Added notice_log

This commit is contained in:
emdee 2022-11-29 12:54:36 +00:00
parent 204a6adc48
commit d08b34fd57
3 changed files with 335 additions and 210 deletions

View file

@ -4,21 +4,22 @@
PROG=exclude_badExits.py
SOCKS_PORT=9050
CAFILE=/etc/ssl/certs/ca-certificates.crt
# you may have a special python for installed packages
EXE=`which python3.bash`
$EXE exclude_badExits.py --help > exclude_badExits.hlp &
# an example of running exclude_badExits with full debugging
# expected to take an hour or so
declare -a LARGS
LARGS=(
--log_level 10
)
# you may have a special python for installed packages
EXE=`which python3.bash`
LARGS+=(
--strict_nodes 1
# --strict_nodes 1
--points_timeout 120
--log_level 10
--https_cafile $CAFILE
)
LARGS+=(
--proxy-host 127.0.0.1
--proxy-port $SOCKS_PORT
--https_cafile $CAFILE
)
if [ -f '/run/tor/control' ] ; then
@ -34,8 +35,9 @@ LARGS+=( --white_onions $ddg )
# you may need to be the tor user to read /run/tor/control
grep -q ^debian-tor /etc/group && TORU=debian-tor || {
grep -q ^tor /etc/group && TORU=tor
}
sudo -u $TORU $EXE exclude_badExits.py "${LARGS[@]}" \
}
# --saved_only
sudo -u $TORU $EXE exclude_badExits.py "${LARGS[@]}" "$@" \
2>&1|tee exclude_badExits6.log
# The DEBUG statements contain the detail of why the relay was considered bad.

View file

@ -37,7 +37,7 @@ By default all sections of the goodnodes.yaml are used as a whitelist.
BadNodes:
ExcludeExitNodes:
BadExit:
# $0000000000000000000000000000000000000007
- 0000000000000000000000000000000000000007
```
That part requires [PyYAML](https://pyyaml.org/wiki/PyYAML)
https://github.com/yaml/pyyaml/ or ```ruamel```: do
@ -99,7 +99,9 @@ For usage, do ```python3 exclude_badExits.py --help`
import argparse
import os
import json
import re
import sys
import tempfile
import time
from io import StringIO
@ -157,24 +159,30 @@ try:
except ImportError:
oPARSER = None
oCONTACT_RE = re.compile(r'([^:]*)(\s+)(email|url|proof|ciissversion|abuse|gpg):')
ETC_DIR = '/usr/local/etc/tor/yaml'
aTRUST_DB = {}
aTRUST_DB_INDEX = {}
aGOOD_CONTACTS_DB = {}
aGOOD_CONTACTS_FPS = {}
aBAD_CONTACTS_DB = {}
aRELAYS_DB = {}
aRELAYS_DB_INDEX = {}
aFP_EMAIL = {}
aDOMAIN_FPS = {}
sDETAILS_URL = "https://metrics.torproject.org/rs.html#details/"
# You can call this while bootstrapping
sEXCLUDE_EXIT_KEY = 'ExcludeNodes'
sEXCLUDE_EXIT_GROUP = 'ExcludeNodes'
sINCLUDE_EXIT_KEY = 'ExitNodes'
oBAD_ROOT = 'BadNodes'
oBAD_NODES = safe_load("""
aBAD_NODES = safe_load("""
BadNodes:
ExcludeDomains: []
ExcludeNodes:
# BadExit will be overwritten
BadExit: []
# list MyBadExit in --bad_sections if you want it used
MyBadExit: []
""")
sGOOD_ROOT = 'GoodNodes'
@ -193,30 +201,32 @@ GoodNodes:
lKNOWN_NODNS = []
tMAYBE_NODNS = set()
def lYamlBadNodes(sFile,
section=sEXCLUDE_EXIT_KEY,
lWanted=['BadExit']):
global oBAD_NODES
section=sEXCLUDE_EXIT_GROUP,
tWanted=None):
global aBAD_NODES
global lKNOWN_NODNS
global tMAYBE_NODNS
l = []
if tWanted is None: tWanted = {'BadExit'}
if not yaml:
return []
return l
if os.path.exists(sFile):
with open(sFile, 'rt') as oFd:
oBAD_NODES = safe_load(oFd)
aBAD_NODES = safe_load(oFd)
# BROKEN
# root = sEXCLUDE_EXIT_KEY
root = sEXCLUDE_EXIT_GROUP
# for elt in o[oBAD_ROOT][root][section].keys():
# if lWanted and elt not in lWanted: continue
# if tWanted and elt not in tWanted: continue
# # l += o[oBAD_ROOT][root][section][elt]
l = oBAD_NODES[oBAD_ROOT][sEXCLUDE_EXIT_KEY]['BadExit']
for sub in tWanted:
l += aBAD_NODES[oBAD_ROOT][sEXCLUDE_EXIT_GROUP][sub]
tMAYBE_NODNS = set(safe_load(StringIO(yKNOWN_NODNS)))
root = sEXCLUDE_DOMAINS
if root in oBAD_NODES[oBAD_ROOT] and oBAD_NODES[oBAD_ROOT][root]:
tMAYBE_NODNS.extend(oBAD_NODES[oBAD_ROOT][root])
if sEXCLUDE_DOMAINS in aBAD_NODES[oBAD_ROOT] and aBAD_NODES[oBAD_ROOT][sEXCLUDE_DOMAINS]:
tMAYBE_NODNS.update(set(aBAD_NODES[oBAD_ROOT][sEXCLUDE_DOMAINS]))
return l
def lYamlGoodNodes(sFile='/etc/tor/torrc-goodnodes.yaml'):
@ -252,9 +262,19 @@ def bdomain_is_bad(domain, fp):
tBAD_URLS = set()
lAT_REPS = ['[]', ' at ', '(at)', '[at]', '<at>', '(att)', '_at_',
'~at~', '.at.', '!at!', '<a>t', '<(a)>', '|__at-|', '<:at:>',
'[__at ]', '"a t"', 'removeme at ']
'[__at ]', '"a t"', 'removeme at ', ' a7 ', '{at-}'
'[at}', 'atsign', '-at-', '(at_sign)', 'a.t',
'atsignhere', ' _a_ ', ' (at-sign) ', "'at sign'",
'(a)', ' atsign ', '(at symbol)', ' anat ', '=at=',
'-at-', '-dot-', ' [a] ','(at)', '<a-t<>', '[at sign]',
'"at"', '{at}', '-----symbol for email----', '[at@]',
'(at sign here)', '==at', '|=dot|','/\t',
]
lDOT_REPS = [' point ', ' dot ', '[dot]', '(dot)', '_dot_', '!dot!', '<.>',
'<:dot:>', '|dot--|',
'<:dot:>', '|dot--|', ' d07 ', '<dot=>', '(dot]', '{dot)',
'd.t', "'dot'", '(d)', '-dot-', ' adot ',
'(d)', ' . ', '[punto]', '(point)', '"dot"', '{.}',
'--separator--', '|=dot|', ' period ', ')dot(',
]
lNO_EMAIL = [
'<nobody at example dot com>',
@ -279,18 +299,26 @@ lNO_EMAIL = [
'your@email.com',
r'<nothing/at\\mail.de>',
]
#
lMORONS = ['hoster:Quintex Alliance Consulting ']
def sCleanEmail(s):
s = s.lower()
for elt in lAT_REPS:
s = s.replace(' ' + elt + ' ', '@').replace(elt, '@')
if not elt.startswith(' '):
s = s.replace(' ' + elt + ' ', '@')
s = s.replace(elt, '@')
for elt in lDOT_REPS:
if not elt.startswith(' '):
s = s.replace(' ' + elt + ' ', '.')
s = s.replace(elt, '.')
s = s.replace('(dash)', '-')
s = s.replace('hyphen ', '-')
for elt in lNO_EMAIL:
s = s.replace(elt, '')
s = s.replace(elt, '?')
return s
lATS = ['abuse', 'email']
lEMAILS = ['abuse', 'email']
lINTS = ['ciissversion', 'uplinkbw', 'signingkeylifetime', 'memory']
lBOOLS = ['dnssec', 'dnsqname', 'aesni', 'autoupdate', 'dnslocalrootzone',
'sandbox', 'offlinemasterkey']
@ -305,7 +333,7 @@ def aCleanContact(a):
a[elt] = True
else:
a[elt] = False
for elt in lATS:
for elt in lEMAILS:
if elt not in a: continue
a[elt] = sCleanEmail(a[elt])
if 'url' in a.keys():
@ -324,8 +352,8 @@ def bVerifyContact(a=None, fp=None, https_cafile=None):
global aFP_EMAIL
global tBAD_URLS
global lKNOWN_NODNS
global aTRUST_DB
global aTRUST_DB_INDEX
global aGOOD_CONTACTS_DB
global aGOOD_CONTACTS_FPS
assert a
assert fp
assert https_cafile
@ -346,10 +374,10 @@ def bVerifyContact(a=None, fp=None, https_cafile=None):
LOG.warn(f"{fp} 'proof' not in {keys}")
return a
if aTRUST_DB_INDEX and fp in aTRUST_DB_INDEX.keys():
aCachedContact = aTRUST_DB_INDEX[fp]
if aGOOD_CONTACTS_FPS and fp in aGOOD_CONTACTS_FPS.keys():
aCachedContact = aGOOD_CONTACTS_FPS[fp]
if aCachedContact['email'] == a['email']:
LOG.info(f"{fp} in aTRUST_DB_INDEX")
LOG.info(f"{fp} in aGOOD_CONTACTS_FPS")
return aCachedContact
if 'url' not in keys:
@ -377,53 +405,16 @@ def bVerifyContact(a=None, fp=None, https_cafile=None):
lKNOWN_NODNS.append(domain)
return a
if a['proof'] in ['dns-rsa']:
# only support uri for now
if False and ub_ctx:
fp_domain = fp + '.' + domain
if idns_validate(fp_domain,
libunbound_resolv_file='resolv.conf',
dnssec_DS_file='dnssec-root-trust',
) == 0:
pass
LOG.warn(f"{fp} proof={a['proof']} - assumed good")
a['fps'] = [fp]
aTRUST_DB_INDEX[fp] = a
return a
return True
# async
# If we keep a cache of FPs that we have gotten by downloading a URL
# we can avoid re-downloading the URL of other FP in the list of relays.
# If we paralelize the gathering of the URLs, we may have simultaneous
# gathers of the same URL from different relays, defeating the advantage
# of going parallel. The cache is global aDOMAIN_FPS.
def aVerifyContact(a=None, fp=None, https_cafile=None, timeout=20, host='127.0.0.1', port=9050, oargs=None):
global aFP_EMAIL
global tBAD_URLS
global lKNOWN_NODNS
global aDOMAIN_FPS
assert a
assert fp
assert https_cafile
r = bVerifyContact(a=a, fp=fp, https_cafile=https_cafile)
if r is not True:
return r
domain = a['url'].replace('https://', '').replace('http://', '').rstrip('/')
if domain in aDOMAIN_FPS.keys():
a['fps'] = aDOMAIN_FPS[domain]
return a
# LOG.debug(f"{len(keys)} contact fields for {fp}")
url = a['url'] + "/.well-known/tor-relay/rsa-fingerprint.txt"
if url in aDOMAIN_FPS.keys():
a['fps'] = aDOMAIN_FPS[url]
return a
def oVerifyUrl(url, domain, fp=None, https_cafile=None, timeout=20, host='127.0.0.1', port=9050, oargs=None):
if bAreWeConnected() is False:
raise SystemExit("we are not connected")
if url in tBAD_URLS:
LOG.debug(f"BC Known bad url from {domain} for {fp}")
return None
o = None
try:
if httpx:
LOG.debug(f"Downloading from {domain} for {fp}")
@ -438,35 +429,99 @@ def aVerifyContact(a=None, fp=None, https_cafile=None, timeout=20, host='127.0.0
content_type='text/plain')
# requests response: text "reason", "status_code"
except AttributeError as e:
LOG.exception(f"AttributeError downloading from {domain} {e}")
LOG.exception(f"BC AttributeError downloading from {domain} {e}")
tBAD_URLS.add(url)
except CertificateError as e:
LOG.warn(f"CertificateError downloading from {domain} {e}")
tBAD_URLS.add(a['url'])
LOG.warn(f"BC CertificateError downloading from {domain} {e}")
tBAD_URLS.add(url)
except TrustorError as e:
if e.args == "HTTP Errorcode 404":
aFP_EMAIL[fp] = a['email']
LOG.warn(f"TrustorError 404 from {domain} {e.args}")
LOG.warn(f"BC TrustorError 404 from {domain} {e.args}")
else:
LOG.warn(f"TrustorError downloading from {domain} {e.args}")
tBAD_URLS.add(a['url'])
LOG.warn(f"BC TrustorError downloading from {domain} {e.args}")
tBAD_URLS.add(url)
except (urllib3.exceptions.MaxRetryError, urllib3.exceptions.ProtocolError,) as e: # noqa
#
# maybe offline - not bad
LOG.warn(f"MaxRetryError downloading from {domain} {e}")
LOG.warn(f"BC MaxRetryError downloading from {domain} {e}")
except (BaseException) as e:
LOG.error(f"Exception {type(e)} downloading from {domain} {e}")
LOG.error(f"BC Exception {type(e)} downloading from {domain} {e}")
else:
a = aContactFps(oargs, a, o, domain)
LOG.debug(f"Downloaded from {domain} {len(a['fps'])} FPs for {fp}")
aDOMAIN_FPS[domain] = a['fps']
return o
return None
# async
# If we keep a cache of FPs that we have gotten by downloading a URL
# we can avoid re-downloading the URL of other FP in the list of relays.
# If we paralelize the gathering of the URLs, we may have simultaneous
# gathers of the same URL from different relays, defeating the advantage
# of going parallel. The cache is global aDOMAIN_FPS.
def aVerifyContact(a=None, fp=None, https_cafile=None, timeout=20, host='127.0.0.1', port=9050, oargs=None):
global aFP_EMAIL
global tBAD_URLS
global lKNOWN_NODNS
global aDOMAIN_FPS
global aBAD_CONTACTS_DB
assert a
assert fp
assert https_cafile
domain = a['url'].replace('https://', '').replace('http://', '').rstrip('/')
a['url'] = 'https://' + domain
if domain in aDOMAIN_FPS.keys():
a['fps'] = aDOMAIN_FPS[domain]
return a
r = bVerifyContact(a=a, fp=fp, https_cafile=https_cafile)
if r is not True:
return r
if a['url'] in tBAD_URLS:
a['fps'] = []
return a
if a['proof'] == 'dns-rsa':
if ub_ctx:
fp_domain = fp + '.' + domain
if idns_validate(fp_domain,
libunbound_resolv_file='resolv.conf',
dnssec_DS_file='dnssec-root-trust',
) == 0:
LOG.warn(f"{fp} proof={a['proof']} - validated good")
a['fps'] = [fp]
aGOOD_CONTACTS_FPS[fp] = a
else:
a['fps'] = []
return a
# only test url for now drop through
url = a['url']
aDOMAIN_FPS[url] = a['fps']
else:
url = a['url'] + "/.well-known/tor-relay/rsa-fingerprint.txt"
o = oVerifyUrl(url, domain, fp=fp, https_cafile=https_cafile, timeout=timeout, host=host, port=port, oargs=oargs)
if not o:
LOG.warn(f"BC Failed Download from {url} ")
a['fps'] = []
tBAD_URLS.add(url)
aBAD_CONTACTS_DB[fp] = a
elif a['proof'] == 'dns-rsa':
# well let the test of the URL be enough for now
LOG.debug(f"Downloaded from {url} ")
a['fps'] = [fp]
aDOMAIN_FPS[domain] = a['fps']
elif a['proof'] == 'uri-rsa':
a = aContactFps(oargs, a, o, domain)
if a['fps']:
LOG.debug(f"Downloaded from {url} {len(a['fps'])} FPs for {fp}")
else:
aBAD_CONTACTS_DB[fp] = a
LOG.debug(f"BC Downloaded from {url} NO FPs for {fp}")
aDOMAIN_FPS[domain] = a['fps']
return a
def aContactFps(oargs, a, o, domain):
global aFP_EMAIL
global tBAD_URLS
global lKNOWN_NODNS
global aDOMAIN_FPS
if hasattr(o, 'status'):
@ -496,7 +551,7 @@ def aContactFps(oargs, a, o, domain):
with open(sfile, 'wt') as oFd:
oFd.write(data)
except Exception as e:
LOG.warn(f"Error wirting {sfile} {e}")
LOG.warn(f"Error writing {sfile} {e}")
a['modified'] = int(time.time())
if not l:
@ -506,7 +561,6 @@ def aContactFps(oargs, a, o, domain):
and len(elt) == 40 \
and not elt.startswith('#')]
LOG.info(f"Downloaded from {domain} {len(a['fps'])} FPs")
aDOMAIN_FPS[domain] = a['fps']
return a
def aParseContact(contact, fp):
@ -516,23 +570,33 @@ def aParseContact(contact, fp):
"""
a = {}
if not contact:
LOG.warn(f"null contact for {fp}")
LOG.warn(f"BC null contact for {fp}")
LOG.debug(f"{fp} {contact}")
return {}
contact = contact.split(r'\n')[0]
for elt in lMORONS:
contact = contact.replace(elt)
m = oCONTACT_RE.match(contact)
# 450 matches!
if m and m.groups and len(m.groups(0)) > 2 and m.span()[1] > 0:
i = len(m.groups(0)[0]) + len(m.groups(0)[1])
contact = contact[i:]
# shlex?
lelts = contact.split(' ')
if not lelts:
LOG.warn(f"empty contact for {fp}")
LOG.warn(f"BC empty contact for {fp}")
LOG.debug(f"{fp} {contact}")
return {}
for elt in lelts:
if ':' not in elt:
if elt == 'DFRI':
# oddball
continue
# hoster:Quintex Alliance Consulting
LOG.warn(f"no : in {elt} for {contact} in {fp}")
return {}
LOG.warn(f"BC no : in {elt} for {contact} in {fp}")
# return {}
# try going with what we have
break
(key , val,) = elt.split(':', 1)
if key == '':
continue
@ -599,12 +663,14 @@ def oMainArgparser(_=None):
parser.add_argument('--bad_nodes', type=str,
default=os.path.join(ETC_DIR, 'badnodes.yaml'),
help="Yaml file of bad nodes that should also be excluded")
parser.add_argument('--bad_on', type=str, default='Empty,NotGood',
parser.add_argument('--bad_on', type=str, default='Empty,NoEmail,NotGood',
help="comma sep list of conditions - Empty,NoEmail,NotGood")
parser.add_argument('--bad_contacts', type=str,
default=os.path.join(ETC_DIR, 'badcontacts.yaml'),
help="Yaml file of bad contacts that bad FPs are using")
parser.add_argument('--saved_only', default=False,
action='store_true',
help="Just use the info in the last *.yaml files without querying the Tor controller")
parser.add_argument('--strict_nodes', type=str, default=0,
choices=['0', '1'],
help="Set StrictNodes: 1 is less anonymous but more secure, although some onion sites may be unreachable")
@ -615,14 +681,17 @@ def oMainArgparser(_=None):
parser.add_argument('--log_level', type=int, default=20,
help="10=debug 20=info 30=warn 40=error")
parser.add_argument('--bad_sections', type=str,
default='MyBadExit',
help="sections of the badnodes.yaml to use, comma separated, '' BROKEN")
default='BadExit',
help="sections of the badnodes.yaml to use, in addition to BadExit, comma separated")
parser.add_argument('--white_onions', type=str,
default='',
help="comma sep. list of onions to whitelist their introduction points - BROKEN")
parser.add_argument('--torrc_output', type=str,
default=os.path.join(ETC_DIR, 'torrc.new'),
help="Write the torrc configuration to a file")
parser.add_argument('--notice_log', type=str,
default='',
help="Parse the notice log for relays and services (not yet)")
parser.add_argument('--relays_output', type=str,
default=os.path.join(ETC_DIR, 'relays.json'),
help="Write the download relays in json to a file")
@ -634,40 +703,43 @@ def oMainArgparser(_=None):
return parser
def vwrite_good_contacts(oargs):
global aTRUST_DB
global aGOOD_CONTACTS_DB
good_contacts_tmp = oargs.good_contacts + '.tmp'
with open(good_contacts_tmp, 'wt') as oFYaml:
yaml.dump(aTRUST_DB, oFYaml)
yaml.dump(aGOOD_CONTACTS_DB, oFYaml)
oFYaml.close()
if os.path.exists(oargs.good_contacts):
bak = oargs.good_contacts +'.bak'
os.rename(oargs.good_contacts, bak)
os.rename(good_contacts_tmp, oargs.good_contacts)
LOG.info(f"Wrote {len(list(aTRUST_DB.keys()))} good contact details to {oargs.good_contacts}")
LOG.info(f"Wrote {len(list(aGOOD_CONTACTS_DB.keys()))} good contact details to {oargs.good_contacts}")
bad_contacts_tmp = good_contacts_tmp.replace('.tmp', '.bad')
with open(bad_contacts_tmp, 'wt') as oFYaml:
yaml.dump(aBAD_CONTACTS_DB, oFYaml)
oFYaml.close()
def vwrite_badnodes(oargs, oBAD_NODES, slen):
if oargs.bad_nodes:
tmp = oargs.bad_nodes +'.tmp'
bak = oargs.bad_nodes +'.bak'
with open(tmp, 'wt') as oFYaml:
yaml.dump(oBAD_NODES, oFYaml)
LOG.info(f"Wrote {slen} to {oargs.bad_nodes}")
oFYaml.close()
if os.path.exists(oargs.bad_nodes):
os.rename(oargs.bad_nodes, bak)
os.rename(tmp, oargs.bad_nodes)
def vwrite_badnodes(oargs, aBAD_NODES, slen):
if not aBAD_NODES: return
tmp = oargs.bad_nodes +'.tmp'
bak = oargs.bad_nodes +'.bak'
with open(tmp, 'wt') as oFYaml:
yaml.dump(aBAD_NODES, oFYaml)
LOG.info(f"Wrote {slen} to {oargs.bad_nodes}")
oFYaml.close()
if os.path.exists(oargs.bad_nodes):
os.rename(oargs.bad_nodes, bak)
os.rename(tmp, oargs.bad_nodes)
def vwrite_goodnodes(oargs, oGOOD_NODES, ilen):
if oargs.good_nodes:
tmp = oargs.good_nodes +'.tmp'
bak = oargs.good_nodes +'.bak'
with open(tmp, 'wt') as oFYaml:
yaml.dump(oGOOD_NODES, oFYaml)
LOG.info(f"Wrote {ilen} good relays to {oargs.good_nodes}")
oFYaml.close()
if os.path.exists(oargs.good_nodes):
os.rename(oargs.good_nodes, bak)
os.rename(tmp, oargs.good_nodes)
tmp = oargs.good_nodes +'.tmp'
bak = oargs.good_nodes +'.bak'
with open(tmp, 'wt') as oFYaml:
yaml.dump(oGOOD_NODES, oFYaml)
LOG.info(f"Wrote {ilen} good relays to {oargs.good_nodes}")
oFYaml.close()
if os.path.exists(oargs.good_nodes):
os.rename(oargs.good_nodes, bak)
os.rename(tmp, oargs.good_nodes)
def lget_onionoo_relays(oargs):
import requests
@ -780,18 +852,19 @@ def vsetup_logging(log_level, logfile='', stream=sys.stdout):
LOG.addHandler(oHandler)
LOG.info(f"SSetting log_level to {log_level!s}")
def vwritefinale(oargs, lNotInaRELAYS_DB):
if len(lNotInaRELAYS_DB):
LOG.warn(f"{len(lNotInaRELAYS_DB)} relays from stem were not in onionoo.torproject.org")
def vwritefinale(oargs):
global lNOT_IN_RELAYS_DB
if len(lNOT_IN_RELAYS_DB):
LOG.warn(f"{len(lNOT_IN_RELAYS_DB)} relays from stem were not in onionoo.torproject.org")
LOG.info(f"For info on a FP, use: https://nusenu.github.io/OrNetStats/w/relay/<FP>.html")
LOG.info(f"For info on relays, use: https://onionoo.torproject.org/details")
LOG.info(f"For info on relays, try: https://onionoo.torproject.org/details")
# https://onionoo.torproject.org/details
LOG.info(f"although it's often broken")
def bProcessContact(b, texclude_set, aBadContacts, iFakeContact=0):
global aTRUST_DB
global aTRUST_DB_INDEX
global aGOOD_CONTACTS_DB
global aGOOD_CONTACTS_FPS
sofar = ''
fp = b['fp']
# need to skip urllib3.exceptions.MaxRetryError
@ -813,15 +886,16 @@ def bProcessContact(b, texclude_set, aBadContacts, iFakeContact=0):
LOG.info(f"{fp} GOOD {b['url']} {sofar}")
# add our contact info to the trustdb
aTRUST_DB[fp] = b
aGOOD_CONTACTS_DB[fp] = b
for elt in b['fps']:
aTRUST_DB_INDEX[elt] = b
aGOOD_CONTACTS_FPS[elt] = b
return True
def bCheckFp(relay, sofar, lConds, texclude_set, lNotInaRELAYS_DB):
global aTRUST_DB
global aTRUST_DB_INDEX
def bCheckFp(relay, sofar, lConds, texclude_set):
global aGOOD_CONTACTS_DB
global aGOOD_CONTACTS_FPS
global lNOT_IN_RELAYS_DB
if not is_valid_fingerprint(relay.fingerprint):
LOG.warn('Invalid Fingerprint: %s' % relay.fingerprint)
@ -830,17 +904,17 @@ def bCheckFp(relay, sofar, lConds, texclude_set, lNotInaRELAYS_DB):
fp = relay.fingerprint
if aRELAYS_DB and fp not in aRELAYS_DB.keys():
LOG.warn(f"{fp} not in aRELAYS_DB")
lNotInaRELAYS_DB += [fp]
lNOT_IN_RELAYS_DB += [fp]
if not relay.exit_policy.is_exiting_allowed():
if sEXCLUDE_EXIT_KEY == sEXCLUDE_EXIT_KEY:
if sEXCLUDE_EXIT_GROUP == sEXCLUDE_EXIT_GROUP:
pass # LOG.debug(f"{fp} not an exit {sofar}")
else:
pass # LOG.warn(f"{fp} not an exit {sofar}")
# return None
# great contact had good fps and we are in them
if fp in aTRUST_DB_INDEX.keys():
if fp in aGOOD_CONTACTS_FPS.keys():
# a cached entry
return None
@ -856,8 +930,8 @@ def bCheckFp(relay, sofar, lConds, texclude_set, lNotInaRELAYS_DB):
contact = sCleanEmail(relay.contact)
# fail if the contact has no email - unreliable
if ('NoEmail' in lConds and relay.contact and
('@' not in contact and 'email:' not in contact)):
if 'NoEmail' in lConds and relay.contact and \
('@' not in contact):
LOG.info(f"{fp} skipping contact - NoEmail {contact} {sofar}")
LOG.debug(f"{fp} {relay.contact} {sofar}")
texclude_set.add(fp)
@ -881,8 +955,8 @@ def bCheckFp(relay, sofar, lConds, texclude_set, lNotInaRELAYS_DB):
return True
def oMainPreamble(lArgs):
global aTRUST_DB
global aTRUST_DB_INDEX
global aGOOD_CONTACTS_DB
global aGOOD_CONTACTS_FPS
parser = oMainArgparser()
oargs = parser.parse_args(lArgs)
@ -899,20 +973,20 @@ def oMainPreamble(lArgs):
if sFile and os.path.exists(sFile):
try:
with open(sFile, 'rt') as oFd:
aTRUST_DB = safe_load(oFd)
LOG.info(f"{len(aTRUST_DB.keys())} trusted contacts from {sFile}")
aGOOD_CONTACTS_DB = safe_load(oFd)
LOG.info(f"{len(aGOOD_CONTACTS_DB.keys())} trusted contacts from {sFile}")
# reverse lookup of fps to contacts
# but...
for (k, v,) in aTRUST_DB.items():
for (k, v,) in aGOOD_CONTACTS_DB.items():
if 'modified' not in v.keys():
v['modified'] = int(time.time())
aTRUST_DB_INDEX[k] = v
if 'fps' in aTRUST_DB[k].keys():
for fp in aTRUST_DB[k]['fps']:
if fp in aTRUST_DB_INDEX:
aGOOD_CONTACTS_FPS[k] = v
if 'fps' in aGOOD_CONTACTS_DB[k].keys():
for fp in aGOOD_CONTACTS_DB[k]['fps']:
if fp in aGOOD_CONTACTS_FPS:
continue
aTRUST_DB_INDEX[fp] = v
LOG.info(f"{len(aTRUST_DB_INDEX.keys())} good relays from {sFile}")
aGOOD_CONTACTS_FPS[fp] = v
LOG.info(f"{len(aGOOD_CONTACTS_FPS.keys())} good relays from {sFile}")
except Exception as e:
LOG.exception(f"Error reading YAML TrustDB {sFile} {e}")
@ -935,9 +1009,9 @@ def oStemController(oargs):
# does it work dynamically?
return 2
elt = controller.get_conf(sEXCLUDE_EXIT_KEY)
elt = controller.get_conf(sEXCLUDE_EXIT_GROUP)
if elt and elt != '{??}':
LOG.warn(f"{sEXCLUDE_EXIT_KEY} is in use already")
LOG.warn(f"{sEXCLUDE_EXIT_GROUP} is in use already")
return controller
@ -951,14 +1025,34 @@ def tWhitelistSet(oargs, controller):
if sGOOD_ROOT in oGOOD_NODES and 'Relays' in oGOOD_NODES[sGOOD_ROOT] and \
'IntroductionPoints' in oGOOD_NODES[sGOOD_ROOT]['Relays'].keys():
t = set(oGOOD_NODES[sGOOD_ROOT]['Relays']['IntroductionPoints'])
if oargs.notice_log and os.path.exists(oargs.notice_log):
tmp = tempfile.mktemp()
i = os.system(f"grep 'Every introduction point for service' {oargs.notice_log} |sed -e 's/.* service //' -e 's/ is .*//'|sort -u |sed -e '/ /d' > {tmp}")
if i:
with open(tmp, 'rt') as oFd:
lnew = oFd.readlines()
t.update(set(lnew))
LOG.info(f"Whitelist {len(lnew)} services from {oargs.notice_log}")
os.remove(tmp)
w = set()
if sGOOD_ROOT in oGOOD_NODES and 'Services' in oGOOD_NODES[sGOOD_ROOT].keys():
w = set(oGOOD_NODES[sGOOD_ROOT]['Services'])
twhitelist_set.update(w)
if len(w) > 0:
LOG.info(f"Whitelist {len(t)} relays from Services")
LOG.info(f"Whitelist {len(w)} relays from {sGOOD_ROOT}/Services")
if oargs.notice_log and os.path.exists(oargs.notice_log):
tmp = tempfile.mktemp()
i = os.system(f"grep 'Wanted to contact directory mirror \$' /var/lib/tor/.SelekTOR/3xx/cache/9050/notice.log|sed -e 's/.* \$//' -e 's/[~ ].*//'|sort -u > {tmp}")
if i:
with open(tmp, 'rt') as oFd:
lnew = oFd.readlines()
w.update(set(lnew))
LOG.info(f"Whitelist {len(lnew)} relays from {oargs.notice_log}")
os.remove(tmp)
twhitelist_set.update(w)
w = set()
if 'Onions' in oGOOD_NODES[sGOOD_ROOT].keys():
# Provides the descriptor for a hidden service. The **address** is the
@ -977,63 +1071,68 @@ def tWhitelistSet(oargs, controller):
def tExcludeSet(oargs):
texclude_set = set()
sections = {'BadExit'}
if oargs.bad_nodes and os.path.exists(oargs.bad_nodes):
if False and oargs.bad_sections:
# BROKEN
sections = oargs.bad_sections.split(',')
texclude_set = set(lYamlBadNodes(oargs.bad_nodes,
lWanted=sections,
section=sEXCLUDE_EXIT_KEY))
LOG.info(f"Preloaded {len(texclude_set)} bad fps")
if oargs.bad_sections:
sections.update(oargs.bad_sections.split(','))
texclude_set = set(lYamlBadNodes(oargs.bad_nodes,
tWanted=sections,
section=sEXCLUDE_EXIT_GROUP))
LOG.info(f"Preloaded {len(texclude_set)} bad fps")
return texclude_set
# async
def iMain(lArgs):
global aTRUST_DB
global aTRUST_DB_INDEX
global oBAD_NODES
global aGOOD_CONTACTS_DB
global aGOOD_CONTACTS_FPS
global aBAD_CONTACTS_DB
global aBAD_NODES
global oGOOD_NODES
global lKNOWN_NODNS
global aRELAYS_DB
global aRELAYS_DB_INDEX
global tBAD_URLS
global lNOT_IN_RELAYS_DB
oargs = oMainPreamble(lArgs)
controller = oStemController(oargs)
twhitelist_set = tWhitelistSet(oargs, controller)
texclude_set = tExcludeSet(oargs)
ttrust_db_index = aTRUST_DB_INDEX.keys()
tdns_urls = set()
ttrust_db_index = aGOOD_CONTACTS_FPS.keys()
iFakeContact = 0
iTotalContacts = 0
aBadContacts = {}
lNotInaRELAYS_DB = []
lNOT_IN_RELAYS_DB = []
iR = 0
relays = controller.get_server_descriptors()
lqueue = []
socksu = f"socks5://{oargs.proxy_host}:{oargs.proxy_port}"
if oargs.saved_only:
relays = []
for relay in relays:
iR += 1
fp = relay.fingerprint = relay.fingerprint.upper()
sofar = f"G:{len(aTRUST_DB.keys())} U:{len(tdns_urls)} F:{iFakeContact} BF:{len(texclude_set)} GF:{len(ttrust_db_index)} TC:{iTotalContacts} #{iR}"
sofar = f"G:{len(aGOOD_CONTACTS_DB.keys())} F:{iFakeContact} BF:{len(texclude_set)} GF:{len(ttrust_db_index)} TC:{iTotalContacts} #{iR}"
lConds = oargs.bad_on.split(',')
r = bCheckFp(relay, sofar, lConds, texclude_set, lNotInaRELAYS_DB)
r = bCheckFp(relay, sofar, lConds, texclude_set)
if r is not True: continue
# if it has a ciissversion in contact we count it in total
iTotalContacts += 1
# only proceed if 'NotGood' not in lConds:
if 'NotGood' not in lConds: continue
if 'NotGood' not in lConds:
continue
# fail if the contact does not have url: to pass
a = aParseContact(relay.contact, fp)
if not a:
LOG.warn(f"{fp} contact did not parse {sofar}")
LOG.warn(f"{fp} BC contact did not parse {sofar}")
texclude_set.add(fp)
aBAD_CONTACTS_DB[fp] = a
continue
if 'url' in a and a['url']:
@ -1048,23 +1147,17 @@ def iMain(lArgs):
# fail if the contact uses a domain we already know does not resolve
if domain in lKNOWN_NODNS:
# The fp is using a contact with a URL we know is bogus
LOG.info(f"{fp} skipping in lKNOWN_NODNS {a} {sofar}")
LOG.info(f"{fp} BC skipping in lKNOWN_NODNS {a} {sofar}")
LOG.debug(f"{fp} {relay} {sofar}")
texclude_set.add(fp)
aBAD_CONTACTS_DB[fp] = a
continue
# drop through
if 'dns-rsa' in relay.contact.lower():
# skip if the contact uses a dns-rsa url we dont handle
target = f"{fp}.{domain}"
LOG.info(f"skipping 'dns-rsa' {target} {sofar}")
tdns_urls.add(target)
continue
if 'proof:uri-rsa' in relay.contact.lower():
if 'proof' in a and a['proof'] in ['uri-rsa', 'dns-rsa']:
if domain in aDOMAIN_FPS.keys(): continue
a['fp'] = fp
if httpx:
a['fp'] = fp
lqueue.append(asyncio.create_task(
aVerifyContact(a=a,
fp=fp,
@ -1099,14 +1192,12 @@ def iMain(lArgs):
LOG.info(f"Filtered {len(twhitelist_set)} whitelisted relays")
texclude_set = texclude_set.difference(twhitelist_set)
# accept the dns-rsa urls for now until we test them
texclude_set = texclude_set.difference(tdns_urls)
LOG.info(f"{len(list(aTRUST_DB.keys()))} good contacts out of {iTotalContacts}")
LOG.info(f"{len(list(aGOOD_CONTACTS_DB.keys()))} good contacts out of {iTotalContacts}")
if oargs.torrc_output and texclude_set:
with open(oargs.torrc_output, 'wt') as oFTorrc:
oFTorrc.write(f"{sEXCLUDE_EXIT_KEY} {','.join(texclude_set)}\n")
oFTorrc.write(f"{sINCLUDE_EXIT_KEY} {','.join(aTRUST_DB_INDEX.keys())}\n")
oFTorrc.write(f"{sEXCLUDE_EXIT_GROUP} {','.join(texclude_set)}\n")
oFTorrc.write(f"{sINCLUDE_EXIT_KEY} {','.join(aGOOD_CONTACTS_FPS.keys())}\n")
oFTorrc.write(f"{sINCLUDE_GUARD_KEY} {','.join(oGOOD_NODES[sGOOD_ROOT]['EntryNodes'])}\n")
LOG.info(f"Wrote tor configuration to {oargs.torrc_output}")
oFTorrc.close()
@ -1117,35 +1208,37 @@ def iMain(lArgs):
yaml.dump(aBadContacts, oFYaml)
oFYaml.close()
if oargs.good_contacts != '' and aTRUST_DB:
if oargs.good_contacts != '' and aGOOD_CONTACTS_DB:
vwrite_good_contacts(oargs)
oBAD_NODES[oBAD_ROOT][sEXCLUDE_EXIT_KEY]['BadExit'] = list(texclude_set)
oBAD_NODES[oBAD_ROOT][sEXCLUDE_DOMAINS] = lKNOWN_NODNS
vwrite_badnodes(oargs, oBAD_NODES, str(len(texclude_set)))
aBAD_NODES[oBAD_ROOT][sEXCLUDE_EXIT_GROUP]['BadExit'] = list(texclude_set)
aBAD_NODES[oBAD_ROOT][sEXCLUDE_DOMAINS] = lKNOWN_NODNS
if oargs.bad_nodes:
vwrite_badnodes(oargs, aBAD_NODES, str(len(texclude_set)))
oGOOD_NODES['GoodNodes']['Relays']['ExitNodes'] = list(aTRUST_DB_INDEX.keys())
oGOOD_NODES['GoodNodes']['Relays']['ExitNodes'] = list(aGOOD_CONTACTS_FPS.keys())
# EntryNodes are readony
vwrite_goodnodes(oargs, oGOOD_NODES, len(aTRUST_DB_INDEX.keys()))
if oargs.good_nodes:
vwrite_goodnodes(oargs, oGOOD_NODES, len(aGOOD_CONTACTS_FPS.keys()))
vwritefinale(oargs, lNotInaRELAYS_DB)
vwritefinale(oargs)
retval = 0
try:
logging.getLogger('stem').setLevel(30)
if texclude_set:
try:
LOG.info(f"{sEXCLUDE_EXIT_KEY} {len(texclude_set)} net bad exit relays")
controller.set_conf(sEXCLUDE_EXIT_KEY, list(texclude_set))
LOG.info(f"controller {sEXCLUDE_EXIT_GROUP} {len(texclude_set)} net bad relays")
controller.set_conf(sEXCLUDE_EXIT_GROUP, list(texclude_set))
except (Exception, stem.InvalidRequest, stem.SocketClosed,) as e: # noqa
LOG.error(f"Failed setting {sEXCLUDE_EXIT_KEY} bad exit relays in Tor {e}")
LOG.error(f"Failed setting {sEXCLUDE_EXIT_GROUP} bad exit relays in Tor {e}")
LOG.debug(repr(texclude_set))
retval += 1
if aTRUST_DB_INDEX.keys():
l = [elt for elt in aTRUST_DB_INDEX.keys() if len (elt) == 40]
if aGOOD_CONTACTS_FPS.keys():
l = [elt for elt in aGOOD_CONTACTS_FPS.keys() if len (elt) == 40]
try:
LOG.info(f"{sINCLUDE_EXIT_KEY} {len(l)} good relays")
LOG.info(f"controller {sINCLUDE_EXIT_KEY} {len(l)} good relays")
controller.set_conf(sINCLUDE_EXIT_KEY, l)
except (Exception, stem.InvalidRequest, stem.SocketClosed) as e: # noqa
LOG.error(f"Failed setting {sINCLUDE_EXIT_KEY} good exit nodes in Tor {e}")
@ -1165,8 +1258,13 @@ def iMain(lArgs):
cur = controller.get_conf('StrictNodes')
if oargs.strict_nodes and int(cur) != oargs.strict_nodes:
LOG.info(f"OVERRIDING StrictNodes to {oargs.strict_nodes}")
controller.set_conf('StrictNodes', oargs.strict_nodes)
cur = controller.get_conf('StrictNodes')
if int(cur) != oargs.strict_nodes:
LOG.warn(f"OVERRIDING StrictNodes NOT {oargs.strict_nodes}")
else:
LOG.info(f"OVERRODE StrictNodes to {oargs.strict_nodes}")
else:
LOG.info(f"StrictNodes is set to {cur}")
@ -1188,7 +1286,6 @@ def iMain(lArgs):
except Exception as e:
LOG.warn(str(e))
sys.stdout.write("dns-rsa domains:\n" +'\n'.join(tdns_urls) +'\n')
return retval
if __name__ == '__main__':

View file

@ -33,6 +33,32 @@ bHAVE_TORR = shutil.which('tor-resolve')
# in the wild we'll keep a copy here so we can avoid restesting
yKNOWN_NODNS = """
---
- for-privacy.net
- backup.spekadyon.org
- verification-for-nusenu.net
- prsv.ch
- ezyn.de
- dfri.se
- dtf.contact
- galtland.network
- dotsrc.org
- nicdex.com
- unzane.com
- a9.wtf
- tor.skankhunt42.pw
- tor-exit-3.aa78i2efsewr0neeknk.xyz
- privacysvcs.net
- apt96.com
- mkg20001.io
- kryptonit.org
- sebastian-elisa-pfeifer.eu
- nx42.de
- www.defcon.org
- 0x0.is
- transliberation.today
- tor-exit-2.aa78i2efsewr0neeknk.xyz
- interfesse.net
- axims.net
- a9.wtf
- heraldonion.org
- linkspartei.org