Async added and removed

This commit is contained in:
emdee 2022-11-27 01:10:18 +00:00
parent 08626942d3
commit 204a6adc48
4 changed files with 483 additions and 258 deletions

View file

@ -14,7 +14,7 @@ LARGS=(
# you may have a special python for installed packages
EXE=`which python3.bash`
LARGS+=(
--strict_nodes 0
--strict_nodes 1
--points_timeout 120
--proxy-host 127.0.0.1
--proxy-port $SOCKS_PORT

View file

@ -136,8 +136,14 @@ from support_onions import (bAreWeConnected, icheck_torrc, lIntroductionPoints,
yKNOWN_NODNS, zResolveDomain)
from trustor_poc import TrustorError, idns_validate
from trustor_poc import oDownloadUrlUrllib3 as oDownloadUrl
try:
import xxxhttpx
import asyncio
from trustor_poc import oDownloadUrlHttpx
except:
httpx = None
from trustor_poc import oDownloadUrlUrllib3Socks as oDownloadUrl
global LOG
import logging
import warnings
@ -157,16 +163,32 @@ aTRUST_DB_INDEX = {}
aRELAYS_DB = {}
aRELAYS_DB_INDEX = {}
aFP_EMAIL = {}
aDOMAIN_FPS = {}
sDETAILS_URL = "https://metrics.torproject.org/rs.html#details/"
# You can call this while bootstrapping
sEXCLUDE_EXIT_KEY = 'ExcludeNodes'
sINCLUDE_EXIT_KEY = 'ExitNodes'
sINCLUDE_GUARD_KEY = 'EntryNodes'
oBAD_NODES = {}
oBAD_ROOT = 'BadNodes'
oBAD_NODES[oBAD_ROOT] = {}
oBAD_NODES[oBAD_ROOT]['ExcludeNodes'] = {}
oBAD_NODES = safe_load("""
BadNodes:
ExcludeDomains: []
ExcludeNodes:
BadExit: []
""")
sGOOD_ROOT = 'GoodNodes'
sINCLUDE_GUARD_KEY = 'EntryNodes'
sEXCLUDE_DOMAINS = 'ExcludeDomains'
oGOOD_NODES = safe_load("""
GoodNodes:
EntryNodes: []
Relays:
ExitNodes: []
IntroductionPoints: []
Onions: []
Services: []
""")
lKNOWN_NODNS = []
tMAYBE_NODNS = set()
@ -184,21 +206,19 @@ def lYamlBadNodes(sFile,
oBAD_NODES = safe_load(oFd)
# BROKEN
# root = 'ExcludeNodes'
# root = sEXCLUDE_EXIT_KEY
# for elt in o[oBAD_ROOT][root][section].keys():
# if lWanted and elt not in lWanted: continue
# # l += o[oBAD_ROOT][root][section][elt]
l = oBAD_NODES[oBAD_ROOT]['ExcludeNodes']['BadExit']
l = oBAD_NODES[oBAD_ROOT][sEXCLUDE_EXIT_KEY]['BadExit']
tMAYBE_NODNS = set(safe_load(StringIO(yKNOWN_NODNS)))
root = 'ExcludeDomains'
root = sEXCLUDE_DOMAINS
if root in oBAD_NODES[oBAD_ROOT] and oBAD_NODES[oBAD_ROOT][root]:
tMAYBE_NODNS.extend(oBAD_NODES[oBAD_ROOT][root])
return l
oGOOD_NODES = {}
oGOOD_ROOT = 'GoodNodes'
def lYamlGoodNodes(sFile='/etc/tor/torrc-goodnodes.yaml'):
global oGOOD_NODES
l = []
@ -207,8 +227,8 @@ def lYamlGoodNodes(sFile='/etc/tor/torrc-goodnodes.yaml'):
with open(sFile, 'rt') as oFd:
o = safe_load(oFd)
oGOOD_NODES = o
if 'GuardNodes' in o[oGOOD_ROOT].keys():
l = o[oGOOD_ROOT]['GuardNodes']
if 'EntryNodes' in o[sGOOD_ROOT].keys():
l = o[sGOOD_ROOT]['EntryNodes']
# yq '.Nodes.IntroductionPoints|.[]' < /etc/tor/torrc-goodnodes.yaml
return l
@ -236,18 +256,20 @@ lAT_REPS = ['[]', ' at ', '(at)', '[at]', '<at>', '(att)', '_at_',
lDOT_REPS = [' point ', ' dot ', '[dot]', '(dot)', '_dot_', '!dot!', '<.>',
'<:dot:>', '|dot--|',
]
lNO_EMAIL = ['<nobody at example dot com>',
'not@needed.com',
lNO_EMAIL = [
'<nobody at example dot com>',
'<nobody at none of your business xyz>',
'<not-set@example.com>',
'not a person <nomail at yet dot com>',
r'<nothing/at\\mail.de>',
'@snowden',
'ano ano@fu.dk',
'anonymous',
'anonymous@buzzzz.com',
'check http://highwaytohoell.de',
'no-spam@tor.org',
'no@no.no',
'noreply@bytor.com',
'not a person <nomail at yet dot com>',
'not@needed.com',
'not@needed.com',
'not@re.al',
'nothanks',
@ -255,6 +277,7 @@ lNO_EMAIL = ['<nobody at example dot com>',
'ur@mom.com',
'your@e-mail',
'your@email.com',
r'<nothing/at\\mail.de>',
]
def sCleanEmail(s):
s = s.lower()
@ -297,17 +320,26 @@ def aCleanContact(a):
a.update({'fps': []})
return a
def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050):
def bVerifyContact(a=None, fp=None, https_cafile=None):
global aFP_EMAIL
global tBAD_URLS
global lKNOWN_NODNS
global aTRUST_DB
global aTRUST_DB_INDEX
assert a
assert fp
assert https_cafile
keys = list(a.keys())
a = aCleanContact(a)
a['fp'] = fp
if 'email' not in keys:
a['email'] = ''
if 'ciissversion' not in keys:
aFP_EMAIL[fp] = a['email']
LOG.warn(f"{fp} 'ciissversion' not in {keys}")
a['ciissversion'] = 2
return a
# test the url for fps and add it to the array
if 'proof' not in keys:
aFP_EMAIL[fp] = a['email']
@ -343,7 +375,7 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050)
aFP_EMAIL[fp] = a['email']
LOG.debug(f"{fp} {domain} does not resolve")
lKNOWN_NODNS.append(domain)
return {}
return a
if a['proof'] in ['dns-rsa']:
# only support uri for now
@ -354,16 +386,56 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050)
dnssec_DS_file='dnssec-root-trust',
) == 0:
pass
LOG.warn(f"{fp} proof={a['proof']} not supported yet")
LOG.warn(f"{fp} proof={a['proof']} - assumed good")
a['fps'] = [fp]
aTRUST_DB_INDEX[fp] = a
return a
return True
# async
# If we keep a cache of FPs that we have gotten by downloading a URL
# we can avoid re-downloading the URL of other FP in the list of relays.
# If we paralelize the gathering of the URLs, we may have simultaneous
# gathers of the same URL from different relays, defeating the advantage
# of going parallel. The cache is global aDOMAIN_FPS.
def aVerifyContact(a=None, fp=None, https_cafile=None, timeout=20, host='127.0.0.1', port=9050, oargs=None):
global aFP_EMAIL
global tBAD_URLS
global lKNOWN_NODNS
global aDOMAIN_FPS
assert a
assert fp
assert https_cafile
r = bVerifyContact(a=a, fp=fp, https_cafile=https_cafile)
if r is not True:
return r
domain = a['url'].replace('https://', '').replace('http://', '').rstrip('/')
if domain in aDOMAIN_FPS.keys():
a['fps'] = aDOMAIN_FPS[domain]
return a
# LOG.debug(f"{len(keys)} contact fields for {fp}")
url = a['url'] + "/.well-known/tor-relay/rsa-fingerprint.txt"
if url in aDOMAIN_FPS.keys():
a['fps'] = aDOMAIN_FPS[url]
return a
if bAreWeConnected() is False:
raise SystemExit("we are not connected")
try:
LOG.debug(f"Downloading from {domain} for {fp}")
o = oDownloadUrl(url, https_cafile,
timeout=timeout, host=host, port=port,
content_type='text/plain')
if httpx:
LOG.debug(f"Downloading from {domain} for {fp}")
# await
o = oDownloadUrl(url, https_cafile,
timeout=timeout, host=host, port=port,
content_type='text/plain')
else:
LOG.debug(f"Downloading from {domain} for {fp}")
o = oDownloadUrl(url, https_cafile,
timeout=timeout, host=host, port=port,
content_type='text/plain')
# requests response: text "reason", "status_code"
except AttributeError as e:
LOG.exception(f"AttributeError downloading from {domain} {e}")
@ -384,34 +456,57 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050)
except (BaseException) as e:
LOG.error(f"Exception {type(e)} downloading from {domain} {e}")
else:
if hasattr(o, 'status'):
status_code = o.status
else:
status_code = o.status_code
if status_code >= 300:
aFP_EMAIL[fp] = a['email']
LOG.warn(f"Error from {domain} {status_code} {o.reason}")
# any reason retry?
tBAD_URLS.add(a['url'])
return a
a = aContactFps(oargs, a, o, domain)
LOG.debug(f"Downloaded from {domain} {len(a['fps'])} FPs for {fp}")
aDOMAIN_FPS[domain] = a['fps']
url = a['url']
aDOMAIN_FPS[url] = a['fps']
return a
if hasattr(o, 'text'):
data = o.text
else:
data = str(o.data, 'UTF-8')
l = data.upper().strip().split('\n')
LOG.debug(f"Downloaded from {domain} {len(l)} lines {len(data)} bytes")
def aContactFps(oargs, a, o, domain):
global aFP_EMAIL
global tBAD_URLS
global lKNOWN_NODNS
global aDOMAIN_FPS
a['modified'] = int(time.time())
if not l:
LOG.warn(f"Downloading from {domain} empty for {fp}")
else:
a['fps'] = [elt.strip() for elt in l if elt \
and not elt.startswith('#')]
LOG.info(f"Downloaded from {domain} {len(a['fps'])} FPs")
for elt in a['fps']:
if len(elt) != 40:
LOG.warn(f"len !=40 from {domain} '{elt}'")
if hasattr(o, 'status'):
status_code = o.status
else:
status_code = o.status_code
if status_code >= 300:
aFP_EMAIL[fp] = a['email']
LOG.warn(f"Error from {domain} {status_code} {o.reason}")
# any reason retry?
tBAD_URLS.add(a['url'])
return a
if hasattr(o, 'text'):
data = o.text
else:
data = str(o.data, 'UTF-8')
l = data.upper().strip().split('\n')
LOG.debug(f"Downloaded from {domain} {len(l)} lines {len(data)} bytes")
if oargs.wellknown_output:
sdir = os.path.join(oargs.wellknown_output, domain,
'.well-known', 'tor-relay')
try:
if not os.path.isdir(sdir):
os.makedirs(sdir)
sfile = os.path.join(sdir, "rsa-fingerprint.txt")
with open(sfile, 'wt') as oFd:
oFd.write(data)
except Exception as e:
LOG.warn(f"Error wirting {sfile} {e}")
a['modified'] = int(time.time())
if not l:
LOG.warn(f"Downloaded from {domain} empty for {fp}")
else:
a['fps'] = [elt.strip() for elt in l if elt \
and len(elt) == 40 \
and not elt.startswith('#')]
LOG.info(f"Downloaded from {domain} {len(a['fps'])} FPs")
aDOMAIN_FPS[domain] = a['fps']
return a
def aParseContact(contact, fp):
@ -432,16 +527,18 @@ def aParseContact(contact, fp):
return {}
for elt in lelts:
if ':' not in elt:
if elt == 'DFRI':
# oddball
continue
# hoster:Quintex Alliance Consulting
LOG.warn(f"no : in {elt} for {contact} in {fp}")
continue
return {}
(key , val,) = elt.split(':', 1)
if key == '':
continue
key = key.rstrip(':')
a[key] = val
a = aCleanContact(a)
# LOG.debug(f"{fp} {len(a.keys())} fields")
return a
def aParseContactYaml(contact, fp):
@ -508,8 +605,9 @@ def oMainArgparser(_=None):
default=os.path.join(ETC_DIR, 'badcontacts.yaml'),
help="Yaml file of bad contacts that bad FPs are using")
parser.add_argument('--strict_nodes', type=int, default=0, choices=[0, 1],
help="Set StrictNodes: 1 is less anonymous but more secure, although some sites may be unreachable")
parser.add_argument('--strict_nodes', type=str, default=0,
choices=['0', '1'],
help="Set StrictNodes: 1 is less anonymous but more secure, although some onion sites may be unreachable")
parser.add_argument('--wait_boot', type=int, default=120,
help="Seconds to wait for Tor to booststrap")
parser.add_argument('--points_timeout', type=int, default=0,
@ -528,10 +626,25 @@ def oMainArgparser(_=None):
parser.add_argument('--relays_output', type=str,
default=os.path.join(ETC_DIR, 'relays.json'),
help="Write the download relays in json to a file")
parser.add_argument('--wellknown_output', type=str,
default=os.path.join(ETC_DIR, 'https'),
help="Write the well-known files to a directory")
parser.add_argument('--good_contacts', type=str, default=os.path.join(ETC_DIR, 'goodcontacts.yaml'),
help="Write the proof data of the included nodes to a YAML file")
return parser
def vwrite_good_contacts(oargs):
global aTRUST_DB
good_contacts_tmp = oargs.good_contacts + '.tmp'
with open(good_contacts_tmp, 'wt') as oFYaml:
yaml.dump(aTRUST_DB, oFYaml)
oFYaml.close()
if os.path.exists(oargs.good_contacts):
bak = oargs.good_contacts +'.bak'
os.rename(oargs.good_contacts, bak)
os.rename(good_contacts_tmp, oargs.good_contacts)
LOG.info(f"Wrote {len(list(aTRUST_DB.keys()))} good contact details to {oargs.good_contacts}")
def vwrite_badnodes(oargs, oBAD_NODES, slen):
if oargs.bad_nodes:
tmp = oargs.bad_nodes +'.tmp'
@ -560,6 +673,7 @@ def lget_onionoo_relays(oargs):
import requests
adata = {}
if oargs.relays_output and os.path.exists(oargs.relays_output):
# and less than a day old?
LOG.info(f"Getting OO relays from {oargs.relays_output}")
try:
with open(oargs.relays_output, 'rt') as ofd:
@ -581,9 +695,9 @@ def lget_onionoo_relays(oargs):
port=oargs.proxy_port,
content_type='')
if hasattr(o, 'text'):
data = o.text
sdata = o.text
else:
data = str(o.data, 'UTF-8')
sdata = str(o.data, 'UTF-8')
except Exception as e:
# simplejson.errors.JSONDecodeError
# urllib3.exceptions import ConnectTimeoutError, NewConnectionError
@ -592,7 +706,7 @@ def lget_onionoo_relays(oargs):
return []
else:
LOG.debug(f"Downloaded {surl} {len(sdata)} bytes")
adata = json.loads(data)
adata = json.loads(sdata)
else:
odata = requests.get(surl, verify=sCAfile)
try:
@ -675,15 +789,101 @@ def vwritefinale(oargs, lNotInaRELAYS_DB):
# https://onionoo.torproject.org/details
LOG.info(f"although it's often broken")
def iMain(lArgs):
def bProcessContact(b, texclude_set, aBadContacts, iFakeContact=0):
global aTRUST_DB
global aTRUST_DB_INDEX
global oBAD_NODES
global oGOOD_NODES
global lKNOWN_NODNS
global aRELAYS_DB
global aRELAYS_DB_INDEX
sofar = ''
fp = b['fp']
# need to skip urllib3.exceptions.MaxRetryError
if not b or 'fps' not in b or not b['fps'] or not b['url']:
LOG.warn(f"{fp} did NOT VERIFY {sofar}")
LOG.debug(f"{fp} {b} {sofar}")
# If it's giving contact info that doesnt check out
# it could be a bad exit with fake contact info
texclude_set.add(fp)
aBadContacts[fp] = b
return None
if fp not in b['fps']:
LOG.warn(f"{fp} the FP IS NOT in the list of fps {sofar}")
# assume a fp is using a bogus contact
texclude_set.add(fp)
aBadContacts[fp] = b
return False
LOG.info(f"{fp} GOOD {b['url']} {sofar}")
# add our contact info to the trustdb
aTRUST_DB[fp] = b
for elt in b['fps']:
aTRUST_DB_INDEX[elt] = b
return True
def bCheckFp(relay, sofar, lConds, texclude_set, lNotInaRELAYS_DB):
global aTRUST_DB
global aTRUST_DB_INDEX
if not is_valid_fingerprint(relay.fingerprint):
LOG.warn('Invalid Fingerprint: %s' % relay.fingerprint)
return None
fp = relay.fingerprint
if aRELAYS_DB and fp not in aRELAYS_DB.keys():
LOG.warn(f"{fp} not in aRELAYS_DB")
lNotInaRELAYS_DB += [fp]
if not relay.exit_policy.is_exiting_allowed():
if sEXCLUDE_EXIT_KEY == sEXCLUDE_EXIT_KEY:
pass # LOG.debug(f"{fp} not an exit {sofar}")
else:
pass # LOG.warn(f"{fp} not an exit {sofar}")
# return None
# great contact had good fps and we are in them
if fp in aTRUST_DB_INDEX.keys():
# a cached entry
return None
if type(relay.contact) == bytes:
# dunno
relay.contact = str(relay.contact, 'UTF-8')
# fail if the contact is empty
if ('Empty' in lConds and not relay.contact):
LOG.info(f"{fp} skipping empty contact - Empty {sofar}")
texclude_set.add(fp)
return None
contact = sCleanEmail(relay.contact)
# fail if the contact has no email - unreliable
if ('NoEmail' in lConds and relay.contact and
('@' not in contact and 'email:' not in contact)):
LOG.info(f"{fp} skipping contact - NoEmail {contact} {sofar}")
LOG.debug(f"{fp} {relay.contact} {sofar}")
texclude_set.add(fp)
return None
# fail if the contact does not pass
if ('NotGood' in lConds and relay.contact and
('ciissversion:' not in relay.contact)):
LOG.info(f"{fp} skipping no ciissversion in contact {sofar}")
LOG.debug(f"{fp} {relay.contact} {sofar}")
texclude_set.add(fp)
return None
# fail if the contact does not have url: to pass
if relay.contact and 'url' not in relay.contact:
LOG.info(f"{fp} skipping unfetchable contact - no url {sofar}")
LOG.debug(f"{fp} {relay.contact} {sofar}")
if ('NotGood' in lConds): texclude_set.add(fp)
return None
return True
def oMainPreamble(lArgs):
global aTRUST_DB
global aTRUST_DB_INDEX
parser = oMainArgparser()
oargs = parser.parse_args(lArgs)
@ -691,21 +891,12 @@ def iMain(lArgs):
if bAreWeConnected() is False:
raise SystemExit("we are not connected")
if os.path.exists(oargs.proxy_ctl):
controller = oGetStemController(log_level=oargs.log_level, sock_or_pair=oargs.proxy_ctl)
else:
port =int(oargs.proxy_ctl)
controller = oGetStemController(log_level=oargs.log_level, sock_or_pair=port)
vwait_for_controller(controller, oargs.wait_boot)
sFile = oargs.torrc
if sFile and os.path.exists(sFile):
icheck_torrc(sFile, oargs)
twhitelist_set = set()
sFile = oargs.good_contacts
if False and sFile and os.path.exists(sFile):
if sFile and os.path.exists(sFile):
try:
with open(sFile, 'rt') as oFd:
aTRUST_DB = safe_load(oFd)
@ -726,8 +917,16 @@ def iMain(lArgs):
except Exception as e:
LOG.exception(f"Error reading YAML TrustDB {sFile} {e}")
if oargs.good_contacts:
good_contacts_tmp = oargs.good_contacts + '.tmp'
return oargs
def oStemController(oargs):
if os.path.exists(oargs.proxy_ctl):
controller = oGetStemController(log_level=oargs.log_level, sock_or_pair=oargs.proxy_ctl)
else:
port =int(oargs.proxy_ctl)
controller = oGetStemController(log_level=oargs.log_level, sock_or_pair=port)
vwait_for_controller(controller, oargs.wait_boot)
elt = controller.get_conf('UseMicrodescriptors')
if elt != '0':
@ -740,25 +939,31 @@ def iMain(lArgs):
if elt and elt != '{??}':
LOG.warn(f"{sEXCLUDE_EXIT_KEY} is in use already")
return controller
def tWhitelistSet(oargs, controller):
twhitelist_set = set()
twhitelist_set.update(set(lYamlGoodNodes(oargs.good_nodes)))
LOG.info(f"lYamlGoodNodes {len(twhitelist_set)} GuardNodes from {oargs.good_nodes}")
LOG.info(f"lYamlGoodNodes {len(twhitelist_set)} EntryNodes from {oargs.good_nodes}")
global oGOOD_NODES
t = set()
if 'IntroductionPoints' in oGOOD_NODES[oGOOD_ROOT]['Relays'].keys():
t = set(oGOOD_NODES[oGOOD_ROOT]['Relays']['IntroductionPoints'])
if sGOOD_ROOT in oGOOD_NODES and 'Relays' in oGOOD_NODES[sGOOD_ROOT] and \
'IntroductionPoints' in oGOOD_NODES[sGOOD_ROOT]['Relays'].keys():
t = set(oGOOD_NODES[sGOOD_ROOT]['Relays']['IntroductionPoints'])
w = set()
if 'Services' in oGOOD_NODES[oGOOD_ROOT].keys():
w = set(oGOOD_NODES[oGOOD_ROOT]['Services'])
if sGOOD_ROOT in oGOOD_NODES and 'Services' in oGOOD_NODES[sGOOD_ROOT].keys():
w = set(oGOOD_NODES[sGOOD_ROOT]['Services'])
twhitelist_set.update(w)
if len(w) > 0:
LOG.info(f"Whitelist {len(t)} relays from Services")
w = set()
if 'Onions' in oGOOD_NODES[oGOOD_ROOT].keys():
if 'Onions' in oGOOD_NODES[sGOOD_ROOT].keys():
# Provides the descriptor for a hidden service. The **address** is the
# '.onion' address of the hidden service
w = set(oGOOD_NODES[oGOOD_ROOT]['Onions'])
w = set(oGOOD_NODES[sGOOD_ROOT]['Onions'])
if oargs.white_onions:
w.update(oargs.white_onions.split(','))
if oargs.points_timeout > 0:
@ -768,6 +973,9 @@ def iMain(lArgs):
LOG.info(f"IntroductionPoints {len(t)} relays from {len(w)} IPs for onions")
twhitelist_set.update(t)
return twhitelist_set
def tExcludeSet(oargs):
texclude_set = set()
if oargs.bad_nodes and os.path.exists(oargs.bad_nodes):
if False and oargs.bad_sections:
@ -778,150 +986,117 @@ def iMain(lArgs):
section=sEXCLUDE_EXIT_KEY))
LOG.info(f"Preloaded {len(texclude_set)} bad fps")
return texclude_set
# async
def iMain(lArgs):
global aTRUST_DB
global aTRUST_DB_INDEX
global oBAD_NODES
global oGOOD_NODES
global lKNOWN_NODNS
global aRELAYS_DB
global aRELAYS_DB_INDEX
global tBAD_URLS
oargs = oMainPreamble(lArgs)
controller = oStemController(oargs)
twhitelist_set = tWhitelistSet(oargs, controller)
texclude_set = tExcludeSet(oargs)
ttrust_db_index = aTRUST_DB_INDEX.keys()
tdns_urls = set()
iFakeContact = 0
iTotalContacts = 0
aBadContacts = {}
lNotInaRELAYS_DB = []
aRELAYS_DB = {elt['fingerprint'].upper(): elt for
elt in lget_onionoo_relays(oargs)
if 'fingerprint' in elt}
lConds = oargs.bad_on.split(',')
iR = 0
relays = controller.get_server_descriptors()
lqueue = []
socksu = f"socks5://{oargs.proxy_host}:{oargs.proxy_port}"
for relay in relays:
iR += 1
if not is_valid_fingerprint(relay.fingerprint):
LOG.warn('Invalid Fingerprint: %s' % relay.fingerprint)
continue
relay.fingerprint = relay.fingerprint.upper()
fp = relay.fingerprint = relay.fingerprint.upper()
sofar = f"G:{len(aTRUST_DB.keys())} U:{len(tdns_urls)} F:{iFakeContact} BF:{len(texclude_set)} GF:{len(ttrust_db_index)} TC:{iTotalContacts} #{iR}"
fp = relay.fingerprint
if aRELAYS_DB and fp not in aRELAYS_DB.keys():
LOG.warn(f"{fp} not in aRELAYS_DB")
lNotInaRELAYS_DB += [fp]
if not relay.exit_policy.is_exiting_allowed():
if sEXCLUDE_EXIT_KEY == 'ExcludeNodes':
pass # LOG.debug(f"{relay.fingerprint} not an exit {sofar}")
else:
pass # LOG.warn(f"{relay.fingerprint} not an exit {sofar}")
# continue
# great contact had good fps and we are in them
if relay.fingerprint in aTRUST_DB_INDEX.keys():
# a cached entry
continue
if type(relay.contact) == bytes:
# dunno
relay.contact = str(relay.contact, 'UTF-8')
# fail if the contact is empty
if ('Empty' in lConds and not relay.contact):
LOG.info(f"{fp} skipping empty contact - Empty {sofar}")
texclude_set.add(relay.fingerprint)
continue
contact = sCleanEmail(relay.contact)
# fail if the contact has no email - unreliable
if ('NoEmail' in lConds and relay.contact and
('@' not in contact and 'email:' not in contact)):
LOG.info(f"{fp} skipping contact - NoEmail {contact} {sofar}")
LOG.debug(f"{fp} {relay.contact} {sofar}")
texclude_set.add(relay.fingerprint)
continue
# fail if the contact does not pass
if ('NotGood' in lConds and relay.contact and
('ciissversion:' not in relay.contact)):
LOG.info(f"{fp} skipping no ciissversion in contact {sofar}")
LOG.debug(f"{fp} {relay.contact} {sofar}")
texclude_set.add(relay.fingerprint)
continue
lConds = oargs.bad_on.split(',')
r = bCheckFp(relay, sofar, lConds, texclude_set, lNotInaRELAYS_DB)
if r is not True: continue
# if it has a ciissversion in contact we count it in total
iTotalContacts += 1
# fail if the contact does not have url: to pass
if relay.contact and 'url' not in relay.contact:
LOG.info(f"{fp} skipping unfetchable contact - no url {sofar}")
LOG.debug(f"{fp} {relay.contact} {sofar}")
if ('NotGood' in lConds): texclude_set.add(fp)
continue
# only proceed if 'NotGood' not in lConds:
if 'NotGood' not in lConds: continue
# fail if the contact does not have url: to pass
a = aParseContact(relay.contact, relay.fingerprint)
a = aParseContact(relay.contact, fp)
if not a:
LOG.warn(f"{relay.fingerprint} contact did not parse {sofar}")
LOG.warn(f"{fp} contact did not parse {sofar}")
texclude_set.add(fp)
continue
if 'url' in a and a['url']:
# fail if the contact uses a url we already know is bad
if a['url'] in tBAD_URLS:
LOG.info(f"{relay.fingerprint} skipping in tBAD_URLS {a['url']} {sofar}")
LOG.debug(f"{relay.fingerprint} {a} {sofar}")
# The fp is using a contact with a URL we know is bad
iFakeContact += 1
texclude_set.add(relay.fingerprint)
LOG.info(f"{fp} skipping in tBAD_URLS {a['url']} {sofar}")
LOG.debug(f"{fp} {a} {sofar}")
texclude_set.add(fp)
continue
domain = a['url'].replace('https://', '').replace('http://', '')
# fail if the contact uses a domain we already know does not resolve
if domain in lKNOWN_NODNS:
# The fp is using a contact with a URL we know is bogus
LOG.info(f"{relay.fingerprint} skipping in lKNOWN_NODNS {a} {sofar}")
LOG.debug(f"{relay.fingerprint} {relay} {sofar}")
iFakeContact += 1
texclude_set.add(relay.fingerprint)
LOG.info(f"{fp} skipping in lKNOWN_NODNS {a} {sofar}")
LOG.debug(f"{fp} {relay} {sofar}")
texclude_set.add(fp)
continue
# drop through
if 'dns-rsa' in relay.contact.lower():
# skip if the contact uses a dns-rsa url we dont handle
target = f"{relay.fingerprint}.{domain}"
target = f"{fp}.{domain}"
LOG.info(f"skipping 'dns-rsa' {target} {sofar}")
tdns_urls.add(target)
continue
if 'proof:uri-rsa' in relay.contact.lower():
# list(a.values())[0]
b = aVerifyContact(a,
relay.fingerprint,
oargs.https_cafile,
timeout=oargs.timeout,
host=oargs.proxy_host,
port=oargs.proxy_port)
# need to skip urllib3.exceptions.MaxRetryError
if not b or 'fps' not in b or not b['fps'] or not b['url']:
LOG.warn(f"{relay.fingerprint} did NOT VERIFY {sofar}")
LOG.debug(f"{relay.fingerprint} {b} {sofar}")
# If it's giving contact info that doesnt check out
# it could be a bad exit with fake contact info
texclude_set.add(relay.fingerprint)
aBadContacts[relay.fingerprint] = b
continue
if relay.fingerprint not in b['fps']:
LOG.warn(f"{relay.fingerprint} the FP IS NOT in the list of fps {sofar}")
# assume a fp is using a bogus contact
texclude_set.add(relay.fingerprint)
if domain in aDOMAIN_FPS.keys(): continue
a['fp'] = fp
if httpx:
lqueue.append(asyncio.create_task(
aVerifyContact(a=a,
fp=fp,
https_cafile=oargs.https_cafile,
timeout=oargs.timeout,
host=oargs.proxy_host,
port=oargs.proxy_port,
oargs=oargs)))
else:
b = aVerifyContact(a=a,
fp=fp,
https_cafile=oargs.https_cafile,
timeout=oargs.timeout,
host=oargs.proxy_host,
port=oargs.proxy_port,
oargs=oargs)
r = bProcessContact(b, texclude_set, aBadContacts, iFakeContact)
if r is False:
iFakeContact += 1
if httpx:
# for b in asyncio.as_completed(lqueue):
for b in lqueue:
# r = await b
r = b
r = bProcessContact(r, texclude_set, aBadContacts, iFakeContact)
if r is False:
iFakeContact += 1
aBadContacts[relay.fingerprint] = b
continue
LOG.info(f"{relay.fingerprint} GOOD {b['url']} {sofar}")
# add our contact info to the trustdb
aTRUST_DB[relay.fingerprint] = b
for elt in b['fps']:
aTRUST_DB_INDEX[elt] = b
elif r is True:
# iGoodContact += 1
pass
LOG.info(f"Filtered {len(twhitelist_set)} whitelisted relays")
texclude_set = texclude_set.difference(twhitelist_set)
# accept the dns-rsa urls for now until we test them
@ -932,7 +1107,7 @@ def iMain(lArgs):
with open(oargs.torrc_output, 'wt') as oFTorrc:
oFTorrc.write(f"{sEXCLUDE_EXIT_KEY} {','.join(texclude_set)}\n")
oFTorrc.write(f"{sINCLUDE_EXIT_KEY} {','.join(aTRUST_DB_INDEX.keys())}\n")
oFTorrc.write(f"{sINCLUDE_GUARD_KEY} {','.join(oGOOD_NODES[oGOOD_ROOT]['GuardNodes'])}\n")
oFTorrc.write(f"{sINCLUDE_GUARD_KEY} {','.join(oGOOD_NODES[sGOOD_ROOT]['EntryNodes'])}\n")
LOG.info(f"Wrote tor configuration to {oargs.torrc_output}")
oFTorrc.close()
@ -943,21 +1118,14 @@ def iMain(lArgs):
oFYaml.close()
if oargs.good_contacts != '' and aTRUST_DB:
with open(good_contacts_tmp, 'wt') as oFYaml:
yaml.dump(aTRUST_DB, oFYaml)
oFYaml.close()
if os.path.exists(oargs.good_contacts):
bak = oargs.good_contacts +'.bak'
os.rename(oargs.good_contacts, bak)
os.rename(good_contacts_tmp, oargs.good_contacts)
LOG.info(f"Wrote {len(list(aTRUST_DB.keys()))} good contact details to {oargs.good_contacts}")
vwrite_good_contacts(oargs)
oBAD_NODES[oBAD_ROOT]['ExcludeNodes']['BadExit'] = list(texclude_set)
oBAD_NODES[oBAD_ROOT]['ExcludeDomains'] = lKNOWN_NODNS
oBAD_NODES[oBAD_ROOT][sEXCLUDE_EXIT_KEY]['BadExit'] = list(texclude_set)
oBAD_NODES[oBAD_ROOT][sEXCLUDE_DOMAINS] = lKNOWN_NODNS
vwrite_badnodes(oargs, oBAD_NODES, str(len(texclude_set)))
oGOOD_NODES['GoodNodes']['Relays']['ExitNodes'] = list(aTRUST_DB_INDEX.keys())
# GuardNodes are readonl
# EntryNodes are readony
vwrite_goodnodes(oargs, oGOOD_NODES, len(aTRUST_DB_INDEX.keys()))
vwritefinale(oargs, lNotInaRELAYS_DB)
@ -965,50 +1133,48 @@ def iMain(lArgs):
retval = 0
try:
logging.getLogger('stem').setLevel(30)
try:
if texclude_set:
if texclude_set:
try:
LOG.info(f"{sEXCLUDE_EXIT_KEY} {len(texclude_set)} net bad exit relays")
controller.set_conf(sEXCLUDE_EXIT_KEY, texclude_set)
controller.set_conf(sEXCLUDE_EXIT_KEY, list(texclude_set))
except (Exception, stem.InvalidRequest, stem.SocketClosed,) as e: # noqa
LOG.error(f"Failed setting {sEXCLUDE_EXIT_KEY} bad exit relays in Tor {e}")
LOG.debug(repr(texclude_set))
retval += 1
except stem.SocketClosed as e: # noqa
LOG.error(f"Failed setting {sEXCLUDE_EXIT_KEY} bad exit relays in Tor")
retval += 1
if aTRUST_DB_INDEX.keys():
l = [elt for elt in aTRUST_DB_INDEX.keys() if len (elt) == 40]
try:
LOG.info(f"{sINCLUDE_EXIT_KEY} {len(l)} good relays")
controller.set_conf(sINCLUDE_EXIT_KEY, l)
except (Exception, stem.InvalidRequest, stem.SocketClosed) as e: # noqa
LOG.error(f"Failed setting {sINCLUDE_EXIT_KEY} good exit nodes in Tor {e}")
LOG.debug(repr(l))
retval += 1
try:
if aTRUST_DB_INDEX.keys():
LOG.info(f"{sINCLUDE_EXIT_KEY} {len(aTRUST_DB_INDEX.keys())} good relays")
controller.set_conf(sINCLUDE_EXIT_KEY, aTRUST_DB_INDEX.keys())
except stem.SocketClosed as e: # noqa
LOG.error(f"Failed setting {sINCLUDE_EXIT_KEY} good exit nodes in Tor")
retval += 1
try:
if 'GuardNodes' in oGOOD_NODES[oGOOD_ROOT].keys():
LOG.info(f"{sINCLUDE_GUARD_KEY} {len(oGOOD_NODES[oGOOD_ROOT]['GuardNodes'])} guard nodes")
if 'EntryNodes' in oGOOD_NODES[sGOOD_ROOT].keys():
try:
LOG.info(f"{sINCLUDE_GUARD_KEY} {len(oGOOD_NODES[sGOOD_ROOT]['EntryNodes'])} guard nodes")
# FixMe for now override StrictNodes it may be unusable otherwise
controller.set_conf(sINCLUDE_GUARD_KEY,
oGOOD_NODES[oGOOD_ROOT]['GuardNodes'])
cur = controller.get_conf('StrictNodes')
if oargs.strict_nodes and int(cur) != oargs.strict_nodes:
LOG.info(f"OVERRIDING StrictNodes to {oargs.strict_nodes}")
controller.set_conf('StrictNodes', oargs.strict_nodes)
else:
LOG.info(f"StrictNodes is set to {cur}")
except stem.SocketClosed as e: # noqa
LOG.errro(f"Failed setting {sINCLUDE_EXIT_KEY} good exit nodes in Tor")
retval += 1
oGOOD_NODES[sGOOD_ROOT]['EntryNodes'])
except (Exception, stem.InvalidRequest, stem.SocketClosed,) as e: # noqa
LOG.error(f"Failed setting {sINCLUDE_GUARD_KEY} guard nodes in Tor {e}")
LOG.debug(repr(list(oGOOD_NODES[sGOOD_ROOT]['EntryNodes'])))
retval += 1
cur = controller.get_conf('StrictNodes')
if oargs.strict_nodes and int(cur) != oargs.strict_nodes:
LOG.info(f"OVERRIDING StrictNodes to {oargs.strict_nodes}")
controller.set_conf('StrictNodes', oargs.strict_nodes)
else:
LOG.info(f"StrictNodes is set to {cur}")
except InvalidRequest as e:
# Unacceptable option value: Invalid router list.
LOG.error(str(e))
retval = 1
return retval
except KeyboardInterrupt:
return 0
except Exception as e:
LOG.exception(str(e))
retval = 2
return retval
finally:
# wierd we are getting stem errors during the final return
# with a traceback that doesnt correspond to any real flow
@ -1027,6 +1193,7 @@ def iMain(lArgs):
if __name__ == '__main__':
try:
# i = asyncio.run(iMain(sys.argv[1:]))
i = iMain(sys.argv[1:])
except IncorrectPassword as e:
LOG.error(e)

View file

@ -33,9 +33,12 @@ bHAVE_TORR = shutil.which('tor-resolve')
# in the wild we'll keep a copy here so we can avoid restesting
yKNOWN_NODNS = """
---
- a9.wtf
- heraldonion.org
- linkspartei.org
- pineapple.cx
- privacylayer.xyz
- prsv.ch
- thingtohide.nl
- tor-exit-2.aa78i2efsewr0neeknk.xyz
- tor-exit-3.aa78i2efsewr0neeknk.xyz
@ -44,7 +47,6 @@ yKNOWN_NODNS = """
- verification-for-nusenu.net
"""
# - 0x0.is
# - a9.wtf
# - aklad5.com
# - artikel5ev.de
# - arvanode.net

View file

@ -7,10 +7,15 @@ import datetime
import os
import re
import sys
import ipaddress
import warnings
import urllib3.util
from urllib3.util import parse_url as urlparse
from stem.control import Controller
# from stem.util.tor_tools import *
from urllib3.util import parse_url as urlparse
try:
# unbound is not on pypi
@ -20,11 +25,13 @@ except:
global LOG
import logging
import warnings
warnings.filterwarnings('ignore')
LOG = logging.getLogger()
logging.getLogger("urllib3").setLevel(logging.INFO)
# import urllib3.contrib.pyopenssl
# urllib3.contrib.pyopenssl.inject_into_urllib3()
# download this python library from
# https://github.com/erans/torcontactinfoparser
# sys.path.append('/home/....')
@ -211,7 +218,7 @@ def find_validation_candidates(controller,
result[domain] = {prooftype: [fingerprint]}
return result
def oDownloadUrlRequests(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050):
def oDownloadUrlRequests(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050, content_type='text/plain', session=None):
import requests
# socks proxy used for outbound web requests (for validation of proofs)
proxy = {'https': "socks5h://{host}:{port}"}
@ -225,6 +232,7 @@ def oDownloadUrlRequests(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050):
# urllib3.connection WARNING Certificate did not match expected hostname:
head = requests.head(uri, timeout=timeout, proxies=proxy, headers=headers)
except Exception as e:
LOG.exception(f"{e}")
raise TrustorError(f"HTTP HEAD request failed for {uri} {e}")
if head.status_code >= 300:
@ -234,15 +242,15 @@ def oDownloadUrlRequests(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050):
if not os.path.exists(sCAfile):
raise TrustorError(f"File not found CAfile {sCAfile}")
if session is None: session = requests.sessions.Session()
try:
with requests.sessions.Session() as session:
oReqResp = session.request(method="get", url=uri,
proxies=proxy,
timeout=timeout,
headers=headers,
allow_redirects=False,
verify=True
)
oReqResp = session.request(method="get", url=uri,
proxies=proxy,
timeout=timeout,
headers=headers,
allow_redirects=False,
verify=True
)
except:
LOG.warn("HTTP GET request failed for %s" % uri)
raise
@ -257,13 +265,61 @@ def oDownloadUrlRequests(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050):
raise TrustorError(f'Redirect detected {uri} vs %s (final)' % (oReqResp.url))
return oReqResp
logging.getLogger("urllib3").setLevel(logging.INFO)
# import urllib3.contrib.pyopenssl
# urllib3.contrib.pyopenssl.inject_into_urllib3()
# There's no point in using asyncio because of duplicate urls in the tasks
async def oDownloadUrlHttpx(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050, content_type='text/plain'):
import httpcore
import asyncio
import httpx
# socks proxy used for outbound web requests (for validation of proofs)
if host and port:
proxy = "socks5://{host}:{port}"
else:
proxy = ''
# we use this UA string when connecting to webservers to fetch rsa-fingerprint.txt proof files
# https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/#uri-rsa
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0'}
import ipaddress
LOG.debug("fetching %s...." % uri)
async with httpx.AsyncClient(proxies=proxy) as client:
try:
# https://www.python-httpx.org/advanced/
head = await client.head(uri, timeout=timeout, headers=headers)
except Exception as e:
LOG.exception(f"{e}")
raise TrustorError(f"HTTP HEAD request failed for {uri} {e}")
import urllib3.util
if head.status_code >= 300:
raise TrustorError(f"HTTP Errorcode {head.status_code}")
if content_type and not head.headers['Content-Type'].startswith(content_type):
raise TrustorError(f"HTTP Content-Type != {content_type}" )
if not os.path.exists(sCAfile):
raise TrustorError(f"File not found CAfile {sCAfile}")
try:
oReqResp = await client.get(url=uri,
timeout=timeout,
headers=headers,
max_redirects=0,
verify=sCAfile,
)
except (asyncio.exceptions.CancelledError,
httpcore.PoolTimeout,
Exception,) as e:
LOG.warn(f"HTTP GET request failed for %s {e}" % uri)
raise
if oReqResp.status_code != 200:
LOG.warn(f"HTTP Errorcode {head.status_code}")
raise TrustorError(f"HTTP Errorcode {head.status_code}")
if not oReqResp.headers['Content-Type'].startswith('text/plain'):
LOG.warn(f"HTTP Content-Type != text/plain")
raise TrustorError(f"HTTP Content-Type != text/plain")
# check for redirects (not allowed as per spec)
if oReqResp.url != uri:
LOG.error(f'Redirect detected {uri} vs %s (final)' % (oReqResp.url))
raise TrustorError(f'Redirect detected {uri} vs %s (final)' % (oReqResp.url))
return oReqResp
def ballow_subdomain_matching(hostname, dnsnames):
@ -276,7 +332,6 @@ def ballow_subdomain_matching(hostname, dnsnames):
from urllib3.util.ssl_match_hostname import (CertificateError, _dnsname_match,
_ipaddress_match)
def my_match_hostname(cert, hostname):
"""Verify that *cert* (in decoded format as returned by
SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125
@ -370,13 +425,14 @@ urllib3.connection._match_hostname = _my_match_hostname
from urllib3.contrib.socks import SOCKSProxyManager
# from urllib3 import Retry
def oDownloadUrlUrllib3(uri, sCAfile,
timeout=30,
host='127.0.0.1',
port=9050,
content_type=''):
def oDownloadUrlUrllib3Socks(uri,
sCAfile,
timeout=30,
host='127.0.0.1',
port=9050,
session=None,
content_type='text/plain'):
"""Theres no need to use requests here and it
adds too many layers on the SSL to be able to get at things
"""