diff --git a/README.md b/README.md index ebf5c72..5d92ee8 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,3 @@ -# https://github.com/nusenu/noContactInfo_Exit_Excluder -# https://github.com/TheSmashy/TorExitRelayExclude - This extends nusenu's basic idea of using the stem library to dynamically exclude nodes that are likely to be bad by putting them on the ExcludeNodes or ExcludeExitNodes setting of a running Tor. @@ -11,21 +8,23 @@ The basic cut is to exclude Exit nodes that do not have a contact. That can be extended to nodes that do not have an email in the contact etc. But there's a problem, and your Tor notice.log will tell you about it: -you could exclude the nodes needed to access hidden services etc. -So we need to add to the process the concept of a whitelist. -In addition, we may have our own blacklist of nodes we want to exclude. +you could exclude the nodes needed to access hidden services or +directorues. So we need to add to the process the concept of a whitelist. +In addition, we may have our own blacklist of nodes we want to exclude, +or use these lists for other applications like selektor. So we make two files that are structured in YAML: ``` -/etc/tor/torrc-goodnodes.yaml -Nodes: - IntroductionPoints: - - $NODEFINGERPRINT +/etc/tor/yaml/torrc-goodnodes.yaml +GoodNodes: + Relays: + IntroductionPoints: + - NODEFINGERPRINT ... By default all sections of the goodnodes.yaml are used as a whitelist. -/etc/tor/torrc-badnodes.yaml -Nodes: +/etc/tor/yaml/torrc-badnodes.yaml +BadNodes: ExcludeExitNodes: BadExit: # $0000000000000000000000000000000000000007 @@ -44,15 +43,31 @@ exclusion: the ```--contact``` commandline arg is a comma sep list of conditions More may be added later. Because you don't want to exclude the introduction points to any onion -you want to connect to, ```--white_onions``` should whitelist the -introduction points to a comma sep list of onions, but is +you want to connect to, ```--white_onions``` should whitelist the +introduction points to a comma sep list of onions, but is currently broken in stem 1.8.0: see: * https://github.com/torproject/stem/issues/96 * https://gitlab.torproject.org/legacy/trac/-/issues/25417 -```--bad_output``` will write the torrc configuration to a file. +```--torrc_output``` will write the torrc ExcludeNodes configuration to a file. -```--details_output``` will write the lookup URLs of the excluded nodes to a file +Now for the final part: we lookup the Contact info of every server +that is currently in our Tor, and check it for its existence. +If it fails to provide the well-know url, we assume its a bogus +relay and add it to a list of nodes that goes on ExcludeNodes - +not just exclude Exit. -For usage, do ```python3 exclude_badExits.py --help` +If the Contact info is good we add the list of fingerprints to add +to ExitNodes, a whitelist of relays to use as exits. + +```--proof_output``` will write the contact info as a ciiss dictionary +to a YAML file. If the proof is uri-rsa, the well-known file of fingerprints +is downloaded and the fingerprints are added on a 'fps' field we create +of that fingerprint's entry of the YAML dictionary. This file is read at the +beginning of the program to start with a trust database, and only new +contact info from new relays are added to the dictionary. +You can expect it to take an hour or two the first time this is run: +>700 domains. + +For usage, do ```python3 exclude_badExits.py --help` diff --git a/exclude_badExits.py b/exclude_badExits.py index 8b22c53..9dcb8a4 100644 --- a/exclude_badExits.py +++ b/exclude_badExits.py @@ -12,24 +12,24 @@ on the ExcludeNodes or ExcludeExitNodes setting of a running Tor. The basic cut is to exclude Exit nodes that do not have a contact. That can be extended to nodes that do not have an email in the contact etc. """ -""" -But there's a problem, and your Tor notice.log will tell you about it: +"""But there's a problem, and your Tor notice.log will tell you about it: you could exclude the nodes needed to access hidden services or -directorues. So we need to add to the process the concept of a whitelist. +directorues. So we need to add to the process the concept of a whitelist. In addition, we may have our own blacklist of nodes we want to exclude, or use these lists for other applications like selektor. So we make two files that are structured in YAML: ``` -/etc/tor/torrc-goodnodes.yaml -Nodes: - IntroductionPoints: - - $NODEFINGERPRINT +/etc/tor/yaml/torrc-goodnodes.yaml +GoodNodes: + Relays: + IntroductionPoints: + - NODEFINGERPRINT ... By default all sections of the goodnodes.yaml are used as a whitelist. -/etc/tor/torrc-badnodes.yaml -Nodes: +/etc/tor/yaml/torrc-badnodes.yaml +BadNodes: ExcludeExitNodes: BadExit: # $0000000000000000000000000000000000000007 @@ -48,24 +48,32 @@ exclusion: the ```--contact``` commandline arg is a comma sep list of conditions More may be added later. Because you don't want to exclude the introduction points to any onion -you want to connect to, ```--white_onions``` should whitelist the -introduction points to a comma sep list of onions, but is +you want to connect to, ```--white_onions``` should whitelist the +introduction points to a comma sep list of onions, but is currently broken in stem 1.8.0: see: * https://github.com/torproject/stem/issues/96 * https://gitlab.torproject.org/legacy/trac/-/issues/25417 -```--bad_output``` will write the torrc ExcludeNodes configuration to a file. +```--torrc_output``` will write the torrc ExcludeNodes configuration to a file. + +Now for the final part: we lookup the Contact info of every server +that is currently in our Tor, and check it for its existence. +If it fails to provide the well-know url, we assume its a bogus +relay and add it to a list of nodes that goes on ExcludeNodes - +not just exclude Exit. -```--details_output``` will write the lookup URLs of the excluded nodes to a file +If the Contact info is good we add the list of fingerprints to add +to ExitNodes, a whitelist of relays to use as exits. ```--proof_output``` will write the contact info as a ciiss dictionary to a YAML file. If the proof is uri-rsa, the well-known file of fingerprints -is downloaded and the fingerprints are added to the on the 'fps' field - of that fingerprint entry of the YAML dictionary. This file is read at the +is downloaded and the fingerprints are added on a 'fps' field we create +of that fingerprint's entry of the YAML dictionary. This file is read at the beginning of the program to start with a trust database, and only new -relays are added to the dictionary. The 'fps' field is emptied if the -host fails to provide the well-known file. You can expect it to take -an hour or two the first time this is run: >700 domains. +contact info from new relays are added to the dictionary. + +You can expect it to take an hour or two the first time this is run: +>700 domains. For usage, do ```python3 exclude_badExits.py --help` @@ -74,12 +82,12 @@ For usage, do ```python3 exclude_badExits.py --help` import sys import os -import getpass import re import time import argparse from io import StringIO +from stem import InvalidRequest from stem.control import Controller from stem.connection import IncorrectPassword from stem.util.tor_tools import is_valid_fingerprint @@ -87,25 +95,35 @@ try: import yaml except: yaml = None - try: - import coloredlogs + from unbound import ub_ctx,RR_TYPE_TXT,RR_CLASS_IN +except: + ub_ctx = RR_TYPE_TXT = RR_CLASS_IN = None + +try: if 'COLOREDLOGS_LEVEL_STYLES' not in os.environ: os.environ['COLOREDLOGS_LEVEL_STYLES'] = 'spam=22;debug=28;verbose=34;notice=220;warning=202;success=118,bold;error=124;critical=background=red' # https://pypi.org/project/coloredlogs/ + import coloredlogs except ImportError as e: coloredlogs = False -from trustor_poc import lDownloadUrlFps +from trustor_poc import lDownloadUrlFps, idns_validate global LOG import logging +import warnings +warnings.filterwarnings('ignore') LOG = logging.getLogger() aTRUST_DB = {} sDETAILS_URL = "https://metrics.torproject.org/rs.html#details/" # You can call this while bootstrapping +sEXCLUDE_EXIT_KEY = 'ExcludeNodes' +sINCLUDE_EXIT_KEY = 'ExitNodes' +sINCLUDE_GUARD_KEY = 'EntryNodes' def oMakeController(sSock='', port=9051): + import getpass if sSock and os.path.exists(sSock): controller = Controller.from_socket_file(path=sSock) else: @@ -115,36 +133,84 @@ def oMakeController(sSock='', port=9051): controller.authenticate(p) return controller -def lYamlBadNodes(sFile='/etc/tor/torrc-badnodes.yaml', - section='ExcludeExitNodes', - lWanted=['Hetzner','BadExit']): +oBAD_NODES = {} +oBAD_ROOT = 'BadNodes' +def lYamlBadNodes(sFile, + section=sEXCLUDE_EXIT_KEY, + lWanted=['BadExit']): + global oBAD_NODES root = 'ExcludeNodes' l = [] if not yaml: return l if os.path.exists(sFile): with open(sFile, 'rt') as oFd: o = yaml.safe_load(oFd) - for elt in o[root][section].keys(): - if lWanted and elt not in lWanted: continue - l += o[root][section][elt] - # yq '.ExcludeNodes.Hetzner' < /etc/tor/torrc-badnodes.yaml |sed -e 's/^[[]/ExcludeNodesHetzner = [/' - # yq '.ExcludeNodes.Hetzner|.[]' < /etc/tor/torrc-badnodes.yaml - # yq '.ExcludeNodes.BadExit|.[]' < /etc/tor/torrc-badnodes.yaml + oBAD_NODES = o + + # BROKEN +# for elt in o[oBAD_ROOT][root][section].keys(): +# if lWanted and elt not in lWanted: continue +# # l += o[oBAD_ROOT][root][section][elt] + return l +def icheck_torrc(sFile, oArgs): + l = open(sFile, 'rt').readlines() + a = {} + for elt in l: + k,v = elt.split(' ', 1) + a[k] = v + keys = list(a.keys()) + + if 'HashedControlPassword' not in keys: + LOG.info('Add HashedControlPassword for security') + print('run: tor --hashcontrolpassword ') + if 'ExcludeNodes' in keys: + elt = 'ExcludeNodes.ExcludeExitNodes.BadExit' + LOG.warn(f"Remove ExcludeNodes and move then to {oArgs.bad_nodes}") + print(f"move to the {elt} section as a list") + if 'GuardNodes' in keys: + elt = 'GoodNodes.GuardNodes' + LOG.warn(f"Remove GuardNodes and move then to {oArgs.good_nodes}") + print(f"move to the {elt} section as a list") + if 'ExcludeNodes' in keys: + elt = 'ExcludeNodes.ExcludeExitNodes.BadExit' + LOG.warn(f"Remove ExcludeNodes and move then to {oArgs.bad_nodes}") + print(f"move to the {elt} section as a list") + if 'ControlSocket' not in keys and os.path.exists('/run/tor/control'): + LOG.info('Add ControlSocket /run/tor/control for us') + print('ControlSocket /run/tor/control GroupWritable RelaxDirModeCheck') + if 'UseMicrodescriptors' not in keys or keys['UseMicrodescriptors'] != '1': + LOG.info('Add UseMicrodescriptors 0 for us') + print('UseMicrodescriptors 0') + if 'AutomapHostsSuffixes' not in keys: + LOG.info('Add AutomapHostsSuffixes for onions') + print('AutomapHostsSuffixes .exit,.onion') + if 'AutoMapHostsOnResolve' not in keys: + LOG.info('Add AutoMapHostsOnResolve for onions') + print('AutoMapHostsOnResolve 1') + if 'VirtualAddrNetworkIPv4' not in keys: + LOG.info('Add VirtualAddrNetworkIPv4 for onions') + print('VirtualAddrNetworkIPv4 172.16.0.0/12') + return 0 + +oGOOD_NODES = {} +oGOOD_ROOT = 'GoodNodes' def lYamlGoodNodes(sFile='/etc/tor/torrc-goodnodes.yaml'): - root='IncludeNodes' + global oGOOD_NODES + root = oGOOD_ROOT l = [] if not yaml: return l if os.path.exists(sFile): with open(sFile, 'rt') as oFd: o = yaml.safe_load(oFd) - for elt in o[root].keys(): - l += o[root][elt] + oGOOD_NODES = o + if 'GuardNodes' in o[root].keys(): + l += o[oGOOD_ROOT]['GuardNodes'] # yq '.Nodes.IntroductionPoints|.[]' < /etc/tor/torrc-goodnodes.yaml return l -def lIntroductionPoints(lOnions): +def lIntroductionPoints(controller, lOnions): """not working in stem 1.8.3""" l = [] for elt in lOnions: @@ -162,11 +228,14 @@ def lIntroductionPoints(lOnions): l += [introduction_point.address] return l -# memory? -lINTS = ['ciissversion', 'uplinkbw', 'signingkeylifetime'] -lBOOLS = ['dnssec', 'dnsqname', 'aesni', 'autoupdate', 'dnslocalrootzone' +lBAD_URLS = [] +lATS = ['abuse', 'email'] +lINTS = ['ciissversion', 'uplinkbw', 'signingkeylifetime', 'memory'] +lBOOLS = ['dnssec', 'dnsqname', 'aesni', 'autoupdate', 'dnslocalrootzone', 'sandbox', 'offlinemasterkey'] -def aVerifyContact(a, fp, timeout=20, host='127.0.0.1', port=9050): +def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050): + global lBAD_URLS + # cleanups for yaml for elt in lINTS: if elt in a: a[elt] = int(a[elt]) @@ -176,43 +245,69 @@ def aVerifyContact(a, fp, timeout=20, host='127.0.0.1', port=9050): a[elt] = True else: a[elt] = False - - # just stick fp in for now - a.update({'fps': [fp]}) + for elt in lATS: + if elt in a: + a[elt] = a[elt].replace('[]', '@') + + a.update({'fps': []}) + # test the url for fps and add it to the array - if 'proof' not in a: - # only support uri for now - LOG.warn(f"{fp} 'proof' not in {list(a.keys())}") - return a - if a['proof'] not in ['uri-rsa']: - # only support uri for now - LOG.warn(f"{fp} proof={a['proof']} not supported yet") + if 'proof' not in a: + LOG.warn(f"{fp} 'proof' not in {list(a.keys())}") return a + if 'url' not in a: - LOG.warn(f"{fp} 'proof' is 'uri-rsa' but url not in {list(a.keys())}") - return a + if 'uri' not in a: + a['url'] = '' + LOG.warn(f"{fp} url and uri not in {list(a.keys())}") + return a + a['url'] = a['uri'] + LOG.debug(f"{fp} 'uri' but not 'url' in {list(a.keys())}") + # drop through + if a['url'].startswith('http:'): a['url'] = 'https:' +a['url'][5:] elif not a['url'].startswith('https:'): a['url'] = 'https:' +a['url'] + + # domain should be a unique ket for contacts domain = a['url'][8:] + + if a['proof'] not in ['uri-rsa']: + # only support uri for now + if False and ub_ctx: + fp_domain = fp +'.'+domain + if idns_validate(fp_domain, + libunbound_resolv_file='resolv.conf', + dnssec_DS_file='dnssec-root-trust', + ) == 0: + pass + LOG.warn(f"{fp} proof={a['proof']} not supported yet") + return a + LOG.debug(f"{len(list(a.keys()))} contact fields for {fp}") - LOG.info(f"Downloading from {domain} for {fp}") try: - l = lDownloadUrlFps(domain, timeout=20, host=host, port=port) + LOG.debug(f"Downloading from {domain} for {fp}") + l = lDownloadUrlFps(domain, https_cafile, + timeout=timeout, host=host, port=port) except Exception as e: LOG.exception(f"Error downloading from {domain} for {fp} {e}") - # should we put it's FPs from TRUST_DB on the ExcludeExitNodes? - a['fps'] = [] + lBAD_URLS += [a['url']] else: if not l: + # already squacked in lD LOG.warn(f"Downloading from {domain} failed for {fp}") - a['fps'] = [] + lBAD_URLS += [a['url']] else: - a['fps'] = l + a['fps'] = [elt for elt in l if elt and len(elt) == 40 + and not elt.startswith('#')] return a def aParseContact(contact, fp): + """ + See the Tor ContactInfo Information Sharing Specification v2 + https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/ + """ contact = str(contact, 'UTF-8') l = [line for line in contact.strip().replace('"', '').split(' ') if ':' in line] @@ -224,11 +319,93 @@ def aParseContact(contact, fp): a = yaml.safe_load(oFd) return a +def bAreWeConnected(): + # FixMe: Linux only + sFile = f"/proc/{os.getpid()}/net/route" + if not os.path.isfile(sFile): return None + i = 0 + for elt in open(sFile, "r").readlines(): + if elt.startswith('Iface'): continue + if elt.startswith('lo'): continue + i += 1 + return i > 0 + +def vwait_for_controller(controller, wait_boot): + if bAreWeConnected() is False: + raise SystemExit("we are not connected") + percent = i = 0 + # You can call this while boostrapping + while percent < 100 and i < wait_boot: + bootstrap_status = controller.get_info("status/bootstrap-phase") + progress_percent = re.match('.* PROGRESS=([0-9]+).*', bootstrap_status) + percent = int(progress_percent.group(1)) + LOG.info(f"Bootstrapping {percent}%") + time.sleep(5) + i += 5 + +def vsetup_logging(log_level, logfile=''): + global LOG + add = True + + # stem fucks up logging + from stem.util import log + logging.getLogger('stem').setLevel(30) + + logging._defaultFormatter = logging.Formatter(datefmt='%m-%d %H:%M:%S') + logging._defaultFormatter.default_time_format = '%m-%d %H:%M:%S' + logging._defaultFormatter.default_msec_format = '' + + kwargs = dict(level=log_level, + force=True, + format='%(levelname)-4s %(message)s') + + if logfile: + add = logfile.startswith('+') + sub = logfile.startswith('-') + if add or sub: + logfile = logfile[1:] + kwargs['filename'] = logfile + + if coloredlogs: + # https://pypi.org/project/coloredlogs/ + aKw = dict(level=log_level, + logger=LOG, + stream=sys.stdout if add else None, + fmt='%(levelname)-4s %(message)s' + ) + coloredlogs.install(**aKw) + if logfile: + oHandler = logging.FileHandler(logfile) + LOG.addHandler(oHandler) + LOG.info(f"CSetting log_level to {log_level!s}") + else: + logging.basicConfig(**kwargs) + if add and logfile: + oHandler = logging.StreamHandler(sys.stdout) + LOG.addHandler(oHandler) + LOG.info(f"SSetting log_level to {log_level!s}") + + def oMainArgparser(_=None): - # 'Mode: 0=chat 1=chat+audio 2=chat+audio+video default: 0' + + try: + from OpenSSL import SSL + lCAfs = SSL._CERTIFICATE_FILE_LOCATIONS + except: + lCAfs = [] + + CAfs = [] + for elt in lCAfs: + if os.path.exists(elt): + CAfs.append(elt) + if not CAfs: + CAfs = [''] parser = argparse.ArgumentParser(add_help=True, epilog=__doc__) + parser.add_argument('--https_cafile', type=str, + help="Certificate Authority file (in PEM)", + default=CAfs[0]) parser.add_argument('--proxy_host', '--proxy-host', type=str, default='127.0.0.1', help='proxy host') @@ -238,164 +415,292 @@ def oMainArgparser(_=None): default='/run/tor/control', type=str, help='control socket - or port') - parser.add_argument('--timeout', default=20, type=int, - help='proxy download timeout') + + parser.add_argument('--torrc', + default='', + type=str, + help='torrc to check for suggestions') + parser.add_argument('--timeout', default=30, type=int, + help='proxy download connect timeout') parser.add_argument('--good_nodes', type=str, - default='/etc/tor/torrc-goodnodes.yaml', + default='/etc/tor/yaml/torrc-goodnodes.yaml', help="Yaml file of good nodes that should not be excluded") parser.add_argument('--bad_nodes', type=str, - default='/etc/tor/torrc-badnodes.yaml', + default='/etc/tor/yaml/torrc-badnodes.yaml', help="Yaml file of bad nodes that should also be excluded") parser.add_argument('--contact', type=str, default='Empty,NoEmail', help="comma sep list of conditions - Empty,NoEmail") + parser.add_argument('--bad_contacts', type=str, + default='/tmp/badcontacts.yaml', + help="Yaml file of bad contacts that bad FPs are using") parser.add_argument('--wait_boot', type=int, default=120, help="Seconds to wait for Tor to booststrap") parser.add_argument('--log_level', type=int, default=20, help="10=debug 20=info 30=warn 40=error") parser.add_argument('--bad_sections', type=str, default='Hetzner,BadExit', - help="sections of the badnodes.yaml to use, comma separated, '' defaults to all") + help="sections of the badnodes.yaml to use, comma separated, '' BROKEN") parser.add_argument('--white_onions', type=str, default='', help="comma sep. list of onions to whitelist their introduction points - BROKEN") - parser.add_argument('--bad_output', type=str, default='', + parser.add_argument('--torrc_output', type=str, default='', help="Write the torrc configuration to a file") - parser.add_argument('--details_output', type=str, default='', - help="Write the lookup URLs of the excluded nodes to a file") parser.add_argument('--proof_output', type=str, default='', help="Write the proof data of the included nodes to a YAML file") return parser +def vwrite_badnodes(oArgs): + global oBAD_NODES + if oArgs.bad_nodes: + tmp = oArgs.bad_nodes +'.tmp' + bak = oArgs.bad_nodes +'.bak' + with open(tmp, 'wt') as oFYaml: + yaml.dump(oBAD_NODES, indent=2, stream=oFYaml) + LOG.info(f"Wrote {len(list(exit_excludelist))} proof details to {oArgs.bad_nodes}") + oFYaml.close() + if os.path.exists(oArgs.bad_nodes): + os.rename(oArgs.bad_nodes, bak) + os.rename(tmp, oArgs.bad_nodes) + +def vwrite_goodnodes(oArgs): + global oGOOD_NODES + if oArgs.good_nodes: + tmp = oArgs.good_nodes +'.tmp' + bak = oArgs.good_nodes +'.bak' + with open(tmp, 'wt') as oFYaml: + yaml.dump(oGOOD_NODES, indent=2, stream=oFYaml) + LOG.info(f"Wrote {len(list(exit_excludelist))} proof details to {oArgs.good_nodes}") + oFYaml.close() + if os.path.exists(oArgs.good_nodes): + os.rename(oArgs.good_nodes, bak) + os.rename(tmp, oArgs.good_nodes) + def iMain(lArgs): - global oTOX_OARGS - global aTRUST_DB parser = oMainArgparser() oArgs = parser.parse_args(lArgs) - aKw = dict(level=oArgs.log_level, - format='%(name)s %(levelname)-4s %(message)s', - stream=sys.stdout, - force=True) - logging.basicConfig(**aKw) - logging.getLogger('stem').setLevel(oArgs.log_level) + vsetup_logging(oArgs.log_level) + if bAreWeConnected() is False: + raise SystemExit("we are not connected") + + sFile = oArgs.torrc + if sFile and os.path.exists(sFile): + icheck_torrc(sFile, oArgs) + global aTRUST_DB sFile = oArgs.proof_output if sFile and os.path.exists(sFile): - with open(sFile, 'rt') as oFd: - aTRUST_DB = yaml.safe_load(oFd) + try: + with open(sFile, 'rt') as oFd: + aTRUST_DB = yaml.safe_load(oFd) + except: + aTRUST_DB = {} if oArgs.proxy_ctl.startswith('/') or os.path.exists(oArgs.proxy_ctl): controller = oMakeController(sSock=oArgs.proxy_ctl) else: - port =int(oArgs.proxy_ctl) + port =int(oArgs.proxy_ctl) controller = oMakeController(port=port) - + + vwait_for_controller(controller, oArgs.wait_boot) + + if oArgs.proof_output: + proof_output_tmp = oArgs.proof_output + '.tmp' + elt = controller.get_conf('UseMicrodescriptors') if elt != '0' : - LOG.warn('"UseMicrodescriptors 0" is required in your /etc/tor/torrc. Exiting.') + LOG.error('"UseMicrodescriptors 0" is required in your /etc/tor/torrc. Exiting.') controller.set_conf('UseMicrodescriptors', 0) # does it work dynamically? - # return 2 - - percent = i = 0 - # You can call this while boostrapping - while percent < 100 and i < oArgs.wait_boot: - bootstrap_status = controller.get_info("status/bootstrap-phase") - progress_percent = re.match('.* PROGRESS=([0-9]+).*', bootstrap_status) - percent = int(progress_percent.group(1)) - LOG.info(f"Bootstrapping {percent}%") - time.sleep(5) - i += 5 - elt = controller.get_conf('ExcludeExitNodes') + return 2 + + elt = controller.get_conf(sEXCLUDE_EXIT_KEY) if elt and elt != '{??}': - LOG.warn(f'ExcludeExitNodes is in use already') + LOG.warn(f"{sEXCLUDE_EXIT_KEY} is in use already") - lGood = lYamlGoodNodes(oArgs.good_nodes) - LOG.info(f'lYamlGoodNodes {len(lGood)}') + lGoodOverrideSet = lYamlGoodNodes(oArgs.good_nodes) + LOG.info(f"lYamlGoodNodes {len(lGoodOverrideSet)} from {oArgs.good_nodes}") if oArgs.white_onions: - l = lIntroductionPoints(oArgs.white_onions.split(',')) - lGood += l - - relays = controller.get_server_descriptors() + l = lIntroductionPoints(controller, oArgs.white_onions.split(',')) + lGoodOverrideSet += l + + exit_excludelist = [] + if oArgs.bad_nodes and os.path.exists(oArgs.bad_nodes): + if False and oArgs.bad_sections: + # BROKEN + sections = oArgs.bad_sections.split(',') + exit_excludelist = lYamlBadNodes(oArgs.bad_nodes, + lWanted=sections, + section=sEXCLUDE_EXIT_KEY) + else: + exit_excludelist = lYamlBadNodes(oArgs.bad_nodes) - if oArgs.bad_sections: - sections = oArgs.bad_sections.split(',') - exit_excludelist = lYamlBadNodes(lWanted=sections) - else: - exit_excludelist = lYamlBadNodes() - - LOG.info(f'lYamlBadNodes {len(exit_excludelist)}') + LOG.info(f"lYamlBadNodes {len(exit_excludelist)}") - if oArgs.details_output: - oFd = open(oArgs.details_output, 'wt') - else: - oFd = None + relays = controller.get_server_descriptors() - lProofUriFps = [] + lProofGoodFps = [] + iDnsContact = 0 + iBadContact = 0 + iFakeContact = 0 + aBadContacts = {} aProofUri = {} lConds = oArgs.contact.split(',') for relay in relays: - if not relay.exit_policy.is_exiting_allowed(): continue if not is_valid_fingerprint(relay.fingerprint): LOG.warn('Invalid Fingerprint: %s' % relay.fingerprint) continue + relay.fingerprint = relay.fingerprint.upper() + sofar = f"G:{len(list(aProofUri.keys()))} U:{iDnsContact} F:{iFakeContact} BF:{len(exit_excludelist)} GF:{len(lProofGoodFps)}" + if not relay.exit_policy.is_exiting_allowed(): + if sEXCLUDE_EXIT_KEY == 'ExcludeNodes': + LOG.debug(f"{relay.fingerprint} not an exit {sofar}") + else: + LOG.warn(f"{relay.fingerprint} not an exit {sofar}") + # continue + + if relay.fingerprint in lProofGoodFps: + # we already have it. + continue + if relay.fingerprint in aTRUST_DB: - if aTRUST_DB[relay.fingerprint]['fps']: - lProofUriFps += aTRUST_DB[relay.fingerprint]['fps'] + if aTRUST_DB[relay.fingerprint]['fps'] and \ + relay.fingerprint in aTRUST_DB[relay.fingerprint]['fps']: + lProofGoodFps += relay.fingerprint + continue - if relay.fingerprint in lProofUriFps: - # we already have it. + if relay.contact and b'dns-rsa' in relay.contact.lower(): + LOG.info(f"{relay.fingerprint} skipping 'dns-rsa' {sofar}") + iDnsContact += 1 continue + if relay.contact and b'proof:uri-rsa' in relay.contact.lower(): a = aParseContact(relay.contact, relay.fingerprint) - if not a: continue - b = aVerifyContact(list(a.values())[0], relay.fingerprint, + if not a: + LOG.warn(f"{relay.fingerprint} did not parse {sofar}") + exit_excludelist.append(relay.fingerprint) + continue + if 'url' in a and a['url'] and a['url'] in lBAD_URLS: + # The fp is using a contact with a URL we know is bad + LOG.info(f"{relay.fingerprint} skipping in lBAD_URLS {a['url']} {sofar}") + exit_excludelist.append(relay.fingerprint) + continue + + b = aVerifyContact(list(a.values())[0], + relay.fingerprint, + oArgs.https_cafile, timeout=oArgs.timeout, host=oArgs.proxy_host, port=oArgs.proxy_port) - if not b: + + if not b['fps'] or not b['url']: + LOG.warn(f"{relay.fingerprint} did not verify {sofar}") + # If it's giving contact info that doesnt check out + # it could be a bad exit with fake contact info + exit_excludelist.append(relay.fingerprint) + aBadContacts[relay.fingerprint] = b continue - if 'fps' in b and b['fps'] and relay.fingerprint in b['fps']: - lProofUriFps += b['fps'] - aProofUri[relay.fingerprint] = b + + if relay.fingerprint not in b['fps']: + LOG.warn(f"{relay.fingerprint} the fp is not in the list of fps {sofar}") + # assume a fp is using a bogus contact + exit_excludelist.append(relay.fingerprint) + iFakeContact += 1 + aBadContacts[relay.fingerprint] = b + continue + + # great contact had good fps and we are in them + lProofGoodFps += b['fps'] + LOG.info(f"{relay.fingerprint} verified {b['url']} {sofar}") + # add our contact info to the trustdb + aProofUri[relay.fingerprint] = b + if oArgs.proof_output and oArgs.log_level <= 20: + # as we go along then clobber + with open(proof_output_tmp, 'wt') as oFYaml: + yaml.dump(aProofUri, indent=2, stream=oFYaml) + oFYaml.close() + continue + if ('Empty' in lConds and not relay.contact) or \ ('NoEmail' in lConds and relay.contact and not b'@' in relay.contact): exit_excludelist.append(relay.fingerprint) - if oFd: - oFd.write(sDETAILS_URL +relay.fingerprint +"\n") - - exit_excludelist = list(set(exit_excludelist).difference(set(lGood))) - LOG.info(f'ExcludeExitNodes {len(exit_excludelist)} net bad exit nodes') - controller.set_conf('ExcludeExitNodes', exit_excludelist) - elt = controller.get_conf('ExcludeExitNodes') - if oArgs.bad_output: - with open(oArgs.bad_output, 'wt') as oFdE: - oFdE.write(f"ExcludeExitNodes {','.join(exit_excludelist)}\n") - LOG.info(f"Wrote tor configuration to {oArgs.bad_output}") - if lProofUriFps: - LOG.info(f'ExitNodes {len(lProofUriFps)} good exit nodes') - controller.set_conf('ExitNodes', lProofUriFps) - - if oFd: - LOG.info(f"Wrote details URLs to {oArgs.details_output}") - oFd.close() + + exit_excludelist = list(set(exit_excludelist).difference(set(lGoodOverrideSet))) + + if oArgs.proof_output and aProofUri: + with open(proof_output_tmp, 'wt') as oFYaml: + yaml.dump(aProofUri, indent=2, stream=oFYaml) + LOG.info(f"Wrote {len(list(aProofUri))} proof details to {oArgs.proof_output}") + oFYaml.close() + if os.path.exists(oArgs.proof_output): + bak = oArgs.proof_output +'.bak' + os.rename(oArgs.proof_output, bak) + os.rename(proof_output_tmp, oArgs.proof_output) + + if oArgs.torrc_output and exit_excludelist: + with open(oArgs.torrc_output, 'wt') as oFTorrc: + oFTorrc.write(f"{sEXCLUDE_EXIT_KEY} {','.join(exit_excludelist)}\n") + oFTorrc.write(f"{sINCLUDE_EXIT_KEY} {','.join(lProofGoodFps)}\n") + oFTorrc.write(f"{sINCLUDE_GUARD_KEY} {','.join(o[oGOOD_ROOT]['GuardNodes'])}\n") + LOG.info(f"Wrote tor configuration to {oArgs.torrc_output}") + oFTorrc.close() + + if oArgs.bad_contacts and aBadContacts: + # for later analysis + with open(oArgs.bad_contacts, 'wt') as oFYaml: + yaml.dump(aBadContacts, indent=2, stream=oFYaml) + oFYaml.close() + + global oBAD_NODES + oBAD_NODES['BadNodes']['ExcludeNodes']['BadExit'] = exit_excludelist + vwrite_badnodes(oArgs) + # nothing changed vwrite_goodnodes(oArgs) - if oArgs.proof_output: - with open(oArgs.proof_output, 'wt') as oFdD: - s = yaml.dump_all(aProofUri, indent=2, stream=None) - oFdD.write(s +'\n') - LOG.info(f"Wrote proof details to {oArgs.proof_output}") - oFdD.close() - - logging.getLogger('stem').setLevel(40) - for elt in controller._event_listeners: - controller.remove_event_listener(elt) - controller.close() - - return(0) + retval = 0 + try: + logging.getLogger('stem').setLevel(30) + if exit_excludelist: + LOG.info(f"{sEXCLUDE_EXIT_KEY} {len(exit_excludelist)} net bad exit nodes") + controller.set_conf(sEXCLUDE_EXIT_KEY, exit_excludelist) + + if lProofGoodFps: + LOG.info(f"{sINCLUDE_EXIT_KEY} {len(lProofGoodFps)} good nodes") + controller.set_conf(sINCLUDE_EXIT_KEY, lProofGoodFps) + + o = oGOOD_NODES + if 'GuardNodes' in o[oGOOD_ROOT].keys(): + LOG.info(f"{sINCLUDE_GUARD_KEY} {len(o[oGOOD_ROOT]['GuardNodes'])} guard nodes") + controller.set_conf(sINCLUDE_GUARD_KEY, o[oGOOD_ROOT]['GuardNodes']) + return retval + + except InvalidRequest as e: + # Unacceptable option value: Invalid router list. + LOG.error(str(e)) + LOG.warn(f"lProofGoodFps: {lProofGoodFps}") + LOG.warn(f"{sEXCLUDE_EXIT_KEY}: {exit_excludelist}") + retval = 1 + return retval + except KeyboardInterrupt: + return 0 + except Exception as e: + LOG.exception(str(e)) + retval = 2 + return retval + finally: + # wierd we are getting stem errors during the final return + # with a traceback that doesnt correspond to any real flow + # File "/usr/lib/python3.9/site-packages/stem/control.py", line 2474, in set_conf + # self.set_options({param: value}, False) + logging.getLogger('stem').setLevel(40) + try: + for elt in controller._event_listeners: + controller.remove_event_listener(elt) + controller.close() + except Exception as e: + LOG.warn(str(e)) if __name__ == '__main__': try: @@ -403,8 +708,9 @@ if __name__ == '__main__': except IncorrectPassword as e: LOG.error(e) i = 1 + except KeyboardInterrupt: + i = 0 except Exception as e: LOG.exception(e) - i = 1 + i = 2 sys.exit(i) - diff --git a/trustor_poc.py b/trustor_poc.py index ccb15b1..be36a90 100644 --- a/trustor_poc.py +++ b/trustor_poc.py @@ -1,22 +1,26 @@ -# -*- mode: python; indent-tabs-mode: nil; py-indent-offset: 4; coding: utf-8 -# -*- coding: utf-8 -*- +# -*- mode: python; indent-tabs-mode: nil; py-indent-offset: 4; coding: utf-8 - import os import sys +import datetime + +import requests from stem.control import Controller from stem.util.tor_tools import * from urllib.parse import urlparse -import requests -import datetime try: + # unbound is not on pypi from unbound import ub_ctx,RR_TYPE_TXT,RR_CLASS_IN except: ub_ctx = RR_TYPE_TXT = RR_CLASS_IN = None global LOG import logging +import warnings +warnings.filterwarnings('ignore') LOG = logging.getLogger() - + # download this python library from # https://github.com/erans/torcontactinfoparser #sys.path.append('/home/....') @@ -24,15 +28,6 @@ try: from torcontactinfo import TorContactInfoParser except: TorContactInfoParser = None - -# tor ControlPort IP -controller_address = '127.0.0.1' - -dnssec_DS_file = 'dnssec-root-trust' - -# this is not the system wide /etc/resolv.conf -# use dnscrypt-proxy to encrypt your DNS and route it via tor's SOCKSPort -libunbound_resolv_file = 'resolv.conf' # for now we support max_depth = 0 only # this PoC version has no support for recursion @@ -42,9 +37,6 @@ supported_max_depths = ['0'] # https://github.com/nusenu/ContactInfo-Information-Sharing-Specification#ciissversion accepted_ciissversions = ['2'] -# https://github.com/nusenu/ContactInfo-Information-Sharing-Specification#proof -accepted_proof_types = ['uri-rsa','dns-rsa'] - # https://stackoverflow.com/questions/2532053/validate-a-hostname-string # FIXME this check allows non-fqdn names def is_valid_hostname(hostname): @@ -56,7 +48,7 @@ def is_valid_hostname(hostname): return all(allowed.match(x) for x in hostname.split(".")) -def read_local_trust_config(trust_config='trust_config'): +def read_local_trust_config(trust_config): ''' reads a local configuration file containing trusted domains and returns them in an array @@ -103,28 +95,28 @@ def read_local_validation_cache(validation_cache_file, trusted_domains=[]): result = [] if trusted_domains == []: return result - if (os.path.isfile(validation_cache_file)): - f = open(validation_cache_file) - for line in f: - line = line.strip() - if line[0] == '#': - continue - try: - domain, fingerprint, prooftype, dt = line.split(':') - except: - LOG.error('invalid trust cache entry detected: %s aborting!' % line) - sys.exit(12) - - if domain in trusted_domains: - result.append(fingerprint) - else: - print('ignoring cached entry for untrusted domain %s' % domain) + if os.path.isfile(validation_cache_file): + with open(validation_cache_file, 'rt') as f: + for line in f: + line = line.strip() + if line[0] == '#': + continue + try: + domain, fingerprint, prooftype, dt = line.split(':') + except: + LOG.error('invalid trust cache entry detected: %s aborting!' % line) + sys.exit(12) + + if domain in trusted_domains: + result.append(fingerprint) + else: + LOG.warn('ignoring cached entry for untrusted domain %s' % domain) else: - print("Validation cache file not present. It will be created.") + LOG.info("Validation cache file not present. It will be created.") return result -def get_controller(address='127.0.0.1',port=9151,password=''): +def get_controller(address='127.0.0.1', port=9151, password=''): ''' connects to a local tor client via the tor ControlPort and returns a controller that allows us to easily set specific tor @@ -136,7 +128,7 @@ def get_controller(address='127.0.0.1',port=9151,password=''): controller = Controller.from_port(address=address, port=port) controller.authenticate(password=password) except Exception as e: - LOG.error(f'Failed to connect to the tor process, {e}') + LOG.error(f"Failed to connect to the tor process, {e}") sys.exit(1) if not controller.is_set('UseMicrodescriptors'): @@ -155,6 +147,9 @@ def find_validation_candidates(controller, trusted_domains=[],validation_cache=[ example content: { 'emeraldonion.org' : { 'uri-rsa': ['044600FD968728A6F220D5347AD897F421B757C0', '09DCA3360179C6C8A5A20DDDE1C54662965EF1BA']}} ''' + # https://github.com/nusenu/ContactInfo-Information-Sharing-Specification#proof + accepted_proof_types = ['uri-rsa','dns-rsa'] + result = {} @@ -207,7 +202,7 @@ def find_validation_candidates(controller, trusted_domains=[],validation_cache=[ result[domain] = {prooftype : [fingerprint]} return result -def lDownloadUrlFps(domain, timeout=20, host='127.0.0.1', port=9050): +def lDownloadUrlFps(domain, sCAfile, timeout=30, host='127.0.0.1', port=9050): uri="https://"+domain+"/.well-known/tor-relay/rsa-fingerprint.txt" # socks proxy used for outbound web requests (for validation of proofs) proxy = {'https': 'socks5h://' +host +':' +str(port)} @@ -217,33 +212,53 @@ def lDownloadUrlFps(domain, timeout=20, host='127.0.0.1', port=9050): LOG.debug("fetching %s...." % uri) try: + # grr. fix urllib3 + # urllib3.connection WARNING Certificate did not match expected hostname: head = requests.head(uri, timeout=timeout, proxies=proxy, headers=headers) except Exception as e: - print("HTTP HEAD request failed for %s" % uri) - print(e) + LOG.warn(f"HTTP HEAD request failed for {uri} {e}") head = None return [] if head.status_code != 200: return [] if not head.headers['Content-Type'].startswith('text/plain'): return [] + + assert os.path.exists(sCAfile), sCAfile try: - fullfile = requests.get(uri, proxies=proxy, timeout=10, headers=headers) + from https_adapter import HTTPSAdapter + except Exception as e: + LOG.warn(f"Could not import HTTPSAdapter {e}") + HTTPSAdapter = None + HTTPSAdapter = None + try: + with requests.sessions.Session() as session: + if HTTPSAdapter: + # FixMe: upgrade to TLS1.3 + session.mount("https://", HTTPSAdapter(pool_maxsize=1, + max_retries=3,)) + fullfile = session.request(method="get", url=uri, + proxies=proxy, timeout=timeout, + headers=headers, + allow_redirects=False, + verify=True + ) except: - print("HTTP GET request failed for %s" % uri) + LOG.warn("HTTP GET request failed for %s" % uri) return [] if fullfile.status_code != 200 or not fullfile.headers['Content-Type'].startswith('text/plain'): return [] - + #check for redirects (not allowed as per spec) if fullfile.url != uri: LOG.error('Redirect detected %s vs %s (final)' % (uri, fullfile.url)) return [] - - well_known_content = [i.strip() for i in fullfile.text.upper().split('\n')] + + well_known_content = fullfile.text.upper().strip().split('\n') + well_known_content = [i for i in well_known_content if i and len(i) == 40] return well_known_content -def validate_proofs(candidates, validation_cache_file): +def validate_proofs(candidates, validation_cache_file, timeout=20, host='127.0.0.1', port=9050): ''' This function takes the return value of find_validation_candidates() and validated them according to their proof type (uri-rsa, dns-rsa) @@ -257,7 +272,7 @@ def validate_proofs(candidates, validation_cache_file): for domain in candidates.keys(): for prooftype in candidates[domain].keys(): if prooftype == 'uri-rsa': - well_known_content = lDownloadUrlFps(domain, timeout=20, host='127.0.0.1', port=9050) + well_known_content = lDownloadUrlFps(domain, timeout=timeout, host=host, port=port) for fingerprint in candidates[domain][prooftype]: if fingerprint in well_known_content: # write cache entry @@ -268,7 +283,10 @@ def validate_proofs(candidates, validation_cache_file): elif prooftype == 'dns-rsa' and ub_ctx: for fingerprint in candidates[domain][prooftype]: fp_domain = fingerprint+'.'+domain - if dns_validate(fp_domain): + if idns_validate(fp_domain, + libunbound_resolv_file='resolv.conf', + dnssec_DS_file='dnssec-root-trust', + ) == 0: count += 1 f.write('%s:%s:%s:%s\n' % (domain, fingerprint, prooftype, dt_utc)) else: @@ -276,7 +294,10 @@ def validate_proofs(candidates, validation_cache_file): f.close() LOG.info('successfully validated %s new (not yet validated before) relays' % count) -def dns_validate(domain): +def idns_validate(domain, + libunbound_resolv_file='resolv.conf', + dnssec_DS_file='dnssec-root-trust', + ): ''' performs DNS TXT lookups and verifies the reply - is DNSSEC valid and @@ -284,27 +305,31 @@ def dns_validate(domain): - the DNS record contains a hardcoded string as per specification https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/#dns-rsa ''' - if not ub_ctx: return False + if not ub_ctx: return -1 + + # this is not the system wide /etc/resolv.conf + # use dnscrypt-proxy to encrypt your DNS and route it via tor's SOCKSPort + ctx = ub_ctx() if (os.path.isfile(libunbound_resolv_file)): ctx.resolvconf(libunbound_resolv_file) else: LOG.error('libunbound resolv config file: "%s" is missing, aborting!' % libunbound_resolv_file) - sys.exit(5) + return 5 if (os.path.isfile(dnssec_DS_file)): ctx.add_ta_file(dnssec_DS_file) else: LOG.error('DNSSEC trust anchor file "%s" is missing, aborting!' % dnssec_DS_file) - sys.exit(6) + return 6 status, result = ctx.resolve(domain, RR_TYPE_TXT, RR_CLASS_IN) if status == 0 and result.havedata: if len(result.rawdata) == 1 and result.secure: # ignore the first byte, it is the TXT length if result.data.as_raw_data()[0][1:] == b'we-run-this-tor-relay': - return True - return False + return 0 + return 1 def configure_tor(controller, trusted_fingerprints, exitonly=True): ''' @@ -317,32 +342,41 @@ def configure_tor(controller, trusted_fingerprints, exitonly=True): relay_count = len(trusted_fingerprints) if relay_count < 41: - print('Too few trusted relays (%s), aborting!' % relay_count) + LOG.error('Too few trusted relays (%s), aborting!' % relay_count) sys.exit(15) try: controller.set_conf('ExitNodes', trusted_fingerprints) - print('limited exits to %s relays' % relay_count) + LOG.error('limited exits to %s relays' % relay_count) except Exception as e: - print('Failed to set ExitNodes tor config to trusted relays') - print(e) + LOG.exception('Failed to set ExitNodes tor config to trusted relays') sys.exit(20) if __name__ == '__main__': trust_config = 'trust_config' assert os.path.exists(trust_config) trusted_domains = read_local_trust_config(trust_config) - + validation_cache_file = 'validation_cache' trusted_fingerprints = read_local_validation_cache(validation_cache_file, trusted_domains=trusted_domains) # tor ControlPort password controller_password='' + # tor ControlPort IP + controller_address = '127.0.0.1' + timeout = 20 + port = 9050 controller = get_controller(address=controller_address,password=controller_password) - - r = find_validation_candidates(controller,validation_cache=trusted_fingerprints,trusted_domains=trusted_domains) - validate_proofs(r, validation_cache_file) + + r = find_validation_candidates(controller, + validation_cache=trusted_fingerprints, + trusted_domains=trusted_domains) + validate_proofs(r, validation_cache_file, + timeout=timeout, + host=controller_address, + port=port) # refresh list with newly validated fingerprints - trusted_fingerprints = read_local_validation_cache(trusted_domains=trusted_domains) + trusted_fingerprints = read_local_validation_cache(validation_cache_file, + trusted_domains=trusted_domains) configure_tor(controller, trusted_fingerprints)