From 2be5e6e66e8ce4c30400d927b0b9d9affc1bade3 Mon Sep 17 00:00:00 2001 From: emdee Date: Mon, 7 Nov 2022 11:38:22 +0000 Subject: [PATCH] Added well-known downloading --- README.md | 58 +++++++- exclude_badExits.py | 192 +++++++++++++++++++----- trustor_poc.py | 348 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 564 insertions(+), 34 deletions(-) create mode 100644 trustor_poc.py diff --git a/README.md b/README.md index ead7d27..ebf5c72 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,58 @@ -# exclude_badExits +# https://github.com/nusenu/noContactInfo_Exit_Excluder +# https://github.com/TheSmashy/TorExitRelayExclude + +This extends nusenu's basic idea of using the stem library to +dynamically exclude nodes that are likely to be bad by putting them +on the ExcludeNodes or ExcludeExitNodes setting of a running Tor. +* https://github.com/nusenu/noContactInfo_Exit_Excluder +* https://github.com/TheSmashy/TorExitRelayExclude + +The basic cut is to exclude Exit nodes that do not have a contact. +That can be extended to nodes that do not have an email in the contact etc. + +But there's a problem, and your Tor notice.log will tell you about it: +you could exclude the nodes needed to access hidden services etc. +So we need to add to the process the concept of a whitelist. +In addition, we may have our own blacklist of nodes we want to exclude. + +So we make two files that are structured in YAML: +``` +/etc/tor/torrc-goodnodes.yaml +Nodes: + IntroductionPoints: + - $NODEFINGERPRINT + ... +By default all sections of the goodnodes.yaml are used as a whitelist. + +/etc/tor/torrc-badnodes.yaml +Nodes: + ExcludeExitNodes: + BadExit: + # $0000000000000000000000000000000000000007 +``` +That part requires [PyYAML](https://pyyaml.org/wiki/PyYAML) +https://github.com/yaml/pyyaml/ + +Right now only the ExcludeExitNodes section is used by we may add ExcludeNodes +later, and by default all sub-sections of the badnodes.yaml are used as a +ExcludeExitNodes but it can be customized with the lWanted commandline arg. + +The original idea has also been extended to add different conditions for +exclusion: the ```--contact``` commandline arg is a comma sep list of conditions: +* Empty - no contact info +* NoEmail - no @ sign in the contact', +More may be added later. + +Because you don't want to exclude the introduction points to any onion +you want to connect to, ```--white_onions``` should whitelist the +introduction points to a comma sep list of onions, but is +currently broken in stem 1.8.0: see: +* https://github.com/torproject/stem/issues/96 +* https://gitlab.torproject.org/legacy/trac/-/issues/25417 + +```--bad_output``` will write the torrc configuration to a file. + +```--details_output``` will write the lookup URLs of the excluded nodes to a file + +For usage, do ```python3 exclude_badExits.py --help` diff --git a/exclude_badExits.py b/exclude_badExits.py index 32992ad..8b22c53 100644 --- a/exclude_badExits.py +++ b/exclude_badExits.py @@ -11,7 +11,8 @@ on the ExcludeNodes or ExcludeExitNodes setting of a running Tor. The basic cut is to exclude Exit nodes that do not have a contact. That can be extended to nodes that do not have an email in the contact etc. - +""" +""" But there's a problem, and your Tor notice.log will tell you about it: you could exclude the nodes needed to access hidden services or directorues. So we need to add to the process the concept of a whitelist. @@ -53,24 +54,35 @@ currently broken in stem 1.8.0: see: * https://github.com/torproject/stem/issues/96 * https://gitlab.torproject.org/legacy/trac/-/issues/25417 -```--bad_output``` will write the torrc configuration to a file. +```--bad_output``` will write the torrc ExcludeNodes configuration to a file. ```--details_output``` will write the lookup URLs of the excluded nodes to a file +```--proof_output``` will write the contact info as a ciiss dictionary +to a YAML file. If the proof is uri-rsa, the well-known file of fingerprints +is downloaded and the fingerprints are added to the on the 'fps' field + of that fingerprint entry of the YAML dictionary. This file is read at the +beginning of the program to start with a trust database, and only new +relays are added to the dictionary. The 'fps' field is emptied if the +host fails to provide the well-known file. You can expect it to take +an hour or two the first time this is run: >700 domains. + For usage, do ```python3 exclude_badExits.py --help` """ import sys -from stem.control import Controller -from stem.util.tor_tools import is_valid_fingerprint + import os import getpass import re import time import argparse +from io import StringIO from stem.control import Controller +from stem.connection import IncorrectPassword +from stem.util.tor_tools import is_valid_fingerprint try: import yaml except: @@ -83,16 +95,18 @@ try: # https://pypi.org/project/coloredlogs/ except ImportError as e: coloredlogs = False +from trustor_poc import lDownloadUrlFps global LOG import logging LOG = logging.getLogger() +aTRUST_DB = {} sDETAILS_URL = "https://metrics.torproject.org/rs.html#details/" # You can call this while bootstrapping -def oMakeController(sSock='/run/tor/control', port=9051): - if os.path.exists(sSock): +def oMakeController(sSock='', port=9051): + if sSock and os.path.exists(sSock): controller = Controller.from_socket_file(path=sSock) else: controller = Controller.from_port(port=port) @@ -148,23 +162,85 @@ def lIntroductionPoints(lOnions): l += [introduction_point.address] return l +# memory? +lINTS = ['ciissversion', 'uplinkbw', 'signingkeylifetime'] +lBOOLS = ['dnssec', 'dnsqname', 'aesni', 'autoupdate', 'dnslocalrootzone' + 'sandbox', 'offlinemasterkey'] +def aVerifyContact(a, fp, timeout=20, host='127.0.0.1', port=9050): + for elt in lINTS: + if elt in a: + a[elt] = int(a[elt]) + for elt in lBOOLS: + if elt in a: + if a[elt] in ['y','yes', 'true', 'True']: + a[elt] = True + else: + a[elt] = False + + # just stick fp in for now + a.update({'fps': [fp]}) + # test the url for fps and add it to the array + if 'proof' not in a: + # only support uri for now + LOG.warn(f"{fp} 'proof' not in {list(a.keys())}") + return a + if a['proof'] not in ['uri-rsa']: + # only support uri for now + LOG.warn(f"{fp} proof={a['proof']} not supported yet") + return a + if 'url' not in a: + LOG.warn(f"{fp} 'proof' is 'uri-rsa' but url not in {list(a.keys())}") + return a + if a['url'].startswith('http:'): + a['url'] = 'https:' +a['url'][5:] + elif not a['url'].startswith('https:'): + a['url'] = 'https:' +a['url'] + domain = a['url'][8:] + LOG.debug(f"{len(list(a.keys()))} contact fields for {fp}") + LOG.info(f"Downloading from {domain} for {fp}") + try: + l = lDownloadUrlFps(domain, timeout=20, host=host, port=port) + except Exception as e: + LOG.exception(f"Error downloading from {domain} for {fp} {e}") + # should we put it's FPs from TRUST_DB on the ExcludeExitNodes? + a['fps'] = [] + else: + if not l: + LOG.warn(f"Downloading from {domain} failed for {fp}") + a['fps'] = [] + else: + a['fps'] = l + return a + +def aParseContact(contact, fp): + contact = str(contact, 'UTF-8') + l = [line for line in contact.strip().replace('"', '').split(' ') + if ':' in line] + LOG.debug(f"{fp} {len(l)} fields") + s = f'"{fp}":\n' + s += '\n'.join([f" {line}\"".replace(':',': \"', 1) + for line in l]) + oFd = StringIO(s) + a = yaml.safe_load(oFd) + return a + def oMainArgparser(_=None): # 'Mode: 0=chat 1=chat+audio 2=chat+audio+video default: 0' - if not os.path.exists('/proc/sys/net/ipv6'): - bIpV6 = 'False' - else: - bIpV6 = 'True' - lIpV6Choices=[bIpV6, 'False'] - parser = argparse.ArgumentParser(add_help=True) + parser = argparse.ArgumentParser(add_help=True, + epilog=__doc__) parser.add_argument('--proxy_host', '--proxy-host', type=str, default='127.0.0.1', help='proxy host') - parser.add_argument('--proxy_port', '--proxy-port', default=9051, type=int, + parser.add_argument('--proxy_port', '--proxy-port', default=9050, type=int, help='proxy control port') parser.add_argument('--proxy_ctl', '--proxy-ctl', - default='/run/tor/control', type=str, - help='control socket - takes precedence over proxy_port') + default='/run/tor/control', + type=str, + help='control socket - or port') + parser.add_argument('--timeout', default=20, type=int, + help='proxy download timeout') + parser.add_argument('--good_nodes', type=str, default='/etc/tor/torrc-goodnodes.yaml', help="Yaml file of good nodes that should not be excluded") @@ -187,10 +263,13 @@ def oMainArgparser(_=None): help="Write the torrc configuration to a file") parser.add_argument('--details_output', type=str, default='', help="Write the lookup URLs of the excluded nodes to a file") + parser.add_argument('--proof_output', type=str, default='', + help="Write the proof data of the included nodes to a YAML file") return parser def iMain(lArgs): global oTOX_OARGS + global aTRUST_DB parser = oMainArgparser() oArgs = parser.parse_args(lArgs) @@ -200,14 +279,25 @@ def iMain(lArgs): force=True) logging.basicConfig(**aKw) logging.getLogger('stem').setLevel(oArgs.log_level) - - controller = oMakeController(oArgs.proxy_ctl, oArgs.proxy_port) + + sFile = oArgs.proof_output + if sFile and os.path.exists(sFile): + with open(sFile, 'rt') as oFd: + aTRUST_DB = yaml.safe_load(oFd) + + if oArgs.proxy_ctl.startswith('/') or os.path.exists(oArgs.proxy_ctl): + controller = oMakeController(sSock=oArgs.proxy_ctl) + else: + port =int(oArgs.proxy_ctl) + controller = oMakeController(port=port) elt = controller.get_conf('UseMicrodescriptors') if elt != '0' : - LOG.error('"UseMicrodescriptors 0" is required in your /etc/tor/torrc. Exiting.') - return 2 - + LOG.warn('"UseMicrodescriptors 0" is required in your /etc/tor/torrc. Exiting.') + controller.set_conf('UseMicrodescriptors', 0) + # does it work dynamically? + # return 2 + percent = i = 0 # You can call this while boostrapping while percent < 100 and i < oArgs.wait_boot: @@ -225,7 +315,7 @@ def iMain(lArgs): LOG.info(f'lYamlGoodNodes {len(lGood)}') if oArgs.white_onions: - l = lIntroductionPoints(oArgs.white_onions.split(,)) + l = lIntroductionPoints(oArgs.white_onions.split(',')) lGood += l relays = controller.get_server_descriptors() @@ -243,30 +333,63 @@ def iMain(lArgs): else: oFd = None + lProofUriFps = [] + aProofUri = {} lConds = oArgs.contact.split(',') for relay in relays: if not relay.exit_policy.is_exiting_allowed(): continue + if not is_valid_fingerprint(relay.fingerprint): + LOG.warn('Invalid Fingerprint: %s' % relay.fingerprint) + continue + + if relay.fingerprint in aTRUST_DB: + if aTRUST_DB[relay.fingerprint]['fps']: + lProofUriFps += aTRUST_DB[relay.fingerprint]['fps'] + + if relay.fingerprint in lProofUriFps: + # we already have it. + continue + if relay.contact and b'proof:uri-rsa' in relay.contact.lower(): + a = aParseContact(relay.contact, relay.fingerprint) + if not a: continue + b = aVerifyContact(list(a.values())[0], relay.fingerprint, + timeout=oArgs.timeout, + host=oArgs.proxy_host, + port=oArgs.proxy_port) + if not b: + continue + if 'fps' in b and b['fps'] and relay.fingerprint in b['fps']: + lProofUriFps += b['fps'] + aProofUri[relay.fingerprint] = b if ('Empty' in lConds and not relay.contact) or \ ('NoEmail' in lConds and relay.contact and not b'@' in relay.contact): - if is_valid_fingerprint(relay.fingerprint): - exit_excludelist.append(relay.fingerprint) - if oFd: - oFd.write(sDETAILS_URL +relay.fingerprint +"\n") - else: - LOG.warn('Invalid Fingerprint: %s' % relay.fingerprint) - if oFd: - LOG.info(f"Wrote details URLs to {oArgs.details_output}") - oFd.close() - + exit_excludelist.append(relay.fingerprint) + if oFd: + oFd.write(sDETAILS_URL +relay.fingerprint +"\n") + exit_excludelist = list(set(exit_excludelist).difference(set(lGood))) LOG.info(f'ExcludeExitNodes {len(exit_excludelist)} net bad exit nodes') controller.set_conf('ExcludeExitNodes', exit_excludelist) elt = controller.get_conf('ExcludeExitNodes') if oArgs.bad_output: - with open(oArgs.bad_output, 'wt') as oFd: - oFd.write(f"ExcludeExitNodes {','.join(exit_excludelist)}\n") + with open(oArgs.bad_output, 'wt') as oFdE: + oFdE.write(f"ExcludeExitNodes {','.join(exit_excludelist)}\n") LOG.info(f"Wrote tor configuration to {oArgs.bad_output}") + if lProofUriFps: + LOG.info(f'ExitNodes {len(lProofUriFps)} good exit nodes') + controller.set_conf('ExitNodes', lProofUriFps) + if oFd: + LOG.info(f"Wrote details URLs to {oArgs.details_output}") + oFd.close() + + if oArgs.proof_output: + with open(oArgs.proof_output, 'wt') as oFdD: + s = yaml.dump_all(aProofUri, indent=2, stream=None) + oFdD.write(s +'\n') + LOG.info(f"Wrote proof details to {oArgs.proof_output}") + oFdD.close() + logging.getLogger('stem').setLevel(40) for elt in controller._event_listeners: controller.remove_event_listener(elt) @@ -277,6 +400,9 @@ def iMain(lArgs): if __name__ == '__main__': try: i = iMain(sys.argv[1:]) + except IncorrectPassword as e: + LOG.error(e) + i = 1 except Exception as e: LOG.exception(e) i = 1 diff --git a/trustor_poc.py b/trustor_poc.py new file mode 100644 index 0000000..ccb15b1 --- /dev/null +++ b/trustor_poc.py @@ -0,0 +1,348 @@ +# -*- mode: python; indent-tabs-mode: nil; py-indent-offset: 4; coding: utf-8 -# -*- coding: utf-8 -*- + +import os +import sys +from stem.control import Controller +from stem.util.tor_tools import * +from urllib.parse import urlparse +import requests +import datetime + +try: + from unbound import ub_ctx,RR_TYPE_TXT,RR_CLASS_IN +except: + ub_ctx = RR_TYPE_TXT = RR_CLASS_IN = None + +global LOG +import logging +LOG = logging.getLogger() + +# download this python library from +# https://github.com/erans/torcontactinfoparser +#sys.path.append('/home/....') +try: + from torcontactinfo import TorContactInfoParser +except: + TorContactInfoParser = None + +# tor ControlPort IP +controller_address = '127.0.0.1' + +dnssec_DS_file = 'dnssec-root-trust' + +# this is not the system wide /etc/resolv.conf +# use dnscrypt-proxy to encrypt your DNS and route it via tor's SOCKSPort +libunbound_resolv_file = 'resolv.conf' + +# for now we support max_depth = 0 only +# this PoC version has no support for recursion +# https://github.com/nusenu/tor-relay-operator-ids-trust-information#trust-information-consumers +supported_max_depths = ['0'] + +# https://github.com/nusenu/ContactInfo-Information-Sharing-Specification#ciissversion +accepted_ciissversions = ['2'] + +# https://github.com/nusenu/ContactInfo-Information-Sharing-Specification#proof +accepted_proof_types = ['uri-rsa','dns-rsa'] + +# https://stackoverflow.com/questions/2532053/validate-a-hostname-string +# FIXME this check allows non-fqdn names +def is_valid_hostname(hostname): + if len(hostname) > 255: + return False + if hostname[-1] == ".": + hostname = hostname[:-1] # strip exactly one dot from the right, if present + allowed = re.compile("(?!-)[A-Z\d-]{1,63}(? 0: + if 'ciissversion' in parsed_ci and 'proof' in parsed_ci and 'url' in parsed_ci: + prooftype = parsed_ci['proof'] + ciurl = parsed_ci['url'] + if parsed_ci['ciissversion'] in accepted_ciissversions and prooftype in accepted_proof_types: + if ciurl.startswith('http://') or ciurl.startswith('https://'): + try: + domain=urlparse(ciurl).netloc + except: + LOG.warning('failed to parse domain %s' % ciurl) + domain='error' + continue + else: + domain=ciurl + if not is_valid_hostname(domain): + domain='error' + continue + # we can ignore relays that do not claim to be operated by a trusted operator + # if we do not accept all + if domain not in trusted_domains and not accept_all: + continue + if domain in result.keys(): + if prooftype in result[domain].keys(): + result[domain][prooftype].append(fingerprint) + else: + result[domain] = { prooftype : [fingerprint] } + # mixed proof types are not allowd as per spec but we are not strict here + LOG.warning('%s is using mixed prooftypes %s' % (domain, prooftype)) + else: + result[domain] = {prooftype : [fingerprint]} + return result + +def lDownloadUrlFps(domain, timeout=20, host='127.0.0.1', port=9050): + uri="https://"+domain+"/.well-known/tor-relay/rsa-fingerprint.txt" + # socks proxy used for outbound web requests (for validation of proofs) + proxy = {'https': 'socks5h://' +host +':' +str(port)} + # we use this UA string when connecting to webservers to fetch rsa-fingerprint.txt proof files + # https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/#uri-rsa + headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0'} + + LOG.debug("fetching %s...." % uri) + try: + head = requests.head(uri, timeout=timeout, proxies=proxy, headers=headers) + except Exception as e: + print("HTTP HEAD request failed for %s" % uri) + print(e) + head = None + return [] + if head.status_code != 200: + return [] + if not head.headers['Content-Type'].startswith('text/plain'): + return [] + try: + fullfile = requests.get(uri, proxies=proxy, timeout=10, headers=headers) + except: + print("HTTP GET request failed for %s" % uri) + return [] + if fullfile.status_code != 200 or not fullfile.headers['Content-Type'].startswith('text/plain'): + return [] + + #check for redirects (not allowed as per spec) + if fullfile.url != uri: + LOG.error('Redirect detected %s vs %s (final)' % (uri, fullfile.url)) + return [] + + well_known_content = [i.strip() for i in fullfile.text.upper().split('\n')] + return well_known_content + +def validate_proofs(candidates, validation_cache_file): + ''' + This function takes the return value of find_validation_candidates() + and validated them according to their proof type (uri-rsa, dns-rsa) + and writes properly validated relay fingerprints to the local validation cache + ''' + dt_utc = datetime.datetime.now(datetime.timezone.utc).date() + + f = open(validation_cache_file, mode='a') + count = 0 + + for domain in candidates.keys(): + for prooftype in candidates[domain].keys(): + if prooftype == 'uri-rsa': + well_known_content = lDownloadUrlFps(domain, timeout=20, host='127.0.0.1', port=9050) + for fingerprint in candidates[domain][prooftype]: + if fingerprint in well_known_content: + # write cache entry + count += 1 + f.write('%s:%s:%s:%s\n' % (domain, fingerprint, prooftype, dt_utc)) + else: + LOG.error('%s:%s:%s' % (fingerprint, domain, prooftype)) + elif prooftype == 'dns-rsa' and ub_ctx: + for fingerprint in candidates[domain][prooftype]: + fp_domain = fingerprint+'.'+domain + if dns_validate(fp_domain): + count += 1 + f.write('%s:%s:%s:%s\n' % (domain, fingerprint, prooftype, dt_utc)) + else: + LOG.error('%s:%s:%s' % (fingerprint, domain, prooftype)) + f.close() + LOG.info('successfully validated %s new (not yet validated before) relays' % count) + +def dns_validate(domain): + ''' + performs DNS TXT lookups and verifies the reply + - is DNSSEC valid and + - contains only a single TXT record + - the DNS record contains a hardcoded string as per specification + https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/#dns-rsa + ''' + if not ub_ctx: return False + + ctx = ub_ctx() + if (os.path.isfile(libunbound_resolv_file)): + ctx.resolvconf(libunbound_resolv_file) + else: + LOG.error('libunbound resolv config file: "%s" is missing, aborting!' % libunbound_resolv_file) + sys.exit(5) + if (os.path.isfile(dnssec_DS_file)): + ctx.add_ta_file(dnssec_DS_file) + else: + LOG.error('DNSSEC trust anchor file "%s" is missing, aborting!' % dnssec_DS_file) + sys.exit(6) + + status, result = ctx.resolve(domain, RR_TYPE_TXT, RR_CLASS_IN) + if status == 0 and result.havedata: + if len(result.rawdata) == 1 and result.secure: + # ignore the first byte, it is the TXT length + if result.data.as_raw_data()[0][1:] == b'we-run-this-tor-relay': + return True + return False + +def configure_tor(controller, trusted_fingerprints, exitonly=True): + ''' + takes the list of trusted fingerprints and configures a tor client + to only use trusted relays in a certain position + for now we only set exits. + we refuse to set the configuration if there are less then 40 trusted relays + ''' + + relay_count = len(trusted_fingerprints) + + if relay_count < 41: + print('Too few trusted relays (%s), aborting!' % relay_count) + sys.exit(15) + + try: + controller.set_conf('ExitNodes', trusted_fingerprints) + print('limited exits to %s relays' % relay_count) + except Exception as e: + print('Failed to set ExitNodes tor config to trusted relays') + print(e) + sys.exit(20) + +if __name__ == '__main__': + trust_config = 'trust_config' + assert os.path.exists(trust_config) + trusted_domains = read_local_trust_config(trust_config) + + validation_cache_file = 'validation_cache' + trusted_fingerprints = read_local_validation_cache(validation_cache_file, + trusted_domains=trusted_domains) + # tor ControlPort password + controller_password='' + controller = get_controller(address=controller_address,password=controller_password) + + r = find_validation_candidates(controller,validation_cache=trusted_fingerprints,trusted_domains=trusted_domains) + validate_proofs(r, validation_cache_file) + + # refresh list with newly validated fingerprints + trusted_fingerprints = read_local_validation_cache(trusted_domains=trusted_domains) + configure_tor(controller, trusted_fingerprints)