From 7d601ea4c32e8694363063cb977a3894374e21f9 Mon Sep 17 00:00:00 2001 From: "emdee@spm.plastiras.org" Date: Mon, 15 Jan 2024 11:09:20 +0000 Subject: [PATCH] update --- .gitignore | 3 + Makefile | 25 + exclude_badExits-installer.bash | 23 + exclude_badExits-pki.bash | 24 + exclude_badExits.bash | 8 +- exclude_badExits.md | 1 + exclude_badExits.txt | 2 + pyproject.toml | 17 +- src/__init__.py | 0 src/exclude_badExits/exclude_badExits.py | 549 ++++++---------------- src/exclude_badExits/exclude_utils.py | 412 ++++++++++++++++ src/exclude_badExits/support_onions.py | 68 ++- src/exclude_badExits/torcontactinfo.py | 567 +++++++++++++++++++++++ src/exclude_badExits/trustor_poc.py | 4 +- 14 files changed, 1256 insertions(+), 447 deletions(-) create mode 100644 Makefile create mode 100644 exclude_badExits-installer.bash create mode 100644 exclude_badExits-pki.bash create mode 100644 exclude_badExits.md create mode 100644 src/__init__.py mode change 100644 => 100755 src/exclude_badExits/exclude_badExits.py create mode 100644 src/exclude_badExits/exclude_utils.py create mode 100644 src/exclude_badExits/torcontactinfo.py diff --git a/.gitignore b/.gitignore index 7e8ec1f..67cb84b 100644 --- a/.gitignore +++ b/.gitignore @@ -3,7 +3,10 @@ __pycache__/ *.py[cod] *$py.class + +*~ *.junk +*.dst # C extensions *.so diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..2680d38 --- /dev/null +++ b/Makefile @@ -0,0 +1,25 @@ +PREFIX=/usr/local +LOCAL_DOCTEST=${PREFIX}/bin/toxcore_run_doctest3.bash +DOCTEST=${LOCAL_DOCTEST} +MOD=exclude_badExits + + +check:: + sh python3.sh -c "import ${MOD}" + +lint:: + sh .pylint.sh + +install:: + pip3.sh install --target ${PREFIX}/lib/python3.11/site-packages/ --upgrade . + +rsync:: + bash .rsync.sh + +test:: doctest +doctest:: + export PYTHONPATH=${PWD}/src/${MOD} + ${DOCTEST} ${MOD}.txt + +clean:: + find * -name \*~ -delete diff --git a/exclude_badExits-installer.bash b/exclude_badExits-installer.bash new file mode 100644 index 0000000..6c0862c --- /dev/null +++ b/exclude_badExits-installer.bash @@ -0,0 +1,23 @@ +#!/bin/sh + +PROG=exclude_badExits +build=build +dist=dist +# pyinstaller +if [ ! -e ${dist}/${PROG}.pyi -o ! ${dist}/${PROG}.pyi -nt ./${PROG}.py ] ; then + [ -f ${PROG}.spec ] || pyi-makespec ./${PROG}.py -F -c + [ -d ${build} ] || mkdir -p ${build} + [ -d ${dist} ] || mkdir -p ${dist} + [ -e ${dist}/${PROG}.pyi -a ${dist}/${PROG}.pyi -nt ./${PROG}.py ] || \ + pyinstaller --distpath ${dist} --workpath ${build} \ + --exclude tkinter --exclude matplotlib \ + --exclude twisted --exclude jedi --exclude jaraco \ + --exclude sphinx --exclude coverage --exclude nose \ + --exclude PIL --exclude numpy --exclude OpenGL \ + --exclude PySide2 --exclude PyQt5 --exclude IPython \ + --onefile -c --ascii \ + $PROG.py + # AttributeError: 'NoneType' object has no attribute 'groups' + # utils.py #400 +fi +# cx_Freeze exclude_badExits.py diff --git a/exclude_badExits-pki.bash b/exclude_badExits-pki.bash new file mode 100644 index 0000000..3f5a909 --- /dev/null +++ b/exclude_badExits-pki.bash @@ -0,0 +1,24 @@ +#!/bin/sh +# -*- mode: sh; fill-column: 75; tab-width: 8; coding: utf-8-unix -*- + +PROG=exclude_badExits +build=build +dist=dist +# pyinstaller +if [ ! -e ${dist}/${PROG}.pyi -o ! ${dist}/${PROG}.pyi -nt ./${PROG}.py ] ; then + [ -f ${PROG}.spec ] || pyi-makespec ./${PROG}.py -F -c + [ -d ${build} ] || mkdir -p ${build} + [ -d ${dist} ] || mkdir -p ${dist} + [ -e ${dist}/${PROG}.pyi -a ${dist}/${PROG}.pyi -nt ./${PROG}.py ] || \ + pyinstaller --distpath ${dist} --workpath ${build} \ + --exclude tkinter --exclude matplotlib \ + --exclude twisted --exclude jedi --exclude jaraco \ + --exclude sphinx --exclude coverage --exclude nose \ + --exclude PIL --exclude numpy --exclude OpenGL \ + --exclude PySide2 --exclude PyQt5 --exclude IPython \ + --onefile -c --ascii \ + $PROG.py + # AttributeError: 'NoneType' object has no attribute 'groups' + # utils.py #400 +fi +# cx_Freeze exclude_badExits.py diff --git a/exclude_badExits.bash b/exclude_badExits.bash index 4d6ae74..a6c9b0f 100644 --- a/exclude_badExits.bash +++ b/exclude_badExits.bash @@ -8,8 +8,10 @@ CAFILE=/etc/ssl/certs/ca-certificates.crt # you may have a special python for installed packages EXE=`which python3.bash` -$EXE exclude_badExits.py --help > exclude_badExits.hlp & -$EXE -c 'from exclude_badExits import __doc__; print(__doc__)' >exclude_badExits.md +[ -f exclude_badExits.hlp ] || \ + $EXE exclude_badExits.py --help > exclude_badExits.hlp +[ -f README.md ] || \ + $EXE -c 'from exclude_badExits import __doc__; print(__doc__)' > README.md # an example of running exclude_badExits with full debugging # expected to 20 minutes or so declare -a LARGS @@ -49,7 +51,7 @@ grep -q ^debian-tor /etc/group && TORU=debian-tor || { grep -q ^tor /etc/group && TORU=tor } # --saved_only -sudo -u $TORU $EXE exclude_badExits.py "${LARGS[@]}" "$@" \ +sudo -u $TORU $EXE src/exclude_badExits/exclude_badExits.py "${LARGS[@]}" "$@" \ 2>&1|tee exclude_badExits6.log # The DEBUG statements contain the detail of why the relay was considered bad. diff --git a/exclude_badExits.md b/exclude_badExits.md new file mode 100644 index 0000000..b0047fa --- /dev/null +++ b/exclude_badExits.md @@ -0,0 +1 @@ +None diff --git a/exclude_badExits.txt b/exclude_badExits.txt index 67ec8a1..3e82530 100644 --- a/exclude_badExits.txt +++ b/exclude_badExits.txt @@ -56,6 +56,8 @@ Read the usage: Torrc to check for suggestions: >>> lArgs = ['--torrc', '/etc/tor/torrc-defaults'] >>> exclude_badExits.iMain(lArgs) + INFO ... + This may take a while: diff --git a/pyproject.toml b/pyproject.toml index fa83935..1004d82 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,9 +1,9 @@ [project] name = "exclude_badExits" -description = "A Python3 ctypes wrapping of c-toxcore into Python." +description = "Set the ExcludeNodes or ExcludeExitNodes setting of a running Tor." authors = [{ name = "emdee", email = "emdee@spm.plastiras.org" } ] requires-python = ">=3.6" -keywords = ["tox", "python3", "bad exits"] +keywords = ["tor", "python3", "bad exits"] classifiers = [ "License :: OSI Approved", "Operating System :: POSIX :: BSD :: FreeBSD", @@ -19,8 +19,12 @@ classifiers = [ ] dynamic = ["version", "readme", ] # cannot be dynamic ['license'] scripts = { exclude_badExits = "exclude_badExits.exclude_badExits:iMain" } +dependencies = [ + 'qasync >= 0.27.1', + 'cryptography >= 41.0.7', + 'rsa >= 4.9', + 'stem >= 1.8.2'] -# ... [tool.setuptools.dynamic] version = {attr = "exclude_badExits.__version__"} readme = {file = ["README.md"]} @@ -35,8 +39,9 @@ repository = "https://git.plastiras.org/emdee/exclude_badExits" requires = ["setuptools >= 61.0"] build-backend = "setuptools.build_meta" -[tool.setuptools] -packages = ["exclude_badExits"] +# Either or both of these don't work +#[tool.setuptools] +#packages = ["exclude_badExits"] #[tool.setuptools.packages.find] -#where = "src" +#include = ["src"] diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/exclude_badExits/exclude_badExits.py b/src/exclude_badExits/exclude_badExits.py old mode 100644 new mode 100755 index e854ba2..a1fbffa --- a/src/exclude_badExits/exclude_badExits.py +++ b/src/exclude_badExits/exclude_badExits.py @@ -1,3 +1,4 @@ +#!/usr/local/bin/python3.sh # -*- mode: python; indent-tabs-mode: nil; py-indent-offset: 4; coding: utf-8 -*- # https://github.com/nusenu/noContactInfo_Exit_Excluder @@ -64,8 +65,8 @@ Use the GoodNodes/Onions list to list onion services you want the Introduction Points whitelisted - these points may change daily Look in tor's notice.log for warnings of 'Every introduction point for service' -```--hs_dir``` ```default='/var/lib/tor'``` will make the program -parse the files named ```hostname``` below this dir to find +```--hs_dir``` ```default='/var/lib/tor'``` will make the program +parse the files named ```hostname``` below this dir to find Hidden Services to whitelist. The Introduction Points can change during the day, so you may want to @@ -97,7 +98,7 @@ Use the GoodNodes/Onions list in goodnodes.yaml to list onion services you want the Introduction Points whitelisted - these points may change daily. Look in tor's notice.log for 'Every introduction point for service' -```notice_log``` will parse the notice log for warnings about relays and +```notice_log``` will parse the notice log for warnings about relays and services that will then be whitelisted. ```--torrc``` will read a file like /etc/tor/torrc and make some @@ -154,19 +155,20 @@ See [exclude_badExits.txt](./exclude_badExits.txt) # https://github.com/nusenu/trustor-example-trust-config/blob/main/trust_config # https://github.com/nusenu/tor-relay-operator-ids-trust-information -import argparse import os import json -import re import sys import tempfile import time from io import StringIO +import logging +import warnings import stem from stem import InvalidRequest from stem.connection import IncorrectPassword from stem.util.tor_tools import is_valid_fingerprint + import urllib3 from urllib3.util.ssl_match_hostname import CertificateError @@ -192,7 +194,6 @@ except: ub_ctx = RR_TYPE_TXT = RR_CLASS_IN = None from support_onions import (bAreWeConnected, icheck_torrc, lIntroductionPoints, - oGetStemController, vwait_for_controller, yKNOWN_NODNS, zResolveDomain) from trustor_poc import TrustorError, idns_validate @@ -203,13 +204,6 @@ try: except: httpx = None from trustor_poc import oDownloadUrlUrllib3Socks as oDownloadUrl - -global LOG -import logging -import warnings - -warnings.filterwarnings('ignore') -LOG = logging.getLogger() try: from torcontactinfo import TorContactInfoParser @@ -217,9 +211,15 @@ try: except ImportError: oPARSER = None -oCONTACT_RE = re.compile(r'([^:]*)(\s+)(email|url|proof|ciissversion|abuse|gpg):') - -ETC_DIR = '/usr/local/etc/tor/yaml' +from exclude_utils import (aCleanContact, sCleanEmail, aParseContact, + oStemController, oMainArgparser, + vwrite_goodnodes, vwrite_badnodes, vwrite_good_contacts, + vwritefinale, vsetup_logging) + +warnings.filterwarnings('ignore') +global LOG +LOG = logging.getLogger() + aGOOD_CONTACTS_DB = {} aGOOD_CONTACTS_FPS = {} aBAD_CONTACTS_DB = {} @@ -242,6 +242,21 @@ aGOOD_NODES = safe_load(sGOOD_NODES) lKNOWN_NODNS = [] tMAYBE_NODNS = set() + +def tExcludeSet(oargs, sEXCLUDE_EXIT_GROUP): + texclude_set = set() + sections = {'BadExit'} + if oargs.bad_nodes and os.path.exists(oargs.bad_nodes): + if oargs.bad_sections: + sections.update(oargs.bad_sections.split(',')) + texclude_set = set(lYamlBadNodes(oargs.bad_nodes, + tWanted=sections, + section=sEXCLUDE_EXIT_GROUP)) + LOG.info(f"Preloaded {len(texclude_set)} bad fps") + + return texclude_set + + def lYamlBadNodes(sFile, section=sEXCLUDE_EXIT_GROUP, tWanted=None): @@ -314,7 +329,7 @@ lAT_REPS = ['[]', ' at ', '(at)', '[at]', '', '(att)', '_at_', ] lDOT_REPS = [' point ', ' dot ', '[dot]', '(dot)', '_dot_', '!dot!', '<.>', '<:dot:>', '|dot--|', ' d07 ', '', '(dot]', '{dot)', - 'd.t', "'dot'", '(d)', '-dot-', ' adot ', + 'd.t', "'dot'", '(d)', '-dot-', ' adot ', '(d)', ' . ', '[punto]', '(point)', '"dot"', '{.}', '--separator--', '|=dot|', ' period ', ')dot(', ] @@ -342,55 +357,7 @@ lNO_EMAIL = [ r'', ] # -lMORONS = ['hoster:Quintex Alliance Consulting '] - -def sCleanEmail(s): - s = s.lower() - for elt in lAT_REPS: - if not elt.startswith(' '): - s = s.replace(' ' + elt + ' ', '@') - s = s.replace(elt, '@') - for elt in lDOT_REPS: - if not elt.startswith(' '): - s = s.replace(' ' + elt + ' ', '.') - s = s.replace(elt, '.') - s = s.replace('(dash)', '-') - s = s.replace('hyphen ', '-') - for elt in lNO_EMAIL: - s = s.replace(elt, '?') - return s - -lEMAILS = ['abuse', 'email'] -lINTS = ['ciissversion', 'uplinkbw', 'signingkeylifetime', 'memory'] -lBOOLS = ['dnssec', 'dnsqname', 'aesni', 'autoupdate', 'dnslocalrootzone', - 'sandbox', 'offlinemasterkey'] -def aCleanContact(a): - # cleanups - for elt in lINTS: - if elt in a: - a[elt] = int(a[elt]) - for elt in lBOOLS: - if elt not in a: continue - if a[elt] in ['y', 'yes', 'true', 'True']: - a[elt] = True - else: - a[elt] = False - for elt in lEMAILS: - if elt not in a: continue - a[elt] = sCleanEmail(a[elt]) - if 'url' in a.keys(): - a['url'] = a['url'].rstrip('/') - if a['url'].startswith('http://'): - domain = a['url'].replace('http://', '') - elif a['url'].startswith('https://'): - domain = a['url'].replace('https://', '') - else: - domain = a['url'] - a['url'] = 'https://' + domain - a.update({'fps': []}) - return a - -def bVerifyContact(a=None, fp=None, https_cafile=None): +def bVerifyContact(lAT_REPS, lDOT_REPS, lNO_EMAIL, a, fp, https_cafile=None): global aFP_EMAIL global tBAD_URLS global lKNOWN_NODNS @@ -399,9 +366,9 @@ def bVerifyContact(a=None, fp=None, https_cafile=None): assert a assert fp assert https_cafile - + keys = list(a.keys()) - a = aCleanContact(a) + a = aCleanContact(a, lAT_REPS, lDOT_REPS, lNO_EMAIL) a['fp'] = fp if 'email' not in keys: a['email'] = '' @@ -455,7 +422,7 @@ def oVerifyUrl(url, domain, fp=None, https_cafile=None, timeout=20, host='127.0. if url in tBAD_URLS: LOG.debug(f"BC Known bad url from {domain} for {fp}") return None - + o = None try: if httpx: @@ -464,7 +431,7 @@ def oVerifyUrl(url, domain, fp=None, https_cafile=None, timeout=20, host='127.0. o = oDownloadUrl(url, https_cafile, timeout=timeout, host=host, port=port, content_type='text/plain') - else: + else: LOG.debug(f"Downloading from {domain} for {fp}") o = oDownloadUrl(url, https_cafile, timeout=timeout, host=host, port=port, @@ -478,7 +445,7 @@ def oVerifyUrl(url, domain, fp=None, https_cafile=None, timeout=20, host='127.0. tBAD_URLS.add(url) except TrustorError as e: if e.args == "HTTP Errorcode 404": - aFP_EMAIL[fp] = a['email'] + #? aFP_EMAIL[fp] = a['email'] LOG.warn(f"BC TrustorError 404 from {domain} {e.args}") else: LOG.warn(f"BC TrustorError downloading from {domain} {e.args}") @@ -488,7 +455,7 @@ def oVerifyUrl(url, domain, fp=None, https_cafile=None, timeout=20, host='127.0. # maybe offline - not bad LOG.warn(f"BC MaxRetryError downloading from {domain} {e}") except (BaseException) as e: - LOG.error(f"BC Exception {type(e)} downloading from {domain} {e}") + LOG.warn(f"BC Exception {type(e)} downloading from {domain} {e}") else: return o return None @@ -499,13 +466,13 @@ def oVerifyUrl(url, domain, fp=None, https_cafile=None, timeout=20, host='127.0. # If we paralelize the gathering of the URLs, we may have simultaneous # gathers of the same URL from different relays, defeating the advantage # of going parallel. The cache is global aDOMAIN_FPS. -def aVerifyContact(a=None, fp=None, https_cafile=None, timeout=20, host='127.0.0.1', port=9050, oargs=None): +def aVerifyContact(a, fp, https_cafile=None, timeout=20, host='127.0.0.1', port=9050, oargs=None): global aFP_EMAIL global tBAD_URLS global lKNOWN_NODNS global aDOMAIN_FPS global aBAD_CONTACTS_DB - + assert a assert fp assert https_cafile @@ -515,10 +482,11 @@ def aVerifyContact(a=None, fp=None, https_cafile=None, timeout=20, host='127.0.0 if domain in aDOMAIN_FPS.keys(): a['fps'] = aDOMAIN_FPS[domain] return a - - r = bVerifyContact(a=a, fp=fp, https_cafile=https_cafile) + + r = bVerifyContact(lAT_REPS, lDOT_REPS, lNO_EMAIL, a, fp, https_cafile=https_cafile) if r is not True: return r + if a['url'] in tBAD_URLS: a['fps'] = [] return a @@ -549,10 +517,10 @@ def aVerifyContact(a=None, fp=None, https_cafile=None, timeout=20, host='127.0.0 elif a['proof'] == 'dns-rsa': # well let the test of the URL be enough for now LOG.debug(f"Downloaded from {url} ") - a['fps'] = [fp] + a['fps'] = [fp] aDOMAIN_FPS[domain] = a['fps'] elif a['proof'] == 'uri-rsa': - a = aContactFps(oargs, a, o, domain) + a = aContactFps(oargs, a, fp, o, domain) if a['fps']: LOG.debug(f"Downloaded from {url} {len(a['fps'])} FPs for {fp}") else: @@ -561,7 +529,7 @@ def aVerifyContact(a=None, fp=None, https_cafile=None, timeout=20, host='127.0.0 aDOMAIN_FPS[domain] = a['fps'] return a -def aContactFps(oargs, a, o, domain): +def aContactFps(oargs, a, fp, o, domain): global aFP_EMAIL global tBAD_URLS global aDOMAIN_FPS @@ -594,7 +562,7 @@ def aContactFps(oargs, a, o, domain): oFd.write(data) except Exception as e: LOG.warn(f"Error writing {sfile} {e}") - + a['modified'] = int(time.time()) if not l: LOG.warn(f"Downloaded from {domain} empty for {fp}") @@ -605,187 +573,6 @@ def aContactFps(oargs, a, o, domain): LOG.info(f"Downloaded from {domain} {len(a['fps'])} FPs") return a -def aParseContact(contact, fp): - """ - See the Tor ContactInfo Information Sharing Specification v2 - https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/ - """ - a = {} - if not contact: - LOG.warn(f"BC null contact for {fp}") - LOG.debug(f"{fp} {contact}") - return {} - - contact = contact.split(r'\n')[0] - for elt in lMORONS: - contact = contact.replace(elt, '') - m = oCONTACT_RE.match(contact) - # 450 matches! - if m and m.groups and len(m.groups(0)) > 2 and m.span()[1] > 0: - i = len(m.groups(0)[0]) + len(m.groups(0)[1]) - contact = contact[i:] - - # shlex? - lelts = contact.split(' ') - if not lelts: - LOG.warn(f"BC empty contact for {fp}") - LOG.debug(f"{fp} {contact}") - return {} - - for elt in lelts: - if ':' not in elt: - # hoster:Quintex Alliance Consulting - LOG.warn(f"BC no : in {elt} for {contact} in {fp}") - # return {} - # try going with what we have - break - (key , val,) = elt.split(':', 1) - if key == '': - continue - key = key.rstrip(':') - a[key] = val - a = aCleanContact(a) - return a - -def aParseContactYaml(contact, fp): - """ - See the Tor ContactInfo Information Sharing Specification v2 - https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/ - """ - l = [line for line in contact.strip().replace('"', '').split(' ') - if ':' in line] - LOG.debug(f"{fp} {len(l)} fields") - s = f'"{fp}":\n' - s += '\n'.join([f" {line}\"".replace(':', ': \"', 1) - for line in l]) - oFd = StringIO(s) - a = safe_load(oFd) - return a - -def oMainArgparser(_=None): - - try: - from OpenSSL import SSL - lCAfs = SSL._CERTIFICATE_FILE_LOCATIONS - except: - lCAfs = [] - - CAfs = [] - for elt in lCAfs: - if os.path.exists(elt): - CAfs.append(elt) - if not CAfs: - CAfs = [''] - - parser = argparse.ArgumentParser(add_help=True, - epilog=__prolog__) - parser.add_argument('--https_cafile', type=str, - help="Certificate Authority file (in PEM)", - default=CAfs[0]) - parser.add_argument('--proxy_host', '--proxy-host', type=str, - default='127.0.0.1', - help='proxy host') - parser.add_argument('--proxy_port', '--proxy-port', default=9050, type=int, - help='proxy socks port') - parser.add_argument('--proxy_ctl', '--proxy-ctl', - default='/run/tor/control' if os.path.exists('/run/tor/control') else '9051', - type=str, - help='control socket - or port') - - parser.add_argument('--torrc', - default='/etc/tor/torrc-defaults', - type=str, - help='torrc to check for suggestions') - parser.add_argument('--timeout', default=60, type=int, - help='proxy download connect timeout') - - parser.add_argument('--good_nodes', type=str, - default=os.path.join(ETC_DIR, 'goodnodes.yaml'), - help="Yaml file of good info that should not be excluded") - parser.add_argument('--bad_nodes', type=str, - default=os.path.join(ETC_DIR, 'badnodes.yaml'), - help="Yaml file of bad nodes that should also be excluded") - parser.add_argument('--bad_on', type=str, default='Empty,NoEmail,NotGood', - help="comma sep list of conditions - Empty,NoEmail,NotGood") - parser.add_argument('--bad_contacts', type=str, - default=os.path.join(ETC_DIR, 'badcontacts.yaml'), - help="Yaml file of bad contacts that bad FPs are using") - parser.add_argument('--saved_only', default=False, - action='store_true', - help="Just use the info in the last *.yaml files without querying the Tor controller") - parser.add_argument('--strict_nodes', type=str, default=0, - choices=['0', '1'], - help="Set StrictNodes: 1 is less anonymous but more secure, although some onion sites may be unreachable") - parser.add_argument('--wait_boot', type=int, default=120, - help="Seconds to wait for Tor to booststrap") - parser.add_argument('--points_timeout', type=int, default=0, - help="Timeout for getting introduction points - must be long >120sec. 0 means disabled looking for IPs") - parser.add_argument('--log_level', type=int, default=20, - help="10=debug 20=info 30=warn 40=error") - parser.add_argument('--bad_sections', type=str, - default='BadExit', - help="sections of the badnodes.yaml to use, in addition to BadExit, comma separated") - parser.add_argument('--white_onions', type=str, - default='', - help="comma sep. list of onions to whitelist their introduction points - BROKEN") - parser.add_argument('--torrc_output', type=str, - default=os.path.join(ETC_DIR, 'torrc.new'), - help="Write the torrc configuration to a file") - parser.add_argument('--hs_dir', type=str, - default='/var/lib/tor', - help="Parse the files name hostname below this dir to find Hidden Services to whitelist") - parser.add_argument('--notice_log', type=str, - default='', - help="Parse the notice log for relays and services") - parser.add_argument('--relays_output', type=str, - default=os.path.join(ETC_DIR, 'relays.json'), - help="Write the download relays in json to a file") - parser.add_argument('--wellknown_output', type=str, - default=os.path.join(ETC_DIR, 'https'), - help="Write the well-known files to a directory") - parser.add_argument('--good_contacts', type=str, default=os.path.join(ETC_DIR, 'goodcontacts.yaml'), - help="Write the proof data of the included nodes to a YAML file") - return parser - -def vwrite_good_contacts(oargs): - global aGOOD_CONTACTS_DB - good_contacts_tmp = oargs.good_contacts + '.tmp' - with open(good_contacts_tmp, 'wt') as oFYaml: - yaml.dump(aGOOD_CONTACTS_DB, oFYaml) - oFYaml.close() - if os.path.exists(oargs.good_contacts): - bak = oargs.good_contacts +'.bak' - os.rename(oargs.good_contacts, bak) - os.rename(good_contacts_tmp, oargs.good_contacts) - LOG.info(f"Wrote {len(list(aGOOD_CONTACTS_DB.keys()))} good contact details to {oargs.good_contacts}") - bad_contacts_tmp = good_contacts_tmp.replace('.tmp', '.bad') - with open(bad_contacts_tmp, 'wt') as oFYaml: - yaml.dump(aBAD_CONTACTS_DB, oFYaml) - oFYaml.close() - -def vwrite_badnodes(oargs, aBAD_NODES, slen, stag): - if not aBAD_NODES: return - tmp = oargs.bad_nodes +'.tmp' - bak = oargs.bad_nodes +'.bak' - with open(tmp, 'wt') as oFYaml: - yaml.dump(aBAD_NODES, oFYaml) - LOG.info(f"Wrote {slen} to {stag} in {oargs.bad_nodes}") - oFYaml.close() - if os.path.exists(oargs.bad_nodes): - os.rename(oargs.bad_nodes, bak) - os.rename(tmp, oargs.bad_nodes) - -def vwrite_goodnodes(oargs, aGOOD_NODES, ilen): - tmp = oargs.good_nodes +'.tmp' - bak = oargs.good_nodes +'.bak' - with open(tmp, 'wt') as oFYaml: - yaml.dump(aGOOD_NODES, oFYaml) - LOG.info(f"Wrote {ilen} good relays to {oargs.good_nodes}") - oFYaml.close() - if os.path.exists(oargs.good_nodes): - os.rename(oargs.good_nodes, bak) - os.rename(tmp, oargs.good_nodes) - def lget_onionoo_relays(oargs): import requests adata = {} @@ -847,66 +634,6 @@ def lget_onionoo_relays(oargs): lonionoo_relays = [r for r in adata["relays"] if 'fingerprint' in r.keys()] return lonionoo_relays -def vsetup_logging(log_level, logfile='', stream=sys.stdout): - global LOG - add = True - - try: - if 'COLOREDLOGS_LEVEL_STYLES' not in os.environ: - os.environ['COLOREDLOGS_LEVEL_STYLES'] = 'spam=22;debug=28;verbose=34;notice=220;warning=202;success=118,bold;error=124;critical=background=red' - # https://pypi.org/project/coloredlogs/ - import coloredlogs - except ImportError: - coloredlogs = False - - # stem fucks up logging - # from stem.util import log - logging.getLogger('stem').setLevel(30) - - logging._defaultFormatter = logging.Formatter(datefmt='%m-%d %H:%M:%S') - logging._defaultFormatter.default_time_format = '%m-%d %H:%M:%S' - logging._defaultFormatter.default_msec_format = '' - - kwargs = dict(level=log_level, - force=True, - format='%(levelname)s %(message)s') - - if logfile: - add = logfile.startswith('+') - sub = logfile.startswith('-') - if add or sub: - logfile = logfile[1:] - kwargs['filename'] = logfile - - if coloredlogs: - # https://pypi.org/project/coloredlogs/ - aKw = dict(level=log_level, - logger=LOG, - stream=stream, - fmt='%(levelname)s %(message)s' - ) - coloredlogs.install(**aKw) - if logfile: - oHandler = logging.FileHandler(logfile) - LOG.addHandler(oHandler) - LOG.info(f"CSetting log_level to {log_level} {stream}") - else: - logging.basicConfig(**kwargs) - if add and logfile: - oHandler = logging.StreamHandler(stream) - LOG.addHandler(oHandler) - LOG.info(f"SSetting log_level to {log_level!s}") - -def vwritefinale(oargs): - global lNOT_IN_RELAYS_DB - - if len(lNOT_IN_RELAYS_DB): - LOG.warn(f"{len(lNOT_IN_RELAYS_DB)} relays from stem were not in onionoo.torproject.org") - - LOG.info(f"For info on a FP, use: https://nusenu.github.io/OrNetStats/w/relay/.html") - LOG.info(f"For info on relays, try: https://onionoo.torproject.org/details") - # https://onionoo.torproject.org/details - def bProcessContact(b, texclude_set, aBadContacts, iFakeContact=0): global aGOOD_CONTACTS_DB global aGOOD_CONTACTS_FPS @@ -936,16 +663,16 @@ def bProcessContact(b, texclude_set, aBadContacts, iFakeContact=0): aGOOD_CONTACTS_FPS[elt] = b return True - + def bCheckFp(relay, sofar, lConds, texclude_set): global aGOOD_CONTACTS_DB global aGOOD_CONTACTS_FPS global lNOT_IN_RELAYS_DB - + if not is_valid_fingerprint(relay.fingerprint): LOG.warn('Invalid Fingerprint: %s' % relay.fingerprint) return None - + fp = relay.fingerprint if aRELAYS_DB and fp not in aRELAYS_DB.keys(): LOG.warn(f"{fp} not in aRELAYS_DB") @@ -969,44 +696,45 @@ def bCheckFp(relay, sofar, lConds, texclude_set): # fail if the contact is empty if ('Empty' in lConds and not relay.contact): - LOG.info(f"{fp} skipping empty contact - Empty {sofar}") + LOG.debug(f"{fp} skipping empty contact - Empty {sofar}") texclude_set.add(fp) return None - contact = sCleanEmail(relay.contact) + contact = sCleanEmail(relay.contact, lAT_REPS, lDOT_REPS, lNO_EMAIL) # fail if the contact has no email - unreliable if 'NoEmail' in lConds and relay.contact and \ ('@' not in contact): - LOG.info(f"{fp} skipping contact - NoEmail {contact} {sofar}") - LOG.debug(f"{fp} {relay.contact} {sofar}") + LOG.debug(f"{fp} skipping contact - NoEmail {contact} {sofar}") + # LOG.spam(f"{fp} {relay.contact} {sofar}") texclude_set.add(fp) return None # fail if the contact does not pass if ('NotGood' in lConds and relay.contact and ('ciissversion:' not in relay.contact)): - LOG.info(f"{fp} skipping no ciissversion in contact {sofar}") - LOG.debug(f"{fp} {relay.contact} {sofar}") + LOG.debug(f"{fp} skipping no ciissversion in contact {sofar}") + # LOG.spam(f"{fp} {relay.contact} {sofar}") texclude_set.add(fp) return None # fail if the contact does not have url: to pass if relay.contact and 'url' not in relay.contact: - LOG.info(f"{fp} skipping unfetchable contact - no url {sofar}") - LOG.debug(f"{fp} {relay.contact} {sofar}") + LOG.debug(f"{fp} skipping unfetchable contact - no url {sofar}") + # LOG.spam(f"{fp} {relay.contact} {sofar}") if ('NotGood' in lConds): texclude_set.add(fp) return None - + return True - + def oMainPreamble(lArgs): + global LOG global aGOOD_CONTACTS_DB global aGOOD_CONTACTS_FPS - - parser = oMainArgparser() + + parser = oMainArgparser( __prolog__= __prolog__) oargs = parser.parse_args(lArgs) - vsetup_logging(oargs.log_level) + vsetup_logging(LOG, oargs.log_level, stream=sys.stdout) if bAreWeConnected() is False: raise SystemExit("we are not connected") @@ -1038,31 +766,9 @@ def oMainPreamble(lArgs): return oargs -def oStemController(oargs): - if os.path.exists(oargs.proxy_ctl): - controller = oGetStemController(log_level=oargs.log_level, sock_or_pair=oargs.proxy_ctl) - else: - port =int(oargs.proxy_ctl) - controller = oGetStemController(log_level=oargs.log_level, sock_or_pair=port) - - vwait_for_controller(controller, oargs.wait_boot) - - elt = controller.get_conf('UseMicrodescriptors') - if elt != '0': - LOG.error('"UseMicrodescriptors 0" is required in your /etc/tor/torrc. Exiting.') - controller.set_conf('UseMicrodescriptors', 0) - # does it work dynamically? - return 2 - - elt = controller.get_conf(sEXCLUDE_EXIT_GROUP) - if elt and elt != '{??}': - LOG.warn(f"{sEXCLUDE_EXIT_GROUP} is in use already") - - return controller - def tWhitelistSet(oargs, controller): twhitelist_set = set() - + twhitelist_set.update(set(lYamlGoodNodes(oargs.good_nodes))) LOG.info(f"lYamlGoodNodes {len(twhitelist_set)} EntryNodes from {oargs.good_nodes}") @@ -1077,7 +783,7 @@ def tWhitelistSet(oargs, controller): with open(os.path.join(dirpath, f), 'rt') as oFd: son = oFd.read() t.update(son) - LOG.info(f"Added {son} to the list for Introduction Points") + LOG.debug(f"Added {son} to the list for Introduction Points") if oargs.notice_log and os.path.exists(oargs.notice_log): tmp = tempfile.mktemp() @@ -1086,7 +792,7 @@ def tWhitelistSet(oargs, controller): with open(tmp, 'rt') as oFd: tnew = {elt.strip() for elt in oFd.readlines()} t.update(tnew) - LOG.info(f"Whitelist {len(lnew)} services from {oargs.notice_log}") + LOG.info(f"Whitelist {len(tnew)} services to {oargs.notice_log}") os.remove(tmp) w = set() @@ -1105,7 +811,7 @@ def tWhitelistSet(oargs, controller): LOG.info(f"Whitelist {len(lnew)} relays from {oargs.notice_log}") os.remove(tmp) twhitelist_set.update(w) - + w = set() if 'Onions' in aGOOD_NODES[sGOOD_ROOT].keys(): # Provides the descriptor for a hidden service. The **address** is the @@ -1115,27 +821,16 @@ def tWhitelistSet(oargs, controller): w.update(oargs.white_onions.split(',')) if oargs.points_timeout > 0: LOG.info(f"{len(w)} services will be checked from IntroductionPoints") - t.update(lIntroductionPoints(controller, w, itimeout=oargs.points_timeout)) + t.update(lIntroductionPoints(controller, w, itimeout=oargs.points_timeout, + password=oargs.torctl_pass)) if len(t) > 0: LOG.info(f"IntroductionPoints {len(t)} relays from {len(w)} IPs for onions") twhitelist_set.update(t) return twhitelist_set -def tExcludeSet(oargs): - texclude_set = set() - sections = {'BadExit'} - if oargs.bad_nodes and os.path.exists(oargs.bad_nodes): - if oargs.bad_sections: - sections.update(oargs.bad_sections.split(',')) - texclude_set = set(lYamlBadNodes(oargs.bad_nodes, - tWanted=sections, - section=sEXCLUDE_EXIT_GROUP)) - LOG.info(f"Preloaded {len(texclude_set)} bad fps") - - return texclude_set -# async +# async def iMain(lArgs): global aGOOD_CONTACTS_DB global aGOOD_CONTACTS_FPS @@ -1147,12 +842,12 @@ def iMain(lArgs): global aRELAYS_DB_INDEX global tBAD_URLS global lNOT_IN_RELAYS_DB - + oargs = oMainPreamble(lArgs) - controller = oStemController(oargs) + controller = oStemController(oargs, sEXCLUDE_EXIT_GROUP) twhitelist_set = tWhitelistSet(oargs, controller) - texclude_set = tExcludeSet(oargs) - + texclude_set = tExcludeSet(oargs, sEXCLUDE_EXIT_GROUP) + ttrust_db_index = aGOOD_CONTACTS_FPS.keys() iFakeContact = 0 iTotalContacts = 0 @@ -1175,13 +870,13 @@ def iMain(lArgs): if r is not True: continue # if it has a ciissversion in contact we count it in total iTotalContacts += 1 - + # only proceed if 'NotGood' not in lConds: if 'NotGood' not in lConds: continue # fail if the contact does not have url: to pass - a = aParseContact(relay.contact, fp) + a = aParseContact(relay.contact, fp, lAT_REPS, lDOT_REPS, lNO_EMAIL) if not a: LOG.warn(f"{fp} BC contact did not parse {sofar}") texclude_set.add(fp) @@ -1191,8 +886,8 @@ def iMain(lArgs): if 'url' in a and a['url']: # fail if the contact uses a url we already know is bad if a['url'] in tBAD_URLS: - LOG.info(f"{fp} skipping in tBAD_URLS {a['url']} {sofar}") - LOG.debug(f"{fp} {a} {sofar}") + LOG.debug(f"{fp} skipping in tBAD_URLS {a['url']} {sofar}") + # LOG.spam(f"{fp} {a} {sofar}") texclude_set.add(fp) continue @@ -1200,28 +895,28 @@ def iMain(lArgs): # fail if the contact uses a domain we already know does not resolve if domain in lKNOWN_NODNS: # The fp is using a contact with a URL we know is bogus - LOG.info(f"{fp} BC skipping in lKNOWN_NODNS {a} {sofar}") - LOG.debug(f"{fp} {relay} {sofar}") + LOG.debug(f"{fp} BC skipping in lKNOWN_NODNS {a} {sofar}") + # LOG.spam(f"{fp} {relay} {sofar}") texclude_set.add(fp) aBAD_CONTACTS_DB[fp] = a continue # drop through - + if 'proof' in a and a['proof'] in ['uri-rsa', 'dns-rsa']: if domain in aDOMAIN_FPS.keys(): continue if httpx: a['fp'] = fp lqueue.append(asyncio.create_task( - aVerifyContact(a=a, - fp=fp, + aVerifyContact(a, + fp, https_cafile=oargs.https_cafile, timeout=oargs.timeout, host=oargs.proxy_host, port=oargs.proxy_port, oargs=oargs))) else: - b = aVerifyContact(a=a, - fp=fp, + b = aVerifyContact(a, + fp, https_cafile=oargs.https_cafile, timeout=oargs.timeout, host=oargs.proxy_host, @@ -1230,7 +925,7 @@ def iMain(lArgs): r = bProcessContact(b, texclude_set, aBadContacts, iFakeContact) if r is False: iFakeContact += 1 - + if httpx: # for b in asyncio.as_completed(lqueue): for b in lqueue: @@ -1242,40 +937,61 @@ def iMain(lArgs): elif r is True: # iGoodContact += 1 pass - - LOG.info(f"Filtered {len(twhitelist_set)} whitelisted relays") - texclude_set = texclude_set.difference(twhitelist_set) - LOG.info(f"{len(list(aGOOD_CONTACTS_DB.keys()))} good contacts out of {iTotalContacts}") + texclude_set = texclude_set.difference(twhitelist_set) + louts = [] if oargs.torrc_output and texclude_set: - with open(oargs.torrc_output, 'wt') as oFTorrc: - oFTorrc.write(f"{sEXCLUDE_EXIT_GROUP} {','.join(texclude_set)}\n") - oFTorrc.write(f"{sINCLUDE_EXIT_KEY} {','.join(aGOOD_CONTACTS_FPS.keys())}\n") - oFTorrc.write(f"{sINCLUDE_GUARD_KEY} {','.join(aGOOD_NODES[sGOOD_ROOT]['EntryNodes'])}\n") - LOG.info(f"Wrote tor configuration to {oargs.torrc_output}") - oFTorrc.close() + try: + with open(oargs.torrc_output, 'wt') as oFTorrc: + oFTorrc.write(f"{sEXCLUDE_EXIT_GROUP} {','.join(texclude_set)}\n") + oFTorrc.write(f"{sINCLUDE_EXIT_KEY} {','.join(aGOOD_CONTACTS_FPS.keys())}\n") + oFTorrc.write(f"{sINCLUDE_GUARD_KEY} {','.join(aGOOD_NODES[sGOOD_ROOT]['EntryNodes'])}\n") + LOG.info(f"Wrote tor configuration to {oargs.torrc_output}") + oFTorrc.close() + louts += [oargs.torrc_output] + except Exception as e: + LOG.warn(f"ERROR writing {oargs.torrc_output} {e}") + # drop through if oargs.bad_contacts and aBadContacts: - # for later analysis - with open(oargs.bad_contacts, 'wt') as oFYaml: - yaml.dump(aBadContacts, oFYaml) - oFYaml.close() + try: + # for later analysis + with open(oargs.bad_contacts, 'wt') as oFYaml: + yaml.dump(aBadContacts, oFYaml) + oFYaml.close() + louts += [oargs.bad_contacts] + except Exception as e: + LOG.warn(f"ERROR writing {oargs.bad_contacts} {e}") + # drop through if oargs.good_contacts != '' and aGOOD_CONTACTS_DB: - vwrite_good_contacts(oargs) + try: + vwrite_good_contacts(oargs, aGOOD_CONTACTS_DB) + louts += [oargs.good_contacts] + except Exception as e: + LOG.warn(f"ERROR writing vwrite_good_contacts {e}") + # drop through aBAD_NODES[oBAD_ROOT][sEXCLUDE_EXIT_GROUP]['BadExit'] = list(texclude_set) aBAD_NODES[oBAD_ROOT][sEXCLUDE_DOMAINS] = lKNOWN_NODNS if oargs.bad_nodes: stag = sEXCLUDE_EXIT_GROUP + '/BadExit' - vwrite_badnodes(oargs, aBAD_NODES, str(len(texclude_set)), stag) + try: + vwrite_badnodes(oargs, aBAD_NODES, str(len(texclude_set)), stag) + louts += [oargs.bad_nodes] + except Exception as e: + LOG.warn(f"ERROR writing vwrite_badnodes {e}") + # drop through aGOOD_NODES['GoodNodes']['Relays']['ExitNodes'] = list(aGOOD_CONTACTS_FPS.keys()) # EntryNodes are readony if oargs.good_nodes: - vwrite_goodnodes(oargs, aGOOD_NODES, len(aGOOD_CONTACTS_FPS.keys())) - - vwritefinale(oargs) + try: + vwrite_goodnodes(oargs, aGOOD_NODES, len(aGOOD_CONTACTS_FPS.keys())) + louts += [oargs.good_nodes] + except Exception as e: + LOG.warn(f"ERROR writing vwrite_goodnodes {e}") + # drop through retval = 0 try: @@ -1318,10 +1034,17 @@ def iMain(lArgs): LOG.warn(f"controller failed StrictNodes NOT {oargs.strict_nodes}") else: LOG.info(f"controller OVERRODE StrictNodes to {oargs.strict_nodes}") - + else: LOG.info(f"controller StrictNodes is set to {cur}") + # final + LOG.info(f"Filtered {len(twhitelist_set)} whitelisted relays") + LOG.info(f"{len(list(aGOOD_CONTACTS_DB.keys()))} good contacts out of {iTotalContacts}") + vwritefinale(oargs, lNOT_IN_RELAYS_DB) + elts='\n' + '\n'.join(louts) + LOG.info(f"The following files were written:{elts}") + except KeyboardInterrupt: return 0 except Exception as e: diff --git a/src/exclude_badExits/exclude_utils.py b/src/exclude_badExits/exclude_utils.py new file mode 100644 index 0000000..dcaa109 --- /dev/null +++ b/src/exclude_badExits/exclude_utils.py @@ -0,0 +1,412 @@ +#!/usr/local/bin/python3.sh +# -*- mode: python; indent-tabs-mode: nil; py-indent-offset: 4; coding: utf-8 -*- + +import argparse +from argparse import Namespace +import os +import sys +import re +from io import StringIO +import logging +import warnings +global LOG + +from support_onions import (oGetStemController, + vwait_for_controller,) + +try: + from ruamel.yaml import YAML + yaml = YAML(typ='rt') + yaml.indent(mapping=2, sequence=2) + safe_load = yaml.load +except: + yaml = None +if yaml is None: + try: + import yaml + safe_load = yaml.safe_load + except: + yaml = None + +try: +# if 'COLOREDLOGS_LEVEL_STYLES' not in os.environ: +# os.environ['COLOREDLOGS_LEVEL_STYLES'] = 'spam=22;debug=28;verbose=34;notice=220;warning=202;success=118,bold;error=124;critical=background=red' + # https://pypi.org/project/coloredlogs/ + import coloredlogs +except ImportError: + coloredlogs = False + +def aCleanContact(a, lAT_REPS, lDOT_REPS, lNO_EMAIL) -> dict: + # cleanups + for elt in lINTS: + if elt in a: + a[elt] = int(a[elt]) + for elt in lBOOLS: + if elt not in a: continue + if a[elt] in ['y', 'yes', 'true', 'True']: + a[elt] = True + else: + a[elt] = False + for elt in lEMAILS: + if elt not in a: continue + a[elt] = sCleanEmail(a[elt], lAT_REPS, lDOT_REPS, lNO_EMAIL) + if 'url' in a.keys(): + a['url'] = a['url'].rstrip('/') + if a['url'].startswith('http://'): + domain = a['url'].replace('http://', '') + elif a['url'].startswith('https://'): + domain = a['url'].replace('https://', '') + else: + domain = a['url'] + a['url'] = 'https://' + domain + a.update({'fps': []}) + return a + +def sCleanEmail(s, lAT_REPS, lDOT_REPS, lNO_EMAIL) -> str: + s = s.lower() + for elt in lAT_REPS: + if not elt.startswith(' '): + s = s.replace(' ' + elt + ' ', '@') + s = s.replace(elt, '@') + for elt in lDOT_REPS: + if not elt.startswith(' '): + s = s.replace(' ' + elt + ' ', '.') + s = s.replace(elt, '.') + s = s.replace('(dash)', '-') + s = s.replace('hyphen ', '-') + for elt in lNO_EMAIL: + s = s.replace(elt, '?') + return s + +lMORONS = ['hoster:Quintex Alliance Consulting '] +oCONTACT_RE = re.compile(r'([^:]*)(\s+)(email|url|proof|ciissversion|abuse|gpg):') +lINTS = ['ciissversion', 'uplinkbw', 'signingkeylifetime', 'memory'] +lBOOLS = ['dnssec', 'dnsqname', 'aesni', 'autoupdate', 'dnslocalrootzone', + 'sandbox', 'offlinemasterkey'] +lEMAILS = ['abuse', 'email'] + +ETC_DIR = '/usr/local/etc/tor/yaml' + +def oStemController(oargs, sEXCLUDE_EXIT_GROUP): + if os.path.exists(oargs.proxy_ctl): + controller = oGetStemController(log_level=oargs.log_level, + sock_or_pair=oargs.proxy_ctl, + password=oargs.torctl_pass) + else: + port =int(oargs.proxy_ctl) + controller = oGetStemController(log_level=oargs.log_level, + sock_or_pair=port, + password=oargs.torctl_pass) + + vwait_for_controller(controller, oargs.wait_boot) + + elt = controller.get_conf('UseMicrodescriptors') + if elt != '0': + LOG.error('"UseMicrodescriptors 0" is required in your /etc/tor/torrc. Exiting.') + controller.set_conf('UseMicrodescriptors', 0) + # does it work dynamically? + return 2 + + elt = controller.get_conf(sEXCLUDE_EXIT_GROUP) + if elt and elt != '{??}': + LOG.warn(f"{sEXCLUDE_EXIT_GROUP} is in use already") + + return controller + +def aParseContactYaml(contact, fp) -> dict: + """ + See the Tor ContactInfo Information Sharing Specification v2 + https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/ + """ + l = [line for line in contact.strip().replace('"', '').split(' ') + if ':' in line] + LOG.debug(f"{fp} {len(l)} fields") + s = f'"{fp}":\n' + s += '\n'.join([f" {line}\"".replace(':', ': \"', 1) + for line in l]) + oFd = StringIO(s) + a = safe_load(oFd) + return a + +def aParseContact(contact, fp, lAT_REPS, lDOT_REPS, lNO_EMAIL) -> dict: + """ + See the Tor ContactInfo Information Sharing Specification v2 + https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/ + """ + a = {} + if not contact: + LOG.warn(f"BC null contact for {fp}") + LOG.debug(f"{fp} {contact}") + return {} + + contact = contact.split(r'\n')[0] + for elt in lMORONS: + contact = contact.replace(elt, '') + m = oCONTACT_RE.match(contact) + # 450 matches! + if m and m.groups and len(m.groups(0)) > 2 and m.span()[1] > 0: + i = len(m.groups(0)[0]) + len(m.groups(0)[1]) + contact = contact[i:] + + # shlex? + lelts = contact.split(' ') + if not lelts: + LOG.warn(f"BC empty contact for {fp}") + LOG.debug(f"{fp} {contact}") + return {} + + for elt in lelts: + if ':' not in elt: + # hoster:Quintex Alliance Consulting + LOG.warn(f"BC no : in {elt} for {contact} in {fp}") + # return {} + # try going with what we have + break + (key , val,) = elt.split(':', 1) + if key == '': + continue + key = key.rstrip(':') + a[key] = val + a = aCleanContact(a, lAT_REPS, lDOT_REPS, lNO_EMAIL) + return a + +def vwrite_good_contacts(oargs, aGOOD_CONTACTS_DB) -> None: + good_contacts_tmp = oargs.good_contacts + '.tmp' + with open(good_contacts_tmp, 'wt') as oFYaml: + yaml.dump(aGOOD_CONTACTS_DB, oFYaml) + oFYaml.close() + if os.path.exists(oargs.good_contacts): + bak = oargs.good_contacts +'.bak' + os.rename(oargs.good_contacts, bak) + os.rename(good_contacts_tmp, oargs.good_contacts) + LOG.info(f"Wrote {len(list(aGOOD_CONTACTS_DB.keys()))} good contact details to {oargs.good_contacts}") + +def vwrite_bad_contacts(oargs, aBAD_CONTACTS_DB) -> None: + bad_contacts_tmp = oargs.bad_contacts + '.tmp' + with open(bad_contacts_tmp, 'wt') as oFYaml: + yaml.dump(aBAD_CONTACTS_DB, oFYaml) + oFYaml.close() + if os.path.exists(oargs.bad_contacts): + bak = oargs.bad_contacts +'.bak' + os.rename(oargs.bad_contacts, bak) + os.rename(bad_contacts_tmp, oargs.bad_contacts) + LOG.info(f"Wrote {len(list(aBAD_CONTACTS_DB.keys()))} bad contact details to {oargs.bad_contacts}") + +def vwrite_badnodes(oargs, aBAD_NODES, slen, stag) -> None: + if not aBAD_NODES: return + tmp = oargs.bad_nodes +'.tmp' + bak = oargs.bad_nodes +'.bak' + with open(tmp, 'wt') as oFYaml: + yaml.dump(aBAD_NODES, oFYaml) + LOG.info(f"Wrote to {oargs.bad_nodes}") + oFYaml.close() + if os.path.exists(oargs.bad_nodes): + os.rename(oargs.bad_nodes, bak) + os.rename(tmp, oargs.bad_nodes) + +def vwrite_goodnodes(oargs, aGOOD_NODES, ilen) -> None: + tmp = oargs.good_nodes +'.tmp' + bak = oargs.good_nodes +'.bak' + with open(tmp, 'wt') as oFYaml: + yaml.dump(aGOOD_NODES, oFYaml) + LOG.info(f"Wrote good relays to {oargs.good_nodes}") + oFYaml.close() + if os.path.exists(oargs.good_nodes): + os.rename(oargs.good_nodes, bak) + os.rename(tmp, oargs.good_nodes) + +def vwritefinale(oargs, lNOT_IN_RELAYS_DB) -> None: + + if len(lNOT_IN_RELAYS_DB): + LOG.warn(f"{len(lNOT_IN_RELAYS_DB)} relays from stem were not in onionoo.torproject.org") + + LOG.info(f"For info on a FP, use: https://nusenu.github.io/OrNetStats/w/relay/.html") + LOG.info(f"For info on relays, try: https://onionoo.torproject.org/details") + # https://onionoo.torproject.org/details + +def alt_vsetup_logging(theLOG, log_level, logfile='', stream=sys.stderr) -> None: + global LOG + LOG = theLOG + add = True + + logging._defaultFormatter = logging.Formatter(datefmt='%m-%d %H:%M:%S') + logging._defaultFormatter.default_time_format = '%m-%d %H:%M:%S' + logging._defaultFormatter.default_msec_format = '' + + if logfile: + add = logfile.startswith('+') + sub = logfile.startswith('-') + if add or sub: + logfile = logfile[1:] + kwargs['filename'] = logfile + + if coloredlogs: + coloredlogs.DEFAULT_LEVEL_STYLES['info']=dict(color='white',bold=True) + coloredlogs.DEFAULT_LEVEL_STYLES['debug']=dict(color='cyan') + coloredlogs.DEFAULT_LEVEL_STYLES['warn']=dict(color='yellow',bold=True) + coloredlogs.DEFAULT_LEVEL_STYLES['error']=dict(color='red',bold=True) + coloredlogs.DEFAULT_FIELD_STYLES['levelname=']=dict(color='green', bold=True), + # https://pypi.org/project/coloredlogs/ + aKw = dict(level=log_level, + logger=LOG, + stream=stream, + fmt='%(levelname)s %(message)s', + isatty=True, + milliseconds=False, + ) + coloredlogs.install(**aKw) + if logfile: + oHandler = logging.FileHandler(logfile) + LOG.addHandler(oHandler) + LOG.info(f"Setting coloured log_level to {log_level} {stream}") + else: + kwargs = dict(level=log_level, + force=True, + format='%(levelname)s %(message)s') + logging.basicConfig(**kwargs) + if add and logfile: + oHandler = logging.StreamHandler(stream) + LOG.addHandler(oHandler) + LOG.info(f"SSetting log_level to {log_level!s}") + +def vsetup_logging(theLOG, log_level, logfile='', stream=sys.stdout) -> None: + global LOG + LOG = theLOG + add = True + # stem fucks up logging + # from stem.util import log + logging.getLogger('stem').setLevel(30) + + logging._defaultFormatter = logging.Formatter(datefmt='%m-%d %H:%M:%S') + logging._defaultFormatter.default_time_format = '%m-%d %H:%M:%S' + logging._defaultFormatter.default_msec_format = '' + + if logfile: + add = logfile.startswith('+') + sub = logfile.startswith('-') + if add or sub: + logfile = logfile[1:] + kwargs['filename'] = logfile + + if coloredlogs: + # https://pypi.org/project/coloredlogs/ + coloredlogs.install( + level=log_level, + logger=LOG, + stream=stream, + fmt='%(levelname)s %(message)s', + isatty=True, # required! + milliseconds=False, + ) + if logfile: + oHandler = logging.FileHandler(logfile) + LOG.addHandler(oHandler) + LOG.info(f"Setting coloured log_level to {log_level} {stream}") + else: + kwargs = dict(level=log_level, + force=True, + format='%(levelname)s %(message)s') + + logging.basicConfig(**kwargs) + if add and logfile: + oHandler = logging.StreamHandler(stream) + LOG.addHandler(oHandler) + LOG.info(f"Setting log_level to {log_level}") + +def oMainArgparser(_=None, __prolog__='') -> Namespace: + + try: + from OpenSSL import SSL + lCAfs = SSL._CERTIFICATE_FILE_LOCATIONS + except: + lCAfs = [] + + CAfs = [] + for elt in lCAfs: + if os.path.exists(elt): + CAfs.append(elt) + if not CAfs: + CAfs = [''] + + parser = argparse.ArgumentParser(add_help=True, + epilog=__prolog__) + + # important settings + parser.add_argument('--bad_on', type=str, default='Empty,NoEmail,NotGood', + help="comma sep list of conditions - Empty,NoEmail,NotGood") + parser.add_argument('--points_timeout', type=int, default=0, + help="Timeout for getting introduction points - must be long >120sec. 0 means disabled looking for IPs") + parser.add_argument('--saved_only', default=False, + action='store_true', + help="Just use the info in the last *.yaml files without querying the Tor controller") + parser.add_argument('--hs_dir', type=str, + default='/var/lib/tor', + help="Parse the files name hostname below this dir to find Hidden Services to whitelist") + parser.add_argument('--notice_log', type=str, + default='', + help="Parse the notice log for relays and services") + parser.add_argument('--strict_nodes', type=str, default=0, + choices=['0', '1'], + help="Set StrictNodes: 1 is less anonymous but more secure, although some onion sites may be unreachable") + + # proxy + parser.add_argument('--proxy_host', '--proxy-host', type=str, + default='127.0.0.1', + help='proxy host') + parser.add_argument('--proxy_port', '--proxy-port', default=9050, type=int, + help='proxy socks port') + parser.add_argument('--proxy_ctl', '--proxy-ctl', + default='/run/tor/control' if os.path.exists('/run/tor/control') else '9051', + type=str, + help='control socket - or port') + parser.add_argument('--torctl_pass', + default=os.environ.get('TOR_CONTROLLER_PASSWORD', ''), + type=str, + help='password for the tor controller') + + parser.add_argument('--torrc', + default='/etc/tor/torrc-defaults', + type=str, + help='torrc to check for suggestions') + + # output + parser.add_argument('--torrc_output', type=str, + default=os.path.join(ETC_DIR, 'torrc.new'), + help="Write the torrc configuration to a file") + parser.add_argument('--good_nodes', type=str, + default=os.path.join(ETC_DIR, 'goodnodes.yaml'), + help="Yaml file of good info that should not be excluded") + parser.add_argument('--bad_nodes', type=str, + default=os.path.join(ETC_DIR, 'badnodes.yaml'), + help="Yaml file of bad nodes that should also be excluded") + parser.add_argument('--bad_contacts', type=str, + default=os.path.join(ETC_DIR, 'badcontacts.yaml'), + help="Yaml file of bad contacts that bad FPs are using") + parser.add_argument('--relays_output', type=str, + default=os.path.join(ETC_DIR, 'relays.json'), + help="Write the download relays in json to a file") + parser.add_argument('--wellknown_output', type=str, + default=os.path.join(ETC_DIR, 'https'), + help="Write the well-known files to a directory") + parser.add_argument('--good_contacts', type=str, default=os.path.join(ETC_DIR, 'goodcontacts.yaml'), + help="Write the proof data of the included nodes to a YAML file") + + # timeouts + parser.add_argument('--timeout', default=60, type=int, + help='proxy download connect timeout') + parser.add_argument('--wait_boot', type=int, default=120, + help="Seconds to wait for Tor to booststrap") + parser.add_argument('--https_cafile', type=str, + help="Certificate Authority file (in PEM)", + default=CAfs[0]) + + parser.add_argument('--log_level', type=int, default=20, + help="10=debug 20=info 30=warn 40=error") + parser.add_argument('--bad_sections', type=str, + default='BadExit', + help="sections of the badnodes.yaml to use, in addition to BadExit, comma separated") + parser.add_argument('--white_onions', type=str, + default='', + help="comma sep. list of onions to whitelist their introduction points - BROKEN") + + return parser diff --git a/src/exclude_badExits/support_onions.py b/src/exclude_badExits/support_onions.py index ba1d182..52acaf3 100644 --- a/src/exclude_badExits/support_onions.py +++ b/src/exclude_badExits/support_onions.py @@ -236,7 +236,7 @@ def oMakeController(sSock='', port=9051): return controller oSTEM_CONTROLER = None -def oGetStemController(log_level=10, sock_or_pair='/run/tor/control'): +def oGetStemController(log_level=10, sock_or_pair='/run/tor/control', password=None): global oSTEM_CONTROLER if oSTEM_CONTROLER: return oSTEM_CONTROLER @@ -258,8 +258,28 @@ def oGetStemController(log_level=10, sock_or_pair='/run/tor/control'): except: port = 9051 LOG.info(f"controller from port {port}") controller = Controller.from_port(port=port) + if password is None: + password = os.environ.get('TOR_CONTROLLER_PASSWORD', '') + print(f"DBUG: trying TOR_CONTROLLER_PASSWORD {len(password)}") + else: +# print(f"DBUG: using a password {len(password)}") + pass + if not password: +# print("DBUG: trying without a password") + try: + controller.authenticate() + oSTEM_CONTROLER = controller + return controller + except MissingPassword as e: + pass # drop throuhgh + except Exception as e: + print(f"WARN: error trying to authenticate {e}") + #? return None + sys.stdout.flush() + password = getpass.unix_getpass(prompt='Controller Password: ', stream=sys.stderr) + try: - controller.authenticate() + controller.authenticate(password) except (Exception, MissingPassword): sys.stdout.flush() p = getpass.unix_getpass(prompt='Controller Password: ', stream=sys.stderr) @@ -279,22 +299,6 @@ def bAreWeConnected(): i += 1 return i > 0 -def sMapaddressResolv(target, iPort=9051, log_level=10): - if not stem: - LOG.warn('please install the stem Python package') - return '' - - try: - controller = oGetStemController(log_level=log_level) - - map_dict = {"0.0.0.0": target} - map_ret = controller.map_address(map_dict) - - return map_ret - except Exception as e: - LOG.exception(e) - return '' - def vwait_for_controller(controller, wait_boot=10): if bAreWeConnected() is False: raise SystemExit("we are not connected") @@ -313,7 +317,8 @@ def bin_to_hex(raw_id, length=None): res = ''.join('{:02x}'.format(raw_id[i]) for i in range(length)) return res.upper() -def lIntroductionPoints(controller=None, lOnions=[], itimeout=120, log_level=10): +def lIntroductionPoints(controller=None, lOnions=[], itimeout=120, log_level=10, + password=None): """now working !!! stem 1.8.x timeout must be huge >120 'Provides the descriptor for a hidden service. The **address** is the '.onion' address of the hidden service ' @@ -340,7 +345,7 @@ def lIntroductionPoints(controller=None, lOnions=[], itimeout=120, log_level=10) if type(lOnions) not in [set, tuple, list]: lOnions = list(lOnions) if controller is None: - controller = oGetStemController(log_level=log_level) + controller = oGetStemController(log_level=log_level, password=password) l = [] for elt in lOnions: LOG.info(f"controller.get_hidden_service_descriptor {elt}") @@ -486,6 +491,23 @@ def getaddrinfo(sHost, sPort): return None return lPair +# unused? +def sMapaddressResolv(target, iPort=9051, log_level=10, password=None): + if not stem: + LOG.warn('please install the stem Python package') + return '' + + try: + controller = oGetStemController(log_level=log_level, password=password) + + map_dict = {"0.0.0.0": target} + map_ret = controller.map_address(map_dict) + + return map_ret + except Exception as e: + LOG.exception(e) + return '' + def icheck_torrc(sFile, oArgs): l = open(sFile, 'rt').readlines() a = {} @@ -528,7 +550,7 @@ def icheck_torrc(sFile, oArgs): print('VirtualAddrNetworkIPv4 172.16.0.0/12') return 0 -def lExitExcluder(oArgs, iPort=9051, log_level=10): +def lExitExcluder(oArgs, iPort=9051, log_level=10, password=None): """ https://raw.githubusercontent.com/nusenu/noContactInfo_Exit_Excluder/main/exclude_noContactInfo_Exits.py """ @@ -538,7 +560,7 @@ def lExitExcluder(oArgs, iPort=9051, log_level=10): LOG.debug('lExcludeExitNodes') try: - controller = oGetStemController(log_level=log_level) + controller = oGetStemController(log_level=log_level, password=password) # generator relays = controller.get_server_descriptors() except Exception as e: @@ -568,5 +590,5 @@ def lExitExcluder(oArgs, iPort=9051, log_level=10): if __name__ == '__main__': target = 'duckduckgogg42xjoc72x3sjasowoarfbgcmvfimaftt6twagswzczad' - controller = oGetStemController(log_level=10) + controller = oGetStemController(log_level=10, password=None) lIntroductionPoints(controller, [target], itimeout=120) diff --git a/src/exclude_badExits/torcontactinfo.py b/src/exclude_badExits/torcontactinfo.py new file mode 100644 index 0000000..e098543 --- /dev/null +++ b/src/exclude_badExits/torcontactinfo.py @@ -0,0 +1,567 @@ +#!/usr/bin/env python3 +""" +Tor Contact Info Parser - A tool/Python Class for parsing Tor ContactInfo Information Sharing v2 specification contacts +Written by Eran Sandler (https://twitter.com/erans) (C) 2018 + +Turned into a proper command-line tool with sub-commands and flags by @Someguy123 at Privex Inc. (C) 2021 +(https://www.privex.io) (https://github.com/PrivexInc) + +This is a parser for the Tor ContactInfo Information Sharing Specification v2 (https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/). + +The parser can parse the ContactInfo field of Tor relays based on the specification. + +Official Repo: https://github.com/erans/torcontactinfoparser +Privex Fork: https://github.com/Privex/torcontactinfoparser + +Released under the MIT License. +""" +import argparse +import os +import re +import sys +import json +import requests +import textwrap +try: + from rich import print as rprint + HAS_RICH = True +except ImportError: + def rprint(value='', *args, **kwargs): + if value not in [None, False, True] and isinstance(value, (dict, list, set, tuple)): + value = json.dumps(value, indent=4) + return print(value, *args, **kwargs) + # rprint = print + HAS_RICH = False + +import logging +import warnings +warnings.filterwarnings('ignore') + +from exclude_utils import vsetup_logging + +class TorContactInfoParser(object): + email_regex = "^[a-zA-Z0-9.!#$%&’*+/=?^_`{|}~-]+@[a-zA-Z0-9-]+(?:\\.[a-zA-Z0-9-]+)*$" + + def _parse_string_value(self, value, min_length, max_length, valid_chars, raise_exception=False, field_name=None, deobfuscate_email=False): + value_length = len(value) + if value_length < min_length: + if raise_exception: + raise ValueError("value of field '{0}' is too short".format(field_name)) + return None + + if value_length > max_length: + if raise_exception: + raise ValueError("value of field '{0}' is too long".format(field_name)) + return None + + if valid_chars != "*": + m = re.search(valid_chars, value) + if not m: + if raise_exception: + raise ValueError("value of field '{0}' doesn't match valid chars restrictions".format(field_name)) + else: + return None + + return value + + def _parse_email_value(self, value, field_name, raise_exception, deobfuscate_email): + if value: + v = value.replace("[]", "@") + if re.search(self.email_regex, v): + if not deobfuscate_email: + return v.replace("@", "[]") + + return v + + return None + + _supported_fields_parsers = { + "email" : { + "fn": _parse_email_value, + "args": {} + }, + "url" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 4, + "max_length" : 399, + "valid_chars" : "[_%/:a-zA-Z0-9.-]+" + } + }, + "proof" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 7, + "max_length" : 7, + "valid_chars" : "[adinrsu-]+" + } + }, + "ciissversion" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 1, + "max_length" : 1, + "valid_chars" : "[12]+" + } + }, + "pgp" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 40, + "max_length" : 40, + "valid_chars" : "[a-zA-Z0-9]+" + } + }, + "abuse" : { + "fn": _parse_email_value, + "args": {} + }, + "keybase" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 0, + "max_length" : 50, + "valid_chars" : "[a-zA-Z0-9]+" + } + }, + "twitter" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 1, + "max_length" : 15, + "valid_chars" : "[a-zA-Z0-9_]+" + } + }, + "mastodon" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 0, + "max_length" : 254, + "valid_chars" : "*" + } + }, + "matrix" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 0, + "max_length" : 254, + "valid_chars" : "*" + } + }, + "xmpp" : { + "fn": _parse_email_value, + "args": {} + }, + "otr3" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 40, + "max_length" : 40, + "valid_chars" : "[a-z0-9]+" + } + }, + "hoster" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 0, + "max_length" : 254, + "valid_chars" : "[a-zA-Z0-9.-]+" + } + }, + "cost" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 0, + "max_length" : 13, + "valid_chars" : "[A-Z0-9.]+" + } + }, + "uplinkbw" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 0, + "max_length" : 7, + "valid_chars" : "[0-9]+" + } + }, + "trafficacct" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 0, + "max_length" : 9, + "valid_chars" : "[unmetrd0-9]+" + } + }, + "memory" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 0, + "max_length" : 10, + "valid_chars" : "[0-9]+" + } + }, + "cpu" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 0, + "max_length" : 50, + "valid_chars" : "[a-zA-Z0-9_-]+" + } + }, + "virtualization" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 0, + "max_length" : 15, + "valid_chars" : "[a-z-]+" + } + }, + "donationurl" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 0, + "max_length" : 254, + "valid_chars" : "*" + } + }, + "btc" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 26, + "max_length" : 99, + "valid_chars" : "[a-zA-Z0-9]+" + } + }, + "zec" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 0, + "max_length" : 95, + "valid_chars" : "[a-zA-Z0-9]+" + } + }, + "xmr" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 0, + "max_length" : 99, + "valid_chars" : "[a-zA-Z0-9]+" + } + }, + "offlinemasterkey" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 1, + "max_length" : 1, + "valid_chars" : "[yn]" + } + }, + "signingkeylifetime" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 0, + "max_length" : 6, + "valid_chars" : "[0-9]+" + } + }, + "sandbox" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 1, + "max_length" : 2, + "valid_chars" : "[yn]" + } + }, + "os" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 0, + "max_length" : 20, + "valid_chars" : "[A-Za-z0-9/.]+" + } + }, + "tls" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 0, + "max_length" : 14, + "valid_chars" : "[a-z]+" + } + }, + "aesni" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 1, + "max_length" : 1, + "valid_chars" : "[yn]" + } + }, + "autoupdate" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 1, + "max_length" : 1, + "valid_chars" : "[yn]" + } + }, + "confmgmt" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 1, + "max_length" : 15, + "valid_chars" : "[a-zA-Z-]" + } + }, + "dnslocation" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 5, + "max_length" : 100, + "valid_chars" : "[a-z,]" + } + }, + "dnsqname" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 1, + "max_length" : 1, + "valid_chars" : "[yn]" + } + }, + "dnssec" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 1, + "max_length" : 1, + "valid_chars" : "[yn]" + } + }, + "dnslocalrootzone" : { + "fn" : _parse_string_value, + "args" : { + "min_length" : 1, + "max_length" : 1, + "valid_chars" : "[yn]" + } + } + } + + def __init__(self): + pass + + def parse(self, value: str, raise_exception_on_invalid_value=False, deobfuscate_email=True) -> dict: + # the ciissversion field is mandatory + if not 'ciissversion:' in value: + return None + + result = {} + parts = value.split(" ") + for p in parts: + field_parts = p.split(":", 1) + if len(field_parts) <= 1: + continue + name, data = field_parts + if name in self._supported_fields_parsers: + field_parser = self._supported_fields_parsers[name] + if field_parser is None: + result[name] = data + continue + if callable(field_parser): + value = field_parser(self, data) + else: + field_parser["args"]["field_name"] = name + field_parser["args"]["value"] = data + field_parser["args"]["raise_exception"] = raise_exception_on_invalid_value + field_parser["args"]["deobfuscate_email"] = deobfuscate_email + + value = field_parser["fn"](self, **field_parser["args"]) + + if not result.get(name, None): + result[name] = value + + return result + +def cmd_parse(opts: argparse.Namespace): + """ + ArgParser function for parsing a single ContactInfo string, and outputting it as JSON (or python-style dict's) + """ + + if opts.contact is None or len(opts.contact) == 0 or opts.contact[0] == '-': + contact = sys.stdin.read() + else: + contact = ' '.join(opts.contact).strip() + + tparser = TorContactInfoParser() + res = tparser.parse(contact) + if not opts.pretty: + return print(json.dumps(res)) + if opts.json: + res = json.dumps(res, indent=4) if opts.pretty else json.dumps(res) + # if not HAS_RICH: res = json.dumps(res, indent=4) + rprint(res) + +def cmd_scan(opts: argparse.Namespace, adata=None) -> int: + """ + ArgParser function for scanning all ContactInfo strings from ``https://onionoo.torproject.org/details`` , + and outputting each one as a Python-style Dict, or JSON. + """ + parser = TorContactInfoParser() + surl = "https://onionoo.torproject.org/details" + + if not adata: + LOG.info(f"Getting relays from {surl}") + jdata = requests.get(surl) + try: + adata = jdata.json() + except Exception as e: + # simplejson.errors.JSONDecodeError + LOG.exception(f"JSON error {e}") + return + elts = adata["relays"] + else: + elts = json.loads(adata)['relays'] + + if not elts: + LOG.warn(f"NO relays - are we connected?") + return + LOG.info(f"{len(elts)} relays") + for relay in elts: + if 'fingerprint' not in relay.keys(): + LOG.warn(f"fingerprint not in relay for {relay}") + continue + fp = relay['fingerprint'] + verified_host_names = relay.get('verified_host_names', []) + contact = relay.get("contact", None) + if not contact: + LOG.warn(f"No contact for {fp} {verified_host_names}") + continue + if 'ciissversion' not in contact: + LOG.debug(f"No ciissversion in contact in {fp}") + continue + LOG.debug(f"parsing {fp}") + result = parser.parse(contact, False) + if not result: + LOG.warn(f"No result for {contact} in {fp}") + continue + if len(result) > 0: + if opts.json: result = json.dumps(result, indent=4) if opts.pretty else json.dumps(result) + if opts.pretty: + rprint(result) + else: + print(result) + return 0 + +ETC_DIR = '/etc/tor/yaml' +def oparser(): + cparser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, + description=textwrap.dedent(f""" + Examples: + + # 'scan' is the original behaviour of this script. It iterates over the data + # from https://onionoo.torproject.org/details , parses each contact, and prints it as Python dict-style JSON. + {sys.argv[0]} scan + + # Same as previous. With no arguments, it's equivalent to running 'scan'. + {sys.argv[0]} + + # If you pass '-p' after scan, it will enable pretty printing. For best pretty printing, + # make sure you have 'rich' installed from pypi. + {sys.argv[0]} scan -p + + # If you need real JSON with double quotes, rather than Python dict-style JSON, you can + # use the '-j' flag to enable "real JSON" mode (you can combine with '-p' if you want pretty printed real json) + {sys.argv[0]} scan -j + + # Using 'parse', you can parse an arbitrary ContactInfo string, and it will output the parsed result + # with pretty printing by default. + + {sys.argv[0]} parse "contact Privex Inc. email:noc[]privex.io url:https://www.privex.io " \\ + "proof:uri-rsa pgp:288DD1632F6E8951 keybase:privexinc twitter:PrivexInc hoster:www.privex.io " \\ + "uplinkbw:500 memory:4096 virtualization:kvm btc:bc1qpst9uscvd8rpjjhzz9rau3trylh6e0wh76qrlhw3q9nj89ua728sn3t6a2 " \\ + "xmr:89tukP3wfpH4FZAmC1D2GfArWwfPTz8Ap46NZc54Vyhy9YxEUYoFQ7HGQ74LrCMQTD3zxvwM1ewmGjH9WVmeffwR72m1Pps" + + {{ + 'email': 'noc@privex.io', + 'url': 'https://www.privex.io', + 'proof': 'uri-rsa', + 'pgp': None, + 'keybase': 'privexinc', + 'twitter': 'PrivexInc', + 'hoster': 'www.privex.io', + 'uplinkbw': '500', + 'memory': '4096', + 'virtualization': 'kvm', + 'btc': 'bc1qpst9uscvd8rpjjhzz9rau3trylh6e0wh76qrlhw3q9nj89ua728sn3t6a2', + 'xmr': '89tukP3wfpH4FZAmC1D2GfArWwfPTz8Ap46NZc54Vyhy9YxEUYoFQ7HGQ74LrCMQTD3zxvwM1ewmGjH9WVmeffwR72m1Pps' + }} + + # You can also pipe a contact string into 'parse', and it will work just the same. + + echo "Privex Inc. email:noc[]privex.io url:https://www.privex.io proof:uri-rsa pgp:288DD1632F6E8951 keybase:privexinc twitter:PrivexInc" | {sys.argv[0]} parse + {{'email': 'noc@privex.io', 'url': 'https://www.privex.io', 'proof': 'uri-rsa', 'pgp': None, 'keybase': 'privexinc', 'twitter': 'PrivexInc\n'}} + + # If you need real JSON outputted, rather than Python dict-style output, you can pass -j to either 'parse' or 'scan' + + {sys.argv[0]} parse -j "Privex Inc. email:noc[]privex.io url:https://www.privex.io proof:uri-rsa pgp:288DD1632F6E8951 keybase:privexinc twitter:PrivexInc" + {{ + "email": "noc@privex.io", + "url": "https://www.privex.io", + "proof": "uri-rsa", + "pgp": null, + "keybase": "privexinc", + "twitter": "PrivexInc" + }} + + # You can use '-np' to disable pretty printing for 'parse' - you can combine it with '-j' to get flat, plain JSON. + + {sys.argv[0]} parse -np -j "Privex Inc. email:noc[]privex.io url:https://www.privex.io proof:uri-rsa pgp:288DD1632F6E8951 keybase:privexinc twitter:PrivexInc" + {{"email": "noc@privex.io", "url": "https://www.privex.io", "proof": "uri-rsa", "pgp": null, "keybase": "privexinc", "twitter": "PrivexInc"}} + """)) + cparser.set_defaults(func=cmd_scan, json=False, pretty=False) + subparse = cparser.add_subparsers() + subparse.required = False + sp_parse = subparse.add_parser('parse', + help="Parse a single contact string, either as an argument, or piped into stdin") + sp_parse.add_argument('contact', nargs='*') + sp_parse.add_argument('-np', '--no-pretty', + action='store_false', default=False, dest='pretty', + help="Disable pretty printing JSON") + sp_parse.add_argument('--relays_output', type=str, + dest='relays_output', + default=os.path.join(ETC_DIR, 'relays.json'), + help="Write the download relays in json to a file") + sp_parse.add_argument('-j', '--json', action='store_true', + default=False, dest='json', + help="Output real JSON, not Python dict format.") + sp_parse.set_defaults(func=cmd_parse) + + sp_scan = subparse.add_parser('scan', help="Parse all contacts from https://onionoo.torproject.org/details") + sp_scan.add_argument('-p', action='store_true', default=False, dest='pretty', help="Enable pretty printing JSON") + sp_scan.add_argument('-j', '--json', action='store_true', default=False, dest='json', help="Output real JSON, not Python dict format.") + + # sp_scan.set_defaults(func=cmd_scan) + + return cparser + +if __name__ == "__main__": + if os.environ.get('DEBUG', ''): + log_level = 10 + else: + log_level = 20 + LOG = logging.getLogger() + vsetup_logging(LOG, log_level) + try: + cparser = oparser() + opts = cparser.parse_args(sys.argv[1:]) + data = None + if opts.relays_output and os.path.exists(opts.relays_output): + data = open(opts.relays_output, 'rt').read() + i = cmd_scan(opts, data) + except KeyboardInterrupt as e: + i = 0 + except (requests.exceptions.ProxyError, Exception,) as e: + LOG.error(f"{e}") + i = 0 + + sys.exit(i) diff --git a/src/exclude_badExits/trustor_poc.py b/src/exclude_badExits/trustor_poc.py index 7638c71..ad4ff7f 100644 --- a/src/exclude_badExits/trustor_poc.py +++ b/src/exclude_badExits/trustor_poc.py @@ -270,7 +270,7 @@ async def oDownloadUrlHttpx(uri, sCAfile, timeout=30, host='127.0.0.1', port=905 import httpcore import asyncio import httpx - + # socks proxy used for outbound web requests (for validation of proofs) if host and port: proxy = "socks5://{host}:{port}" @@ -516,7 +516,7 @@ def validate_proofs(candidates, validation_cache_file, timeout=20, host='127.0.0 for domain in candidates.keys(): for prooftype in candidates[domain].keys(): if prooftype == 'uri-rsa': - well_known_content = lDownloadUrlFps(domain, timeout=timeout, host=host, port=port) + well_known_content = lDownloadUrlFps(domain, sCAfile, timeout=timeout, host=host, port=port) for fingerprint in candidates[domain][prooftype]: if fingerprint in well_known_content: # write cache entry