This commit is contained in:
emdee 2022-11-08 14:15:05 +00:00
parent 2be5e6e66e
commit 0198994486
3 changed files with 588 additions and 233 deletions

View file

@ -1,6 +1,3 @@
# https://github.com/nusenu/noContactInfo_Exit_Excluder
# https://github.com/TheSmashy/TorExitRelayExclude
This extends nusenu's basic idea of using the stem library to
dynamically exclude nodes that are likely to be bad by putting them
on the ExcludeNodes or ExcludeExitNodes setting of a running Tor.
@ -11,21 +8,23 @@ The basic cut is to exclude Exit nodes that do not have a contact.
That can be extended to nodes that do not have an email in the contact etc.
But there's a problem, and your Tor notice.log will tell you about it:
you could exclude the nodes needed to access hidden services etc.
So we need to add to the process the concept of a whitelist.
In addition, we may have our own blacklist of nodes we want to exclude.
you could exclude the nodes needed to access hidden services or
directorues. So we need to add to the process the concept of a whitelist.
In addition, we may have our own blacklist of nodes we want to exclude,
or use these lists for other applications like selektor.
So we make two files that are structured in YAML:
```
/etc/tor/torrc-goodnodes.yaml
Nodes:
/etc/tor/yaml/torrc-goodnodes.yaml
GoodNodes:
Relays:
IntroductionPoints:
- $NODEFINGERPRINT
- NODEFINGERPRINT
...
By default all sections of the goodnodes.yaml are used as a whitelist.
/etc/tor/torrc-badnodes.yaml
Nodes:
/etc/tor/yaml/torrc-badnodes.yaml
BadNodes:
ExcludeExitNodes:
BadExit:
# $0000000000000000000000000000000000000007
@ -50,9 +49,25 @@ currently broken in stem 1.8.0: see:
* https://github.com/torproject/stem/issues/96
* https://gitlab.torproject.org/legacy/trac/-/issues/25417
```--bad_output``` will write the torrc configuration to a file.
```--torrc_output``` will write the torrc ExcludeNodes configuration to a file.
```--details_output``` will write the lookup URLs of the excluded nodes to a file
Now for the final part: we lookup the Contact info of every server
that is currently in our Tor, and check it for its existence.
If it fails to provide the well-know url, we assume its a bogus
relay and add it to a list of nodes that goes on ExcludeNodes -
not just exclude Exit.
If the Contact info is good we add the list of fingerprints to add
to ExitNodes, a whitelist of relays to use as exits.
```--proof_output``` will write the contact info as a ciiss dictionary
to a YAML file. If the proof is uri-rsa, the well-known file of fingerprints
is downloaded and the fingerprints are added on a 'fps' field we create
of that fingerprint's entry of the YAML dictionary. This file is read at the
beginning of the program to start with a trust database, and only new
contact info from new relays are added to the dictionary.
You can expect it to take an hour or two the first time this is run:
>700 domains.
For usage, do ```python3 exclude_badExits.py --help`

View file

@ -12,8 +12,7 @@ on the ExcludeNodes or ExcludeExitNodes setting of a running Tor.
The basic cut is to exclude Exit nodes that do not have a contact.
That can be extended to nodes that do not have an email in the contact etc.
"""
"""
But there's a problem, and your Tor notice.log will tell you about it:
"""But there's a problem, and your Tor notice.log will tell you about it:
you could exclude the nodes needed to access hidden services or
directorues. So we need to add to the process the concept of a whitelist.
In addition, we may have our own blacklist of nodes we want to exclude,
@ -21,15 +20,16 @@ or use these lists for other applications like selektor.
So we make two files that are structured in YAML:
```
/etc/tor/torrc-goodnodes.yaml
Nodes:
/etc/tor/yaml/torrc-goodnodes.yaml
GoodNodes:
Relays:
IntroductionPoints:
- $NODEFINGERPRINT
- NODEFINGERPRINT
...
By default all sections of the goodnodes.yaml are used as a whitelist.
/etc/tor/torrc-badnodes.yaml
Nodes:
/etc/tor/yaml/torrc-badnodes.yaml
BadNodes:
ExcludeExitNodes:
BadExit:
# $0000000000000000000000000000000000000007
@ -54,18 +54,26 @@ currently broken in stem 1.8.0: see:
* https://github.com/torproject/stem/issues/96
* https://gitlab.torproject.org/legacy/trac/-/issues/25417
```--bad_output``` will write the torrc ExcludeNodes configuration to a file.
```--torrc_output``` will write the torrc ExcludeNodes configuration to a file.
```--details_output``` will write the lookup URLs of the excluded nodes to a file
Now for the final part: we lookup the Contact info of every server
that is currently in our Tor, and check it for its existence.
If it fails to provide the well-know url, we assume its a bogus
relay and add it to a list of nodes that goes on ExcludeNodes -
not just exclude Exit.
If the Contact info is good we add the list of fingerprints to add
to ExitNodes, a whitelist of relays to use as exits.
```--proof_output``` will write the contact info as a ciiss dictionary
to a YAML file. If the proof is uri-rsa, the well-known file of fingerprints
is downloaded and the fingerprints are added to the on the 'fps' field
of that fingerprint entry of the YAML dictionary. This file is read at the
is downloaded and the fingerprints are added on a 'fps' field we create
of that fingerprint's entry of the YAML dictionary. This file is read at the
beginning of the program to start with a trust database, and only new
relays are added to the dictionary. The 'fps' field is emptied if the
host fails to provide the well-known file. You can expect it to take
an hour or two the first time this is run: >700 domains.
contact info from new relays are added to the dictionary.
You can expect it to take an hour or two the first time this is run:
>700 domains.
For usage, do ```python3 exclude_badExits.py --help`
@ -74,12 +82,12 @@ For usage, do ```python3 exclude_badExits.py --help`
import sys
import os
import getpass
import re
import time
import argparse
from io import StringIO
from stem import InvalidRequest
from stem.control import Controller
from stem.connection import IncorrectPassword
from stem.util.tor_tools import is_valid_fingerprint
@ -87,25 +95,35 @@ try:
import yaml
except:
yaml = None
try:
from unbound import ub_ctx,RR_TYPE_TXT,RR_CLASS_IN
except:
ub_ctx = RR_TYPE_TXT = RR_CLASS_IN = None
try:
import coloredlogs
if 'COLOREDLOGS_LEVEL_STYLES' not in os.environ:
os.environ['COLOREDLOGS_LEVEL_STYLES'] = 'spam=22;debug=28;verbose=34;notice=220;warning=202;success=118,bold;error=124;critical=background=red'
# https://pypi.org/project/coloredlogs/
import coloredlogs
except ImportError as e:
coloredlogs = False
from trustor_poc import lDownloadUrlFps
from trustor_poc import lDownloadUrlFps, idns_validate
global LOG
import logging
import warnings
warnings.filterwarnings('ignore')
LOG = logging.getLogger()
aTRUST_DB = {}
sDETAILS_URL = "https://metrics.torproject.org/rs.html#details/"
# You can call this while bootstrapping
sEXCLUDE_EXIT_KEY = 'ExcludeNodes'
sINCLUDE_EXIT_KEY = 'ExitNodes'
sINCLUDE_GUARD_KEY = 'EntryNodes'
def oMakeController(sSock='', port=9051):
import getpass
if sSock and os.path.exists(sSock):
controller = Controller.from_socket_file(path=sSock)
else:
@ -115,36 +133,84 @@ def oMakeController(sSock='', port=9051):
controller.authenticate(p)
return controller
def lYamlBadNodes(sFile='/etc/tor/torrc-badnodes.yaml',
section='ExcludeExitNodes',
lWanted=['Hetzner','BadExit']):
oBAD_NODES = {}
oBAD_ROOT = 'BadNodes'
def lYamlBadNodes(sFile,
section=sEXCLUDE_EXIT_KEY,
lWanted=['BadExit']):
global oBAD_NODES
root = 'ExcludeNodes'
l = []
if not yaml: return l
if os.path.exists(sFile):
with open(sFile, 'rt') as oFd:
o = yaml.safe_load(oFd)
for elt in o[root][section].keys():
if lWanted and elt not in lWanted: continue
l += o[root][section][elt]
# yq '.ExcludeNodes.Hetzner' < /etc/tor/torrc-badnodes.yaml |sed -e 's/^[[]/ExcludeNodesHetzner = [/'
# yq '.ExcludeNodes.Hetzner|.[]' < /etc/tor/torrc-badnodes.yaml
# yq '.ExcludeNodes.BadExit|.[]' < /etc/tor/torrc-badnodes.yaml
oBAD_NODES = o
# BROKEN
# for elt in o[oBAD_ROOT][root][section].keys():
# if lWanted and elt not in lWanted: continue
# # l += o[oBAD_ROOT][root][section][elt]
return l
def icheck_torrc(sFile, oArgs):
l = open(sFile, 'rt').readlines()
a = {}
for elt in l:
k,v = elt.split(' ', 1)
a[k] = v
keys = list(a.keys())
if 'HashedControlPassword' not in keys:
LOG.info('Add HashedControlPassword for security')
print('run: tor --hashcontrolpassword <TopSecretWord>')
if 'ExcludeNodes' in keys:
elt = 'ExcludeNodes.ExcludeExitNodes.BadExit'
LOG.warn(f"Remove ExcludeNodes and move then to {oArgs.bad_nodes}")
print(f"move to the {elt} section as a list")
if 'GuardNodes' in keys:
elt = 'GoodNodes.GuardNodes'
LOG.warn(f"Remove GuardNodes and move then to {oArgs.good_nodes}")
print(f"move to the {elt} section as a list")
if 'ExcludeNodes' in keys:
elt = 'ExcludeNodes.ExcludeExitNodes.BadExit'
LOG.warn(f"Remove ExcludeNodes and move then to {oArgs.bad_nodes}")
print(f"move to the {elt} section as a list")
if 'ControlSocket' not in keys and os.path.exists('/run/tor/control'):
LOG.info('Add ControlSocket /run/tor/control for us')
print('ControlSocket /run/tor/control GroupWritable RelaxDirModeCheck')
if 'UseMicrodescriptors' not in keys or keys['UseMicrodescriptors'] != '1':
LOG.info('Add UseMicrodescriptors 0 for us')
print('UseMicrodescriptors 0')
if 'AutomapHostsSuffixes' not in keys:
LOG.info('Add AutomapHostsSuffixes for onions')
print('AutomapHostsSuffixes .exit,.onion')
if 'AutoMapHostsOnResolve' not in keys:
LOG.info('Add AutoMapHostsOnResolve for onions')
print('AutoMapHostsOnResolve 1')
if 'VirtualAddrNetworkIPv4' not in keys:
LOG.info('Add VirtualAddrNetworkIPv4 for onions')
print('VirtualAddrNetworkIPv4 172.16.0.0/12')
return 0
oGOOD_NODES = {}
oGOOD_ROOT = 'GoodNodes'
def lYamlGoodNodes(sFile='/etc/tor/torrc-goodnodes.yaml'):
root='IncludeNodes'
global oGOOD_NODES
root = oGOOD_ROOT
l = []
if not yaml: return l
if os.path.exists(sFile):
with open(sFile, 'rt') as oFd:
o = yaml.safe_load(oFd)
for elt in o[root].keys():
l += o[root][elt]
oGOOD_NODES = o
if 'GuardNodes' in o[root].keys():
l += o[oGOOD_ROOT]['GuardNodes']
# yq '.Nodes.IntroductionPoints|.[]' < /etc/tor/torrc-goodnodes.yaml
return l
def lIntroductionPoints(lOnions):
def lIntroductionPoints(controller, lOnions):
"""not working in stem 1.8.3"""
l = []
for elt in lOnions:
@ -162,11 +228,14 @@ def lIntroductionPoints(lOnions):
l += [introduction_point.address]
return l
# memory?
lINTS = ['ciissversion', 'uplinkbw', 'signingkeylifetime']
lBOOLS = ['dnssec', 'dnsqname', 'aesni', 'autoupdate', 'dnslocalrootzone'
lBAD_URLS = []
lATS = ['abuse', 'email']
lINTS = ['ciissversion', 'uplinkbw', 'signingkeylifetime', 'memory']
lBOOLS = ['dnssec', 'dnsqname', 'aesni', 'autoupdate', 'dnslocalrootzone',
'sandbox', 'offlinemasterkey']
def aVerifyContact(a, fp, timeout=20, host='127.0.0.1', port=9050):
def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050):
global lBAD_URLS
# cleanups for yaml
for elt in lINTS:
if elt in a:
a[elt] = int(a[elt])
@ -176,43 +245,69 @@ def aVerifyContact(a, fp, timeout=20, host='127.0.0.1', port=9050):
a[elt] = True
else:
a[elt] = False
for elt in lATS:
if elt in a:
a[elt] = a[elt].replace('[]', '@')
a.update({'fps': []})
# just stick fp in for now
a.update({'fps': [fp]})
# test the url for fps and add it to the array
if 'proof' not in a:
# only support uri for now
LOG.warn(f"{fp} 'proof' not in {list(a.keys())}")
return a
if a['proof'] not in ['uri-rsa']:
# only support uri for now
LOG.warn(f"{fp} proof={a['proof']} not supported yet")
return a
if 'url' not in a:
LOG.warn(f"{fp} 'proof' is 'uri-rsa' but url not in {list(a.keys())}")
if 'uri' not in a:
a['url'] = ''
LOG.warn(f"{fp} url and uri not in {list(a.keys())}")
return a
a['url'] = a['uri']
LOG.debug(f"{fp} 'uri' but not 'url' in {list(a.keys())}")
# drop through
if a['url'].startswith('http:'):
a['url'] = 'https:' +a['url'][5:]
elif not a['url'].startswith('https:'):
a['url'] = 'https:' +a['url']
# domain should be a unique ket for contacts
domain = a['url'][8:]
if a['proof'] not in ['uri-rsa']:
# only support uri for now
if False and ub_ctx:
fp_domain = fp +'.'+domain
if idns_validate(fp_domain,
libunbound_resolv_file='resolv.conf',
dnssec_DS_file='dnssec-root-trust',
) == 0:
pass
LOG.warn(f"{fp} proof={a['proof']} not supported yet")
return a
LOG.debug(f"{len(list(a.keys()))} contact fields for {fp}")
LOG.info(f"Downloading from {domain} for {fp}")
try:
l = lDownloadUrlFps(domain, timeout=20, host=host, port=port)
LOG.debug(f"Downloading from {domain} for {fp}")
l = lDownloadUrlFps(domain, https_cafile,
timeout=timeout, host=host, port=port)
except Exception as e:
LOG.exception(f"Error downloading from {domain} for {fp} {e}")
# should we put it's FPs from TRUST_DB on the ExcludeExitNodes?
a['fps'] = []
lBAD_URLS += [a['url']]
else:
if not l:
# already squacked in lD
LOG.warn(f"Downloading from {domain} failed for {fp}")
a['fps'] = []
lBAD_URLS += [a['url']]
else:
a['fps'] = l
a['fps'] = [elt for elt in l if elt and len(elt) == 40
and not elt.startswith('#')]
return a
def aParseContact(contact, fp):
"""
See the Tor ContactInfo Information Sharing Specification v2
https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/
"""
contact = str(contact, 'UTF-8')
l = [line for line in contact.strip().replace('"', '').split(' ')
if ':' in line]
@ -224,11 +319,93 @@ def aParseContact(contact, fp):
a = yaml.safe_load(oFd)
return a
def bAreWeConnected():
# FixMe: Linux only
sFile = f"/proc/{os.getpid()}/net/route"
if not os.path.isfile(sFile): return None
i = 0
for elt in open(sFile, "r").readlines():
if elt.startswith('Iface'): continue
if elt.startswith('lo'): continue
i += 1
return i > 0
def vwait_for_controller(controller, wait_boot):
if bAreWeConnected() is False:
raise SystemExit("we are not connected")
percent = i = 0
# You can call this while boostrapping
while percent < 100 and i < wait_boot:
bootstrap_status = controller.get_info("status/bootstrap-phase")
progress_percent = re.match('.* PROGRESS=([0-9]+).*', bootstrap_status)
percent = int(progress_percent.group(1))
LOG.info(f"Bootstrapping {percent}%")
time.sleep(5)
i += 5
def vsetup_logging(log_level, logfile=''):
global LOG
add = True
# stem fucks up logging
from stem.util import log
logging.getLogger('stem').setLevel(30)
logging._defaultFormatter = logging.Formatter(datefmt='%m-%d %H:%M:%S')
logging._defaultFormatter.default_time_format = '%m-%d %H:%M:%S'
logging._defaultFormatter.default_msec_format = ''
kwargs = dict(level=log_level,
force=True,
format='%(levelname)-4s %(message)s')
if logfile:
add = logfile.startswith('+')
sub = logfile.startswith('-')
if add or sub:
logfile = logfile[1:]
kwargs['filename'] = logfile
if coloredlogs:
# https://pypi.org/project/coloredlogs/
aKw = dict(level=log_level,
logger=LOG,
stream=sys.stdout if add else None,
fmt='%(levelname)-4s %(message)s'
)
coloredlogs.install(**aKw)
if logfile:
oHandler = logging.FileHandler(logfile)
LOG.addHandler(oHandler)
LOG.info(f"CSetting log_level to {log_level!s}")
else:
logging.basicConfig(**kwargs)
if add and logfile:
oHandler = logging.StreamHandler(sys.stdout)
LOG.addHandler(oHandler)
LOG.info(f"SSetting log_level to {log_level!s}")
def oMainArgparser(_=None):
# 'Mode: 0=chat 1=chat+audio 2=chat+audio+video default: 0'
try:
from OpenSSL import SSL
lCAfs = SSL._CERTIFICATE_FILE_LOCATIONS
except:
lCAfs = []
CAfs = []
for elt in lCAfs:
if os.path.exists(elt):
CAfs.append(elt)
if not CAfs:
CAfs = ['']
parser = argparse.ArgumentParser(add_help=True,
epilog=__doc__)
parser.add_argument('--https_cafile', type=str,
help="Certificate Authority file (in PEM)",
default=CAfs[0])
parser.add_argument('--proxy_host', '--proxy-host', type=str,
default='127.0.0.1',
help='proxy host')
@ -238,52 +415,87 @@ def oMainArgparser(_=None):
default='/run/tor/control',
type=str,
help='control socket - or port')
parser.add_argument('--timeout', default=20, type=int,
help='proxy download timeout')
parser.add_argument('--torrc',
default='',
type=str,
help='torrc to check for suggestions')
parser.add_argument('--timeout', default=30, type=int,
help='proxy download connect timeout')
parser.add_argument('--good_nodes', type=str,
default='/etc/tor/torrc-goodnodes.yaml',
default='/etc/tor/yaml/torrc-goodnodes.yaml',
help="Yaml file of good nodes that should not be excluded")
parser.add_argument('--bad_nodes', type=str,
default='/etc/tor/torrc-badnodes.yaml',
default='/etc/tor/yaml/torrc-badnodes.yaml',
help="Yaml file of bad nodes that should also be excluded")
parser.add_argument('--contact', type=str, default='Empty,NoEmail',
help="comma sep list of conditions - Empty,NoEmail")
parser.add_argument('--bad_contacts', type=str,
default='/tmp/badcontacts.yaml',
help="Yaml file of bad contacts that bad FPs are using")
parser.add_argument('--wait_boot', type=int, default=120,
help="Seconds to wait for Tor to booststrap")
parser.add_argument('--log_level', type=int, default=20,
help="10=debug 20=info 30=warn 40=error")
parser.add_argument('--bad_sections', type=str,
default='Hetzner,BadExit',
help="sections of the badnodes.yaml to use, comma separated, '' defaults to all")
help="sections of the badnodes.yaml to use, comma separated, '' BROKEN")
parser.add_argument('--white_onions', type=str,
default='',
help="comma sep. list of onions to whitelist their introduction points - BROKEN")
parser.add_argument('--bad_output', type=str, default='',
parser.add_argument('--torrc_output', type=str, default='',
help="Write the torrc configuration to a file")
parser.add_argument('--details_output', type=str, default='',
help="Write the lookup URLs of the excluded nodes to a file")
parser.add_argument('--proof_output', type=str, default='',
help="Write the proof data of the included nodes to a YAML file")
return parser
def vwrite_badnodes(oArgs):
global oBAD_NODES
if oArgs.bad_nodes:
tmp = oArgs.bad_nodes +'.tmp'
bak = oArgs.bad_nodes +'.bak'
with open(tmp, 'wt') as oFYaml:
yaml.dump(oBAD_NODES, indent=2, stream=oFYaml)
LOG.info(f"Wrote {len(list(exit_excludelist))} proof details to {oArgs.bad_nodes}")
oFYaml.close()
if os.path.exists(oArgs.bad_nodes):
os.rename(oArgs.bad_nodes, bak)
os.rename(tmp, oArgs.bad_nodes)
def vwrite_goodnodes(oArgs):
global oGOOD_NODES
if oArgs.good_nodes:
tmp = oArgs.good_nodes +'.tmp'
bak = oArgs.good_nodes +'.bak'
with open(tmp, 'wt') as oFYaml:
yaml.dump(oGOOD_NODES, indent=2, stream=oFYaml)
LOG.info(f"Wrote {len(list(exit_excludelist))} proof details to {oArgs.good_nodes}")
oFYaml.close()
if os.path.exists(oArgs.good_nodes):
os.rename(oArgs.good_nodes, bak)
os.rename(tmp, oArgs.good_nodes)
def iMain(lArgs):
global oTOX_OARGS
global aTRUST_DB
parser = oMainArgparser()
oArgs = parser.parse_args(lArgs)
aKw = dict(level=oArgs.log_level,
format='%(name)s %(levelname)-4s %(message)s',
stream=sys.stdout,
force=True)
logging.basicConfig(**aKw)
logging.getLogger('stem').setLevel(oArgs.log_level)
vsetup_logging(oArgs.log_level)
if bAreWeConnected() is False:
raise SystemExit("we are not connected")
sFile = oArgs.torrc
if sFile and os.path.exists(sFile):
icheck_torrc(sFile, oArgs)
global aTRUST_DB
sFile = oArgs.proof_output
if sFile and os.path.exists(sFile):
try:
with open(sFile, 'rt') as oFd:
aTRUST_DB = yaml.safe_load(oFd)
except:
aTRUST_DB = {}
if oArgs.proxy_ctl.startswith('/') or os.path.exists(oArgs.proxy_ctl):
controller = oMakeController(sSock=oArgs.proxy_ctl)
@ -291,111 +503,204 @@ def iMain(lArgs):
port =int(oArgs.proxy_ctl)
controller = oMakeController(port=port)
vwait_for_controller(controller, oArgs.wait_boot)
if oArgs.proof_output:
proof_output_tmp = oArgs.proof_output + '.tmp'
elt = controller.get_conf('UseMicrodescriptors')
if elt != '0' :
LOG.warn('"UseMicrodescriptors 0" is required in your /etc/tor/torrc. Exiting.')
LOG.error('"UseMicrodescriptors 0" is required in your /etc/tor/torrc. Exiting.')
controller.set_conf('UseMicrodescriptors', 0)
# does it work dynamically?
# return 2
return 2
percent = i = 0
# You can call this while boostrapping
while percent < 100 and i < oArgs.wait_boot:
bootstrap_status = controller.get_info("status/bootstrap-phase")
progress_percent = re.match('.* PROGRESS=([0-9]+).*', bootstrap_status)
percent = int(progress_percent.group(1))
LOG.info(f"Bootstrapping {percent}%")
time.sleep(5)
i += 5
elt = controller.get_conf('ExcludeExitNodes')
elt = controller.get_conf(sEXCLUDE_EXIT_KEY)
if elt and elt != '{??}':
LOG.warn(f'ExcludeExitNodes is in use already')
LOG.warn(f"{sEXCLUDE_EXIT_KEY} is in use already")
lGood = lYamlGoodNodes(oArgs.good_nodes)
LOG.info(f'lYamlGoodNodes {len(lGood)}')
lGoodOverrideSet = lYamlGoodNodes(oArgs.good_nodes)
LOG.info(f"lYamlGoodNodes {len(lGoodOverrideSet)} from {oArgs.good_nodes}")
if oArgs.white_onions:
l = lIntroductionPoints(oArgs.white_onions.split(','))
lGood += l
l = lIntroductionPoints(controller, oArgs.white_onions.split(','))
lGoodOverrideSet += l
exit_excludelist = []
if oArgs.bad_nodes and os.path.exists(oArgs.bad_nodes):
if False and oArgs.bad_sections:
# BROKEN
sections = oArgs.bad_sections.split(',')
exit_excludelist = lYamlBadNodes(oArgs.bad_nodes,
lWanted=sections,
section=sEXCLUDE_EXIT_KEY)
else:
exit_excludelist = lYamlBadNodes(oArgs.bad_nodes)
LOG.info(f"lYamlBadNodes {len(exit_excludelist)}")
relays = controller.get_server_descriptors()
if oArgs.bad_sections:
sections = oArgs.bad_sections.split(',')
exit_excludelist = lYamlBadNodes(lWanted=sections)
else:
exit_excludelist = lYamlBadNodes()
LOG.info(f'lYamlBadNodes {len(exit_excludelist)}')
if oArgs.details_output:
oFd = open(oArgs.details_output, 'wt')
else:
oFd = None
lProofUriFps = []
lProofGoodFps = []
iDnsContact = 0
iBadContact = 0
iFakeContact = 0
aBadContacts = {}
aProofUri = {}
lConds = oArgs.contact.split(',')
for relay in relays:
if not relay.exit_policy.is_exiting_allowed(): continue
if not is_valid_fingerprint(relay.fingerprint):
LOG.warn('Invalid Fingerprint: %s' % relay.fingerprint)
continue
relay.fingerprint = relay.fingerprint.upper()
if relay.fingerprint in aTRUST_DB:
if aTRUST_DB[relay.fingerprint]['fps']:
lProofUriFps += aTRUST_DB[relay.fingerprint]['fps']
sofar = f"G:{len(list(aProofUri.keys()))} U:{iDnsContact} F:{iFakeContact} BF:{len(exit_excludelist)} GF:{len(lProofGoodFps)}"
if not relay.exit_policy.is_exiting_allowed():
if sEXCLUDE_EXIT_KEY == 'ExcludeNodes':
LOG.debug(f"{relay.fingerprint} not an exit {sofar}")
else:
LOG.warn(f"{relay.fingerprint} not an exit {sofar}")
# continue
if relay.fingerprint in lProofUriFps:
if relay.fingerprint in lProofGoodFps:
# we already have it.
continue
if relay.fingerprint in aTRUST_DB:
if aTRUST_DB[relay.fingerprint]['fps'] and \
relay.fingerprint in aTRUST_DB[relay.fingerprint]['fps']:
lProofGoodFps += relay.fingerprint
continue
if relay.contact and b'dns-rsa' in relay.contact.lower():
LOG.info(f"{relay.fingerprint} skipping 'dns-rsa' {sofar}")
iDnsContact += 1
continue
if relay.contact and b'proof:uri-rsa' in relay.contact.lower():
a = aParseContact(relay.contact, relay.fingerprint)
if not a: continue
b = aVerifyContact(list(a.values())[0], relay.fingerprint,
if not a:
LOG.warn(f"{relay.fingerprint} did not parse {sofar}")
exit_excludelist.append(relay.fingerprint)
continue
if 'url' in a and a['url'] and a['url'] in lBAD_URLS:
# The fp is using a contact with a URL we know is bad
LOG.info(f"{relay.fingerprint} skipping in lBAD_URLS {a['url']} {sofar}")
exit_excludelist.append(relay.fingerprint)
continue
b = aVerifyContact(list(a.values())[0],
relay.fingerprint,
oArgs.https_cafile,
timeout=oArgs.timeout,
host=oArgs.proxy_host,
port=oArgs.proxy_port)
if not b:
if not b['fps'] or not b['url']:
LOG.warn(f"{relay.fingerprint} did not verify {sofar}")
# If it's giving contact info that doesnt check out
# it could be a bad exit with fake contact info
exit_excludelist.append(relay.fingerprint)
aBadContacts[relay.fingerprint] = b
continue
if 'fps' in b and b['fps'] and relay.fingerprint in b['fps']:
lProofUriFps += b['fps']
if relay.fingerprint not in b['fps']:
LOG.warn(f"{relay.fingerprint} the fp is not in the list of fps {sofar}")
# assume a fp is using a bogus contact
exit_excludelist.append(relay.fingerprint)
iFakeContact += 1
aBadContacts[relay.fingerprint] = b
continue
# great contact had good fps and we are in them
lProofGoodFps += b['fps']
LOG.info(f"{relay.fingerprint} verified {b['url']} {sofar}")
# add our contact info to the trustdb
aProofUri[relay.fingerprint] = b
if oArgs.proof_output and oArgs.log_level <= 20:
# as we go along then clobber
with open(proof_output_tmp, 'wt') as oFYaml:
yaml.dump(aProofUri, indent=2, stream=oFYaml)
oFYaml.close()
continue
if ('Empty' in lConds and not relay.contact) or \
('NoEmail' in lConds and relay.contact and not b'@' in relay.contact):
exit_excludelist.append(relay.fingerprint)
if oFd:
oFd.write(sDETAILS_URL +relay.fingerprint +"\n")
exit_excludelist = list(set(exit_excludelist).difference(set(lGood)))
LOG.info(f'ExcludeExitNodes {len(exit_excludelist)} net bad exit nodes')
controller.set_conf('ExcludeExitNodes', exit_excludelist)
elt = controller.get_conf('ExcludeExitNodes')
if oArgs.bad_output:
with open(oArgs.bad_output, 'wt') as oFdE:
oFdE.write(f"ExcludeExitNodes {','.join(exit_excludelist)}\n")
LOG.info(f"Wrote tor configuration to {oArgs.bad_output}")
if lProofUriFps:
LOG.info(f'ExitNodes {len(lProofUriFps)} good exit nodes')
controller.set_conf('ExitNodes', lProofUriFps)
exit_excludelist = list(set(exit_excludelist).difference(set(lGoodOverrideSet)))
if oFd:
LOG.info(f"Wrote details URLs to {oArgs.details_output}")
oFd.close()
if oArgs.proof_output and aProofUri:
with open(proof_output_tmp, 'wt') as oFYaml:
yaml.dump(aProofUri, indent=2, stream=oFYaml)
LOG.info(f"Wrote {len(list(aProofUri))} proof details to {oArgs.proof_output}")
oFYaml.close()
if os.path.exists(oArgs.proof_output):
bak = oArgs.proof_output +'.bak'
os.rename(oArgs.proof_output, bak)
os.rename(proof_output_tmp, oArgs.proof_output)
if oArgs.proof_output:
with open(oArgs.proof_output, 'wt') as oFdD:
s = yaml.dump_all(aProofUri, indent=2, stream=None)
oFdD.write(s +'\n')
LOG.info(f"Wrote proof details to {oArgs.proof_output}")
oFdD.close()
if oArgs.torrc_output and exit_excludelist:
with open(oArgs.torrc_output, 'wt') as oFTorrc:
oFTorrc.write(f"{sEXCLUDE_EXIT_KEY} {','.join(exit_excludelist)}\n")
oFTorrc.write(f"{sINCLUDE_EXIT_KEY} {','.join(lProofGoodFps)}\n")
oFTorrc.write(f"{sINCLUDE_GUARD_KEY} {','.join(o[oGOOD_ROOT]['GuardNodes'])}\n")
LOG.info(f"Wrote tor configuration to {oArgs.torrc_output}")
oFTorrc.close()
if oArgs.bad_contacts and aBadContacts:
# for later analysis
with open(oArgs.bad_contacts, 'wt') as oFYaml:
yaml.dump(aBadContacts, indent=2, stream=oFYaml)
oFYaml.close()
global oBAD_NODES
oBAD_NODES['BadNodes']['ExcludeNodes']['BadExit'] = exit_excludelist
vwrite_badnodes(oArgs)
# nothing changed vwrite_goodnodes(oArgs)
retval = 0
try:
logging.getLogger('stem').setLevel(30)
if exit_excludelist:
LOG.info(f"{sEXCLUDE_EXIT_KEY} {len(exit_excludelist)} net bad exit nodes")
controller.set_conf(sEXCLUDE_EXIT_KEY, exit_excludelist)
if lProofGoodFps:
LOG.info(f"{sINCLUDE_EXIT_KEY} {len(lProofGoodFps)} good nodes")
controller.set_conf(sINCLUDE_EXIT_KEY, lProofGoodFps)
o = oGOOD_NODES
if 'GuardNodes' in o[oGOOD_ROOT].keys():
LOG.info(f"{sINCLUDE_GUARD_KEY} {len(o[oGOOD_ROOT]['GuardNodes'])} guard nodes")
controller.set_conf(sINCLUDE_GUARD_KEY, o[oGOOD_ROOT]['GuardNodes'])
return retval
except InvalidRequest as e:
# Unacceptable option value: Invalid router list.
LOG.error(str(e))
LOG.warn(f"lProofGoodFps: {lProofGoodFps}")
LOG.warn(f"{sEXCLUDE_EXIT_KEY}: {exit_excludelist}")
retval = 1
return retval
except KeyboardInterrupt:
return 0
except Exception as e:
LOG.exception(str(e))
retval = 2
return retval
finally:
# wierd we are getting stem errors during the final return
# with a traceback that doesnt correspond to any real flow
# File "/usr/lib/python3.9/site-packages/stem/control.py", line 2474, in set_conf
# self.set_options({param: value}, False)
logging.getLogger('stem').setLevel(40)
try:
for elt in controller._event_listeners:
controller.remove_event_listener(elt)
controller.close()
return(0)
except Exception as e:
LOG.warn(str(e))
if __name__ == '__main__':
try:
@ -403,8 +708,9 @@ if __name__ == '__main__':
except IncorrectPassword as e:
LOG.error(e)
i = 1
except KeyboardInterrupt:
i = 0
except Exception as e:
LOG.exception(e)
i = 1
i = 2
sys.exit(i)

View file

@ -1,20 +1,24 @@
# -*- mode: python; indent-tabs-mode: nil; py-indent-offset: 4; coding: utf-8 -# -*- coding: utf-8 -*-
# -*- mode: python; indent-tabs-mode: nil; py-indent-offset: 4; coding: utf-8 -
import os
import sys
import datetime
import requests
from stem.control import Controller
from stem.util.tor_tools import *
from urllib.parse import urlparse
import requests
import datetime
try:
# unbound is not on pypi
from unbound import ub_ctx,RR_TYPE_TXT,RR_CLASS_IN
except:
ub_ctx = RR_TYPE_TXT = RR_CLASS_IN = None
global LOG
import logging
import warnings
warnings.filterwarnings('ignore')
LOG = logging.getLogger()
# download this python library from
@ -25,15 +29,6 @@ try:
except:
TorContactInfoParser = None
# tor ControlPort IP
controller_address = '127.0.0.1'
dnssec_DS_file = 'dnssec-root-trust'
# this is not the system wide /etc/resolv.conf
# use dnscrypt-proxy to encrypt your DNS and route it via tor's SOCKSPort
libunbound_resolv_file = 'resolv.conf'
# for now we support max_depth = 0 only
# this PoC version has no support for recursion
# https://github.com/nusenu/tor-relay-operator-ids-trust-information#trust-information-consumers
@ -42,9 +37,6 @@ supported_max_depths = ['0']
# https://github.com/nusenu/ContactInfo-Information-Sharing-Specification#ciissversion
accepted_ciissversions = ['2']
# https://github.com/nusenu/ContactInfo-Information-Sharing-Specification#proof
accepted_proof_types = ['uri-rsa','dns-rsa']
# https://stackoverflow.com/questions/2532053/validate-a-hostname-string
# FIXME this check allows non-fqdn names
def is_valid_hostname(hostname):
@ -56,7 +48,7 @@ def is_valid_hostname(hostname):
return all(allowed.match(x) for x in hostname.split("."))
def read_local_trust_config(trust_config='trust_config'):
def read_local_trust_config(trust_config):
'''
reads a local configuration file containing trusted domains
and returns them in an array
@ -103,8 +95,8 @@ def read_local_validation_cache(validation_cache_file, trusted_domains=[]):
result = []
if trusted_domains == []:
return result
if (os.path.isfile(validation_cache_file)):
f = open(validation_cache_file)
if os.path.isfile(validation_cache_file):
with open(validation_cache_file, 'rt') as f:
for line in f:
line = line.strip()
if line[0] == '#':
@ -118,10 +110,10 @@ def read_local_validation_cache(validation_cache_file, trusted_domains=[]):
if domain in trusted_domains:
result.append(fingerprint)
else:
print('ignoring cached entry for untrusted domain %s' % domain)
LOG.warn('ignoring cached entry for untrusted domain %s' % domain)
else:
print("Validation cache file not present. It will be created.")
LOG.info("Validation cache file not present. It will be created.")
return result
def get_controller(address='127.0.0.1', port=9151, password=''):
@ -136,7 +128,7 @@ def get_controller(address='127.0.0.1',port=9151,password=''):
controller = Controller.from_port(address=address, port=port)
controller.authenticate(password=password)
except Exception as e:
LOG.error(f'Failed to connect to the tor process, {e}')
LOG.error(f"Failed to connect to the tor process, {e}")
sys.exit(1)
if not controller.is_set('UseMicrodescriptors'):
@ -155,6 +147,9 @@ def find_validation_candidates(controller, trusted_domains=[],validation_cache=[
example content:
{ 'emeraldonion.org' : { 'uri-rsa': ['044600FD968728A6F220D5347AD897F421B757C0', '09DCA3360179C6C8A5A20DDDE1C54662965EF1BA']}}
'''
# https://github.com/nusenu/ContactInfo-Information-Sharing-Specification#proof
accepted_proof_types = ['uri-rsa','dns-rsa']
result = {}
@ -207,7 +202,7 @@ def find_validation_candidates(controller, trusted_domains=[],validation_cache=[
result[domain] = {prooftype : [fingerprint]}
return result
def lDownloadUrlFps(domain, timeout=20, host='127.0.0.1', port=9050):
def lDownloadUrlFps(domain, sCAfile, timeout=30, host='127.0.0.1', port=9050):
uri="https://"+domain+"/.well-known/tor-relay/rsa-fingerprint.txt"
# socks proxy used for outbound web requests (for validation of proofs)
proxy = {'https': 'socks5h://' +host +':' +str(port)}
@ -217,20 +212,39 @@ def lDownloadUrlFps(domain, timeout=20, host='127.0.0.1', port=9050):
LOG.debug("fetching %s...." % uri)
try:
# grr. fix urllib3
# urllib3.connection WARNING Certificate did not match expected hostname:
head = requests.head(uri, timeout=timeout, proxies=proxy, headers=headers)
except Exception as e:
print("HTTP HEAD request failed for %s" % uri)
print(e)
LOG.warn(f"HTTP HEAD request failed for {uri} {e}")
head = None
return []
if head.status_code != 200:
return []
if not head.headers['Content-Type'].startswith('text/plain'):
return []
assert os.path.exists(sCAfile), sCAfile
try:
fullfile = requests.get(uri, proxies=proxy, timeout=10, headers=headers)
from https_adapter import HTTPSAdapter
except Exception as e:
LOG.warn(f"Could not import HTTPSAdapter {e}")
HTTPSAdapter = None
HTTPSAdapter = None
try:
with requests.sessions.Session() as session:
if HTTPSAdapter:
# FixMe: upgrade to TLS1.3
session.mount("https://", HTTPSAdapter(pool_maxsize=1,
max_retries=3,))
fullfile = session.request(method="get", url=uri,
proxies=proxy, timeout=timeout,
headers=headers,
allow_redirects=False,
verify=True
)
except:
print("HTTP GET request failed for %s" % uri)
LOG.warn("HTTP GET request failed for %s" % uri)
return []
if fullfile.status_code != 200 or not fullfile.headers['Content-Type'].startswith('text/plain'):
return []
@ -240,10 +254,11 @@ def lDownloadUrlFps(domain, timeout=20, host='127.0.0.1', port=9050):
LOG.error('Redirect detected %s vs %s (final)' % (uri, fullfile.url))
return []
well_known_content = [i.strip() for i in fullfile.text.upper().split('\n')]
well_known_content = fullfile.text.upper().strip().split('\n')
well_known_content = [i for i in well_known_content if i and len(i) == 40]
return well_known_content
def validate_proofs(candidates, validation_cache_file):
def validate_proofs(candidates, validation_cache_file, timeout=20, host='127.0.0.1', port=9050):
'''
This function takes the return value of find_validation_candidates()
and validated them according to their proof type (uri-rsa, dns-rsa)
@ -257,7 +272,7 @@ def validate_proofs(candidates, validation_cache_file):
for domain in candidates.keys():
for prooftype in candidates[domain].keys():
if prooftype == 'uri-rsa':
well_known_content = lDownloadUrlFps(domain, timeout=20, host='127.0.0.1', port=9050)
well_known_content = lDownloadUrlFps(domain, timeout=timeout, host=host, port=port)
for fingerprint in candidates[domain][prooftype]:
if fingerprint in well_known_content:
# write cache entry
@ -268,7 +283,10 @@ def validate_proofs(candidates, validation_cache_file):
elif prooftype == 'dns-rsa' and ub_ctx:
for fingerprint in candidates[domain][prooftype]:
fp_domain = fingerprint+'.'+domain
if dns_validate(fp_domain):
if idns_validate(fp_domain,
libunbound_resolv_file='resolv.conf',
dnssec_DS_file='dnssec-root-trust',
) == 0:
count += 1
f.write('%s:%s:%s:%s\n' % (domain, fingerprint, prooftype, dt_utc))
else:
@ -276,7 +294,10 @@ def validate_proofs(candidates, validation_cache_file):
f.close()
LOG.info('successfully validated %s new (not yet validated before) relays' % count)
def dns_validate(domain):
def idns_validate(domain,
libunbound_resolv_file='resolv.conf',
dnssec_DS_file='dnssec-root-trust',
):
'''
performs DNS TXT lookups and verifies the reply
- is DNSSEC valid and
@ -284,27 +305,31 @@ def dns_validate(domain):
- the DNS record contains a hardcoded string as per specification
https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/#dns-rsa
'''
if not ub_ctx: return False
if not ub_ctx: return -1
# this is not the system wide /etc/resolv.conf
# use dnscrypt-proxy to encrypt your DNS and route it via tor's SOCKSPort
ctx = ub_ctx()
if (os.path.isfile(libunbound_resolv_file)):
ctx.resolvconf(libunbound_resolv_file)
else:
LOG.error('libunbound resolv config file: "%s" is missing, aborting!' % libunbound_resolv_file)
sys.exit(5)
return 5
if (os.path.isfile(dnssec_DS_file)):
ctx.add_ta_file(dnssec_DS_file)
else:
LOG.error('DNSSEC trust anchor file "%s" is missing, aborting!' % dnssec_DS_file)
sys.exit(6)
return 6
status, result = ctx.resolve(domain, RR_TYPE_TXT, RR_CLASS_IN)
if status == 0 and result.havedata:
if len(result.rawdata) == 1 and result.secure:
# ignore the first byte, it is the TXT length
if result.data.as_raw_data()[0][1:] == b'we-run-this-tor-relay':
return True
return False
return 0
return 1
def configure_tor(controller, trusted_fingerprints, exitonly=True):
'''
@ -317,15 +342,14 @@ def configure_tor(controller, trusted_fingerprints, exitonly=True):
relay_count = len(trusted_fingerprints)
if relay_count < 41:
print('Too few trusted relays (%s), aborting!' % relay_count)
LOG.error('Too few trusted relays (%s), aborting!' % relay_count)
sys.exit(15)
try:
controller.set_conf('ExitNodes', trusted_fingerprints)
print('limited exits to %s relays' % relay_count)
LOG.error('limited exits to %s relays' % relay_count)
except Exception as e:
print('Failed to set ExitNodes tor config to trusted relays')
print(e)
LOG.exception('Failed to set ExitNodes tor config to trusted relays')
sys.exit(20)
if __name__ == '__main__':
@ -338,11 +362,21 @@ if __name__ == '__main__':
trusted_domains=trusted_domains)
# tor ControlPort password
controller_password=''
# tor ControlPort IP
controller_address = '127.0.0.1'
timeout = 20
port = 9050
controller = get_controller(address=controller_address,password=controller_password)
r = find_validation_candidates(controller,validation_cache=trusted_fingerprints,trusted_domains=trusted_domains)
validate_proofs(r, validation_cache_file)
r = find_validation_candidates(controller,
validation_cache=trusted_fingerprints,
trusted_domains=trusted_domains)
validate_proofs(r, validation_cache_file,
timeout=timeout,
host=controller_address,
port=port)
# refresh list with newly validated fingerprints
trusted_fingerprints = read_local_validation_cache(trusted_domains=trusted_domains)
trusted_fingerprints = read_local_validation_cache(validation_cache_file,
trusted_domains=trusted_domains)
configure_tor(controller, trusted_fingerprints)