Compare commits
3 commits
f85f5c6bd8
...
28d1d34dbd
Author | SHA1 | Date | |
---|---|---|---|
|
28d1d34dbd | ||
|
94c0834092 | ||
|
2ea65cc181 |
6 changed files with 436 additions and 351 deletions
|
@ -160,7 +160,7 @@ def lYamlBadNodes(sFile,
|
||||||
global oBAD_NODES
|
global oBAD_NODES
|
||||||
global lKNOWN_NODNS
|
global lKNOWN_NODNS
|
||||||
global lMAYBE_NODNS
|
global lMAYBE_NODNS
|
||||||
|
|
||||||
l = []
|
l = []
|
||||||
if not yaml: return l
|
if not yaml: return l
|
||||||
if os.path.exists(sFile):
|
if os.path.exists(sFile):
|
||||||
|
@ -198,7 +198,7 @@ def lYamlGoodNodes(sFile='/etc/tor/torrc-goodnodes.yaml'):
|
||||||
# yq '.Nodes.IntroductionPoints|.[]' < /etc/tor/torrc-goodnodes.yaml
|
# yq '.Nodes.IntroductionPoints|.[]' < /etc/tor/torrc-goodnodes.yaml
|
||||||
return l
|
return l
|
||||||
|
|
||||||
def bdomain_is_bad(domain):
|
def bdomain_is_bad(domain, fp):
|
||||||
global lKNOWN_NODNS
|
global lKNOWN_NODNS
|
||||||
if domain in lKNOWN_NODNS: return True
|
if domain in lKNOWN_NODNS: return True
|
||||||
if domain in lMAYBE_NODNS:
|
if domain in lMAYBE_NODNS:
|
||||||
|
@ -208,10 +208,11 @@ def bdomain_is_bad(domain):
|
||||||
lKNOWN_NODNS.append(domain)
|
lKNOWN_NODNS.append(domain)
|
||||||
lMAYBE_NODNS.remove(domain)
|
lMAYBE_NODNS.remove(domain)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
if '@' in domain:
|
for elt in '@(){}$!':
|
||||||
LOG.warn(f"@ in domain {domain}")
|
if elt in domain:
|
||||||
return True
|
LOG.warn(f"{elt} in domain {domain}")
|
||||||
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
tBAD_URLS = set()
|
tBAD_URLS = set()
|
||||||
|
@ -256,7 +257,7 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050)
|
||||||
if aCachedContact['email'] == a['email']:
|
if aCachedContact['email'] == a['email']:
|
||||||
LOG.info(f"{fp} in aTRUST_DB_INDEX")
|
LOG.info(f"{fp} in aTRUST_DB_INDEX")
|
||||||
return aCachedContact
|
return aCachedContact
|
||||||
|
|
||||||
if 'url' not in keys:
|
if 'url' not in keys:
|
||||||
if 'uri' not in keys:
|
if 'uri' not in keys:
|
||||||
a['url'] = ''
|
a['url'] = ''
|
||||||
|
@ -270,20 +271,21 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050)
|
||||||
|
|
||||||
c = a['url'].lstrip('https://').lstrip('http://').strip('/')
|
c = a['url'].lstrip('https://').lstrip('http://').strip('/')
|
||||||
a['url'] = 'https://' +c
|
a['url'] = 'https://' +c
|
||||||
|
|
||||||
# domain should be a unique key for contacts
|
# domain should be a unique key for contacts
|
||||||
domain = a['url'][8:]
|
domain = a['url'][8:]
|
||||||
if bdomain_is_bad(domain):
|
if bdomain_is_bad(domain, fp):
|
||||||
LOG.warn(f"{domain} is bad from {a['url']}")
|
LOG.warn(f"{domain} is bad from {a['url']}")
|
||||||
LOG.info(f"{domain} is bad from {a}")
|
LOG.debug(f"{fp} is bad from {a}")
|
||||||
return a
|
return a
|
||||||
|
|
||||||
ip = zResolveDomain(domain)
|
ip = zResolveDomain(domain)
|
||||||
if ip == '':
|
if ip == '':
|
||||||
aFP_EMAIL[fp] = a['email']
|
aFP_EMAIL[fp] = a['email']
|
||||||
LOG.debug(f"{fp} {domain} does not resolve")
|
LOG.debug(f"{fp} {domain} does not resolve")
|
||||||
lKNOWN_NODNS.append(domain)
|
lKNOWN_NODNS.append(domain)
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
if a['proof'] not in ['uri-rsa']:
|
if a['proof'] not in ['uri-rsa']:
|
||||||
# only support uri for now
|
# only support uri for now
|
||||||
if False and ub_ctx:
|
if False and ub_ctx:
|
||||||
|
@ -295,7 +297,7 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050)
|
||||||
pass
|
pass
|
||||||
LOG.warn(f"{fp} proof={a['proof']} not supported yet")
|
LOG.warn(f"{fp} proof={a['proof']} not supported yet")
|
||||||
return a
|
return a
|
||||||
|
|
||||||
LOG.debug(f"{len(keys)} contact fields for {fp}")
|
LOG.debug(f"{len(keys)} contact fields for {fp}")
|
||||||
url="https://"+domain+"/.well-known/tor-relay/rsa-fingerprint.txt"
|
url="https://"+domain+"/.well-known/tor-relay/rsa-fingerprint.txt"
|
||||||
try:
|
try:
|
||||||
|
@ -328,7 +330,7 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050)
|
||||||
# any reason retry?
|
# any reason retry?
|
||||||
tBAD_URLS.add(a['url'])
|
tBAD_URLS.add(a['url'])
|
||||||
return a
|
return a
|
||||||
|
|
||||||
if hasattr(o, 'text'):
|
if hasattr(o, 'text'):
|
||||||
data = o.text
|
data = o.text
|
||||||
else:
|
else:
|
||||||
|
@ -347,7 +349,7 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050)
|
||||||
|
|
||||||
def aParseContactYaml(contact, fp):
|
def aParseContactYaml(contact, fp):
|
||||||
"""
|
"""
|
||||||
See the Tor ContactInfo Information Sharing Specification v2
|
See the Tor ContactInfo Information Sharing Specification v2
|
||||||
https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/
|
https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/
|
||||||
"""
|
"""
|
||||||
lelts = contact.split()
|
lelts = contact.split()
|
||||||
|
@ -357,7 +359,7 @@ def aParseContactYaml(contact, fp):
|
||||||
LOG.debug(f"{fp} {a}")
|
LOG.debug(f"{fp} {a}")
|
||||||
return a
|
return a
|
||||||
key = ''
|
key = ''
|
||||||
for elt in lets:
|
for elt in lelts:
|
||||||
if key == '':
|
if key == '':
|
||||||
key = elt
|
key = elt
|
||||||
continue
|
continue
|
||||||
|
@ -368,7 +370,7 @@ def aParseContactYaml(contact, fp):
|
||||||
|
|
||||||
def aParseContact(contact, fp):
|
def aParseContact(contact, fp):
|
||||||
"""
|
"""
|
||||||
See the Tor ContactInfo Information Sharing Specification v2
|
See the Tor ContactInfo Information Sharing Specification v2
|
||||||
https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/
|
https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/
|
||||||
"""
|
"""
|
||||||
l = [line for line in contact.strip().replace('"', '').split(' ')
|
l = [line for line in contact.strip().replace('"', '').split(' ')
|
||||||
|
@ -424,22 +426,22 @@ def vsetup_logging(log_level, logfile=''):
|
||||||
LOG.info(f"SSetting log_level to {log_level!s}")
|
LOG.info(f"SSetting log_level to {log_level!s}")
|
||||||
|
|
||||||
logging._levelToName = {
|
logging._levelToName = {
|
||||||
CRITICAL: 'CRITICAL',
|
logging.CRITICAL: 'CRITICAL',
|
||||||
ERROR: 'ERROR',
|
logging.ERROR: 'ERROR',
|
||||||
WARNING: 'WARN',
|
logging.WARNING: 'WARN',
|
||||||
INFO: 'INFO',
|
logging.INFO: 'INFO',
|
||||||
DEBUG: 'DEBUG',
|
logging.DEBUG: 'DEBUG',
|
||||||
NOTSET: 'NOTSET',
|
logging.NOTSET: 'NOTSET',
|
||||||
}
|
}
|
||||||
logging._nameToLevel = {
|
logging._nameToLevel = {
|
||||||
'CRITICAL': CRITICAL,
|
'CRITICAL': logging.CRITICAL,
|
||||||
'FATAL': FATAL,
|
'FATAL': logging.FATAL,
|
||||||
'ERROR': ERROR,
|
'ERROR': logging.ERROR,
|
||||||
'WARN': WARNING,
|
'WARN': logging.WARNING,
|
||||||
'WARNING': WARNING,
|
'WARNING': logging.WARNING,
|
||||||
'INFO': INFO,
|
'INFO': logging.INFO,
|
||||||
'DEBUG': DEBUG,
|
'DEBUG': logging.DEBUG,
|
||||||
'NOTSET': NOTSET,
|
'NOTSET': logging.NOTSET,
|
||||||
}
|
}
|
||||||
|
|
||||||
def oMainArgparser(_=None):
|
def oMainArgparser(_=None):
|
||||||
|
@ -490,7 +492,7 @@ def oMainArgparser(_=None):
|
||||||
parser.add_argument('--bad_contacts', type=str,
|
parser.add_argument('--bad_contacts', type=str,
|
||||||
default=os.path.join(ETC_DIR, 'badcontacts.yaml'),
|
default=os.path.join(ETC_DIR, 'badcontacts.yaml'),
|
||||||
help="Yaml file of bad contacts that bad FPs are using")
|
help="Yaml file of bad contacts that bad FPs are using")
|
||||||
|
|
||||||
parser.add_argument('--strict_nodes', type=int, default=0,
|
parser.add_argument('--strict_nodes', type=int, default=0,
|
||||||
choices=[0,1],
|
choices=[0,1],
|
||||||
help="Set StrictNodes: 1 is less anonymous but more secure, although some sites may be unreachable")
|
help="Set StrictNodes: 1 is less anonymous but more secure, although some sites may be unreachable")
|
||||||
|
@ -498,7 +500,7 @@ def oMainArgparser(_=None):
|
||||||
help="Seconds to wait for Tor to booststrap")
|
help="Seconds to wait for Tor to booststrap")
|
||||||
parser.add_argument('--points_timeout', type=int, default=0,
|
parser.add_argument('--points_timeout', type=int, default=0,
|
||||||
help="Timeout for getting introduction points - must be long >120sec. 0 means disabled looking for IPs")
|
help="Timeout for getting introduction points - must be long >120sec. 0 means disabled looking for IPs")
|
||||||
parser.add_argument('--log_level', type=int, default=10,
|
parser.add_argument('--log_level', type=int, default=20,
|
||||||
help="10=debug 20=info 30=warn 40=error")
|
help="10=debug 20=info 30=warn 40=error")
|
||||||
parser.add_argument('--bad_sections', type=str,
|
parser.add_argument('--bad_sections', type=str,
|
||||||
default='MyBadExit',
|
default='MyBadExit',
|
||||||
|
@ -523,24 +525,24 @@ def vwrite_badnodes(oArgs, oBAD_NODES, slen):
|
||||||
if os.path.exists(oArgs.bad_nodes):
|
if os.path.exists(oArgs.bad_nodes):
|
||||||
os.rename(oArgs.bad_nodes, bak)
|
os.rename(oArgs.bad_nodes, bak)
|
||||||
os.rename(tmp, oArgs.bad_nodes)
|
os.rename(tmp, oArgs.bad_nodes)
|
||||||
|
|
||||||
def vwrite_goodnodes(oArgs, oGOOD_NODES, slen):
|
def vwrite_goodnodes(oArgs, oGOOD_NODES, ilen):
|
||||||
if oArgs.good_nodes:
|
if oArgs.good_nodes:
|
||||||
tmp = oArgs.good_nodes +'.tmp'
|
tmp = oArgs.good_nodes +'.tmp'
|
||||||
bak = oArgs.good_nodes +'.bak'
|
bak = oArgs.good_nodes +'.bak'
|
||||||
with open(tmp, 'wt') as oFYaml:
|
with open(tmp, 'wt') as oFYaml:
|
||||||
yaml.dump(oGOOD_NODES, indent=2, stream=oFYaml)
|
yaml.dump(oGOOD_NODES, indent=2, stream=oFYaml)
|
||||||
LOG.info(f"Wrote {slen} good nodes to {oArgs.good_nodes}")
|
LOG.info(f"Wrote {ilen} good relays to {oArgs.good_nodes}")
|
||||||
oFYaml.close()
|
oFYaml.close()
|
||||||
if os.path.exists(oArgs.good_nodes):
|
if os.path.exists(oArgs.good_nodes):
|
||||||
os.rename(oArgs.good_nodes, bak)
|
os.rename(oArgs.good_nodes, bak)
|
||||||
os.rename(tmp, oArgs.good_nodes)
|
os.rename(tmp, oArgs.good_nodes)
|
||||||
|
|
||||||
def iMain(lArgs):
|
def iMain(lArgs):
|
||||||
global aTRUST_DB
|
global aTRUST_DB
|
||||||
global aTRUST_DB_INDEX
|
global aTRUST_DB_INDEX
|
||||||
global oBAD_NODES
|
global oBAD_NODES
|
||||||
global oGOOD_NODES
|
global oGOOD_NODES
|
||||||
global lKNOWN_NODNS
|
global lKNOWN_NODNS
|
||||||
parser = oMainArgparser()
|
parser = oMainArgparser()
|
||||||
oArgs = parser.parse_args(lArgs)
|
oArgs = parser.parse_args(lArgs)
|
||||||
|
@ -573,7 +575,7 @@ def iMain(lArgs):
|
||||||
continue
|
continue
|
||||||
aTRUST_DB_INDEX[fp] = v
|
aTRUST_DB_INDEX[fp] = v
|
||||||
LOG.info(f"{len(aTRUST_DB_INDEX.keys())} good relays from {sFile}")
|
LOG.info(f"{len(aTRUST_DB_INDEX.keys())} good relays from {sFile}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
LOG.exception(f"Error reading YAML TrustDB {sFile} {e}")
|
LOG.exception(f"Error reading YAML TrustDB {sFile} {e}")
|
||||||
|
|
||||||
|
@ -625,17 +627,17 @@ def iMain(lArgs):
|
||||||
texclude_set = set(lYamlBadNodes(oArgs.bad_nodes,
|
texclude_set = set(lYamlBadNodes(oArgs.bad_nodes,
|
||||||
lWanted=sections,
|
lWanted=sections,
|
||||||
section=sEXCLUDE_EXIT_KEY))
|
section=sEXCLUDE_EXIT_KEY))
|
||||||
LOG.info(f"Preloaded {len(texclude_set)} bad fps")
|
LOG.info(f"Preloaded {len(texclude_set)} bad fps")
|
||||||
|
|
||||||
ttrust_db_index = aTRUST_DB_INDEX.keys()
|
ttrust_db_index = aTRUST_DB_INDEX.keys()
|
||||||
tdns_contacts = set()
|
tdns_urls = set()
|
||||||
iFakeContact = 0
|
iFakeContact = 0
|
||||||
iTotalContacts = 0
|
iTotalContacts = 0
|
||||||
aBadContacts = {}
|
aBadContacts = {}
|
||||||
|
|
||||||
lConds = oArgs.contact.split(',')
|
lConds = oArgs.contact.split(',')
|
||||||
iR = 0
|
iR = 0
|
||||||
|
|
||||||
relays = controller.get_server_descriptors()
|
relays = controller.get_server_descriptors()
|
||||||
for relay in relays:
|
for relay in relays:
|
||||||
iR += 1
|
iR += 1
|
||||||
|
@ -643,15 +645,15 @@ def iMain(lArgs):
|
||||||
LOG.warn('Invalid Fingerprint: %s' % relay.fingerprint)
|
LOG.warn('Invalid Fingerprint: %s' % relay.fingerprint)
|
||||||
continue
|
continue
|
||||||
relay.fingerprint = relay.fingerprint.upper()
|
relay.fingerprint = relay.fingerprint.upper()
|
||||||
|
|
||||||
sofar = f"G:{len(aTRUST_DB.keys())} U:{len(tdns_contacts)} F:{iFakeContact} BF:{len(texclude_set)} GF:{len(ttrust_db_index)} TC:{iTotalContacts} #{iR}"
|
sofar = f"G:{len(aTRUST_DB.keys())} U:{len(tdns_urls)} F:{iFakeContact} BF:{len(texclude_set)} GF:{len(ttrust_db_index)} TC:{iTotalContacts} #{iR}"
|
||||||
if not relay.exit_policy.is_exiting_allowed():
|
if not relay.exit_policy.is_exiting_allowed():
|
||||||
if sEXCLUDE_EXIT_KEY == 'ExcludeNodes':
|
if sEXCLUDE_EXIT_KEY == 'ExcludeNodes':
|
||||||
pass # LOG.debug(f"{relay.fingerprint} not an exit {sofar}")
|
pass # LOG.debug(f"{relay.fingerprint} not an exit {sofar}")
|
||||||
else:
|
else:
|
||||||
pass # LOG.warn(f"{relay.fingerprint} not an exit {sofar}")
|
pass # LOG.warn(f"{relay.fingerprint} not an exit {sofar}")
|
||||||
# continue
|
# continue
|
||||||
|
|
||||||
# great contact had good fps and we are in them
|
# great contact had good fps and we are in them
|
||||||
if relay.fingerprint in aTRUST_DB_INDEX.keys():
|
if relay.fingerprint in aTRUST_DB_INDEX.keys():
|
||||||
# a cached entry
|
# a cached entry
|
||||||
|
@ -660,54 +662,55 @@ def iMain(lArgs):
|
||||||
if type(relay.contact) == bytes:
|
if type(relay.contact) == bytes:
|
||||||
# dunno
|
# dunno
|
||||||
relay.contact = str(relay.contact, 'UTF-8')
|
relay.contact = str(relay.contact, 'UTF-8')
|
||||||
|
|
||||||
if ('Empty' in lConds and not relay.contact) or \
|
if ('Empty' in lConds and not relay.contact) or \
|
||||||
('NoEmail' in lConds and relay.contact and not 'email:' in relay.contact):
|
('NoEmail' in lConds and relay.contact and not 'email:' in relay.contact):
|
||||||
texclude_set.add(relay.fingerprint)
|
texclude_set.add(relay.fingerprint)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if not relay.contact or not 'ciissversion:' in relay.contact:
|
if not relay.contact or not 'ciissversion:' in relay.contact:
|
||||||
# should be unreached 'Empty' should always be in lConds
|
# should be unreached 'Empty' should always be in lConds
|
||||||
continue
|
continue
|
||||||
iTotalContacts += 1
|
iTotalContacts += 1
|
||||||
|
|
||||||
|
fp = relay.fingerprint
|
||||||
if relay.contact and not 'url:' in relay.contact:
|
if relay.contact and not 'url:' in relay.contact:
|
||||||
LOG.info(f"{relay.fingerprint} skipping bad contact - no url: {sofar}")
|
LOG.info(f"{fp} skipping bad contact - no url: {sofar}")
|
||||||
LOG.debug(f"{relay.fingerprint} {relay.contact} {sofar}")
|
LOG.debug(f"{fp} {relay.contact} {sofar}")
|
||||||
texclude_set.add(relay.fingerprint)
|
texclude_set.add(fp)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
c = relay.contact.lower()
|
c = relay.contact.lower()
|
||||||
# first rough cut
|
# first rough cut
|
||||||
i = c.find('url:')
|
i = c.find('url:')
|
||||||
if i >=0:
|
if i >=0:
|
||||||
c = c[i+4:]
|
c = c[i+4:]
|
||||||
i = c.find(' ')
|
i = c.find(' ')
|
||||||
if i >=0: c = c[:i]
|
if i >=0: c = c[:i]
|
||||||
c = c.lstrip('https://').lstrip('http://').strip('/')
|
c = c.lstrip('https://').lstrip('http://').strip('/')
|
||||||
i = c.find('/')
|
i = c.find('/')
|
||||||
if i >=0: c = c[:i]
|
if i >=0: c = c[:i]
|
||||||
domain = c
|
domain = c
|
||||||
if domain and bdomain_is_bad(domain):
|
if domain and bdomain_is_bad(domain, fp):
|
||||||
LOG.info(f"{relay.fingerprint} skipping bad {domain} {sofar}")
|
LOG.info(f"{fp} skipping bad {domain} {sofar}")
|
||||||
LOG.debug(f"{relay.fingerprint} {relay.contact} {sofar}")
|
LOG.debug(f"{fp} {relay.contact} {sofar}")
|
||||||
texclude_set.add(relay.fingerprint)
|
texclude_set.add(fp)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if domain:
|
if domain:
|
||||||
ip = zResolveDomain(domain)
|
ip = zResolveDomain(domain)
|
||||||
if not ip:
|
if not ip:
|
||||||
LOG.warn(f"{relay.fingerprint} {domain} did not resolve {sofar}")
|
LOG.warn(f"{fp} {domain} did not resolve {sofar}")
|
||||||
texclude_set.add(relay.fingerprint)
|
texclude_set.add(fp)
|
||||||
lKNOWN_NODNS.append(domain)
|
lKNOWN_NODNS.append(domain)
|
||||||
iFakeContact += 1
|
iFakeContact += 1
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if 'dns-rsa' in relay.contact.lower():
|
if 'dns-rsa' in relay.contact.lower():
|
||||||
target = f"{relay.fingerprint}.{domain}"
|
target = f"{relay.fingerprint}.{domain}"
|
||||||
LOG.info(f"skipping 'dns-rsa' {target} {sofar}")
|
LOG.info(f"skipping 'dns-rsa' {target} {sofar}")
|
||||||
tdns_contacts.add(target)
|
tdns_urls.add(target)
|
||||||
|
|
||||||
elif 'proof:uri-rsa' in relay.contact.lower():
|
elif 'proof:uri-rsa' in relay.contact.lower():
|
||||||
a = aParseContact(relay.contact, relay.fingerprint)
|
a = aParseContact(relay.contact, relay.fingerprint)
|
||||||
if not a:
|
if not a:
|
||||||
|
@ -730,7 +733,7 @@ def iMain(lArgs):
|
||||||
iFakeContact += 1
|
iFakeContact += 1
|
||||||
texclude_set.add(relay.fingerprint)
|
texclude_set.add(relay.fingerprint)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
||||||
b = aVerifyContact(list(a.values())[0],
|
b = aVerifyContact(list(a.values())[0],
|
||||||
relay.fingerprint,
|
relay.fingerprint,
|
||||||
|
@ -738,7 +741,7 @@ def iMain(lArgs):
|
||||||
timeout=oArgs.timeout,
|
timeout=oArgs.timeout,
|
||||||
host=oArgs.proxy_host,
|
host=oArgs.proxy_host,
|
||||||
port=oArgs.proxy_port)
|
port=oArgs.proxy_port)
|
||||||
|
|
||||||
if not b or not 'fps' in b or not b['fps'] or not b['url']:
|
if not b or not 'fps' in b or not b['fps'] or not b['url']:
|
||||||
LOG.warn(f"{relay.fingerprint} did NOT VERIFY {sofar}")
|
LOG.warn(f"{relay.fingerprint} did NOT VERIFY {sofar}")
|
||||||
LOG.debug(f"{relay.fingerprint} {b} {sofar}")
|
LOG.debug(f"{relay.fingerprint} {b} {sofar}")
|
||||||
|
@ -747,7 +750,7 @@ def iMain(lArgs):
|
||||||
texclude_set.add(relay.fingerprint)
|
texclude_set.add(relay.fingerprint)
|
||||||
aBadContacts[relay.fingerprint] = b
|
aBadContacts[relay.fingerprint] = b
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if relay.fingerprint not in b['fps']:
|
if relay.fingerprint not in b['fps']:
|
||||||
LOG.warn(f"{relay.fingerprint} the FP IS NOT in the list of fps {sofar}")
|
LOG.warn(f"{relay.fingerprint} the FP IS NOT in the list of fps {sofar}")
|
||||||
# assume a fp is using a bogus contact
|
# assume a fp is using a bogus contact
|
||||||
|
@ -766,9 +769,11 @@ def iMain(lArgs):
|
||||||
with open(proof_output_tmp, 'wt') as oFYaml:
|
with open(proof_output_tmp, 'wt') as oFYaml:
|
||||||
yaml.dump(aTRUST_DB, indent=2, stream=oFYaml)
|
yaml.dump(aTRUST_DB, indent=2, stream=oFYaml)
|
||||||
oFYaml.close()
|
oFYaml.close()
|
||||||
|
|
||||||
LOG.info(f"Filtered {len(twhitelist_set)} whitelisted relays")
|
LOG.info(f"Filtered {len(twhitelist_set)} whitelisted relays")
|
||||||
texclude_set = texclude_set.difference(twhitelist_set)
|
texclude_set = texclude_set.difference(twhitelist_set)
|
||||||
|
# accept the dns-rsa urls for now until we test them
|
||||||
|
texclude_set = texclude_set.difference(tdns_urls)
|
||||||
LOG.info(f"{len(list(aTRUST_DB.keys()))} good contacts out of {iTotalContacts}")
|
LOG.info(f"{len(list(aTRUST_DB.keys()))} good contacts out of {iTotalContacts}")
|
||||||
|
|
||||||
if oArgs.proof_output and aTRUST_DB:
|
if oArgs.proof_output and aTRUST_DB:
|
||||||
|
@ -785,7 +790,7 @@ def iMain(lArgs):
|
||||||
with open(oArgs.torrc_output, 'wt') as oFTorrc:
|
with open(oArgs.torrc_output, 'wt') as oFTorrc:
|
||||||
oFTorrc.write(f"{sEXCLUDE_EXIT_KEY} {','.join(texclude_set)}\n")
|
oFTorrc.write(f"{sEXCLUDE_EXIT_KEY} {','.join(texclude_set)}\n")
|
||||||
oFTorrc.write(f"{sINCLUDE_EXIT_KEY} {','.join(aTRUST_DB_INDEX.keys())}\n")
|
oFTorrc.write(f"{sINCLUDE_EXIT_KEY} {','.join(aTRUST_DB_INDEX.keys())}\n")
|
||||||
oFTorrc.write(f"{sINCLUDE_GUARD_KEY} {','.join(o[oGOOD_ROOT]['GuardNodes'])}\n")
|
oFTorrc.write(f"{sINCLUDE_GUARD_KEY} {','.join(oGOOD_NODES[oGOOD_ROOT]['GuardNodes'])}\n")
|
||||||
LOG.info(f"Wrote tor configuration to {oArgs.torrc_output}")
|
LOG.info(f"Wrote tor configuration to {oArgs.torrc_output}")
|
||||||
oFTorrc.close()
|
oFTorrc.close()
|
||||||
|
|
||||||
|
@ -798,10 +803,10 @@ def iMain(lArgs):
|
||||||
oBAD_NODES[oBAD_ROOT]['ExcludeNodes']['BadExit'] = list(texclude_set)
|
oBAD_NODES[oBAD_ROOT]['ExcludeNodes']['BadExit'] = list(texclude_set)
|
||||||
oBAD_NODES[oBAD_ROOT]['ExcludeDomains'] = lKNOWN_NODNS
|
oBAD_NODES[oBAD_ROOT]['ExcludeDomains'] = lKNOWN_NODNS
|
||||||
vwrite_badnodes(oArgs, oBAD_NODES, str(len(texclude_set)))
|
vwrite_badnodes(oArgs, oBAD_NODES, str(len(texclude_set)))
|
||||||
|
|
||||||
oGOOD_NODES['GoodNodes']['Relays']['ExitNodes'] = list(aTRUST_DB_INDEX.keys())
|
oGOOD_NODES['GoodNodes']['Relays']['ExitNodes'] = list(aTRUST_DB_INDEX.keys())
|
||||||
# GuardNodes are readonl
|
# GuardNodes are readonl
|
||||||
vwrite_goodnodes(oArgs, oGOOD_NODES, str(len(ttrust_db_index)))
|
vwrite_goodnodes(oArgs, oGOOD_NODES, len(aTRUST_DB_INDEX.keys()))
|
||||||
retval = 0
|
retval = 0
|
||||||
try:
|
try:
|
||||||
logging.getLogger('stem').setLevel(30)
|
logging.getLogger('stem').setLevel(30)
|
||||||
|
@ -838,7 +843,7 @@ def iMain(lArgs):
|
||||||
LOG.errro(f"Failed setting {sINCLUDE_EXIT_KEY} good exit nodes in Tor")
|
LOG.errro(f"Failed setting {sINCLUDE_EXIT_KEY} good exit nodes in Tor")
|
||||||
retval += 1
|
retval += 1
|
||||||
|
|
||||||
LOG.info("dns-rsa domains:\n{'\n'.join(tdns_contacts)}")
|
sys.stdout.write("dns-rsa domains:\n" +'\n'.join(tdns_urls) +'\n')
|
||||||
return retval
|
return retval
|
||||||
|
|
||||||
except InvalidRequest as e:
|
except InvalidRequest as e:
|
||||||
|
|
265
https_adapter.py
265
https_adapter.py
|
@ -1,265 +0,0 @@
|
||||||
# -*- mode: python; indent-tabs-mode: nil; py-indent-offset: 4; coding: utf-8 -*-
|
|
||||||
|
|
||||||
from requests import adapters
|
|
||||||
from requests.utils import (
|
|
||||||
DEFAULT_CA_BUNDLE_PATH,
|
|
||||||
get_auth_from_url,
|
|
||||||
get_encoding_from_headers,
|
|
||||||
prepend_scheme_if_needed,
|
|
||||||
select_proxy,
|
|
||||||
urldefragauth,
|
|
||||||
)
|
|
||||||
import urllib3
|
|
||||||
from urllib3.util import parse_url
|
|
||||||
from urllib3.util.retry import Retry
|
|
||||||
from urllib3.util import Timeout as TimeoutSauce
|
|
||||||
|
|
||||||
DEFAULT_POOLBLOCK = False
|
|
||||||
DEFAULT_POOLSIZE = 10
|
|
||||||
DEFAULT_RETRIES = 0
|
|
||||||
DEFAULT_POOL_TIMEOUT = None
|
|
||||||
|
|
||||||
class HTTPAdapter(adapters.HTTPAdapter):
|
|
||||||
def __init__(self,
|
|
||||||
pool_connections=DEFAULT_POOLSIZE,
|
|
||||||
pool_maxsize=DEFAULT_POOLSIZE,
|
|
||||||
max_retries=DEFAULT_RETRIES,
|
|
||||||
pool_block=DEFAULT_POOLBLOCK
|
|
||||||
):
|
|
||||||
self.config = {}
|
|
||||||
self.proxy_manager = {}
|
|
||||||
|
|
||||||
if isinstance(max_retries, Retry):
|
|
||||||
self.max_retries = max_retries
|
|
||||||
else:
|
|
||||||
max_retries = Retry.from_int(max_retries)
|
|
||||||
self.max_retries = max_retries
|
|
||||||
|
|
||||||
self._pool_connections = pool_connections
|
|
||||||
self._pool_maxsize = pool_maxsize
|
|
||||||
self._pool_block = pool_block
|
|
||||||
|
|
||||||
self.init_poolmanager(pool_connections, pool_maxsize, block=pool_block)
|
|
||||||
|
|
||||||
|
|
||||||
class HTTPSAdapter(HTTPAdapter):
|
|
||||||
"""The built-in HTTP Adapter for urllib3.
|
|
||||||
|
|
||||||
Provides a general-case interface for Requests sessions to contact HTTP and
|
|
||||||
HTTPS urls by implementing the Transport Adapter interface. This class will
|
|
||||||
usually be created by the :class:`Session <Session>` class under the
|
|
||||||
covers.
|
|
||||||
|
|
||||||
:param pool_connections: The number of urllib3 connection pools to cache.
|
|
||||||
:param pool_maxsize: The maximum number of connections to save in the pool.
|
|
||||||
:param max_retries: The maximum number of retries each connection
|
|
||||||
should attempt. Note, this applies only to failed DNS lookups, socket
|
|
||||||
connections and connection timeouts, never to requests where data has
|
|
||||||
made it to the server. By default, Requests does not retry failed
|
|
||||||
connections. If you need granular control over the conditions under
|
|
||||||
which we retry a request, import urllib3's ``Retry`` class and pass
|
|
||||||
that instead.
|
|
||||||
:param pool_block: Whether the connection pool should block for connections.
|
|
||||||
|
|
||||||
Usage::
|
|
||||||
|
|
||||||
>>> import requests
|
|
||||||
>>> s = requests.Session()
|
|
||||||
>>> a = requests.adapters.HTTPAdapter(max_retries=3)
|
|
||||||
>>> s.mount('http://', a)
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
pool_connections=DEFAULT_POOLSIZE,
|
|
||||||
pool_maxsize=1,
|
|
||||||
max_retries=3,
|
|
||||||
pool_block=DEFAULT_POOLBLOCK,
|
|
||||||
):
|
|
||||||
retries = Retry(connect=max_retries, read=2, redirect=0)
|
|
||||||
adapters.HTTPAdapter.__init__(self,
|
|
||||||
pool_connections=pool_connections,
|
|
||||||
pool_maxsize=pool_maxsize,
|
|
||||||
max_retries=retries,
|
|
||||||
pool_block=pool_block)
|
|
||||||
|
|
||||||
def get_connection(self, url, proxies=None, use_forwarding_for_https=True):
|
|
||||||
"""Returns a urllib3 connection for the given URL. This should not be
|
|
||||||
called from user code, and is only exposed for use when subclassing the
|
|
||||||
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
|
|
||||||
|
|
||||||
:param url: The URL to connect to.
|
|
||||||
:param proxies: (optional) A Requests-style dictionary of proxies used on this request.
|
|
||||||
:rtype: urllib3.ConnectionPool
|
|
||||||
"""
|
|
||||||
proxy = select_proxy(url, proxies)
|
|
||||||
|
|
||||||
if proxy:
|
|
||||||
proxy = prepend_scheme_if_needed(proxy, "http")
|
|
||||||
proxy_url = parse_url(proxy)
|
|
||||||
if not proxy_url.host:
|
|
||||||
raise InvalidProxyURL(
|
|
||||||
"Please check proxy URL. It is malformed "
|
|
||||||
"and could be missing the host."
|
|
||||||
)
|
|
||||||
proxy_manager = self.proxy_manager_for(proxy)
|
|
||||||
conn = proxy_manager.connection_from_url(url)
|
|
||||||
else:
|
|
||||||
# Only scheme should be lower case
|
|
||||||
parsed = urlparse(url)
|
|
||||||
url = parsed.geturl()
|
|
||||||
conn = self.poolmanager.connection_from_url(url, use_forwarding_for_https=True)
|
|
||||||
|
|
||||||
return conn
|
|
||||||
|
|
||||||
def send(
|
|
||||||
self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None
|
|
||||||
):
|
|
||||||
"""Sends PreparedRequest object. Returns Response object.
|
|
||||||
|
|
||||||
:param request: The :class:`PreparedRequest <PreparedRequest>` being sent.
|
|
||||||
:param stream: (optional) Whether to stream the request content.
|
|
||||||
:param timeout: (optional) How long to wait for the server to send
|
|
||||||
data before giving up, as a float, or a :ref:`(connect timeout,
|
|
||||||
read timeout) <timeouts>` tuple.
|
|
||||||
:type timeout: float or tuple or urllib3 Timeout object
|
|
||||||
:param verify: (optional) Either a boolean, in which case it controls whether
|
|
||||||
we verify the server's TLS certificate, or a string, in which case it
|
|
||||||
must be a path to a CA bundle to use
|
|
||||||
:param cert: (optional) Any user-provided SSL certificate to be trusted.
|
|
||||||
:param proxies: (optional) The proxies dictionary to apply to the request.
|
|
||||||
:rtype: requests.Response
|
|
||||||
"""
|
|
||||||
|
|
||||||
try:
|
|
||||||
#? _socks_options
|
|
||||||
conn = self.get_connection(request.url, proxies, use_forwarding_for_https=True)
|
|
||||||
except LocationValueError as e:
|
|
||||||
raise InvalidURL(e, request=request)
|
|
||||||
|
|
||||||
self.cert_verify(conn, request.url, verify, cert)
|
|
||||||
url = self.request_url(request, proxies)
|
|
||||||
self.add_headers(
|
|
||||||
request,
|
|
||||||
stream=stream,
|
|
||||||
timeout=timeout,
|
|
||||||
verify=verify,
|
|
||||||
cert=cert,
|
|
||||||
proxies=proxies,
|
|
||||||
)
|
|
||||||
|
|
||||||
chunked = not (request.body is None or "Content-Length" in request.headers)
|
|
||||||
|
|
||||||
if isinstance(timeout, tuple):
|
|
||||||
try:
|
|
||||||
connect, read = timeout
|
|
||||||
timeout = TimeoutSauce(connect=connect, read=read)
|
|
||||||
except ValueError:
|
|
||||||
raise ValueError(
|
|
||||||
f"Invalid timeout {timeout}. Pass a (connect, read) timeout tuple, "
|
|
||||||
f"or a single float to set both timeouts to the same value."
|
|
||||||
)
|
|
||||||
elif isinstance(timeout, TimeoutSauce):
|
|
||||||
pass
|
|
||||||
else:
|
|
||||||
timeout = TimeoutSauce(connect=timeout, read=timeout)
|
|
||||||
|
|
||||||
try:
|
|
||||||
if not chunked:
|
|
||||||
resp = conn.urlopen(
|
|
||||||
method=request.method,
|
|
||||||
url=url,
|
|
||||||
body=request.body,
|
|
||||||
headers=request.headers,
|
|
||||||
redirect=False,
|
|
||||||
assert_same_host=False,
|
|
||||||
preload_content=False,
|
|
||||||
decode_content=False,
|
|
||||||
retries=self.max_retries,
|
|
||||||
timeout=timeout,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Send the request.
|
|
||||||
else:
|
|
||||||
if hasattr(conn, "proxy_pool"):
|
|
||||||
conn = conn.proxy_pool
|
|
||||||
|
|
||||||
low_conn = conn._get_conn(timeout=DEFAULT_POOL_TIMEOUT)
|
|
||||||
|
|
||||||
try:
|
|
||||||
skip_host = "Host" in request.headers
|
|
||||||
low_conn.putrequest(
|
|
||||||
request.method,
|
|
||||||
url,
|
|
||||||
skip_accept_encoding=True,
|
|
||||||
skip_host=skip_host,
|
|
||||||
)
|
|
||||||
|
|
||||||
for header, value in request.headers.items():
|
|
||||||
low_conn.putheader(header, value)
|
|
||||||
|
|
||||||
low_conn.endheaders()
|
|
||||||
|
|
||||||
for i in request.body:
|
|
||||||
low_conn.send(hex(len(i))[2:].encode("utf-8"))
|
|
||||||
low_conn.send(b"\r\n")
|
|
||||||
low_conn.send(i)
|
|
||||||
low_conn.send(b"\r\n")
|
|
||||||
low_conn.send(b"0\r\n\r\n")
|
|
||||||
|
|
||||||
# Receive the response from the server
|
|
||||||
r = low_conn.getresponse()
|
|
||||||
|
|
||||||
resp = HTTPResponse.from_httplib(
|
|
||||||
r,
|
|
||||||
pool=conn,
|
|
||||||
connection=low_conn,
|
|
||||||
preload_content=False,
|
|
||||||
decode_content=False,
|
|
||||||
)
|
|
||||||
except Exception:
|
|
||||||
# If we hit any problems here, clean up the connection.
|
|
||||||
# Then, raise so that we can handle the actual exception.
|
|
||||||
low_conn.close()
|
|
||||||
raise
|
|
||||||
|
|
||||||
except (ProtocolError, OSError) as err:
|
|
||||||
raise ConnectionError(err, request=request)
|
|
||||||
|
|
||||||
except MaxRetryError as e:
|
|
||||||
if isinstance(e.reason, ConnectTimeoutError):
|
|
||||||
# TODO: Remove this in 3.0.0: see #2811
|
|
||||||
if not isinstance(e.reason, NewConnectionError):
|
|
||||||
raise ConnectTimeout(e, request=request)
|
|
||||||
|
|
||||||
if isinstance(e.reason, ResponseError):
|
|
||||||
raise RetryError(e, request=request)
|
|
||||||
|
|
||||||
if isinstance(e.reason, _ProxyError):
|
|
||||||
raise ProxyError(e, request=request)
|
|
||||||
|
|
||||||
if isinstance(e.reason, _SSLError):
|
|
||||||
# This branch is for urllib3 v1.22 and later.
|
|
||||||
raise SSLError(e, request=request)
|
|
||||||
|
|
||||||
raise ConnectionError(e, request=request)
|
|
||||||
|
|
||||||
except ClosedPoolError as e:
|
|
||||||
raise ConnectionError(e, request=request)
|
|
||||||
|
|
||||||
except _ProxyError as e:
|
|
||||||
raise ProxyError(e)
|
|
||||||
|
|
||||||
except (_SSLError, _HTTPError) as e:
|
|
||||||
if isinstance(e, _SSLError):
|
|
||||||
# This branch is for urllib3 versions earlier than v1.22
|
|
||||||
raise SSLError(e, request=request)
|
|
||||||
elif isinstance(e, ReadTimeoutError):
|
|
||||||
raise ReadTimeout(e, request=request)
|
|
||||||
elif isinstance(e, _InvalidHeader):
|
|
||||||
raise InvalidHeader(e, request=request)
|
|
||||||
else:
|
|
||||||
raise
|
|
||||||
|
|
||||||
return self.build_response(request, resp)
|
|
||||||
|
|
79
lookupdns.py
Normal file
79
lookupdns.py
Normal file
|
@ -0,0 +1,79 @@
|
||||||
|
#!/usr/local/bin/python3.sh
|
||||||
|
# -*-mode: python; indent-tabs-mode: nil; py-indent-offset: 4; coding: utf-8 -*
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import traceback
|
||||||
|
|
||||||
|
from phantompy import Render
|
||||||
|
|
||||||
|
global LOG
|
||||||
|
import logging
|
||||||
|
import warnings
|
||||||
|
warnings.filterwarnings('ignore')
|
||||||
|
LOG = logging.getLogger()
|
||||||
|
|
||||||
|
class LookFor(Render):
|
||||||
|
|
||||||
|
def __init__(self, url, outfile, jsfile=None):
|
||||||
|
self.uri = url
|
||||||
|
Render.__init__(self, url, outfile, jsfile)
|
||||||
|
|
||||||
|
def ilookfor(self, html):
|
||||||
|
import json
|
||||||
|
marker = '<pre style="word-wrap: break-word; white-space: pre-wrap;">'
|
||||||
|
if marker not in html: return ''
|
||||||
|
i = html.find(marker) + len(marker)
|
||||||
|
html = html[i:]
|
||||||
|
assert html[0] == '{', html
|
||||||
|
i = html.find('</pre')
|
||||||
|
html = html[:i]
|
||||||
|
assert html[-1] == '}', html
|
||||||
|
LOG.debug(f"Found {len(html)} json")
|
||||||
|
o = json.loads(html)
|
||||||
|
if "Answer" not in o.keys() or type(o["Answer"]) != list:
|
||||||
|
LOG.warn(f"FAIL {self.uri}")
|
||||||
|
return 1
|
||||||
|
for elt in o["Answer"]:
|
||||||
|
assert type(elt) == dict, elt
|
||||||
|
assert 'type' in elt, elt
|
||||||
|
if elt['type'] != 16: continue
|
||||||
|
assert 'data' in elt, elt
|
||||||
|
if elt['data'] == 'we-run-this-tor-relay':
|
||||||
|
LOG.info(f"OK {self.uri}")
|
||||||
|
return 0
|
||||||
|
LOG.warn(f"BAD {self.uri}")
|
||||||
|
return 2
|
||||||
|
|
||||||
|
def _html_callback(self, *args):
|
||||||
|
"""print(self, QPrinter, Callable[[bool], None])"""
|
||||||
|
if type(args[0]) is str:
|
||||||
|
self._save(args[0])
|
||||||
|
i = self.ilookfor(args[0])
|
||||||
|
self._exit(i)
|
||||||
|
|
||||||
|
def _save(self, html):
|
||||||
|
sfile = self.outfile.replace('.pdf','.out')
|
||||||
|
# CompleteHtmlSaveFormat SingleHtmlSaveFormat MimeHtmlSaveFormat
|
||||||
|
with open(sfile, 'wt') as ofd:
|
||||||
|
ofd.write(html)
|
||||||
|
LOG.debug(f"Saved {sfile}")
|
||||||
|
|
||||||
|
def _loadFinished(self, result):
|
||||||
|
LOG.debug("phantom.py: Loading finished!")
|
||||||
|
self.toHtml(self._html_callback)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
if (len(sys.argv) < 3):
|
||||||
|
LOG.info("USAGE: lookupdns.py <url> <pdf-file> [<javascript-file>]")
|
||||||
|
else:
|
||||||
|
url = sys.argv[1]
|
||||||
|
outfile = sys.argv[2]
|
||||||
|
jsfile = sys.argv[3] if len(sys.argv) > 3 else None
|
||||||
|
r = LookFor(url, outfile, jsfile)
|
||||||
|
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
260
phantompy.py
Normal file
260
phantompy.py
Normal file
|
@ -0,0 +1,260 @@
|
||||||
|
#!/usr/local/bin/python3.sh
|
||||||
|
# -*-mode: python; indent-tabs-mode: nil; py-indent-offset: 4; coding: utf-8 -*-
|
||||||
|
# https://gist.github.com/michaelfranzl/91f0cc13c56120391b949f885643e974/raw/a0601515e7a575bc4c7d4d2a20973b29b6c6f2df/phantom.py
|
||||||
|
"""
|
||||||
|
# phantom.py
|
||||||
|
|
||||||
|
Simple but fully scriptable headless QtWebKit browser using PyQt5 in Python3,
|
||||||
|
specialized in executing external JavaScript and generating PDF files. A lean
|
||||||
|
replacement for other bulky headless browser frameworks.
|
||||||
|
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
If you have a display attached:
|
||||||
|
|
||||||
|
./phantom.py <url> <pdf-file> [<javascript-file>]
|
||||||
|
|
||||||
|
If you don't have a display attached (i.e. on a remote server):
|
||||||
|
|
||||||
|
xvfb-run ./phantom.py <url> <pdf-file> [<javascript-file>]
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
|
||||||
|
<url> Can be a http(s) URL or a path to a local file
|
||||||
|
<pdf-file> Path and name of PDF file to generate
|
||||||
|
[<javascript-file>] (optional) Path and name of a JavaScript file to execute
|
||||||
|
|
||||||
|
|
||||||
|
## Features
|
||||||
|
|
||||||
|
* Generate a PDF screenshot of the web page after it is completely loaded.
|
||||||
|
* Optionally execute a local JavaScript file specified by the argument
|
||||||
|
<javascript-file> after the web page is completely loaded, and before
|
||||||
|
the PDF is generated.
|
||||||
|
* console.log's will be printed to stdout.
|
||||||
|
* Easily add new features by changing the source code of this script, without
|
||||||
|
compiling C++ code. For more advanced applications, consider attaching
|
||||||
|
PyQt objects/methods to WebKit's JavaScript space by using
|
||||||
|
`QWebFrame::addToJavaScriptWindowObject()`.
|
||||||
|
|
||||||
|
If you execute an external <javascript-file>, phantom.py has no way of knowing
|
||||||
|
when that script has finished doing its work. For this reason, the external
|
||||||
|
script should execute `console.log("__PHANTOM_PY_DONE__");` when done. This will
|
||||||
|
trigger the PDF generation, after which phantom.py will exit. If no
|
||||||
|
`__PHANTOM_PY_DONE__` string is seen on the console for 10 seconds, phantom.py
|
||||||
|
will exit without doing anything. This behavior could be implemented more
|
||||||
|
elegantly without console.log's but it is the simplest solution.
|
||||||
|
|
||||||
|
It is important to remember that since you're just running WebKit, you can use
|
||||||
|
everything that WebKit supports, including the usual JS client libraries, CSS,
|
||||||
|
CSS @media types, etc.
|
||||||
|
|
||||||
|
|
||||||
|
## Dependencies
|
||||||
|
|
||||||
|
* Python3
|
||||||
|
* PyQt5
|
||||||
|
* xvfb (optional for display-less machines)
|
||||||
|
|
||||||
|
Installation of dependencies in Debian Stretch is easy:
|
||||||
|
|
||||||
|
apt-get install xvfb python3-pyqt5 python3-pyqt5.qtwebkit
|
||||||
|
|
||||||
|
Finding the equivalent for other OSes is an exercise that I leave to you.
|
||||||
|
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
Given the following file /tmp/test.html
|
||||||
|
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<p>foo <span id="id1">foo</span> <span id="id2">foo</span></p>
|
||||||
|
</body>
|
||||||
|
<script>
|
||||||
|
document.getElementById('id1').innerHTML = "bar";
|
||||||
|
</script>
|
||||||
|
</html>
|
||||||
|
|
||||||
|
... and the following file /tmp/test.js:
|
||||||
|
|
||||||
|
document.getElementById('id2').innerHTML = "baz";
|
||||||
|
console.log("__PHANTOM_PY_DONE__");
|
||||||
|
|
||||||
|
... and running this script (without attached display) ...
|
||||||
|
|
||||||
|
xvfb-run python3 phantom.py /tmp/test.html /tmp/out.pdf /tmp/test.js
|
||||||
|
|
||||||
|
... you will get a PDF file /tmp/out.pdf with the contents "foo bar baz".
|
||||||
|
|
||||||
|
Note that the second occurrence of "foo" has been replaced by the web page's own
|
||||||
|
script, and the third occurrence of "foo" by the external JS file.
|
||||||
|
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
Copyright 2017 Michael Karl Franzl
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
|
this software and associated documentation files (the "Software"), to deal in
|
||||||
|
the Software without restriction, including without limitation the rights to
|
||||||
|
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||||
|
of the Software, and to permit persons to whom the Software is furnished to do
|
||||||
|
so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||||
|
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||||
|
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||||
|
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||||
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import traceback
|
||||||
|
import atexit
|
||||||
|
from PyQt5.QtCore import QUrl
|
||||||
|
from PyQt5.QtCore import QTimer
|
||||||
|
from PyQt5.QtWidgets import QApplication
|
||||||
|
from PyQt5.QtPrintSupport import QPrinter
|
||||||
|
from PyQt5.QtWebEngineWidgets import QWebEnginePage as QWebPage
|
||||||
|
|
||||||
|
global LOG
|
||||||
|
import logging
|
||||||
|
import warnings
|
||||||
|
warnings.filterwarnings('ignore')
|
||||||
|
LOG = logging.getLogger()
|
||||||
|
|
||||||
|
def prepare():
|
||||||
|
sfile = '/tmp/test.js'
|
||||||
|
if not os.path.exists(sfile):
|
||||||
|
with open(sfile, 'wt') as ofd:
|
||||||
|
ofd.write("""
|
||||||
|
document.getElementById('id2').innerHTML = "baz";
|
||||||
|
console.log("__PHANTOM_PY_DONE__");
|
||||||
|
""")
|
||||||
|
sys.stderr.write(f"wrote {sfile} ")
|
||||||
|
sfile = '/tmp/test.html'
|
||||||
|
if not os.path.exists(sfile):
|
||||||
|
with open(sfile, 'wt') as ofd:
|
||||||
|
ofd.write("""
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<p>foo <span id="id1">foo</span> <span id="id2">foo</span></p>
|
||||||
|
</body>
|
||||||
|
<script>
|
||||||
|
document.getElementById('id1').innerHTML = "bar";
|
||||||
|
</script>
|
||||||
|
</html>
|
||||||
|
""")
|
||||||
|
sys.stderr.write(f"wrote {sfile} ")
|
||||||
|
sys.stderr.write("\n")
|
||||||
|
|
||||||
|
class Render(QWebPage):
|
||||||
|
def __init__(self, url, outfile, jsfile=None):
|
||||||
|
self.app = QApplication(sys.argv)
|
||||||
|
|
||||||
|
QWebPage.__init__(self)
|
||||||
|
|
||||||
|
self.jsfile = jsfile
|
||||||
|
self.outfile = outfile
|
||||||
|
|
||||||
|
qurl = QUrl.fromUserInput(url)
|
||||||
|
|
||||||
|
LOG.debug(f"phantom.py: URL= {qurl} OUTFILE={outfile} JSFILE= {jsfile)")
|
||||||
|
|
||||||
|
# The PDF generation only happens when the special string __PHANTOM_PY_DONE__
|
||||||
|
# is sent to console.log(). The following JS string will be executed by
|
||||||
|
# default, when no external JavaScript file is specified.
|
||||||
|
self.js_contents = "setTimeout(function() { console.log('__PHANTOM_PY_DONE__') }, 5000);";
|
||||||
|
|
||||||
|
if jsfile:
|
||||||
|
try:
|
||||||
|
f = open(self.jsfile)
|
||||||
|
self.js_contents = f.read()
|
||||||
|
f.close()
|
||||||
|
except:
|
||||||
|
LOG.error(traceback.format_exc())
|
||||||
|
self._exit(10)
|
||||||
|
|
||||||
|
self.loadFinished.connect(self._loadFinished)
|
||||||
|
self.load(qurl)
|
||||||
|
self.javaScriptConsoleMessage = self._onConsoleMessage
|
||||||
|
|
||||||
|
if False:
|
||||||
|
# Run for a maximum of 10 seconds
|
||||||
|
watchdog = QTimer()
|
||||||
|
watchdog.setSingleShot(True)
|
||||||
|
watchdog.timeout.connect(lambda: self._exit(9))
|
||||||
|
watchdog.start(10000)
|
||||||
|
|
||||||
|
self.app.exec_()
|
||||||
|
|
||||||
|
def _onConsoleMessage(self, *args):
|
||||||
|
if len(args) > 3:
|
||||||
|
level, txt, lineno, filename = args
|
||||||
|
else:
|
||||||
|
level = 1
|
||||||
|
txt, lineno, filename = args
|
||||||
|
LOG.debug(f"CONSOLE {lineno} {txt} {filename}")
|
||||||
|
if "__PHANTOM_PY_DONE__" in txt:
|
||||||
|
# If we get this magic string, it means that the external JS is done
|
||||||
|
self._print()
|
||||||
|
if "__PHANTOM_PY_EXIT__" in txt:
|
||||||
|
self._exit(0)
|
||||||
|
|
||||||
|
def _loadFinished(self, result):
|
||||||
|
LOG.debug(f"phantom.py: Evaluating JS from {self.jsfile}")
|
||||||
|
self.runJavaScript("document.documentElement.contentEditable=true")
|
||||||
|
self.runJavaScript(self.js_contents)
|
||||||
|
|
||||||
|
def _printer_callback(self, *args):
|
||||||
|
"""print(self, QPrinter, Callable[[bool], None])"""
|
||||||
|
# print(f"_printer_callback {self.outfile} {args}")
|
||||||
|
if args[0] is False:
|
||||||
|
i = 1
|
||||||
|
else:
|
||||||
|
i = 0
|
||||||
|
self._exit(i)
|
||||||
|
|
||||||
|
def _print(self):
|
||||||
|
printer = QPrinter()
|
||||||
|
printer.setPageMargins(10, 10, 10, 10, QPrinter.Millimeter)
|
||||||
|
printer.setPaperSize(QPrinter.A4)
|
||||||
|
printer.setCreator("phantom.py by Michael Karl Franzl")
|
||||||
|
printer.setOutputFormat(QPrinter.PdfFormat);
|
||||||
|
printer.setOutputFileName(self.outfile);
|
||||||
|
self.print(printer, self._printer_callback)
|
||||||
|
LOG.debug("phantom.py: Printed")
|
||||||
|
|
||||||
|
def _exit(self, val):
|
||||||
|
LOG.debug(f"phantom.py: Exiting with val {val}")
|
||||||
|
|
||||||
|
# Run for a maximum of 10 seconds
|
||||||
|
watchdog = QTimer()
|
||||||
|
watchdog.setSingleShot(True)
|
||||||
|
watchdog.timeout.connect(lambda: sys.exit(val))
|
||||||
|
watchdog.start(10000)
|
||||||
|
self.app.exit(val)
|
||||||
|
atexit._clear()
|
||||||
|
sys.exit(val)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
if (len(sys.argv) < 3):
|
||||||
|
LOG.info("USAGE: ./phantom.py <url> <pdf-file> [<javascript-file>]")
|
||||||
|
else:
|
||||||
|
url = sys.argv[1]
|
||||||
|
outfile = sys.argv[2]
|
||||||
|
jsfile = sys.argv[3] if len(sys.argv) > 3 else None
|
||||||
|
r = Render(url, outfile, jsfile)
|
||||||
|
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
|
@ -27,9 +27,8 @@ LOG = logging.getLogger()
|
||||||
|
|
||||||
bHAVE_TORR = shutil.which('tor-resolve')
|
bHAVE_TORR = shutil.which('tor-resolve')
|
||||||
|
|
||||||
# maybe we should check these each time but we
|
# we check these each time but we got them by sorting bad relays
|
||||||
# got them by sorting bad relays in the wild
|
# in the wild we'll keep a copy here so we can avoid restesting
|
||||||
# we'll keep a copy here
|
|
||||||
yKNOWN_NODNS = """
|
yKNOWN_NODNS = """
|
||||||
---
|
---
|
||||||
- 0x0.is
|
- 0x0.is
|
||||||
|
@ -50,6 +49,7 @@ yKNOWN_NODNS = """
|
||||||
- or.wowplanet.de
|
- or.wowplanet.de
|
||||||
- ormycloud.org
|
- ormycloud.org
|
||||||
- plied-privacy.net
|
- plied-privacy.net
|
||||||
|
- rivacysvcs.net
|
||||||
- redacted.org
|
- redacted.org
|
||||||
- rification-for-nusenu.net
|
- rification-for-nusenu.net
|
||||||
- rofl.cat
|
- rofl.cat
|
||||||
|
|
|
@ -52,7 +52,7 @@ def read_local_trust_config(trust_config):
|
||||||
'''
|
'''
|
||||||
|
|
||||||
result = []
|
result = []
|
||||||
|
|
||||||
# for now we support max_depth = 0 only
|
# for now we support max_depth = 0 only
|
||||||
# this PoC version has no support for recursion
|
# this PoC version has no support for recursion
|
||||||
# https://github.com/nusenu/tor-relay-operator-ids-trust-information#trust-information-consumers
|
# https://github.com/nusenu/tor-relay-operator-ids-trust-information#trust-information-consumers
|
||||||
|
@ -140,7 +140,11 @@ def get_controller(address='127.0.0.1', port=9151, password=''):
|
||||||
|
|
||||||
return controller
|
return controller
|
||||||
|
|
||||||
def find_validation_candidates(controller, trusted_domains=[],validation_cache=[],accept_all=False):
|
def find_validation_candidates(controller,
|
||||||
|
trusted_domains=[],
|
||||||
|
validation_cache=[],
|
||||||
|
CAfile='/etc/ssl/certs/ca-certificates.crt',
|
||||||
|
accept_all=False):
|
||||||
'''
|
'''
|
||||||
connect to a tor client via controlport and return a dict of all
|
connect to a tor client via controlport and return a dict of all
|
||||||
not yet validated fingerprints per trusted operators
|
not yet validated fingerprints per trusted operators
|
||||||
|
@ -221,14 +225,14 @@ def oDownloadUrlRequests(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050):
|
||||||
head = requests.head(uri, timeout=timeout, proxies=proxy, headers=headers)
|
head = requests.head(uri, timeout=timeout, proxies=proxy, headers=headers)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise TrustorError(f"HTTP HEAD request failed for {uri} {e}")
|
raise TrustorError(f"HTTP HEAD request failed for {uri} {e}")
|
||||||
|
|
||||||
if head.status_code >= 300:
|
if head.status_code >= 300:
|
||||||
raise TrustorError(f"HTTP Errorcode {head.status_code}")
|
raise TrustorError(f"HTTP Errorcode {head.status_code}")
|
||||||
if not head.headers['Content-Type'].startswith('text/plain'):
|
if not head.headers['Content-Type'].startswith('text/plain'):
|
||||||
raise TrustorError(f"HTTP Content-Type != text/plain")
|
raise TrustorError(f"HTTP Content-Type != text/plain")
|
||||||
if not os.path.exists(sCAfile):
|
if not os.path.exists(sCAfile):
|
||||||
raise TrustorError(f"File not found CAfile {sCAfile}")
|
raise TrustorError(f"File not found CAfile {sCAfile}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with requests.sessions.Session() as session:
|
with requests.sessions.Session() as session:
|
||||||
oReqResp = session.request(method="get", url=uri,
|
oReqResp = session.request(method="get", url=uri,
|
||||||
|
@ -336,7 +340,7 @@ def my_match_hostname(cert, hostname):
|
||||||
else:
|
else:
|
||||||
raise CertificateError(
|
raise CertificateError(
|
||||||
"no appropriate commonName or subjectAltName fields were found"
|
"no appropriate commonName or subjectAltName fields were found"
|
||||||
)
|
)
|
||||||
match_hostname = my_match_hostname
|
match_hostname = my_match_hostname
|
||||||
from urllib3.util.ssl_ import (
|
from urllib3.util.ssl_ import (
|
||||||
is_ipaddress,
|
is_ipaddress,
|
||||||
|
@ -393,15 +397,15 @@ def oDownloadUrlUrllib3(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050):
|
||||||
retries=False)
|
retries=False)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
LOG.error(f"HTTP HEAD request failed for {uri} {e}")
|
LOG.error(f"HTTP HEAD request failed for {uri} {e}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
if head.status >= 300:
|
if head.status >= 300:
|
||||||
raise TrustorError(f"HTTP Errorcode {head.status}")
|
raise TrustorError(f"HTTP Errorcode {head.status}")
|
||||||
if not head.headers['Content-Type'].startswith('text/plain'):
|
if not head.headers['Content-Type'].startswith('text/plain'):
|
||||||
raise TrustorError(f"HTTP Content-Type != text/plain")
|
raise TrustorError(f"HTTP Content-Type != text/plain")
|
||||||
if not os.path.exists(sCAfile):
|
if not os.path.exists(sCAfile):
|
||||||
raise TrustorError(f"File not found CAfile {sCAfile}")
|
raise TrustorError(f"File not found CAfile {sCAfile}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
oReqResp = proxy.request("GET", uri,
|
oReqResp = proxy.request("GET", uri,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
|
@ -420,7 +424,7 @@ def oDownloadUrlUrllib3(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050):
|
||||||
LOG.error(f'Redirect detected %s vs %s (final)' % (uri, oReqResp.geturl()))
|
LOG.error(f'Redirect detected %s vs %s (final)' % (uri, oReqResp.geturl()))
|
||||||
raise TrustorError(f'Redirect detected %s vs %s (final)' % (uri, oReqResp.geturl()))
|
raise TrustorError(f'Redirect detected %s vs %s (final)' % (uri, oReqResp.geturl()))
|
||||||
oReqResp.decode_content = True
|
oReqResp.decode_content = True
|
||||||
|
|
||||||
return oReqResp
|
return oReqResp
|
||||||
import urllib3.connectionpool
|
import urllib3.connectionpool
|
||||||
urllib3.connectionpool.VerifiedHTTPSConnection = HTTPSConnection
|
urllib3.connectionpool.VerifiedHTTPSConnection = HTTPSConnection
|
||||||
|
@ -483,7 +487,7 @@ def idns_validate(domain,
|
||||||
|
|
||||||
# this is not the system wide /etc/resolv.conf
|
# this is not the system wide /etc/resolv.conf
|
||||||
# use dnscrypt-proxy to encrypt your DNS and route it via tor's SOCKSPort
|
# use dnscrypt-proxy to encrypt your DNS and route it via tor's SOCKSPort
|
||||||
|
|
||||||
|
|
||||||
ctx = ub_ctx()
|
ctx = ub_ctx()
|
||||||
if (os.path.isfile(libunbound_resolv_file)):
|
if (os.path.isfile(libunbound_resolv_file)):
|
||||||
|
@ -529,6 +533,7 @@ def configure_tor(controller, trusted_fingerprints, exitonly=True):
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
CAfile = '/etc/ssl/certs/ca-certificates.crt'
|
||||||
trust_config = 'trust_config'
|
trust_config = 'trust_config'
|
||||||
assert os.path.exists(trust_config)
|
assert os.path.exists(trust_config)
|
||||||
trusted_domains = read_local_trust_config(trust_config)
|
trusted_domains = read_local_trust_config(trust_config)
|
||||||
|
@ -546,7 +551,8 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
r = find_validation_candidates(controller,
|
r = find_validation_candidates(controller,
|
||||||
validation_cache=trusted_fingerprints,
|
validation_cache=trusted_fingerprints,
|
||||||
trusted_domains=trusted_domains)
|
trusted_domains=trusted_domains,
|
||||||
|
CAfile=CAfile)
|
||||||
validate_proofs(r, validation_cache_file,
|
validate_proofs(r, validation_cache_file,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
host=controller_address,
|
host=controller_address,
|
||||||
|
|
Loading…
Reference in a new issue