Compare commits

...

3 Commits

Author SHA1 Message Date
emdee 28d1d34dbd Added lookupdns.py 2 years ago
emdee 94c0834092 Added lookupdns.py 2 years ago
emdee 2ea65cc181 rm https_adapter.py 2 years ago

@ -160,7 +160,7 @@ def lYamlBadNodes(sFile,
global oBAD_NODES
global lKNOWN_NODNS
global lMAYBE_NODNS
l = []
if not yaml: return l
if os.path.exists(sFile):
@ -198,7 +198,7 @@ def lYamlGoodNodes(sFile='/etc/tor/torrc-goodnodes.yaml'):
# yq '.Nodes.IntroductionPoints|.[]' < /etc/tor/torrc-goodnodes.yaml
return l
def bdomain_is_bad(domain):
def bdomain_is_bad(domain, fp):
global lKNOWN_NODNS
if domain in lKNOWN_NODNS: return True
if domain in lMAYBE_NODNS:
@ -208,10 +208,11 @@ def bdomain_is_bad(domain):
lKNOWN_NODNS.append(domain)
lMAYBE_NODNS.remove(domain)
return True
if '@' in domain:
LOG.warn(f"@ in domain {domain}")
return True
for elt in '@(){}$!':
if elt in domain:
LOG.warn(f"{elt} in domain {domain}")
return True
return False
tBAD_URLS = set()
@ -256,7 +257,7 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050)
if aCachedContact['email'] == a['email']:
LOG.info(f"{fp} in aTRUST_DB_INDEX")
return aCachedContact
if 'url' not in keys:
if 'uri' not in keys:
a['url'] = ''
@ -270,20 +271,21 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050)
c = a['url'].lstrip('https://').lstrip('http://').strip('/')
a['url'] = 'https://' +c
# domain should be a unique key for contacts
domain = a['url'][8:]
if bdomain_is_bad(domain):
if bdomain_is_bad(domain, fp):
LOG.warn(f"{domain} is bad from {a['url']}")
LOG.info(f"{domain} is bad from {a}")
LOG.debug(f"{fp} is bad from {a}")
return a
ip = zResolveDomain(domain)
if ip == '':
aFP_EMAIL[fp] = a['email']
LOG.debug(f"{fp} {domain} does not resolve")
lKNOWN_NODNS.append(domain)
return {}
if a['proof'] not in ['uri-rsa']:
# only support uri for now
if False and ub_ctx:
@ -295,7 +297,7 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050)
pass
LOG.warn(f"{fp} proof={a['proof']} not supported yet")
return a
LOG.debug(f"{len(keys)} contact fields for {fp}")
url="https://"+domain+"/.well-known/tor-relay/rsa-fingerprint.txt"
try:
@ -328,7 +330,7 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050)
# any reason retry?
tBAD_URLS.add(a['url'])
return a
if hasattr(o, 'text'):
data = o.text
else:
@ -347,7 +349,7 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050)
def aParseContactYaml(contact, fp):
"""
See the Tor ContactInfo Information Sharing Specification v2
See the Tor ContactInfo Information Sharing Specification v2
https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/
"""
lelts = contact.split()
@ -357,7 +359,7 @@ def aParseContactYaml(contact, fp):
LOG.debug(f"{fp} {a}")
return a
key = ''
for elt in lets:
for elt in lelts:
if key == '':
key = elt
continue
@ -368,7 +370,7 @@ def aParseContactYaml(contact, fp):
def aParseContact(contact, fp):
"""
See the Tor ContactInfo Information Sharing Specification v2
See the Tor ContactInfo Information Sharing Specification v2
https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/
"""
l = [line for line in contact.strip().replace('"', '').split(' ')
@ -424,22 +426,22 @@ def vsetup_logging(log_level, logfile=''):
LOG.info(f"SSetting log_level to {log_level!s}")
logging._levelToName = {
CRITICAL: 'CRITICAL',
ERROR: 'ERROR',
WARNING: 'WARN',
INFO: 'INFO',
DEBUG: 'DEBUG',
NOTSET: 'NOTSET',
logging.CRITICAL: 'CRITICAL',
logging.ERROR: 'ERROR',
logging.WARNING: 'WARN',
logging.INFO: 'INFO',
logging.DEBUG: 'DEBUG',
logging.NOTSET: 'NOTSET',
}
logging._nameToLevel = {
'CRITICAL': CRITICAL,
'FATAL': FATAL,
'ERROR': ERROR,
'WARN': WARNING,
'WARNING': WARNING,
'INFO': INFO,
'DEBUG': DEBUG,
'NOTSET': NOTSET,
'CRITICAL': logging.CRITICAL,
'FATAL': logging.FATAL,
'ERROR': logging.ERROR,
'WARN': logging.WARNING,
'WARNING': logging.WARNING,
'INFO': logging.INFO,
'DEBUG': logging.DEBUG,
'NOTSET': logging.NOTSET,
}
def oMainArgparser(_=None):
@ -490,7 +492,7 @@ def oMainArgparser(_=None):
parser.add_argument('--bad_contacts', type=str,
default=os.path.join(ETC_DIR, 'badcontacts.yaml'),
help="Yaml file of bad contacts that bad FPs are using")
parser.add_argument('--strict_nodes', type=int, default=0,
choices=[0,1],
help="Set StrictNodes: 1 is less anonymous but more secure, although some sites may be unreachable")
@ -498,7 +500,7 @@ def oMainArgparser(_=None):
help="Seconds to wait for Tor to booststrap")
parser.add_argument('--points_timeout', type=int, default=0,
help="Timeout for getting introduction points - must be long >120sec. 0 means disabled looking for IPs")
parser.add_argument('--log_level', type=int, default=10,
parser.add_argument('--log_level', type=int, default=20,
help="10=debug 20=info 30=warn 40=error")
parser.add_argument('--bad_sections', type=str,
default='MyBadExit',
@ -523,24 +525,24 @@ def vwrite_badnodes(oArgs, oBAD_NODES, slen):
if os.path.exists(oArgs.bad_nodes):
os.rename(oArgs.bad_nodes, bak)
os.rename(tmp, oArgs.bad_nodes)
def vwrite_goodnodes(oArgs, oGOOD_NODES, slen):
def vwrite_goodnodes(oArgs, oGOOD_NODES, ilen):
if oArgs.good_nodes:
tmp = oArgs.good_nodes +'.tmp'
bak = oArgs.good_nodes +'.bak'
with open(tmp, 'wt') as oFYaml:
yaml.dump(oGOOD_NODES, indent=2, stream=oFYaml)
LOG.info(f"Wrote {slen} good nodes to {oArgs.good_nodes}")
LOG.info(f"Wrote {ilen} good relays to {oArgs.good_nodes}")
oFYaml.close()
if os.path.exists(oArgs.good_nodes):
os.rename(oArgs.good_nodes, bak)
os.rename(tmp, oArgs.good_nodes)
def iMain(lArgs):
global aTRUST_DB
global aTRUST_DB_INDEX
global oBAD_NODES
global oGOOD_NODES
global oGOOD_NODES
global lKNOWN_NODNS
parser = oMainArgparser()
oArgs = parser.parse_args(lArgs)
@ -573,7 +575,7 @@ def iMain(lArgs):
continue
aTRUST_DB_INDEX[fp] = v
LOG.info(f"{len(aTRUST_DB_INDEX.keys())} good relays from {sFile}")
except Exception as e:
LOG.exception(f"Error reading YAML TrustDB {sFile} {e}")
@ -625,17 +627,17 @@ def iMain(lArgs):
texclude_set = set(lYamlBadNodes(oArgs.bad_nodes,
lWanted=sections,
section=sEXCLUDE_EXIT_KEY))
LOG.info(f"Preloaded {len(texclude_set)} bad fps")
LOG.info(f"Preloaded {len(texclude_set)} bad fps")
ttrust_db_index = aTRUST_DB_INDEX.keys()
tdns_contacts = set()
tdns_urls = set()
iFakeContact = 0
iTotalContacts = 0
aBadContacts = {}
lConds = oArgs.contact.split(',')
iR = 0
relays = controller.get_server_descriptors()
for relay in relays:
iR += 1
@ -643,15 +645,15 @@ def iMain(lArgs):
LOG.warn('Invalid Fingerprint: %s' % relay.fingerprint)
continue
relay.fingerprint = relay.fingerprint.upper()
sofar = f"G:{len(aTRUST_DB.keys())} U:{len(tdns_contacts)} F:{iFakeContact} BF:{len(texclude_set)} GF:{len(ttrust_db_index)} TC:{iTotalContacts} #{iR}"
sofar = f"G:{len(aTRUST_DB.keys())} U:{len(tdns_urls)} F:{iFakeContact} BF:{len(texclude_set)} GF:{len(ttrust_db_index)} TC:{iTotalContacts} #{iR}"
if not relay.exit_policy.is_exiting_allowed():
if sEXCLUDE_EXIT_KEY == 'ExcludeNodes':
pass # LOG.debug(f"{relay.fingerprint} not an exit {sofar}")
else:
pass # LOG.warn(f"{relay.fingerprint} not an exit {sofar}")
# continue
# great contact had good fps and we are in them
if relay.fingerprint in aTRUST_DB_INDEX.keys():
# a cached entry
@ -660,54 +662,55 @@ def iMain(lArgs):
if type(relay.contact) == bytes:
# dunno
relay.contact = str(relay.contact, 'UTF-8')
if ('Empty' in lConds and not relay.contact) or \
('NoEmail' in lConds and relay.contact and not 'email:' in relay.contact):
texclude_set.add(relay.fingerprint)
continue
if not relay.contact or not 'ciissversion:' in relay.contact:
# should be unreached 'Empty' should always be in lConds
continue
iTotalContacts += 1
fp = relay.fingerprint
if relay.contact and not 'url:' in relay.contact:
LOG.info(f"{relay.fingerprint} skipping bad contact - no url: {sofar}")
LOG.debug(f"{relay.fingerprint} {relay.contact} {sofar}")
texclude_set.add(relay.fingerprint)
LOG.info(f"{fp} skipping bad contact - no url: {sofar}")
LOG.debug(f"{fp} {relay.contact} {sofar}")
texclude_set.add(fp)
continue
c = relay.contact.lower()
# first rough cut
i = c.find('url:')
if i >=0:
c = c[i+4:]
c = c[i+4:]
i = c.find(' ')
if i >=0: c = c[:i]
c = c.lstrip('https://').lstrip('http://').strip('/')
i = c.find('/')
if i >=0: c = c[:i]
domain = c
if domain and bdomain_is_bad(domain):
LOG.info(f"{relay.fingerprint} skipping bad {domain} {sofar}")
LOG.debug(f"{relay.fingerprint} {relay.contact} {sofar}")
texclude_set.add(relay.fingerprint)
if domain and bdomain_is_bad(domain, fp):
LOG.info(f"{fp} skipping bad {domain} {sofar}")
LOG.debug(f"{fp} {relay.contact} {sofar}")
texclude_set.add(fp)
continue
if domain:
ip = zResolveDomain(domain)
if not ip:
LOG.warn(f"{relay.fingerprint} {domain} did not resolve {sofar}")
texclude_set.add(relay.fingerprint)
LOG.warn(f"{fp} {domain} did not resolve {sofar}")
texclude_set.add(fp)
lKNOWN_NODNS.append(domain)
iFakeContact += 1
continue
if 'dns-rsa' in relay.contact.lower():
target = f"{relay.fingerprint}.{domain}"
if 'dns-rsa' in relay.contact.lower():
target = f"{relay.fingerprint}.{domain}"
LOG.info(f"skipping 'dns-rsa' {target} {sofar}")
tdns_contacts.add(target)
tdns_urls.add(target)
elif 'proof:uri-rsa' in relay.contact.lower():
a = aParseContact(relay.contact, relay.fingerprint)
if not a:
@ -730,7 +733,7 @@ def iMain(lArgs):
iFakeContact += 1
texclude_set.add(relay.fingerprint)
continue
b = aVerifyContact(list(a.values())[0],
relay.fingerprint,
@ -738,7 +741,7 @@ def iMain(lArgs):
timeout=oArgs.timeout,
host=oArgs.proxy_host,
port=oArgs.proxy_port)
if not b or not 'fps' in b or not b['fps'] or not b['url']:
LOG.warn(f"{relay.fingerprint} did NOT VERIFY {sofar}")
LOG.debug(f"{relay.fingerprint} {b} {sofar}")
@ -747,7 +750,7 @@ def iMain(lArgs):
texclude_set.add(relay.fingerprint)
aBadContacts[relay.fingerprint] = b
continue
if relay.fingerprint not in b['fps']:
LOG.warn(f"{relay.fingerprint} the FP IS NOT in the list of fps {sofar}")
# assume a fp is using a bogus contact
@ -766,9 +769,11 @@ def iMain(lArgs):
with open(proof_output_tmp, 'wt') as oFYaml:
yaml.dump(aTRUST_DB, indent=2, stream=oFYaml)
oFYaml.close()
LOG.info(f"Filtered {len(twhitelist_set)} whitelisted relays")
texclude_set = texclude_set.difference(twhitelist_set)
# accept the dns-rsa urls for now until we test them
texclude_set = texclude_set.difference(tdns_urls)
LOG.info(f"{len(list(aTRUST_DB.keys()))} good contacts out of {iTotalContacts}")
if oArgs.proof_output and aTRUST_DB:
@ -785,7 +790,7 @@ def iMain(lArgs):
with open(oArgs.torrc_output, 'wt') as oFTorrc:
oFTorrc.write(f"{sEXCLUDE_EXIT_KEY} {','.join(texclude_set)}\n")
oFTorrc.write(f"{sINCLUDE_EXIT_KEY} {','.join(aTRUST_DB_INDEX.keys())}\n")
oFTorrc.write(f"{sINCLUDE_GUARD_KEY} {','.join(o[oGOOD_ROOT]['GuardNodes'])}\n")
oFTorrc.write(f"{sINCLUDE_GUARD_KEY} {','.join(oGOOD_NODES[oGOOD_ROOT]['GuardNodes'])}\n")
LOG.info(f"Wrote tor configuration to {oArgs.torrc_output}")
oFTorrc.close()
@ -798,10 +803,10 @@ def iMain(lArgs):
oBAD_NODES[oBAD_ROOT]['ExcludeNodes']['BadExit'] = list(texclude_set)
oBAD_NODES[oBAD_ROOT]['ExcludeDomains'] = lKNOWN_NODNS
vwrite_badnodes(oArgs, oBAD_NODES, str(len(texclude_set)))
oGOOD_NODES['GoodNodes']['Relays']['ExitNodes'] = list(aTRUST_DB_INDEX.keys())
# GuardNodes are readonl
vwrite_goodnodes(oArgs, oGOOD_NODES, str(len(ttrust_db_index)))
vwrite_goodnodes(oArgs, oGOOD_NODES, len(aTRUST_DB_INDEX.keys()))
retval = 0
try:
logging.getLogger('stem').setLevel(30)
@ -838,7 +843,7 @@ def iMain(lArgs):
LOG.errro(f"Failed setting {sINCLUDE_EXIT_KEY} good exit nodes in Tor")
retval += 1
LOG.info("dns-rsa domains:\n{'\n'.join(tdns_contacts)}")
sys.stdout.write("dns-rsa domains:\n" +'\n'.join(tdns_urls) +'\n')
return retval
except InvalidRequest as e:

@ -1,265 +0,0 @@
# -*- mode: python; indent-tabs-mode: nil; py-indent-offset: 4; coding: utf-8 -*-
from requests import adapters
from requests.utils import (
DEFAULT_CA_BUNDLE_PATH,
get_auth_from_url,
get_encoding_from_headers,
prepend_scheme_if_needed,
select_proxy,
urldefragauth,
)
import urllib3
from urllib3.util import parse_url
from urllib3.util.retry import Retry
from urllib3.util import Timeout as TimeoutSauce
DEFAULT_POOLBLOCK = False
DEFAULT_POOLSIZE = 10
DEFAULT_RETRIES = 0
DEFAULT_POOL_TIMEOUT = None
class HTTPAdapter(adapters.HTTPAdapter):
def __init__(self,
pool_connections=DEFAULT_POOLSIZE,
pool_maxsize=DEFAULT_POOLSIZE,
max_retries=DEFAULT_RETRIES,
pool_block=DEFAULT_POOLBLOCK
):
self.config = {}
self.proxy_manager = {}
if isinstance(max_retries, Retry):
self.max_retries = max_retries
else:
max_retries = Retry.from_int(max_retries)
self.max_retries = max_retries
self._pool_connections = pool_connections
self._pool_maxsize = pool_maxsize
self._pool_block = pool_block
self.init_poolmanager(pool_connections, pool_maxsize, block=pool_block)
class HTTPSAdapter(HTTPAdapter):
"""The built-in HTTP Adapter for urllib3.
Provides a general-case interface for Requests sessions to contact HTTP and
HTTPS urls by implementing the Transport Adapter interface. This class will
usually be created by the :class:`Session <Session>` class under the
covers.
:param pool_connections: The number of urllib3 connection pools to cache.
:param pool_maxsize: The maximum number of connections to save in the pool.
:param max_retries: The maximum number of retries each connection
should attempt. Note, this applies only to failed DNS lookups, socket
connections and connection timeouts, never to requests where data has
made it to the server. By default, Requests does not retry failed
connections. If you need granular control over the conditions under
which we retry a request, import urllib3's ``Retry`` class and pass
that instead.
:param pool_block: Whether the connection pool should block for connections.
Usage::
>>> import requests
>>> s = requests.Session()
>>> a = requests.adapters.HTTPAdapter(max_retries=3)
>>> s.mount('http://', a)
"""
def __init__(
self,
pool_connections=DEFAULT_POOLSIZE,
pool_maxsize=1,
max_retries=3,
pool_block=DEFAULT_POOLBLOCK,
):
retries = Retry(connect=max_retries, read=2, redirect=0)
adapters.HTTPAdapter.__init__(self,
pool_connections=pool_connections,
pool_maxsize=pool_maxsize,
max_retries=retries,
pool_block=pool_block)
def get_connection(self, url, proxies=None, use_forwarding_for_https=True):
"""Returns a urllib3 connection for the given URL. This should not be
called from user code, and is only exposed for use when subclassing the
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
:param url: The URL to connect to.
:param proxies: (optional) A Requests-style dictionary of proxies used on this request.
:rtype: urllib3.ConnectionPool
"""
proxy = select_proxy(url, proxies)
if proxy:
proxy = prepend_scheme_if_needed(proxy, "http")
proxy_url = parse_url(proxy)
if not proxy_url.host:
raise InvalidProxyURL(
"Please check proxy URL. It is malformed "
"and could be missing the host."
)
proxy_manager = self.proxy_manager_for(proxy)
conn = proxy_manager.connection_from_url(url)
else:
# Only scheme should be lower case
parsed = urlparse(url)
url = parsed.geturl()
conn = self.poolmanager.connection_from_url(url, use_forwarding_for_https=True)
return conn
def send(
self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None
):
"""Sends PreparedRequest object. Returns Response object.
:param request: The :class:`PreparedRequest <PreparedRequest>` being sent.
:param stream: (optional) Whether to stream the request content.
:param timeout: (optional) How long to wait for the server to send
data before giving up, as a float, or a :ref:`(connect timeout,
read timeout) <timeouts>` tuple.
:type timeout: float or tuple or urllib3 Timeout object
:param verify: (optional) Either a boolean, in which case it controls whether
we verify the server's TLS certificate, or a string, in which case it
must be a path to a CA bundle to use
:param cert: (optional) Any user-provided SSL certificate to be trusted.
:param proxies: (optional) The proxies dictionary to apply to the request.
:rtype: requests.Response
"""
try:
#? _socks_options
conn = self.get_connection(request.url, proxies, use_forwarding_for_https=True)
except LocationValueError as e:
raise InvalidURL(e, request=request)
self.cert_verify(conn, request.url, verify, cert)
url = self.request_url(request, proxies)
self.add_headers(
request,
stream=stream,
timeout=timeout,
verify=verify,
cert=cert,
proxies=proxies,
)
chunked = not (request.body is None or "Content-Length" in request.headers)
if isinstance(timeout, tuple):
try:
connect, read = timeout
timeout = TimeoutSauce(connect=connect, read=read)
except ValueError:
raise ValueError(
f"Invalid timeout {timeout}. Pass a (connect, read) timeout tuple, "
f"or a single float to set both timeouts to the same value."
)
elif isinstance(timeout, TimeoutSauce):
pass
else:
timeout = TimeoutSauce(connect=timeout, read=timeout)
try:
if not chunked:
resp = conn.urlopen(
method=request.method,
url=url,
body=request.body,
headers=request.headers,
redirect=False,
assert_same_host=False,
preload_content=False,
decode_content=False,
retries=self.max_retries,
timeout=timeout,
)
# Send the request.
else:
if hasattr(conn, "proxy_pool"):
conn = conn.proxy_pool
low_conn = conn._get_conn(timeout=DEFAULT_POOL_TIMEOUT)
try:
skip_host = "Host" in request.headers
low_conn.putrequest(
request.method,
url,
skip_accept_encoding=True,
skip_host=skip_host,
)
for header, value in request.headers.items():
low_conn.putheader(header, value)
low_conn.endheaders()
for i in request.body:
low_conn.send(hex(len(i))[2:].encode("utf-8"))
low_conn.send(b"\r\n")
low_conn.send(i)
low_conn.send(b"\r\n")
low_conn.send(b"0\r\n\r\n")
# Receive the response from the server
r = low_conn.getresponse()
resp = HTTPResponse.from_httplib(
r,
pool=conn,
connection=low_conn,
preload_content=False,
decode_content=False,
)
except Exception:
# If we hit any problems here, clean up the connection.
# Then, raise so that we can handle the actual exception.
low_conn.close()
raise
except (ProtocolError, OSError) as err:
raise ConnectionError(err, request=request)
except MaxRetryError as e:
if isinstance(e.reason, ConnectTimeoutError):
# TODO: Remove this in 3.0.0: see #2811
if not isinstance(e.reason, NewConnectionError):
raise ConnectTimeout(e, request=request)
if isinstance(e.reason, ResponseError):
raise RetryError(e, request=request)
if isinstance(e.reason, _ProxyError):
raise ProxyError(e, request=request)
if isinstance(e.reason, _SSLError):
# This branch is for urllib3 v1.22 and later.
raise SSLError(e, request=request)
raise ConnectionError(e, request=request)
except ClosedPoolError as e:
raise ConnectionError(e, request=request)
except _ProxyError as e:
raise ProxyError(e)
except (_SSLError, _HTTPError) as e:
if isinstance(e, _SSLError):
# This branch is for urllib3 versions earlier than v1.22
raise SSLError(e, request=request)
elif isinstance(e, ReadTimeoutError):
raise ReadTimeout(e, request=request)
elif isinstance(e, _InvalidHeader):
raise InvalidHeader(e, request=request)
else:
raise
return self.build_response(request, resp)

@ -0,0 +1,79 @@
#!/usr/local/bin/python3.sh
# -*-mode: python; indent-tabs-mode: nil; py-indent-offset: 4; coding: utf-8 -*
import sys
import os
import traceback
from phantompy import Render
global LOG
import logging
import warnings
warnings.filterwarnings('ignore')
LOG = logging.getLogger()
class LookFor(Render):
def __init__(self, url, outfile, jsfile=None):
self.uri = url
Render.__init__(self, url, outfile, jsfile)
def ilookfor(self, html):
import json
marker = '<pre style="word-wrap: break-word; white-space: pre-wrap;">'
if marker not in html: return ''
i = html.find(marker) + len(marker)
html = html[i:]
assert html[0] == '{', html
i = html.find('</pre')
html = html[:i]
assert html[-1] == '}', html
LOG.debug(f"Found {len(html)} json")
o = json.loads(html)
if "Answer" not in o.keys() or type(o["Answer"]) != list:
LOG.warn(f"FAIL {self.uri}")
return 1
for elt in o["Answer"]:
assert type(elt) == dict, elt
assert 'type' in elt, elt
if elt['type'] != 16: continue
assert 'data' in elt, elt
if elt['data'] == 'we-run-this-tor-relay':
LOG.info(f"OK {self.uri}")
return 0
LOG.warn(f"BAD {self.uri}")
return 2
def _html_callback(self, *args):
"""print(self, QPrinter, Callable[[bool], None])"""
if type(args[0]) is str:
self._save(args[0])
i = self.ilookfor(args[0])
self._exit(i)
def _save(self, html):
sfile = self.outfile.replace('.pdf','.out')
# CompleteHtmlSaveFormat SingleHtmlSaveFormat MimeHtmlSaveFormat
with open(sfile, 'wt') as ofd:
ofd.write(html)
LOG.debug(f"Saved {sfile}")
def _loadFinished(self, result):
LOG.debug("phantom.py: Loading finished!")
self.toHtml(self._html_callback)
def main():
if (len(sys.argv) < 3):
LOG.info("USAGE: lookupdns.py <url> <pdf-file> [<javascript-file>]")
else:
url = sys.argv[1]
outfile = sys.argv[2]
jsfile = sys.argv[3] if len(sys.argv) > 3 else None
r = LookFor(url, outfile, jsfile)
sys.exit(0)
if __name__ == "__main__":
main()

@ -0,0 +1,260 @@
#!/usr/local/bin/python3.sh
# -*-mode: python; indent-tabs-mode: nil; py-indent-offset: 4; coding: utf-8 -*-
# https://gist.github.com/michaelfranzl/91f0cc13c56120391b949f885643e974/raw/a0601515e7a575bc4c7d4d2a20973b29b6c6f2df/phantom.py
"""
# phantom.py
Simple but fully scriptable headless QtWebKit browser using PyQt5 in Python3,
specialized in executing external JavaScript and generating PDF files. A lean
replacement for other bulky headless browser frameworks.
## Usage
If you have a display attached:
./phantom.py <url> <pdf-file> [<javascript-file>]
If you don't have a display attached (i.e. on a remote server):
xvfb-run ./phantom.py <url> <pdf-file> [<javascript-file>]
Arguments:
<url> Can be a http(s) URL or a path to a local file
<pdf-file> Path and name of PDF file to generate
[<javascript-file>] (optional) Path and name of a JavaScript file to execute
## Features
* Generate a PDF screenshot of the web page after it is completely loaded.
* Optionally execute a local JavaScript file specified by the argument
<javascript-file> after the web page is completely loaded, and before
the PDF is generated.
* console.log's will be printed to stdout.
* Easily add new features by changing the source code of this script, without
compiling C++ code. For more advanced applications, consider attaching
PyQt objects/methods to WebKit's JavaScript space by using
`QWebFrame::addToJavaScriptWindowObject()`.
If you execute an external <javascript-file>, phantom.py has no way of knowing
when that script has finished doing its work. For this reason, the external
script should execute `console.log("__PHANTOM_PY_DONE__");` when done. This will
trigger the PDF generation, after which phantom.py will exit. If no
`__PHANTOM_PY_DONE__` string is seen on the console for 10 seconds, phantom.py
will exit without doing anything. This behavior could be implemented more
elegantly without console.log's but it is the simplest solution.
It is important to remember that since you're just running WebKit, you can use
everything that WebKit supports, including the usual JS client libraries, CSS,
CSS @media types, etc.
## Dependencies
* Python3
* PyQt5
* xvfb (optional for display-less machines)
Installation of dependencies in Debian Stretch is easy:
apt-get install xvfb python3-pyqt5 python3-pyqt5.qtwebkit
Finding the equivalent for other OSes is an exercise that I leave to you.
## Examples
Given the following file /tmp/test.html
<html>
<body>
<p>foo <span id="id1">foo</span> <span id="id2">foo</span></p>
</body>
<script>
document.getElementById('id1').innerHTML = "bar";
</script>
</html>
... and the following file /tmp/test.js:
document.getElementById('id2').innerHTML = "baz";
console.log("__PHANTOM_PY_DONE__");
... and running this script (without attached display) ...
xvfb-run python3 phantom.py /tmp/test.html /tmp/out.pdf /tmp/test.js
... you will get a PDF file /tmp/out.pdf with the contents "foo bar baz".
Note that the second occurrence of "foo" has been replaced by the web page's own
script, and the third occurrence of "foo" by the external JS file.
## License
Copyright 2017 Michael Karl Franzl
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
"""
import sys
import os
import traceback
import atexit
from PyQt5.QtCore import QUrl
from PyQt5.QtCore import QTimer
from PyQt5.QtWidgets import QApplication
from PyQt5.QtPrintSupport import QPrinter
from PyQt5.QtWebEngineWidgets import QWebEnginePage as QWebPage
global LOG
import logging
import warnings
warnings.filterwarnings('ignore')
LOG = logging.getLogger()
def prepare():
sfile = '/tmp/test.js'
if not os.path.exists(sfile):
with open(sfile, 'wt') as ofd:
ofd.write("""
document.getElementById('id2').innerHTML = "baz";
console.log("__PHANTOM_PY_DONE__");
""")
sys.stderr.write(f"wrote {sfile} ")
sfile = '/tmp/test.html'
if not os.path.exists(sfile):
with open(sfile, 'wt') as ofd:
ofd.write("""
<html>
<body>
<p>foo <span id="id1">foo</span> <span id="id2">foo</span></p>
</body>
<script>
document.getElementById('id1').innerHTML = "bar";
</script>
</html>
""")
sys.stderr.write(f"wrote {sfile} ")
sys.stderr.write("\n")
class Render(QWebPage):
def __init__(self, url, outfile, jsfile=None):
self.app = QApplication(sys.argv)
QWebPage.__init__(self)
self.jsfile = jsfile
self.outfile = outfile
qurl = QUrl.fromUserInput(url)
LOG.debug(f"phantom.py: URL= {qurl} OUTFILE={outfile} JSFILE= {jsfile)")
# The PDF generation only happens when the special string __PHANTOM_PY_DONE__
# is sent to console.log(). The following JS string will be executed by
# default, when no external JavaScript file is specified.
self.js_contents = "setTimeout(function() { console.log('__PHANTOM_PY_DONE__') }, 5000);";
if jsfile:
try:
f = open(self.jsfile)
self.js_contents = f.read()
f.close()
except:
LOG.error(traceback.format_exc())
self._exit(10)
self.loadFinished.connect(self._loadFinished)
self.load(qurl)
self.javaScriptConsoleMessage = self._onConsoleMessage
if False:
# Run for a maximum of 10 seconds
watchdog = QTimer()
watchdog.setSingleShot(True)
watchdog.timeout.connect(lambda: self._exit(9))
watchdog.start(10000)
self.app.exec_()
def _onConsoleMessage(self, *args):
if len(args) > 3:
level, txt, lineno, filename = args
else:
level = 1
txt, lineno, filename = args
LOG.debug(f"CONSOLE {lineno} {txt} {filename}")
if "__PHANTOM_PY_DONE__" in txt:
# If we get this magic string, it means that the external JS is done
self._print()
if "__PHANTOM_PY_EXIT__" in txt:
self._exit(0)
def _loadFinished(self, result):
LOG.debug(f"phantom.py: Evaluating JS from {self.jsfile}")
self.runJavaScript("document.documentElement.contentEditable=true")
self.runJavaScript(self.js_contents)
def _printer_callback(self, *args):
"""print(self, QPrinter, Callable[[bool], None])"""
# print(f"_printer_callback {self.outfile} {args}")
if args[0] is False:
i = 1
else:
i = 0
self._exit(i)
def _print(self):
printer = QPrinter()
printer.setPageMargins(10, 10, 10, 10, QPrinter.Millimeter)
printer.setPaperSize(QPrinter.A4)
printer.setCreator("phantom.py by Michael Karl Franzl")
printer.setOutputFormat(QPrinter.PdfFormat);
printer.setOutputFileName(self.outfile);
self.print(printer, self._printer_callback)
LOG.debug("phantom.py: Printed")
def _exit(self, val):
LOG.debug(f"phantom.py: Exiting with val {val}")
# Run for a maximum of 10 seconds
watchdog = QTimer()
watchdog.setSingleShot(True)
watchdog.timeout.connect(lambda: sys.exit(val))
watchdog.start(10000)
self.app.exit(val)
atexit._clear()
sys.exit(val)
def main():
if (len(sys.argv) < 3):
LOG.info("USAGE: ./phantom.py <url> <pdf-file> [<javascript-file>]")
else:
url = sys.argv[1]
outfile = sys.argv[2]
jsfile = sys.argv[3] if len(sys.argv) > 3 else None
r = Render(url, outfile, jsfile)
sys.exit(0)
if __name__ == "__main__":
main()

@ -27,9 +27,8 @@ LOG = logging.getLogger()
bHAVE_TORR = shutil.which('tor-resolve')
# maybe we should check these each time but we
# got them by sorting bad relays in the wild
# we'll keep a copy here
# we check these each time but we got them by sorting bad relays
# in the wild we'll keep a copy here so we can avoid restesting
yKNOWN_NODNS = """
---
- 0x0.is
@ -50,6 +49,7 @@ yKNOWN_NODNS = """
- or.wowplanet.de
- ormycloud.org
- plied-privacy.net
- rivacysvcs.net
- redacted.org
- rification-for-nusenu.net
- rofl.cat

@ -52,7 +52,7 @@ def read_local_trust_config(trust_config):
'''
result = []
# for now we support max_depth = 0 only
# this PoC version has no support for recursion
# https://github.com/nusenu/tor-relay-operator-ids-trust-information#trust-information-consumers
@ -140,7 +140,11 @@ def get_controller(address='127.0.0.1', port=9151, password=''):
return controller
def find_validation_candidates(controller, trusted_domains=[],validation_cache=[],accept_all=False):
def find_validation_candidates(controller,
trusted_domains=[],
validation_cache=[],
CAfile='/etc/ssl/certs/ca-certificates.crt',
accept_all=False):
'''
connect to a tor client via controlport and return a dict of all
not yet validated fingerprints per trusted operators
@ -221,14 +225,14 @@ def oDownloadUrlRequests(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050):
head = requests.head(uri, timeout=timeout, proxies=proxy, headers=headers)
except Exception as e:
raise TrustorError(f"HTTP HEAD request failed for {uri} {e}")
if head.status_code >= 300:
raise TrustorError(f"HTTP Errorcode {head.status_code}")
if not head.headers['Content-Type'].startswith('text/plain'):
raise TrustorError(f"HTTP Content-Type != text/plain")
if not os.path.exists(sCAfile):
raise TrustorError(f"File not found CAfile {sCAfile}")
try:
with requests.sessions.Session() as session:
oReqResp = session.request(method="get", url=uri,
@ -336,7 +340,7 @@ def my_match_hostname(cert, hostname):
else:
raise CertificateError(
"no appropriate commonName or subjectAltName fields were found"
)
)
match_hostname = my_match_hostname
from urllib3.util.ssl_ import (
is_ipaddress,
@ -393,15 +397,15 @@ def oDownloadUrlUrllib3(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050):
retries=False)
except Exception as e:
LOG.error(f"HTTP HEAD request failed for {uri} {e}")
raise
raise
if head.status >= 300:
raise TrustorError(f"HTTP Errorcode {head.status}")
if not head.headers['Content-Type'].startswith('text/plain'):
raise TrustorError(f"HTTP Content-Type != text/plain")
if not os.path.exists(sCAfile):
raise TrustorError(f"File not found CAfile {sCAfile}")
try:
oReqResp = proxy.request("GET", uri,
headers=headers,
@ -420,7 +424,7 @@ def oDownloadUrlUrllib3(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050):
LOG.error(f'Redirect detected %s vs %s (final)' % (uri, oReqResp.geturl()))
raise TrustorError(f'Redirect detected %s vs %s (final)' % (uri, oReqResp.geturl()))
oReqResp.decode_content = True
return oReqResp
import urllib3.connectionpool
urllib3.connectionpool.VerifiedHTTPSConnection = HTTPSConnection
@ -483,7 +487,7 @@ def idns_validate(domain,
# this is not the system wide /etc/resolv.conf
# use dnscrypt-proxy to encrypt your DNS and route it via tor's SOCKSPort
ctx = ub_ctx()
if (os.path.isfile(libunbound_resolv_file)):
@ -529,6 +533,7 @@ def configure_tor(controller, trusted_fingerprints, exitonly=True):
if __name__ == '__main__':
CAfile = '/etc/ssl/certs/ca-certificates.crt'
trust_config = 'trust_config'
assert os.path.exists(trust_config)
trusted_domains = read_local_trust_config(trust_config)
@ -546,7 +551,8 @@ if __name__ == '__main__':
r = find_validation_candidates(controller,
validation_cache=trusted_fingerprints,
trusted_domains=trusted_domains)
trusted_domains=trusted_domains,
CAfile=CAfile)
validate_proofs(r, validation_cache_file,
timeout=timeout,
host=controller_address,

Loading…
Cancel
Save