diff --git a/exclude_badExits.py b/exclude_badExits.py index 74ec82b..34590f3 100644 --- a/exclude_badExits.py +++ b/exclude_badExits.py @@ -160,7 +160,7 @@ def lYamlBadNodes(sFile, global oBAD_NODES global lKNOWN_NODNS global lMAYBE_NODNS - + l = [] if not yaml: return l if os.path.exists(sFile): @@ -198,7 +198,7 @@ def lYamlGoodNodes(sFile='/etc/tor/torrc-goodnodes.yaml'): # yq '.Nodes.IntroductionPoints|.[]' < /etc/tor/torrc-goodnodes.yaml return l -def bdomain_is_bad(domain): +def bdomain_is_bad(domain, fp): global lKNOWN_NODNS if domain in lKNOWN_NODNS: return True if domain in lMAYBE_NODNS: @@ -208,10 +208,11 @@ def bdomain_is_bad(domain): lKNOWN_NODNS.append(domain) lMAYBE_NODNS.remove(domain) return True - - if '@' in domain: - LOG.warn(f"@ in domain {domain}") - return True + + for elt in '@(){}$!': + if elt in domain: + LOG.warn(f"{elt} in domain {domain}") + return True return False tBAD_URLS = set() @@ -256,7 +257,7 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050) if aCachedContact['email'] == a['email']: LOG.info(f"{fp} in aTRUST_DB_INDEX") return aCachedContact - + if 'url' not in keys: if 'uri' not in keys: a['url'] = '' @@ -270,20 +271,21 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050) c = a['url'].lstrip('https://').lstrip('http://').strip('/') a['url'] = 'https://' +c - + # domain should be a unique key for contacts domain = a['url'][8:] - if bdomain_is_bad(domain): + if bdomain_is_bad(domain, fp): LOG.warn(f"{domain} is bad from {a['url']}") - LOG.info(f"{domain} is bad from {a}") + LOG.debug(f"{fp} is bad from {a}") return a + ip = zResolveDomain(domain) if ip == '': aFP_EMAIL[fp] = a['email'] LOG.debug(f"{fp} {domain} does not resolve") lKNOWN_NODNS.append(domain) return {} - + if a['proof'] not in ['uri-rsa']: # only support uri for now if False and ub_ctx: @@ -295,7 +297,7 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050) pass LOG.warn(f"{fp} proof={a['proof']} not supported yet") return a - + LOG.debug(f"{len(keys)} contact fields for {fp}") url="https://"+domain+"/.well-known/tor-relay/rsa-fingerprint.txt" try: @@ -328,7 +330,7 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050) # any reason retry? tBAD_URLS.add(a['url']) return a - + if hasattr(o, 'text'): data = o.text else: @@ -347,7 +349,7 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050) def aParseContactYaml(contact, fp): """ - See the Tor ContactInfo Information Sharing Specification v2 + See the Tor ContactInfo Information Sharing Specification v2 https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/ """ lelts = contact.split() @@ -357,7 +359,7 @@ def aParseContactYaml(contact, fp): LOG.debug(f"{fp} {a}") return a key = '' - for elt in lets: + for elt in lelts: if key == '': key = elt continue @@ -368,7 +370,7 @@ def aParseContactYaml(contact, fp): def aParseContact(contact, fp): """ - See the Tor ContactInfo Information Sharing Specification v2 + See the Tor ContactInfo Information Sharing Specification v2 https://nusenu.github.io/ContactInfo-Information-Sharing-Specification/ """ l = [line for line in contact.strip().replace('"', '').split(' ') @@ -424,22 +426,22 @@ def vsetup_logging(log_level, logfile=''): LOG.info(f"SSetting log_level to {log_level!s}") logging._levelToName = { - CRITICAL: 'CRITICAL', - ERROR: 'ERROR', - WARNING: 'WARN', - INFO: 'INFO', - DEBUG: 'DEBUG', - NOTSET: 'NOTSET', + logging.CRITICAL: 'CRITICAL', + logging.ERROR: 'ERROR', + logging.WARNING: 'WARN', + logging.INFO: 'INFO', + logging.DEBUG: 'DEBUG', + logging.NOTSET: 'NOTSET', } logging._nameToLevel = { - 'CRITICAL': CRITICAL, - 'FATAL': FATAL, - 'ERROR': ERROR, - 'WARN': WARNING, - 'WARNING': WARNING, - 'INFO': INFO, - 'DEBUG': DEBUG, - 'NOTSET': NOTSET, + 'CRITICAL': logging.CRITICAL, + 'FATAL': logging.FATAL, + 'ERROR': logging.ERROR, + 'WARN': logging.WARNING, + 'WARNING': logging.WARNING, + 'INFO': logging.INFO, + 'DEBUG': logging.DEBUG, + 'NOTSET': logging.NOTSET, } def oMainArgparser(_=None): @@ -490,7 +492,7 @@ def oMainArgparser(_=None): parser.add_argument('--bad_contacts', type=str, default=os.path.join(ETC_DIR, 'badcontacts.yaml'), help="Yaml file of bad contacts that bad FPs are using") - + parser.add_argument('--strict_nodes', type=int, default=0, choices=[0,1], help="Set StrictNodes: 1 is less anonymous but more secure, although some sites may be unreachable") @@ -498,7 +500,7 @@ def oMainArgparser(_=None): help="Seconds to wait for Tor to booststrap") parser.add_argument('--points_timeout', type=int, default=0, help="Timeout for getting introduction points - must be long >120sec. 0 means disabled looking for IPs") - parser.add_argument('--log_level', type=int, default=10, + parser.add_argument('--log_level', type=int, default=20, help="10=debug 20=info 30=warn 40=error") parser.add_argument('--bad_sections', type=str, default='MyBadExit', @@ -523,24 +525,24 @@ def vwrite_badnodes(oArgs, oBAD_NODES, slen): if os.path.exists(oArgs.bad_nodes): os.rename(oArgs.bad_nodes, bak) os.rename(tmp, oArgs.bad_nodes) - -def vwrite_goodnodes(oArgs, oGOOD_NODES, slen): + +def vwrite_goodnodes(oArgs, oGOOD_NODES, ilen): if oArgs.good_nodes: tmp = oArgs.good_nodes +'.tmp' bak = oArgs.good_nodes +'.bak' with open(tmp, 'wt') as oFYaml: yaml.dump(oGOOD_NODES, indent=2, stream=oFYaml) - LOG.info(f"Wrote {slen} good nodes to {oArgs.good_nodes}") + LOG.info(f"Wrote {ilen} good relays to {oArgs.good_nodes}") oFYaml.close() if os.path.exists(oArgs.good_nodes): os.rename(oArgs.good_nodes, bak) os.rename(tmp, oArgs.good_nodes) - + def iMain(lArgs): global aTRUST_DB global aTRUST_DB_INDEX global oBAD_NODES - global oGOOD_NODES + global oGOOD_NODES global lKNOWN_NODNS parser = oMainArgparser() oArgs = parser.parse_args(lArgs) @@ -573,7 +575,7 @@ def iMain(lArgs): continue aTRUST_DB_INDEX[fp] = v LOG.info(f"{len(aTRUST_DB_INDEX.keys())} good relays from {sFile}") - + except Exception as e: LOG.exception(f"Error reading YAML TrustDB {sFile} {e}") @@ -625,17 +627,17 @@ def iMain(lArgs): texclude_set = set(lYamlBadNodes(oArgs.bad_nodes, lWanted=sections, section=sEXCLUDE_EXIT_KEY)) - LOG.info(f"Preloaded {len(texclude_set)} bad fps") + LOG.info(f"Preloaded {len(texclude_set)} bad fps") ttrust_db_index = aTRUST_DB_INDEX.keys() - tdns_contacts = set() + tdns_urls = set() iFakeContact = 0 iTotalContacts = 0 aBadContacts = {} - + lConds = oArgs.contact.split(',') iR = 0 - + relays = controller.get_server_descriptors() for relay in relays: iR += 1 @@ -643,15 +645,15 @@ def iMain(lArgs): LOG.warn('Invalid Fingerprint: %s' % relay.fingerprint) continue relay.fingerprint = relay.fingerprint.upper() - - sofar = f"G:{len(aTRUST_DB.keys())} U:{len(tdns_contacts)} F:{iFakeContact} BF:{len(texclude_set)} GF:{len(ttrust_db_index)} TC:{iTotalContacts} #{iR}" + + sofar = f"G:{len(aTRUST_DB.keys())} U:{len(tdns_urls)} F:{iFakeContact} BF:{len(texclude_set)} GF:{len(ttrust_db_index)} TC:{iTotalContacts} #{iR}" if not relay.exit_policy.is_exiting_allowed(): if sEXCLUDE_EXIT_KEY == 'ExcludeNodes': pass # LOG.debug(f"{relay.fingerprint} not an exit {sofar}") else: pass # LOG.warn(f"{relay.fingerprint} not an exit {sofar}") # continue - + # great contact had good fps and we are in them if relay.fingerprint in aTRUST_DB_INDEX.keys(): # a cached entry @@ -660,54 +662,55 @@ def iMain(lArgs): if type(relay.contact) == bytes: # dunno relay.contact = str(relay.contact, 'UTF-8') - + if ('Empty' in lConds and not relay.contact) or \ ('NoEmail' in lConds and relay.contact and not 'email:' in relay.contact): texclude_set.add(relay.fingerprint) continue - + if not relay.contact or not 'ciissversion:' in relay.contact: # should be unreached 'Empty' should always be in lConds continue iTotalContacts += 1 - + + fp = relay.fingerprint if relay.contact and not 'url:' in relay.contact: - LOG.info(f"{relay.fingerprint} skipping bad contact - no url: {sofar}") - LOG.debug(f"{relay.fingerprint} {relay.contact} {sofar}") - texclude_set.add(relay.fingerprint) + LOG.info(f"{fp} skipping bad contact - no url: {sofar}") + LOG.debug(f"{fp} {relay.contact} {sofar}") + texclude_set.add(fp) continue - + c = relay.contact.lower() # first rough cut i = c.find('url:') if i >=0: - c = c[i+4:] + c = c[i+4:] i = c.find(' ') if i >=0: c = c[:i] c = c.lstrip('https://').lstrip('http://').strip('/') i = c.find('/') if i >=0: c = c[:i] domain = c - if domain and bdomain_is_bad(domain): - LOG.info(f"{relay.fingerprint} skipping bad {domain} {sofar}") - LOG.debug(f"{relay.fingerprint} {relay.contact} {sofar}") - texclude_set.add(relay.fingerprint) + if domain and bdomain_is_bad(domain, fp): + LOG.info(f"{fp} skipping bad {domain} {sofar}") + LOG.debug(f"{fp} {relay.contact} {sofar}") + texclude_set.add(fp) continue if domain: ip = zResolveDomain(domain) if not ip: - LOG.warn(f"{relay.fingerprint} {domain} did not resolve {sofar}") - texclude_set.add(relay.fingerprint) + LOG.warn(f"{fp} {domain} did not resolve {sofar}") + texclude_set.add(fp) lKNOWN_NODNS.append(domain) iFakeContact += 1 continue - - if 'dns-rsa' in relay.contact.lower(): - target = f"{relay.fingerprint}.{domain}" + + if 'dns-rsa' in relay.contact.lower(): + target = f"{relay.fingerprint}.{domain}" LOG.info(f"skipping 'dns-rsa' {target} {sofar}") - tdns_contacts.add(target) - + tdns_urls.add(target) + elif 'proof:uri-rsa' in relay.contact.lower(): a = aParseContact(relay.contact, relay.fingerprint) if not a: @@ -730,7 +733,7 @@ def iMain(lArgs): iFakeContact += 1 texclude_set.add(relay.fingerprint) continue - + b = aVerifyContact(list(a.values())[0], relay.fingerprint, @@ -738,7 +741,7 @@ def iMain(lArgs): timeout=oArgs.timeout, host=oArgs.proxy_host, port=oArgs.proxy_port) - + if not b or not 'fps' in b or not b['fps'] or not b['url']: LOG.warn(f"{relay.fingerprint} did NOT VERIFY {sofar}") LOG.debug(f"{relay.fingerprint} {b} {sofar}") @@ -747,7 +750,7 @@ def iMain(lArgs): texclude_set.add(relay.fingerprint) aBadContacts[relay.fingerprint] = b continue - + if relay.fingerprint not in b['fps']: LOG.warn(f"{relay.fingerprint} the FP IS NOT in the list of fps {sofar}") # assume a fp is using a bogus contact @@ -766,9 +769,11 @@ def iMain(lArgs): with open(proof_output_tmp, 'wt') as oFYaml: yaml.dump(aTRUST_DB, indent=2, stream=oFYaml) oFYaml.close() - + LOG.info(f"Filtered {len(twhitelist_set)} whitelisted relays") texclude_set = texclude_set.difference(twhitelist_set) + # accept the dns-rsa urls for now until we test them + texclude_set = texclude_set.difference(tdns_urls) LOG.info(f"{len(list(aTRUST_DB.keys()))} good contacts out of {iTotalContacts}") if oArgs.proof_output and aTRUST_DB: @@ -785,7 +790,7 @@ def iMain(lArgs): with open(oArgs.torrc_output, 'wt') as oFTorrc: oFTorrc.write(f"{sEXCLUDE_EXIT_KEY} {','.join(texclude_set)}\n") oFTorrc.write(f"{sINCLUDE_EXIT_KEY} {','.join(aTRUST_DB_INDEX.keys())}\n") - oFTorrc.write(f"{sINCLUDE_GUARD_KEY} {','.join(o[oGOOD_ROOT]['GuardNodes'])}\n") + oFTorrc.write(f"{sINCLUDE_GUARD_KEY} {','.join(oGOOD_NODES[oGOOD_ROOT]['GuardNodes'])}\n") LOG.info(f"Wrote tor configuration to {oArgs.torrc_output}") oFTorrc.close() @@ -798,10 +803,10 @@ def iMain(lArgs): oBAD_NODES[oBAD_ROOT]['ExcludeNodes']['BadExit'] = list(texclude_set) oBAD_NODES[oBAD_ROOT]['ExcludeDomains'] = lKNOWN_NODNS vwrite_badnodes(oArgs, oBAD_NODES, str(len(texclude_set))) - + oGOOD_NODES['GoodNodes']['Relays']['ExitNodes'] = list(aTRUST_DB_INDEX.keys()) # GuardNodes are readonl - vwrite_goodnodes(oArgs, oGOOD_NODES, str(len(ttrust_db_index))) + vwrite_goodnodes(oArgs, oGOOD_NODES, len(aTRUST_DB_INDEX.keys())) retval = 0 try: logging.getLogger('stem').setLevel(30) @@ -838,7 +843,7 @@ def iMain(lArgs): LOG.errro(f"Failed setting {sINCLUDE_EXIT_KEY} good exit nodes in Tor") retval += 1 - LOG.info("dns-rsa domains:\n{'\n'.join(tdns_contacts)}") + sys.stdout.write("dns-rsa domains:\n" +'\n'.join(tdns_urls) +'\n') return retval except InvalidRequest as e: diff --git a/https_adapter.py b/https_adapter.py deleted file mode 100644 index 3cee1e8..0000000 --- a/https_adapter.py +++ /dev/null @@ -1,265 +0,0 @@ -# -*- mode: python; indent-tabs-mode: nil; py-indent-offset: 4; coding: utf-8 -*- - -from requests import adapters -from requests.utils import ( - DEFAULT_CA_BUNDLE_PATH, - get_auth_from_url, - get_encoding_from_headers, - prepend_scheme_if_needed, - select_proxy, - urldefragauth, -) -import urllib3 -from urllib3.util import parse_url -from urllib3.util.retry import Retry -from urllib3.util import Timeout as TimeoutSauce - -DEFAULT_POOLBLOCK = False -DEFAULT_POOLSIZE = 10 -DEFAULT_RETRIES = 0 -DEFAULT_POOL_TIMEOUT = None - -class HTTPAdapter(adapters.HTTPAdapter): - def __init__(self, - pool_connections=DEFAULT_POOLSIZE, - pool_maxsize=DEFAULT_POOLSIZE, - max_retries=DEFAULT_RETRIES, - pool_block=DEFAULT_POOLBLOCK - ): - self.config = {} - self.proxy_manager = {} - - if isinstance(max_retries, Retry): - self.max_retries = max_retries - else: - max_retries = Retry.from_int(max_retries) - self.max_retries = max_retries - - self._pool_connections = pool_connections - self._pool_maxsize = pool_maxsize - self._pool_block = pool_block - - self.init_poolmanager(pool_connections, pool_maxsize, block=pool_block) - - -class HTTPSAdapter(HTTPAdapter): - """The built-in HTTP Adapter for urllib3. - - Provides a general-case interface for Requests sessions to contact HTTP and - HTTPS urls by implementing the Transport Adapter interface. This class will - usually be created by the :class:`Session ` class under the - covers. - - :param pool_connections: The number of urllib3 connection pools to cache. - :param pool_maxsize: The maximum number of connections to save in the pool. - :param max_retries: The maximum number of retries each connection - should attempt. Note, this applies only to failed DNS lookups, socket - connections and connection timeouts, never to requests where data has - made it to the server. By default, Requests does not retry failed - connections. If you need granular control over the conditions under - which we retry a request, import urllib3's ``Retry`` class and pass - that instead. - :param pool_block: Whether the connection pool should block for connections. - - Usage:: - - >>> import requests - >>> s = requests.Session() - >>> a = requests.adapters.HTTPAdapter(max_retries=3) - >>> s.mount('http://', a) - """ - - def __init__( - self, - pool_connections=DEFAULT_POOLSIZE, - pool_maxsize=1, - max_retries=3, - pool_block=DEFAULT_POOLBLOCK, - ): - retries = Retry(connect=max_retries, read=2, redirect=0) - adapters.HTTPAdapter.__init__(self, - pool_connections=pool_connections, - pool_maxsize=pool_maxsize, - max_retries=retries, - pool_block=pool_block) - - def get_connection(self, url, proxies=None, use_forwarding_for_https=True): - """Returns a urllib3 connection for the given URL. This should not be - called from user code, and is only exposed for use when subclassing the - :class:`HTTPAdapter `. - - :param url: The URL to connect to. - :param proxies: (optional) A Requests-style dictionary of proxies used on this request. - :rtype: urllib3.ConnectionPool - """ - proxy = select_proxy(url, proxies) - - if proxy: - proxy = prepend_scheme_if_needed(proxy, "http") - proxy_url = parse_url(proxy) - if not proxy_url.host: - raise InvalidProxyURL( - "Please check proxy URL. It is malformed " - "and could be missing the host." - ) - proxy_manager = self.proxy_manager_for(proxy) - conn = proxy_manager.connection_from_url(url) - else: - # Only scheme should be lower case - parsed = urlparse(url) - url = parsed.geturl() - conn = self.poolmanager.connection_from_url(url, use_forwarding_for_https=True) - - return conn - - def send( - self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None - ): - """Sends PreparedRequest object. Returns Response object. - - :param request: The :class:`PreparedRequest ` being sent. - :param stream: (optional) Whether to stream the request content. - :param timeout: (optional) How long to wait for the server to send - data before giving up, as a float, or a :ref:`(connect timeout, - read timeout) ` tuple. - :type timeout: float or tuple or urllib3 Timeout object - :param verify: (optional) Either a boolean, in which case it controls whether - we verify the server's TLS certificate, or a string, in which case it - must be a path to a CA bundle to use - :param cert: (optional) Any user-provided SSL certificate to be trusted. - :param proxies: (optional) The proxies dictionary to apply to the request. - :rtype: requests.Response - """ - - try: - #? _socks_options - conn = self.get_connection(request.url, proxies, use_forwarding_for_https=True) - except LocationValueError as e: - raise InvalidURL(e, request=request) - - self.cert_verify(conn, request.url, verify, cert) - url = self.request_url(request, proxies) - self.add_headers( - request, - stream=stream, - timeout=timeout, - verify=verify, - cert=cert, - proxies=proxies, - ) - - chunked = not (request.body is None or "Content-Length" in request.headers) - - if isinstance(timeout, tuple): - try: - connect, read = timeout - timeout = TimeoutSauce(connect=connect, read=read) - except ValueError: - raise ValueError( - f"Invalid timeout {timeout}. Pass a (connect, read) timeout tuple, " - f"or a single float to set both timeouts to the same value." - ) - elif isinstance(timeout, TimeoutSauce): - pass - else: - timeout = TimeoutSauce(connect=timeout, read=timeout) - - try: - if not chunked: - resp = conn.urlopen( - method=request.method, - url=url, - body=request.body, - headers=request.headers, - redirect=False, - assert_same_host=False, - preload_content=False, - decode_content=False, - retries=self.max_retries, - timeout=timeout, - ) - - # Send the request. - else: - if hasattr(conn, "proxy_pool"): - conn = conn.proxy_pool - - low_conn = conn._get_conn(timeout=DEFAULT_POOL_TIMEOUT) - - try: - skip_host = "Host" in request.headers - low_conn.putrequest( - request.method, - url, - skip_accept_encoding=True, - skip_host=skip_host, - ) - - for header, value in request.headers.items(): - low_conn.putheader(header, value) - - low_conn.endheaders() - - for i in request.body: - low_conn.send(hex(len(i))[2:].encode("utf-8")) - low_conn.send(b"\r\n") - low_conn.send(i) - low_conn.send(b"\r\n") - low_conn.send(b"0\r\n\r\n") - - # Receive the response from the server - r = low_conn.getresponse() - - resp = HTTPResponse.from_httplib( - r, - pool=conn, - connection=low_conn, - preload_content=False, - decode_content=False, - ) - except Exception: - # If we hit any problems here, clean up the connection. - # Then, raise so that we can handle the actual exception. - low_conn.close() - raise - - except (ProtocolError, OSError) as err: - raise ConnectionError(err, request=request) - - except MaxRetryError as e: - if isinstance(e.reason, ConnectTimeoutError): - # TODO: Remove this in 3.0.0: see #2811 - if not isinstance(e.reason, NewConnectionError): - raise ConnectTimeout(e, request=request) - - if isinstance(e.reason, ResponseError): - raise RetryError(e, request=request) - - if isinstance(e.reason, _ProxyError): - raise ProxyError(e, request=request) - - if isinstance(e.reason, _SSLError): - # This branch is for urllib3 v1.22 and later. - raise SSLError(e, request=request) - - raise ConnectionError(e, request=request) - - except ClosedPoolError as e: - raise ConnectionError(e, request=request) - - except _ProxyError as e: - raise ProxyError(e) - - except (_SSLError, _HTTPError) as e: - if isinstance(e, _SSLError): - # This branch is for urllib3 versions earlier than v1.22 - raise SSLError(e, request=request) - elif isinstance(e, ReadTimeoutError): - raise ReadTimeout(e, request=request) - elif isinstance(e, _InvalidHeader): - raise InvalidHeader(e, request=request) - else: - raise - - return self.build_response(request, resp) - diff --git a/lookupdns.py b/lookupdns.py new file mode 100644 index 0000000..d44bdda --- /dev/null +++ b/lookupdns.py @@ -0,0 +1,79 @@ +#!/usr/local/bin/python3.sh +# -*-mode: python; indent-tabs-mode: nil; py-indent-offset: 4; coding: utf-8 -* + +import sys +import os +import traceback + +from phantompy import Render + +global LOG +import logging +import warnings +warnings.filterwarnings('ignore') +LOG = logging.getLogger() + +class LookFor(Render): + + def __init__(self, url, outfile, jsfile=None): + self.uri = url + Render.__init__(self, url, outfile, jsfile) + + def ilookfor(self, html): + import json + marker = '
'
+      if marker not in html: return ''
+      i = html.find(marker) + len(marker)
+      html = html[i:]
+      assert html[0] == '{', html
+      i = html.find('  []")
+  else:
+    url = sys.argv[1]
+    outfile = sys.argv[2]
+    jsfile = sys.argv[3] if len(sys.argv) > 3 else None
+    r = LookFor(url, outfile, jsfile)
+
+  sys.exit(0)
+
+if __name__ == "__main__":
+  main()
+
diff --git a/phantompy.py b/phantompy.py
new file mode 100644
index 0000000..d813a1a
--- /dev/null
+++ b/phantompy.py
@@ -0,0 +1,260 @@
+#!/usr/local/bin/python3.sh
+# -*-mode: python; indent-tabs-mode: nil; py-indent-offset: 4; coding: utf-8 -*-
+# https://gist.github.com/michaelfranzl/91f0cc13c56120391b949f885643e974/raw/a0601515e7a575bc4c7d4d2a20973b29b6c6f2df/phantom.py
+"""
+# phantom.py
+
+Simple but fully scriptable headless QtWebKit browser using PyQt5 in Python3,
+specialized in executing external JavaScript and generating PDF files. A lean
+replacement for other bulky headless browser frameworks.
+
+
+## Usage
+
+If you have a display attached:
+
+    ./phantom.py   []
+    
+If you don't have a display attached (i.e. on a remote server):
+
+    xvfb-run ./phantom.py   []
+
+Arguments:
+
+ Can be a http(s) URL or a path to a local file
+ Path and name of PDF file to generate
+[] (optional) Path and name of a JavaScript file to execute
+
+
+## Features
+
+* Generate a PDF screenshot of the web page after it is completely loaded.
+* Optionally execute a local JavaScript file specified by the argument
+    after the web page is completely loaded, and before
+   the PDF is generated.
+* console.log's will be printed to stdout.
+* Easily add new features by changing the source code of this script, without
+   compiling C++ code. For more advanced applications, consider attaching
+   PyQt objects/methods to WebKit's JavaScript space by using
+   `QWebFrame::addToJavaScriptWindowObject()`.
+
+If you execute an external , phantom.py has no way of knowing
+when that script has finished doing its work. For this reason, the external
+script should execute `console.log("__PHANTOM_PY_DONE__");` when done. This will
+trigger the PDF generation, after which phantom.py will exit. If no
+`__PHANTOM_PY_DONE__` string is seen on the console for 10 seconds, phantom.py
+will exit without doing anything. This behavior could be implemented more
+elegantly without console.log's but it is the simplest solution.
+
+It is important to remember that since you're just running WebKit, you can use
+everything that WebKit supports, including the usual JS client libraries, CSS,
+CSS @media types, etc.
+
+
+## Dependencies
+
+* Python3
+* PyQt5
+* xvfb (optional for display-less machines)
+
+Installation of dependencies in Debian Stretch is easy:
+
+    apt-get install xvfb python3-pyqt5 python3-pyqt5.qtwebkit
+    
+Finding the equivalent for other OSes is an exercise that I leave to you.
+
+
+## Examples
+
+Given the following file /tmp/test.html
+
+    
+      
+        

foo foo foo

+ + + + +... and the following file /tmp/test.js: + + document.getElementById('id2').innerHTML = "baz"; + console.log("__PHANTOM_PY_DONE__"); + +... and running this script (without attached display) ... + + xvfb-run python3 phantom.py /tmp/test.html /tmp/out.pdf /tmp/test.js + +... you will get a PDF file /tmp/out.pdf with the contents "foo bar baz". + +Note that the second occurrence of "foo" has been replaced by the web page's own +script, and the third occurrence of "foo" by the external JS file. + + +## License + +Copyright 2017 Michael Karl Franzl + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +""" + +import sys +import os +import traceback +import atexit +from PyQt5.QtCore import QUrl +from PyQt5.QtCore import QTimer +from PyQt5.QtWidgets import QApplication +from PyQt5.QtPrintSupport import QPrinter +from PyQt5.QtWebEngineWidgets import QWebEnginePage as QWebPage + +global LOG +import logging +import warnings +warnings.filterwarnings('ignore') +LOG = logging.getLogger() + +def prepare(): + sfile = '/tmp/test.js' + if not os.path.exists(sfile): + with open(sfile, 'wt') as ofd: + ofd.write(""" + document.getElementById('id2').innerHTML = "baz"; + console.log("__PHANTOM_PY_DONE__"); +""") + sys.stderr.write(f"wrote {sfile} ") + sfile = '/tmp/test.html' + if not os.path.exists(sfile): + with open(sfile, 'wt') as ofd: + ofd.write(""" + + +

foo foo foo

+ + + +""") + sys.stderr.write(f"wrote {sfile} ") + sys.stderr.write("\n") + +class Render(QWebPage): + def __init__(self, url, outfile, jsfile=None): + self.app = QApplication(sys.argv) + + QWebPage.__init__(self) + + self.jsfile = jsfile + self.outfile = outfile + + qurl = QUrl.fromUserInput(url) + + LOG.debug(f"phantom.py: URL= {qurl} OUTFILE={outfile} JSFILE= {jsfile)") + + # The PDF generation only happens when the special string __PHANTOM_PY_DONE__ + # is sent to console.log(). The following JS string will be executed by + # default, when no external JavaScript file is specified. + self.js_contents = "setTimeout(function() { console.log('__PHANTOM_PY_DONE__') }, 5000);"; + + if jsfile: + try: + f = open(self.jsfile) + self.js_contents = f.read() + f.close() + except: + LOG.error(traceback.format_exc()) + self._exit(10) + + self.loadFinished.connect(self._loadFinished) + self.load(qurl) + self.javaScriptConsoleMessage = self._onConsoleMessage + + if False: + # Run for a maximum of 10 seconds + watchdog = QTimer() + watchdog.setSingleShot(True) + watchdog.timeout.connect(lambda: self._exit(9)) + watchdog.start(10000) + + self.app.exec_() + + def _onConsoleMessage(self, *args): + if len(args) > 3: + level, txt, lineno, filename = args + else: + level = 1 + txt, lineno, filename = args + LOG.debug(f"CONSOLE {lineno} {txt} {filename}") + if "__PHANTOM_PY_DONE__" in txt: + # If we get this magic string, it means that the external JS is done + self._print() + if "__PHANTOM_PY_EXIT__" in txt: + self._exit(0) + + def _loadFinished(self, result): + LOG.debug(f"phantom.py: Evaluating JS from {self.jsfile}") + self.runJavaScript("document.documentElement.contentEditable=true") + self.runJavaScript(self.js_contents) + + def _printer_callback(self, *args): + """print(self, QPrinter, Callable[[bool], None])""" + # print(f"_printer_callback {self.outfile} {args}") + if args[0] is False: + i = 1 + else: + i = 0 + self._exit(i) + + def _print(self): + printer = QPrinter() + printer.setPageMargins(10, 10, 10, 10, QPrinter.Millimeter) + printer.setPaperSize(QPrinter.A4) + printer.setCreator("phantom.py by Michael Karl Franzl") + printer.setOutputFormat(QPrinter.PdfFormat); + printer.setOutputFileName(self.outfile); + self.print(printer, self._printer_callback) + LOG.debug("phantom.py: Printed") + + def _exit(self, val): + LOG.debug(f"phantom.py: Exiting with val {val}") + + # Run for a maximum of 10 seconds + watchdog = QTimer() + watchdog.setSingleShot(True) + watchdog.timeout.connect(lambda: sys.exit(val)) + watchdog.start(10000) + self.app.exit(val) + atexit._clear() + sys.exit(val) + +def main(): + if (len(sys.argv) < 3): + LOG.info("USAGE: ./phantom.py []") + else: + url = sys.argv[1] + outfile = sys.argv[2] + jsfile = sys.argv[3] if len(sys.argv) > 3 else None + r = Render(url, outfile, jsfile) + + sys.exit(0) + +if __name__ == "__main__": + main() + diff --git a/support_onions.py b/support_onions.py index 93ae123..68d3ee0 100644 --- a/support_onions.py +++ b/support_onions.py @@ -27,9 +27,8 @@ LOG = logging.getLogger() bHAVE_TORR = shutil.which('tor-resolve') -# maybe we should check these each time but we -# got them by sorting bad relays in the wild -# we'll keep a copy here +# we check these each time but we got them by sorting bad relays +# in the wild we'll keep a copy here so we can avoid restesting yKNOWN_NODNS = """ --- - 0x0.is @@ -50,6 +49,7 @@ yKNOWN_NODNS = """ - or.wowplanet.de - ormycloud.org - plied-privacy.net + - rivacysvcs.net - redacted.org - rification-for-nusenu.net - rofl.cat diff --git a/trustor_poc.py b/trustor_poc.py index 965e0ad..0f9406a 100644 --- a/trustor_poc.py +++ b/trustor_poc.py @@ -52,7 +52,7 @@ def read_local_trust_config(trust_config): ''' result = [] - + # for now we support max_depth = 0 only # this PoC version has no support for recursion # https://github.com/nusenu/tor-relay-operator-ids-trust-information#trust-information-consumers @@ -140,7 +140,11 @@ def get_controller(address='127.0.0.1', port=9151, password=''): return controller -def find_validation_candidates(controller, trusted_domains=[],validation_cache=[],accept_all=False): +def find_validation_candidates(controller, + trusted_domains=[], + validation_cache=[], + CAfile='/etc/ssl/certs/ca-certificates.crt', + accept_all=False): ''' connect to a tor client via controlport and return a dict of all not yet validated fingerprints per trusted operators @@ -221,14 +225,14 @@ def oDownloadUrlRequests(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050): head = requests.head(uri, timeout=timeout, proxies=proxy, headers=headers) except Exception as e: raise TrustorError(f"HTTP HEAD request failed for {uri} {e}") - + if head.status_code >= 300: raise TrustorError(f"HTTP Errorcode {head.status_code}") if not head.headers['Content-Type'].startswith('text/plain'): raise TrustorError(f"HTTP Content-Type != text/plain") if not os.path.exists(sCAfile): raise TrustorError(f"File not found CAfile {sCAfile}") - + try: with requests.sessions.Session() as session: oReqResp = session.request(method="get", url=uri, @@ -336,7 +340,7 @@ def my_match_hostname(cert, hostname): else: raise CertificateError( "no appropriate commonName or subjectAltName fields were found" - ) + ) match_hostname = my_match_hostname from urllib3.util.ssl_ import ( is_ipaddress, @@ -393,15 +397,15 @@ def oDownloadUrlUrllib3(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050): retries=False) except Exception as e: LOG.error(f"HTTP HEAD request failed for {uri} {e}") - raise - + raise + if head.status >= 300: raise TrustorError(f"HTTP Errorcode {head.status}") if not head.headers['Content-Type'].startswith('text/plain'): raise TrustorError(f"HTTP Content-Type != text/plain") if not os.path.exists(sCAfile): raise TrustorError(f"File not found CAfile {sCAfile}") - + try: oReqResp = proxy.request("GET", uri, headers=headers, @@ -420,7 +424,7 @@ def oDownloadUrlUrllib3(uri, sCAfile, timeout=30, host='127.0.0.1', port=9050): LOG.error(f'Redirect detected %s vs %s (final)' % (uri, oReqResp.geturl())) raise TrustorError(f'Redirect detected %s vs %s (final)' % (uri, oReqResp.geturl())) oReqResp.decode_content = True - + return oReqResp import urllib3.connectionpool urllib3.connectionpool.VerifiedHTTPSConnection = HTTPSConnection @@ -483,7 +487,7 @@ def idns_validate(domain, # this is not the system wide /etc/resolv.conf # use dnscrypt-proxy to encrypt your DNS and route it via tor's SOCKSPort - + ctx = ub_ctx() if (os.path.isfile(libunbound_resolv_file)): @@ -529,6 +533,7 @@ def configure_tor(controller, trusted_fingerprints, exitonly=True): if __name__ == '__main__': + CAfile = '/etc/ssl/certs/ca-certificates.crt' trust_config = 'trust_config' assert os.path.exists(trust_config) trusted_domains = read_local_trust_config(trust_config) @@ -546,7 +551,8 @@ if __name__ == '__main__': r = find_validation_candidates(controller, validation_cache=trusted_fingerprints, - trusted_domains=trusted_domains) + trusted_domains=trusted_domains, + CAfile=CAfile) validate_proofs(r, validation_cache_file, timeout=timeout, host=controller_address,