Compare commits
3 commits
f85f5c6bd8
...
28d1d34dbd
Author | SHA1 | Date | |
---|---|---|---|
|
28d1d34dbd | ||
|
94c0834092 | ||
|
2ea65cc181 |
6 changed files with 436 additions and 351 deletions
|
@ -198,7 +198,7 @@ def lYamlGoodNodes(sFile='/etc/tor/torrc-goodnodes.yaml'):
|
|||
# yq '.Nodes.IntroductionPoints|.[]' < /etc/tor/torrc-goodnodes.yaml
|
||||
return l
|
||||
|
||||
def bdomain_is_bad(domain):
|
||||
def bdomain_is_bad(domain, fp):
|
||||
global lKNOWN_NODNS
|
||||
if domain in lKNOWN_NODNS: return True
|
||||
if domain in lMAYBE_NODNS:
|
||||
|
@ -209,9 +209,10 @@ def bdomain_is_bad(domain):
|
|||
lMAYBE_NODNS.remove(domain)
|
||||
return True
|
||||
|
||||
if '@' in domain:
|
||||
LOG.warn(f"@ in domain {domain}")
|
||||
return True
|
||||
for elt in '@(){}$!':
|
||||
if elt in domain:
|
||||
LOG.warn(f"{elt} in domain {domain}")
|
||||
return True
|
||||
return False
|
||||
|
||||
tBAD_URLS = set()
|
||||
|
@ -273,10 +274,11 @@ def aVerifyContact(a, fp, https_cafile, timeout=20, host='127.0.0.1', port=9050)
|
|||
|
||||
# domain should be a unique key for contacts
|
||||
domain = a['url'][8:]
|
||||
if bdomain_is_bad(domain):
|
||||
if bdomain_is_bad(domain, fp):
|
||||
LOG.warn(f"{domain} is bad from {a['url']}")
|
||||
LOG.info(f"{domain} is bad from {a}")
|
||||
LOG.debug(f"{fp} is bad from {a}")
|
||||
return a
|
||||
|
||||
ip = zResolveDomain(domain)
|
||||
if ip == '':
|
||||
aFP_EMAIL[fp] = a['email']
|
||||
|
@ -357,7 +359,7 @@ def aParseContactYaml(contact, fp):
|
|||
LOG.debug(f"{fp} {a}")
|
||||
return a
|
||||
key = ''
|
||||
for elt in lets:
|
||||
for elt in lelts:
|
||||
if key == '':
|
||||
key = elt
|
||||
continue
|
||||
|
@ -424,22 +426,22 @@ def vsetup_logging(log_level, logfile=''):
|
|||
LOG.info(f"SSetting log_level to {log_level!s}")
|
||||
|
||||
logging._levelToName = {
|
||||
CRITICAL: 'CRITICAL',
|
||||
ERROR: 'ERROR',
|
||||
WARNING: 'WARN',
|
||||
INFO: 'INFO',
|
||||
DEBUG: 'DEBUG',
|
||||
NOTSET: 'NOTSET',
|
||||
logging.CRITICAL: 'CRITICAL',
|
||||
logging.ERROR: 'ERROR',
|
||||
logging.WARNING: 'WARN',
|
||||
logging.INFO: 'INFO',
|
||||
logging.DEBUG: 'DEBUG',
|
||||
logging.NOTSET: 'NOTSET',
|
||||
}
|
||||
logging._nameToLevel = {
|
||||
'CRITICAL': CRITICAL,
|
||||
'FATAL': FATAL,
|
||||
'ERROR': ERROR,
|
||||
'WARN': WARNING,
|
||||
'WARNING': WARNING,
|
||||
'INFO': INFO,
|
||||
'DEBUG': DEBUG,
|
||||
'NOTSET': NOTSET,
|
||||
'CRITICAL': logging.CRITICAL,
|
||||
'FATAL': logging.FATAL,
|
||||
'ERROR': logging.ERROR,
|
||||
'WARN': logging.WARNING,
|
||||
'WARNING': logging.WARNING,
|
||||
'INFO': logging.INFO,
|
||||
'DEBUG': logging.DEBUG,
|
||||
'NOTSET': logging.NOTSET,
|
||||
}
|
||||
|
||||
def oMainArgparser(_=None):
|
||||
|
@ -498,7 +500,7 @@ def oMainArgparser(_=None):
|
|||
help="Seconds to wait for Tor to booststrap")
|
||||
parser.add_argument('--points_timeout', type=int, default=0,
|
||||
help="Timeout for getting introduction points - must be long >120sec. 0 means disabled looking for IPs")
|
||||
parser.add_argument('--log_level', type=int, default=10,
|
||||
parser.add_argument('--log_level', type=int, default=20,
|
||||
help="10=debug 20=info 30=warn 40=error")
|
||||
parser.add_argument('--bad_sections', type=str,
|
||||
default='MyBadExit',
|
||||
|
@ -524,13 +526,13 @@ def vwrite_badnodes(oArgs, oBAD_NODES, slen):
|
|||
os.rename(oArgs.bad_nodes, bak)
|
||||
os.rename(tmp, oArgs.bad_nodes)
|
||||
|
||||
def vwrite_goodnodes(oArgs, oGOOD_NODES, slen):
|
||||
def vwrite_goodnodes(oArgs, oGOOD_NODES, ilen):
|
||||
if oArgs.good_nodes:
|
||||
tmp = oArgs.good_nodes +'.tmp'
|
||||
bak = oArgs.good_nodes +'.bak'
|
||||
with open(tmp, 'wt') as oFYaml:
|
||||
yaml.dump(oGOOD_NODES, indent=2, stream=oFYaml)
|
||||
LOG.info(f"Wrote {slen} good nodes to {oArgs.good_nodes}")
|
||||
LOG.info(f"Wrote {ilen} good relays to {oArgs.good_nodes}")
|
||||
oFYaml.close()
|
||||
if os.path.exists(oArgs.good_nodes):
|
||||
os.rename(oArgs.good_nodes, bak)
|
||||
|
@ -628,7 +630,7 @@ def iMain(lArgs):
|
|||
LOG.info(f"Preloaded {len(texclude_set)} bad fps")
|
||||
|
||||
ttrust_db_index = aTRUST_DB_INDEX.keys()
|
||||
tdns_contacts = set()
|
||||
tdns_urls = set()
|
||||
iFakeContact = 0
|
||||
iTotalContacts = 0
|
||||
aBadContacts = {}
|
||||
|
@ -644,7 +646,7 @@ def iMain(lArgs):
|
|||
continue
|
||||
relay.fingerprint = relay.fingerprint.upper()
|
||||
|
||||
sofar = f"G:{len(aTRUST_DB.keys())} U:{len(tdns_contacts)} F:{iFakeContact} BF:{len(texclude_set)} GF:{len(ttrust_db_index)} TC:{iTotalContacts} #{iR}"
|
||||
sofar = f"G:{len(aTRUST_DB.keys())} U:{len(tdns_urls)} F:{iFakeContact} BF:{len(texclude_set)} GF:{len(ttrust_db_index)} TC:{iTotalContacts} #{iR}"
|
||||
if not relay.exit_policy.is_exiting_allowed():
|
||||
if sEXCLUDE_EXIT_KEY == 'ExcludeNodes':
|
||||
pass # LOG.debug(f"{relay.fingerprint} not an exit {sofar}")
|
||||
|
@ -671,10 +673,11 @@ def iMain(lArgs):
|
|||
continue
|
||||
iTotalContacts += 1
|
||||
|
||||
fp = relay.fingerprint
|
||||
if relay.contact and not 'url:' in relay.contact:
|
||||
LOG.info(f"{relay.fingerprint} skipping bad contact - no url: {sofar}")
|
||||
LOG.debug(f"{relay.fingerprint} {relay.contact} {sofar}")
|
||||
texclude_set.add(relay.fingerprint)
|
||||
LOG.info(f"{fp} skipping bad contact - no url: {sofar}")
|
||||
LOG.debug(f"{fp} {relay.contact} {sofar}")
|
||||
texclude_set.add(fp)
|
||||
continue
|
||||
|
||||
c = relay.contact.lower()
|
||||
|
@ -688,17 +691,17 @@ def iMain(lArgs):
|
|||
i = c.find('/')
|
||||
if i >=0: c = c[:i]
|
||||
domain = c
|
||||
if domain and bdomain_is_bad(domain):
|
||||
LOG.info(f"{relay.fingerprint} skipping bad {domain} {sofar}")
|
||||
LOG.debug(f"{relay.fingerprint} {relay.contact} {sofar}")
|
||||
texclude_set.add(relay.fingerprint)
|
||||
if domain and bdomain_is_bad(domain, fp):
|
||||
LOG.info(f"{fp} skipping bad {domain} {sofar}")
|
||||
LOG.debug(f"{fp} {relay.contact} {sofar}")
|
||||
texclude_set.add(fp)
|
||||
continue
|
||||
|
||||
if domain:
|
||||
ip = zResolveDomain(domain)
|
||||
if not ip:
|
||||
LOG.warn(f"{relay.fingerprint} {domain} did not resolve {sofar}")
|
||||
texclude_set.add(relay.fingerprint)
|
||||
LOG.warn(f"{fp} {domain} did not resolve {sofar}")
|
||||
texclude_set.add(fp)
|
||||
lKNOWN_NODNS.append(domain)
|
||||
iFakeContact += 1
|
||||
continue
|
||||
|
@ -706,7 +709,7 @@ def iMain(lArgs):
|
|||
if 'dns-rsa' in relay.contact.lower():
|
||||
target = f"{relay.fingerprint}.{domain}"
|
||||
LOG.info(f"skipping 'dns-rsa' {target} {sofar}")
|
||||
tdns_contacts.add(target)
|
||||
tdns_urls.add(target)
|
||||
|
||||
elif 'proof:uri-rsa' in relay.contact.lower():
|
||||
a = aParseContact(relay.contact, relay.fingerprint)
|
||||
|
@ -769,6 +772,8 @@ def iMain(lArgs):
|
|||
|
||||
LOG.info(f"Filtered {len(twhitelist_set)} whitelisted relays")
|
||||
texclude_set = texclude_set.difference(twhitelist_set)
|
||||
# accept the dns-rsa urls for now until we test them
|
||||
texclude_set = texclude_set.difference(tdns_urls)
|
||||
LOG.info(f"{len(list(aTRUST_DB.keys()))} good contacts out of {iTotalContacts}")
|
||||
|
||||
if oArgs.proof_output and aTRUST_DB:
|
||||
|
@ -785,7 +790,7 @@ def iMain(lArgs):
|
|||
with open(oArgs.torrc_output, 'wt') as oFTorrc:
|
||||
oFTorrc.write(f"{sEXCLUDE_EXIT_KEY} {','.join(texclude_set)}\n")
|
||||
oFTorrc.write(f"{sINCLUDE_EXIT_KEY} {','.join(aTRUST_DB_INDEX.keys())}\n")
|
||||
oFTorrc.write(f"{sINCLUDE_GUARD_KEY} {','.join(o[oGOOD_ROOT]['GuardNodes'])}\n")
|
||||
oFTorrc.write(f"{sINCLUDE_GUARD_KEY} {','.join(oGOOD_NODES[oGOOD_ROOT]['GuardNodes'])}\n")
|
||||
LOG.info(f"Wrote tor configuration to {oArgs.torrc_output}")
|
||||
oFTorrc.close()
|
||||
|
||||
|
@ -801,7 +806,7 @@ def iMain(lArgs):
|
|||
|
||||
oGOOD_NODES['GoodNodes']['Relays']['ExitNodes'] = list(aTRUST_DB_INDEX.keys())
|
||||
# GuardNodes are readonl
|
||||
vwrite_goodnodes(oArgs, oGOOD_NODES, str(len(ttrust_db_index)))
|
||||
vwrite_goodnodes(oArgs, oGOOD_NODES, len(aTRUST_DB_INDEX.keys()))
|
||||
retval = 0
|
||||
try:
|
||||
logging.getLogger('stem').setLevel(30)
|
||||
|
@ -838,7 +843,7 @@ def iMain(lArgs):
|
|||
LOG.errro(f"Failed setting {sINCLUDE_EXIT_KEY} good exit nodes in Tor")
|
||||
retval += 1
|
||||
|
||||
LOG.info("dns-rsa domains:\n{'\n'.join(tdns_contacts)}")
|
||||
sys.stdout.write("dns-rsa domains:\n" +'\n'.join(tdns_urls) +'\n')
|
||||
return retval
|
||||
|
||||
except InvalidRequest as e:
|
||||
|
|
265
https_adapter.py
265
https_adapter.py
|
@ -1,265 +0,0 @@
|
|||
# -*- mode: python; indent-tabs-mode: nil; py-indent-offset: 4; coding: utf-8 -*-
|
||||
|
||||
from requests import adapters
|
||||
from requests.utils import (
|
||||
DEFAULT_CA_BUNDLE_PATH,
|
||||
get_auth_from_url,
|
||||
get_encoding_from_headers,
|
||||
prepend_scheme_if_needed,
|
||||
select_proxy,
|
||||
urldefragauth,
|
||||
)
|
||||
import urllib3
|
||||
from urllib3.util import parse_url
|
||||
from urllib3.util.retry import Retry
|
||||
from urllib3.util import Timeout as TimeoutSauce
|
||||
|
||||
DEFAULT_POOLBLOCK = False
|
||||
DEFAULT_POOLSIZE = 10
|
||||
DEFAULT_RETRIES = 0
|
||||
DEFAULT_POOL_TIMEOUT = None
|
||||
|
||||
class HTTPAdapter(adapters.HTTPAdapter):
|
||||
def __init__(self,
|
||||
pool_connections=DEFAULT_POOLSIZE,
|
||||
pool_maxsize=DEFAULT_POOLSIZE,
|
||||
max_retries=DEFAULT_RETRIES,
|
||||
pool_block=DEFAULT_POOLBLOCK
|
||||
):
|
||||
self.config = {}
|
||||
self.proxy_manager = {}
|
||||
|
||||
if isinstance(max_retries, Retry):
|
||||
self.max_retries = max_retries
|
||||
else:
|
||||
max_retries = Retry.from_int(max_retries)
|
||||
self.max_retries = max_retries
|
||||
|
||||
self._pool_connections = pool_connections
|
||||
self._pool_maxsize = pool_maxsize
|
||||
self._pool_block = pool_block
|
||||
|
||||
self.init_poolmanager(pool_connections, pool_maxsize, block=pool_block)
|
||||
|
||||
|
||||
class HTTPSAdapter(HTTPAdapter):
|
||||
"""The built-in HTTP Adapter for urllib3.
|
||||
|
||||
Provides a general-case interface for Requests sessions to contact HTTP and
|
||||
HTTPS urls by implementing the Transport Adapter interface. This class will
|
||||
usually be created by the :class:`Session <Session>` class under the
|
||||
covers.
|
||||
|
||||
:param pool_connections: The number of urllib3 connection pools to cache.
|
||||
:param pool_maxsize: The maximum number of connections to save in the pool.
|
||||
:param max_retries: The maximum number of retries each connection
|
||||
should attempt. Note, this applies only to failed DNS lookups, socket
|
||||
connections and connection timeouts, never to requests where data has
|
||||
made it to the server. By default, Requests does not retry failed
|
||||
connections. If you need granular control over the conditions under
|
||||
which we retry a request, import urllib3's ``Retry`` class and pass
|
||||
that instead.
|
||||
:param pool_block: Whether the connection pool should block for connections.
|
||||
|
||||
Usage::
|
||||
|
||||
>>> import requests
|
||||
>>> s = requests.Session()
|
||||
>>> a = requests.adapters.HTTPAdapter(max_retries=3)
|
||||
>>> s.mount('http://', a)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
pool_connections=DEFAULT_POOLSIZE,
|
||||
pool_maxsize=1,
|
||||
max_retries=3,
|
||||
pool_block=DEFAULT_POOLBLOCK,
|
||||
):
|
||||
retries = Retry(connect=max_retries, read=2, redirect=0)
|
||||
adapters.HTTPAdapter.__init__(self,
|
||||
pool_connections=pool_connections,
|
||||
pool_maxsize=pool_maxsize,
|
||||
max_retries=retries,
|
||||
pool_block=pool_block)
|
||||
|
||||
def get_connection(self, url, proxies=None, use_forwarding_for_https=True):
|
||||
"""Returns a urllib3 connection for the given URL. This should not be
|
||||
called from user code, and is only exposed for use when subclassing the
|
||||
:class:`HTTPAdapter <requests.adapters.HTTPAdapter>`.
|
||||
|
||||
:param url: The URL to connect to.
|
||||
:param proxies: (optional) A Requests-style dictionary of proxies used on this request.
|
||||
:rtype: urllib3.ConnectionPool
|
||||
"""
|
||||
proxy = select_proxy(url, proxies)
|
||||
|
||||
if proxy:
|
||||
proxy = prepend_scheme_if_needed(proxy, "http")
|
||||
proxy_url = parse_url(proxy)
|
||||
if not proxy_url.host:
|
||||
raise InvalidProxyURL(
|
||||
"Please check proxy URL. It is malformed "
|
||||
"and could be missing the host."
|
||||
)
|
||||
proxy_manager = self.proxy_manager_for(proxy)
|
||||
conn = proxy_manager.connection_from_url(url)
|
||||
else:
|
||||
# Only scheme should be lower case
|
||||
parsed = urlparse(url)
|
||||
url = parsed.geturl()
|
||||
conn = self.poolmanager.connection_from_url(url, use_forwarding_for_https=True)
|
||||
|
||||
return conn
|
||||
|
||||
def send(
|
||||
self, request, stream=False, timeout=None, verify=True, cert=None, proxies=None
|
||||
):
|
||||
"""Sends PreparedRequest object. Returns Response object.
|
||||
|
||||
:param request: The :class:`PreparedRequest <PreparedRequest>` being sent.
|
||||
:param stream: (optional) Whether to stream the request content.
|
||||
:param timeout: (optional) How long to wait for the server to send
|
||||
data before giving up, as a float, or a :ref:`(connect timeout,
|
||||
read timeout) <timeouts>` tuple.
|
||||
:type timeout: float or tuple or urllib3 Timeout object
|
||||
:param verify: (optional) Either a boolean, in which case it controls whether
|
||||
we verify the server's TLS certificate, or a string, in which case it
|
||||
must be a path to a CA bundle to use
|
||||
:param cert: (optional) Any user-provided SSL certificate to be trusted.
|
||||
:param proxies: (optional) The proxies dictionary to apply to the request.
|
||||
:rtype: requests.Response
|
||||
"""
|
||||
|
||||
try:
|
||||
#? _socks_options
|
||||
conn = self.get_connection(request.url, proxies, use_forwarding_for_https=True)
|
||||
except LocationValueError as e:
|
||||
raise InvalidURL(e, request=request)
|
||||
|
||||
self.cert_verify(conn, request.url, verify, cert)
|
||||
url = self.request_url(request, proxies)
|
||||
self.add_headers(
|
||||
request,
|
||||
stream=stream,
|
||||
timeout=timeout,
|
||||
verify=verify,
|
||||
cert=cert,
|
||||
proxies=proxies,
|
||||
)
|
||||
|
||||
chunked = not (request.body is None or "Content-Length" in request.headers)
|
||||
|
||||
if isinstance(timeout, tuple):
|
||||
try:
|
||||
connect, read = timeout
|
||||
timeout = TimeoutSauce(connect=connect, read=read)
|
||||
except ValueError:
|
||||
raise ValueError(
|
||||
f"Invalid timeout {timeout}. Pass a (connect, read) timeout tuple, "
|
||||
f"or a single float to set both timeouts to the same value."
|
||||
)
|
||||
elif isinstance(timeout, TimeoutSauce):
|
||||
pass
|
||||
else:
|
||||
timeout = TimeoutSauce(connect=timeout, read=timeout)
|
||||
|
||||
try:
|
||||
if not chunked:
|
||||
resp = conn.urlopen(
|
||||
method=request.method,
|
||||
url=url,
|
||||
body=request.body,
|
||||
headers=request.headers,
|
||||
redirect=False,
|
||||
assert_same_host=False,
|
||||
preload_content=False,
|
||||
decode_content=False,
|
||||
retries=self.max_retries,
|
||||
timeout=timeout,
|
||||
)
|
||||
|
||||
# Send the request.
|
||||
else:
|
||||
if hasattr(conn, "proxy_pool"):
|
||||
conn = conn.proxy_pool
|
||||
|
||||
low_conn = conn._get_conn(timeout=DEFAULT_POOL_TIMEOUT)
|
||||
|
||||
try:
|
||||
skip_host = "Host" in request.headers
|
||||
low_conn.putrequest(
|
||||
request.method,
|
||||
url,
|
||||
skip_accept_encoding=True,
|
||||
skip_host=skip_host,
|
||||
)
|
||||
|
||||
for header, value in request.headers.items():
|
||||
low_conn.putheader(header, value)
|
||||
|
||||
low_conn.endheaders()
|
||||
|
||||
for i in request.body:
|
||||
low_conn.send(hex(len(i))[2:].encode("utf-8"))
|
||||
low_conn.send(b"\r\n")
|
||||
low_conn.send(i)
|
||||
low_conn.send(b"\r\n")
|
||||
low_conn.send(b"0\r\n\r\n")
|
||||
|
||||
# Receive the response from the server
|
||||
r = low_conn.getresponse()
|
||||
|
||||
resp = HTTPResponse.from_httplib(
|
||||
r,
|
||||
pool=conn,
|
||||
connection=low_conn,
|
||||
preload_content=False,
|
||||
decode_content=False,
|
||||
)
|
||||
except Exception:
|
||||
# If we hit any problems here, clean up the connection.
|
||||
# Then, raise so that we can handle the actual exception.
|
||||
low_conn.close()
|
||||
raise
|
||||
|
||||
except (ProtocolError, OSError) as err:
|
||||
raise ConnectionError(err, request=request)
|
||||
|
||||
except MaxRetryError as e:
|
||||
if isinstance(e.reason, ConnectTimeoutError):
|
||||
# TODO: Remove this in 3.0.0: see #2811
|
||||
if not isinstance(e.reason, NewConnectionError):
|
||||
raise ConnectTimeout(e, request=request)
|
||||
|
||||
if isinstance(e.reason, ResponseError):
|
||||
raise RetryError(e, request=request)
|
||||
|
||||
if isinstance(e.reason, _ProxyError):
|
||||
raise ProxyError(e, request=request)
|
||||
|
||||
if isinstance(e.reason, _SSLError):
|
||||
# This branch is for urllib3 v1.22 and later.
|
||||
raise SSLError(e, request=request)
|
||||
|
||||
raise ConnectionError(e, request=request)
|
||||
|
||||
except ClosedPoolError as e:
|
||||
raise ConnectionError(e, request=request)
|
||||
|
||||
except _ProxyError as e:
|
||||
raise ProxyError(e)
|
||||
|
||||
except (_SSLError, _HTTPError) as e:
|
||||
if isinstance(e, _SSLError):
|
||||
# This branch is for urllib3 versions earlier than v1.22
|
||||
raise SSLError(e, request=request)
|
||||
elif isinstance(e, ReadTimeoutError):
|
||||
raise ReadTimeout(e, request=request)
|
||||
elif isinstance(e, _InvalidHeader):
|
||||
raise InvalidHeader(e, request=request)
|
||||
else:
|
||||
raise
|
||||
|
||||
return self.build_response(request, resp)
|
||||
|
79
lookupdns.py
Normal file
79
lookupdns.py
Normal file
|
@ -0,0 +1,79 @@
|
|||
#!/usr/local/bin/python3.sh
|
||||
# -*-mode: python; indent-tabs-mode: nil; py-indent-offset: 4; coding: utf-8 -*
|
||||
|
||||
import sys
|
||||
import os
|
||||
import traceback
|
||||
|
||||
from phantompy import Render
|
||||
|
||||
global LOG
|
||||
import logging
|
||||
import warnings
|
||||
warnings.filterwarnings('ignore')
|
||||
LOG = logging.getLogger()
|
||||
|
||||
class LookFor(Render):
|
||||
|
||||
def __init__(self, url, outfile, jsfile=None):
|
||||
self.uri = url
|
||||
Render.__init__(self, url, outfile, jsfile)
|
||||
|
||||
def ilookfor(self, html):
|
||||
import json
|
||||
marker = '<pre style="word-wrap: break-word; white-space: pre-wrap;">'
|
||||
if marker not in html: return ''
|
||||
i = html.find(marker) + len(marker)
|
||||
html = html[i:]
|
||||
assert html[0] == '{', html
|
||||
i = html.find('</pre')
|
||||
html = html[:i]
|
||||
assert html[-1] == '}', html
|
||||
LOG.debug(f"Found {len(html)} json")
|
||||
o = json.loads(html)
|
||||
if "Answer" not in o.keys() or type(o["Answer"]) != list:
|
||||
LOG.warn(f"FAIL {self.uri}")
|
||||
return 1
|
||||
for elt in o["Answer"]:
|
||||
assert type(elt) == dict, elt
|
||||
assert 'type' in elt, elt
|
||||
if elt['type'] != 16: continue
|
||||
assert 'data' in elt, elt
|
||||
if elt['data'] == 'we-run-this-tor-relay':
|
||||
LOG.info(f"OK {self.uri}")
|
||||
return 0
|
||||
LOG.warn(f"BAD {self.uri}")
|
||||
return 2
|
||||
|
||||
def _html_callback(self, *args):
|
||||
"""print(self, QPrinter, Callable[[bool], None])"""
|
||||
if type(args[0]) is str:
|
||||
self._save(args[0])
|
||||
i = self.ilookfor(args[0])
|
||||
self._exit(i)
|
||||
|
||||
def _save(self, html):
|
||||
sfile = self.outfile.replace('.pdf','.out')
|
||||
# CompleteHtmlSaveFormat SingleHtmlSaveFormat MimeHtmlSaveFormat
|
||||
with open(sfile, 'wt') as ofd:
|
||||
ofd.write(html)
|
||||
LOG.debug(f"Saved {sfile}")
|
||||
|
||||
def _loadFinished(self, result):
|
||||
LOG.debug("phantom.py: Loading finished!")
|
||||
self.toHtml(self._html_callback)
|
||||
|
||||
def main():
|
||||
if (len(sys.argv) < 3):
|
||||
LOG.info("USAGE: lookupdns.py <url> <pdf-file> [<javascript-file>]")
|
||||
else:
|
||||
url = sys.argv[1]
|
||||
outfile = sys.argv[2]
|
||||
jsfile = sys.argv[3] if len(sys.argv) > 3 else None
|
||||
r = LookFor(url, outfile, jsfile)
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
260
phantompy.py
Normal file
260
phantompy.py
Normal file
|
@ -0,0 +1,260 @@
|
|||
#!/usr/local/bin/python3.sh
|
||||
# -*-mode: python; indent-tabs-mode: nil; py-indent-offset: 4; coding: utf-8 -*-
|
||||
# https://gist.github.com/michaelfranzl/91f0cc13c56120391b949f885643e974/raw/a0601515e7a575bc4c7d4d2a20973b29b6c6f2df/phantom.py
|
||||
"""
|
||||
# phantom.py
|
||||
|
||||
Simple but fully scriptable headless QtWebKit browser using PyQt5 in Python3,
|
||||
specialized in executing external JavaScript and generating PDF files. A lean
|
||||
replacement for other bulky headless browser frameworks.
|
||||
|
||||
|
||||
## Usage
|
||||
|
||||
If you have a display attached:
|
||||
|
||||
./phantom.py <url> <pdf-file> [<javascript-file>]
|
||||
|
||||
If you don't have a display attached (i.e. on a remote server):
|
||||
|
||||
xvfb-run ./phantom.py <url> <pdf-file> [<javascript-file>]
|
||||
|
||||
Arguments:
|
||||
|
||||
<url> Can be a http(s) URL or a path to a local file
|
||||
<pdf-file> Path and name of PDF file to generate
|
||||
[<javascript-file>] (optional) Path and name of a JavaScript file to execute
|
||||
|
||||
|
||||
## Features
|
||||
|
||||
* Generate a PDF screenshot of the web page after it is completely loaded.
|
||||
* Optionally execute a local JavaScript file specified by the argument
|
||||
<javascript-file> after the web page is completely loaded, and before
|
||||
the PDF is generated.
|
||||
* console.log's will be printed to stdout.
|
||||
* Easily add new features by changing the source code of this script, without
|
||||
compiling C++ code. For more advanced applications, consider attaching
|
||||
PyQt objects/methods to WebKit's JavaScript space by using
|
||||
`QWebFrame::addToJavaScriptWindowObject()`.
|
||||
|
||||
If you execute an external <javascript-file>, phantom.py has no way of knowing
|
||||
when that script has finished doing its work. For this reason, the external
|
||||
script should execute `console.log("__PHANTOM_PY_DONE__");` when done. This will
|
||||
trigger the PDF generation, after which phantom.py will exit. If no
|
||||
`__PHANTOM_PY_DONE__` string is seen on the console for 10 seconds, phantom.py
|
||||
will exit without doing anything. This behavior could be implemented more
|
||||
elegantly without console.log's but it is the simplest solution.
|
||||
|
||||
It is important to remember that since you're just running WebKit, you can use
|
||||
everything that WebKit supports, including the usual JS client libraries, CSS,
|
||||
CSS @media types, etc.
|
||||
|
||||
|
||||
## Dependencies
|
||||
|
||||
* Python3
|
||||
* PyQt5
|
||||
* xvfb (optional for display-less machines)
|
||||
|
||||
Installation of dependencies in Debian Stretch is easy:
|
||||
|
||||
apt-get install xvfb python3-pyqt5 python3-pyqt5.qtwebkit
|
||||
|
||||
Finding the equivalent for other OSes is an exercise that I leave to you.
|
||||
|
||||
|
||||
## Examples
|
||||
|
||||
Given the following file /tmp/test.html
|
||||
|
||||
<html>
|
||||
<body>
|
||||
<p>foo <span id="id1">foo</span> <span id="id2">foo</span></p>
|
||||
</body>
|
||||
<script>
|
||||
document.getElementById('id1').innerHTML = "bar";
|
||||
</script>
|
||||
</html>
|
||||
|
||||
... and the following file /tmp/test.js:
|
||||
|
||||
document.getElementById('id2').innerHTML = "baz";
|
||||
console.log("__PHANTOM_PY_DONE__");
|
||||
|
||||
... and running this script (without attached display) ...
|
||||
|
||||
xvfb-run python3 phantom.py /tmp/test.html /tmp/out.pdf /tmp/test.js
|
||||
|
||||
... you will get a PDF file /tmp/out.pdf with the contents "foo bar baz".
|
||||
|
||||
Note that the second occurrence of "foo" has been replaced by the web page's own
|
||||
script, and the third occurrence of "foo" by the external JS file.
|
||||
|
||||
|
||||
## License
|
||||
|
||||
Copyright 2017 Michael Karl Franzl
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
of the Software, and to permit persons to whom the Software is furnished to do
|
||||
so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import traceback
|
||||
import atexit
|
||||
from PyQt5.QtCore import QUrl
|
||||
from PyQt5.QtCore import QTimer
|
||||
from PyQt5.QtWidgets import QApplication
|
||||
from PyQt5.QtPrintSupport import QPrinter
|
||||
from PyQt5.QtWebEngineWidgets import QWebEnginePage as QWebPage
|
||||
|
||||
global LOG
|
||||
import logging
|
||||
import warnings
|
||||
warnings.filterwarnings('ignore')
|
||||
LOG = logging.getLogger()
|
||||
|
||||
def prepare():
|
||||
sfile = '/tmp/test.js'
|
||||
if not os.path.exists(sfile):
|
||||
with open(sfile, 'wt') as ofd:
|
||||
ofd.write("""
|
||||
document.getElementById('id2').innerHTML = "baz";
|
||||
console.log("__PHANTOM_PY_DONE__");
|
||||
""")
|
||||
sys.stderr.write(f"wrote {sfile} ")
|
||||
sfile = '/tmp/test.html'
|
||||
if not os.path.exists(sfile):
|
||||
with open(sfile, 'wt') as ofd:
|
||||
ofd.write("""
|
||||
<html>
|
||||
<body>
|
||||
<p>foo <span id="id1">foo</span> <span id="id2">foo</span></p>
|
||||
</body>
|
||||
<script>
|
||||
document.getElementById('id1').innerHTML = "bar";
|
||||
</script>
|
||||
</html>
|
||||
""")
|
||||
sys.stderr.write(f"wrote {sfile} ")
|
||||
sys.stderr.write("\n")
|
||||
|
||||
class Render(QWebPage):
|
||||
def __init__(self, url, outfile, jsfile=None):
|
||||
self.app = QApplication(sys.argv)
|
||||
|
||||
QWebPage.__init__(self)
|
||||
|
||||
self.jsfile = jsfile
|
||||
self.outfile = outfile
|
||||
|
||||
qurl = QUrl.fromUserInput(url)
|
||||
|
||||
LOG.debug(f"phantom.py: URL= {qurl} OUTFILE={outfile} JSFILE= {jsfile)")
|
||||
|
||||
# The PDF generation only happens when the special string __PHANTOM_PY_DONE__
|
||||
# is sent to console.log(). The following JS string will be executed by
|
||||
# default, when no external JavaScript file is specified.
|
||||
self.js_contents = "setTimeout(function() { console.log('__PHANTOM_PY_DONE__') }, 5000);";
|
||||
|
||||
if jsfile:
|
||||
try:
|
||||
f = open(self.jsfile)
|
||||
self.js_contents = f.read()
|
||||
f.close()
|
||||
except:
|
||||
LOG.error(traceback.format_exc())
|
||||
self._exit(10)
|
||||
|
||||
self.loadFinished.connect(self._loadFinished)
|
||||
self.load(qurl)
|
||||
self.javaScriptConsoleMessage = self._onConsoleMessage
|
||||
|
||||
if False:
|
||||
# Run for a maximum of 10 seconds
|
||||
watchdog = QTimer()
|
||||
watchdog.setSingleShot(True)
|
||||
watchdog.timeout.connect(lambda: self._exit(9))
|
||||
watchdog.start(10000)
|
||||
|
||||
self.app.exec_()
|
||||
|
||||
def _onConsoleMessage(self, *args):
|
||||
if len(args) > 3:
|
||||
level, txt, lineno, filename = args
|
||||
else:
|
||||
level = 1
|
||||
txt, lineno, filename = args
|
||||
LOG.debug(f"CONSOLE {lineno} {txt} {filename}")
|
||||
if "__PHANTOM_PY_DONE__" in txt:
|
||||
# If we get this magic string, it means that the external JS is done
|
||||
self._print()
|
||||
if "__PHANTOM_PY_EXIT__" in txt:
|
||||
self._exit(0)
|
||||
|
||||
def _loadFinished(self, result):
|
||||
LOG.debug(f"phantom.py: Evaluating JS from {self.jsfile}")
|
||||
self.runJavaScript("document.documentElement.contentEditable=true")
|
||||
self.runJavaScript(self.js_contents)
|
||||
|
||||
def _printer_callback(self, *args):
|
||||
"""print(self, QPrinter, Callable[[bool], None])"""
|
||||
# print(f"_printer_callback {self.outfile} {args}")
|
||||
if args[0] is False:
|
||||
i = 1
|
||||
else:
|
||||
i = 0
|
||||
self._exit(i)
|
||||
|
||||
def _print(self):
|
||||
printer = QPrinter()
|
||||
printer.setPageMargins(10, 10, 10, 10, QPrinter.Millimeter)
|
||||
printer.setPaperSize(QPrinter.A4)
|
||||
printer.setCreator("phantom.py by Michael Karl Franzl")
|
||||
printer.setOutputFormat(QPrinter.PdfFormat);
|
||||
printer.setOutputFileName(self.outfile);
|
||||
self.print(printer, self._printer_callback)
|
||||
LOG.debug("phantom.py: Printed")
|
||||
|
||||
def _exit(self, val):
|
||||
LOG.debug(f"phantom.py: Exiting with val {val}")
|
||||
|
||||
# Run for a maximum of 10 seconds
|
||||
watchdog = QTimer()
|
||||
watchdog.setSingleShot(True)
|
||||
watchdog.timeout.connect(lambda: sys.exit(val))
|
||||
watchdog.start(10000)
|
||||
self.app.exit(val)
|
||||
atexit._clear()
|
||||
sys.exit(val)
|
||||
|
||||
def main():
|
||||
if (len(sys.argv) < 3):
|
||||
LOG.info("USAGE: ./phantom.py <url> <pdf-file> [<javascript-file>]")
|
||||
else:
|
||||
url = sys.argv[1]
|
||||
outfile = sys.argv[2]
|
||||
jsfile = sys.argv[3] if len(sys.argv) > 3 else None
|
||||
r = Render(url, outfile, jsfile)
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -27,9 +27,8 @@ LOG = logging.getLogger()
|
|||
|
||||
bHAVE_TORR = shutil.which('tor-resolve')
|
||||
|
||||
# maybe we should check these each time but we
|
||||
# got them by sorting bad relays in the wild
|
||||
# we'll keep a copy here
|
||||
# we check these each time but we got them by sorting bad relays
|
||||
# in the wild we'll keep a copy here so we can avoid restesting
|
||||
yKNOWN_NODNS = """
|
||||
---
|
||||
- 0x0.is
|
||||
|
@ -50,6 +49,7 @@ yKNOWN_NODNS = """
|
|||
- or.wowplanet.de
|
||||
- ormycloud.org
|
||||
- plied-privacy.net
|
||||
- rivacysvcs.net
|
||||
- redacted.org
|
||||
- rification-for-nusenu.net
|
||||
- rofl.cat
|
||||
|
|
|
@ -140,7 +140,11 @@ def get_controller(address='127.0.0.1', port=9151, password=''):
|
|||
|
||||
return controller
|
||||
|
||||
def find_validation_candidates(controller, trusted_domains=[],validation_cache=[],accept_all=False):
|
||||
def find_validation_candidates(controller,
|
||||
trusted_domains=[],
|
||||
validation_cache=[],
|
||||
CAfile='/etc/ssl/certs/ca-certificates.crt',
|
||||
accept_all=False):
|
||||
'''
|
||||
connect to a tor client via controlport and return a dict of all
|
||||
not yet validated fingerprints per trusted operators
|
||||
|
@ -529,6 +533,7 @@ def configure_tor(controller, trusted_fingerprints, exitonly=True):
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
CAfile = '/etc/ssl/certs/ca-certificates.crt'
|
||||
trust_config = 'trust_config'
|
||||
assert os.path.exists(trust_config)
|
||||
trusted_domains = read_local_trust_config(trust_config)
|
||||
|
@ -546,7 +551,8 @@ if __name__ == '__main__':
|
|||
|
||||
r = find_validation_candidates(controller,
|
||||
validation_cache=trusted_fingerprints,
|
||||
trusted_domains=trusted_domains)
|
||||
trusted_domains=trusted_domains,
|
||||
CAfile=CAfile)
|
||||
validate_proofs(r, validation_cache_file,
|
||||
timeout=timeout,
|
||||
host=controller_address,
|
||||
|
|
Loading…
Reference in a new issue