diff --git a/__main__.py b/__main__.py index 440bca5..6b975db 100644 --- a/__main__.py +++ b/__main__.py @@ -13,4 +13,5 @@ try: vsetup_logging(log_level, logfile='', stream=sys.stderr) except: pass -iMain(sys.argv[1:], bgui=False) +if __name__ == '__main__': + iMain(sys.argv[1:], bgui=False) diff --git a/lookupdns.py b/lookupdns.py index e792aec..962d928 100644 --- a/lookupdns.py +++ b/lookupdns.py @@ -1,9 +1,14 @@ #!/usr/local/bin/python3.sh # -*-mode: python; indent-tabs-mode: nil; py-indent-offset: 4; coding: utf-8 -* -# Looks for urls https://dns.google/resolve? -# and parses them to extract a magic field. -# https://dns.google/resolve?name=domain.name&type=TXT&cd=true&do=true +""" +Looks for urls https://dns.google/resolve? +https://dns.google/resolve?name=domain.name&type=TXT&cd=true&do=true +and parses them to extract a magic field. + +A good example of how you can parse json embedded in HTML with phantomjs. + +""" import sys import os @@ -17,7 +22,7 @@ warnings.filterwarnings('ignore') LOG = logging.getLogger() class LookFor(Render): - + def __init__(self, app, do_print=True, do_save=False): app.lfps = [] self._app = app @@ -37,7 +42,7 @@ class LookFor(Render): fp = fp[:i] # threadsafe? self._app.lfps.append(fp) - + def _html_callback(self, *args): """print(self, QPrinter, Callable[[bool], None])""" if type(args[0]) is str: @@ -72,8 +77,8 @@ class LookFor(Render): self.we_run_this_tor_relay = False LOG.warn(f"BAD {self.uri}") return 2 - + def _loadFinished(self, result): LOG.debug(f"phantom.py: Loading finished {self.uri}") self.toHtml(self._html_callback) - + diff --git a/phantompy.py b/phantompy.py index a0931f1..a2bff2d 100644 --- a/phantompy.py +++ b/phantompy.py @@ -13,8 +13,8 @@ replacement for other bulky headless browser frameworks. If you have a display attached: - ./phantom.py [--pdf_output ] [--js_input ] - + ./phantom.py [--pdf_output ] [--js_input ] + If you don't have a display attached (i.e. on a remote server), you can use xvfb-run, or don't add --show_gui - it should work without a display. @@ -64,7 +64,7 @@ CSS @media types, etc. Installation of dependencies in Debian Stretch is easy: apt-get install xvfb python3-pyqt5 python3-pyqt5.qtwebkit - + Finding the equivalent for other OSes is an exercise that I leave to you. @@ -80,16 +80,16 @@ Given the following file /tmp/test.html document.getElementById('id1').innerHTML = "bar"; - + ... and the following file /tmp/test.js: document.getElementById('id2').innerHTML = "baz"; console.log("__PHANTOM_PY_DONE__"); - + ... and running this script (without attached display) ... xvfb-run python3 phantom.py /tmp/test.html /tmp/out.pdf /tmp/test.js - + ... you will get a PDF file /tmp/out.pdf with the contents "foo bar baz". Note that the second occurrence of "foo" has been replaced by the web page's own @@ -130,8 +130,6 @@ from PyQt5.QtWidgets import QApplication from PyQt5.QtPrintSupport import QPrinter from PyQt5.QtWebEngineWidgets import QWebEnginePage -from support_phantompy import vsetup_logging - global LOG import logging import warnings @@ -161,19 +159,19 @@ def prepare(sdir='/tmp'): """) LOG.debug(f"wrote {sfile} ") - + class Render(QWebEnginePage): def __init__(self, app, do_print=False, do_save=True): - app.ldone = [] - self._app = app - self.do_print = do_print - self.do_save = do_save - self.percent = 0 - self.uri = None - self.jsfile = None - self.htmlfile = None - self.pdffile = None - QWebEnginePage.__init__(self) + app.ldone = [] + self._app = app + self.do_print = do_print + self.do_save = do_save + self.percent = 0 + self.uri = None + self.jsfile = None + self.htmlfile = None + self.pdffile = None + QWebEnginePage.__init__(self) def run(self, url, pdffile, htmlfile, jsfile): self._app.lstart.append(id(self)) @@ -184,64 +182,65 @@ class Render(QWebEnginePage): self.pdffile = pdffile self.outfile = pdffile or htmlfile LOG.debug(f"phantom.py: URL={url} OUTFILE={outfile} JSFILE={jsfile}") - qurl = QUrl.fromUserInput(url) - + qurl = QUrl.fromUserInput(url) + # The PDF generation only happens when the special string __PHANTOM_PY_DONE__ # is sent to console.log(). The following JS string will be executed by # default, when no external JavaScript file is specified. self.js_contents = "setTimeout(function() { console.log('__PHANTOM_PY_DONE__') }, 5000);"; - + if jsfile: try: with open(self.jsfile, 'rt') as f: self.js_contents = f.read() except Exception as e: LOG.exception(f"error reading jsfile {self.jsfile}") - + self.loadFinished.connect(self._loadFinished) self.percent = 20 self.load(qurl) self.javaScriptConsoleMessage = self._onConsoleMessage LOG.debug(f"phantom.py: loading 10") - + def _onConsoleMessage(self, *args): - if len(args) > 3: - level, txt, lineno, filename = args - else: - level = 1 - txt, lineno, filename = args - LOG.debug(f"CONSOLE {lineno} {txt} {filename}") - if "__PHANTOM_PY_DONE__" in txt: - self.percent = 40 - # If we get this magic string, it means that the external JS is done - if self.do_save: - self.toHtml(self._html_callback) - return - # drop through - txt = "__PHANTOM_PY_SAVED__" - if "__PHANTOM_PY_SAVED__" in txt: - self.percent = 50 - if self.do_print: - self._print() - return - txt = "__PHANTOM_PY_PRINTED__" - if "__PHANTOM_PY_PRINTED__" in txt: - self.percent = 60 - self._exit(level) - + if len(args) > 3: + level, txt, lineno, filename = args + else: + level = 1 + txt, lineno, filename = args + LOG.debug(f"CONSOLE {lineno} {txt} {filename}") + if "__PHANTOM_PY_DONE__" in txt: + self.percent = 40 + # If we get this magic string, it means that the external JS is done + if self.do_save: + self.toHtml(self._html_callback) + return + # drop through + txt = "__PHANTOM_PY_SAVED__" + if "__PHANTOM_PY_SAVED__" in txt: + self.percent = 50 + if self.do_print: + self._print() + return + txt = "__PHANTOM_PY_PRINTED__" + if "__PHANTOM_PY_PRINTED__" in txt: + self.percent = 60 + self._exit(level) + def _loadFinished(self, result): - self.percent = 30 - LOG.info(f"phantom.py: _loadFinished {result} {self.percent}") - LOG.debug(f"phantom.py: Evaluating JS from {self.jsfile}") - self.runJavaScript("document.documentElement.contentEditable=true") - self.runJavaScript(self.js_contents) + # RenderProcessTerminationStatus ? + self.percent = 30 + LOG.info(f"phantom.py: _loadFinished {result} {self.percent}") + LOG.debug(f"phantom.py: Evaluating JS from {self.jsfile}") + self.runJavaScript("document.documentElement.contentEditable=true") + self.runJavaScript(self.js_contents) def _html_callback(self, *args): """print(self, QPrinter, Callable[[bool], None])""" if type(args[0]) is str: self._save(args[0]) self._onConsoleMessage(0, "__PHANTOM_PY_SAVED__", 0 , '') - + def _save(self, html): sfile = self.htmlfile # CompleteHtmlSaveFormat SingleHtmlSaveFormat MimeHtmlSaveFormat @@ -267,7 +266,7 @@ class Render(QWebEnginePage): printer.setOutputFileName(sfile) self.print(printer, self._printer_callback) LOG.debug("phantom.py: Printed") - + def _exit(self, val): self.percent = 100 LOG.debug(f"phantom.py: Exiting with val {val}") diff --git a/qasync_phantompy.py b/qasync_phantompy.py index f510f8a..fdd850e 100644 --- a/qasync_phantompy.py +++ b/qasync_phantompy.py @@ -13,6 +13,7 @@ from PyQt5.QtWidgets import (QProgressBar, QWidget, QVBoxLayout) from phantompy import Render # from lookupdns import LookFor as Render +from support_phantompy import vsetup_logging, omain_argparser global LOG import logging @@ -35,7 +36,7 @@ class Widget(QtWidgets.QWidget): i = len(asyncio.all_tasks()) self._label.setText(str(i)) self.progress.setValue(int(text)) - + class ContextManager: def __init__(self) -> None: self._seconds = 0 @@ -63,25 +64,22 @@ async def main(widget, app, ilen): app.exit() # raise asyncio.CancelledError return - LOG.debug(f"{app.ldone} {perc} {seconds}") + LOG.debug(f"{app.ldone} {seconds}") except asyncio.CancelledError as ex: LOG.debug("Task cancelled") def iMain(largs): - parser = oMainArgparser() - oargs = parser.parse_args(lArgs) + parser = omain_argparser() + oargs = parser.parse_args(largs) bgui=oargs.show_gui try: - from support_phantompy import vsetup_logging d = int(os.environ.get('DEBUG', 0)) if d > 0: - vsetup_logging(10, stream=sys.stderr) - else: - vsetup_logging(oargs.log_level, stream=sys.stderr) - vsetup_logging(log_level, logfile='', stream=sys.stderr) + oargs.log_level = 10 + vsetup_logging(oargs.log_level, logfile='', stream=sys.stderr) except: pass - + app = QtWidgets.QApplication([]) app.lstart = [] if bgui: @@ -90,7 +88,7 @@ def iMain(largs): widget.show() else: widget = None - + loop = qasync.QEventLoop(app) asyncio.set_event_loop(loop) @@ -105,9 +103,9 @@ def iMain(largs): uri = url.strip() r.run(uri, pdffile, htmlfile, jsfile) LOG.debug(f"{r.percent} {app.lstart}") - + LOG.info(f"queued {len(app.lstart)} urls") - + task = loop.create_task(main(widget, app, 1)) loop.run_forever() @@ -117,6 +115,6 @@ def iMain(largs): loop.run_until_complete(asyncio.gather(*tasks)) if __name__ == '__main__': - + iMain(sys.argv[1:]) - + diff --git a/support_phantompy.py b/support_phantompy.py index 7286c36..c19968f 100644 --- a/support_phantompy.py +++ b/support_phantompy.py @@ -80,7 +80,7 @@ def vsetup_logging(log_level, logfile='', stream=sys.stdout): 'NOTSET': logging.NOTSET, } -def omain__argparser(_=None): +def omain_argparser(_=None): try: from OpenSSL import SSL @@ -106,9 +106,9 @@ def omain__argparser(_=None): help="Operate on the HTML file with javascript") parser.add_argument('--html_output', type=str, default='', help="Write loaded and javascripted result to a HTML file") - parser.add_argument('--pdf_output', type=str, default=''), + parser.add_argument('--pdf_output', type=str, default='', help="Write loaded and javascripted result to a PDF file") - parser.add_argument('--show_gui', type=bool, store_action=True), + parser.add_argument('--show_gui', type=bool, default=False, store_action=True), help="show a progress meter that doesn't work") parser.add_argument('html_url', type=str, nargs='?', required=True,