From 28d1d34dbdbbfc785ca8c6ecad5f2b3cc12c5c8d Mon Sep 17 00:00:00 2001 From: emdee Date: Mon, 14 Nov 2022 12:00:23 +0000 Subject: [PATCH] Added lookupdns.py --- lookupdns.py | 79 ++++++++++++++++ phantompy.py | 260 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 339 insertions(+) create mode 100644 lookupdns.py create mode 100644 phantompy.py diff --git a/lookupdns.py b/lookupdns.py new file mode 100644 index 0000000..d44bdda --- /dev/null +++ b/lookupdns.py @@ -0,0 +1,79 @@ +#!/usr/local/bin/python3.sh +# -*-mode: python; indent-tabs-mode: nil; py-indent-offset: 4; coding: utf-8 -* + +import sys +import os +import traceback + +from phantompy import Render + +global LOG +import logging +import warnings +warnings.filterwarnings('ignore') +LOG = logging.getLogger() + +class LookFor(Render): + + def __init__(self, url, outfile, jsfile=None): + self.uri = url + Render.__init__(self, url, outfile, jsfile) + + def ilookfor(self, html): + import json + marker = '
'
+      if marker not in html: return ''
+      i = html.find(marker) + len(marker)
+      html = html[i:]
+      assert html[0] == '{', html
+      i = html.find('  []")
+  else:
+    url = sys.argv[1]
+    outfile = sys.argv[2]
+    jsfile = sys.argv[3] if len(sys.argv) > 3 else None
+    r = LookFor(url, outfile, jsfile)
+
+  sys.exit(0)
+
+if __name__ == "__main__":
+  main()
+
diff --git a/phantompy.py b/phantompy.py
new file mode 100644
index 0000000..d813a1a
--- /dev/null
+++ b/phantompy.py
@@ -0,0 +1,260 @@
+#!/usr/local/bin/python3.sh
+# -*-mode: python; indent-tabs-mode: nil; py-indent-offset: 4; coding: utf-8 -*-
+# https://gist.github.com/michaelfranzl/91f0cc13c56120391b949f885643e974/raw/a0601515e7a575bc4c7d4d2a20973b29b6c6f2df/phantom.py
+"""
+# phantom.py
+
+Simple but fully scriptable headless QtWebKit browser using PyQt5 in Python3,
+specialized in executing external JavaScript and generating PDF files. A lean
+replacement for other bulky headless browser frameworks.
+
+
+## Usage
+
+If you have a display attached:
+
+    ./phantom.py   []
+    
+If you don't have a display attached (i.e. on a remote server):
+
+    xvfb-run ./phantom.py   []
+
+Arguments:
+
+ Can be a http(s) URL or a path to a local file
+ Path and name of PDF file to generate
+[] (optional) Path and name of a JavaScript file to execute
+
+
+## Features
+
+* Generate a PDF screenshot of the web page after it is completely loaded.
+* Optionally execute a local JavaScript file specified by the argument
+    after the web page is completely loaded, and before
+   the PDF is generated.
+* console.log's will be printed to stdout.
+* Easily add new features by changing the source code of this script, without
+   compiling C++ code. For more advanced applications, consider attaching
+   PyQt objects/methods to WebKit's JavaScript space by using
+   `QWebFrame::addToJavaScriptWindowObject()`.
+
+If you execute an external , phantom.py has no way of knowing
+when that script has finished doing its work. For this reason, the external
+script should execute `console.log("__PHANTOM_PY_DONE__");` when done. This will
+trigger the PDF generation, after which phantom.py will exit. If no
+`__PHANTOM_PY_DONE__` string is seen on the console for 10 seconds, phantom.py
+will exit without doing anything. This behavior could be implemented more
+elegantly without console.log's but it is the simplest solution.
+
+It is important to remember that since you're just running WebKit, you can use
+everything that WebKit supports, including the usual JS client libraries, CSS,
+CSS @media types, etc.
+
+
+## Dependencies
+
+* Python3
+* PyQt5
+* xvfb (optional for display-less machines)
+
+Installation of dependencies in Debian Stretch is easy:
+
+    apt-get install xvfb python3-pyqt5 python3-pyqt5.qtwebkit
+    
+Finding the equivalent for other OSes is an exercise that I leave to you.
+
+
+## Examples
+
+Given the following file /tmp/test.html
+
+    
+      
+        

foo foo foo

+ + + + +... and the following file /tmp/test.js: + + document.getElementById('id2').innerHTML = "baz"; + console.log("__PHANTOM_PY_DONE__"); + +... and running this script (without attached display) ... + + xvfb-run python3 phantom.py /tmp/test.html /tmp/out.pdf /tmp/test.js + +... you will get a PDF file /tmp/out.pdf with the contents "foo bar baz". + +Note that the second occurrence of "foo" has been replaced by the web page's own +script, and the third occurrence of "foo" by the external JS file. + + +## License + +Copyright 2017 Michael Karl Franzl + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +""" + +import sys +import os +import traceback +import atexit +from PyQt5.QtCore import QUrl +from PyQt5.QtCore import QTimer +from PyQt5.QtWidgets import QApplication +from PyQt5.QtPrintSupport import QPrinter +from PyQt5.QtWebEngineWidgets import QWebEnginePage as QWebPage + +global LOG +import logging +import warnings +warnings.filterwarnings('ignore') +LOG = logging.getLogger() + +def prepare(): + sfile = '/tmp/test.js' + if not os.path.exists(sfile): + with open(sfile, 'wt') as ofd: + ofd.write(""" + document.getElementById('id2').innerHTML = "baz"; + console.log("__PHANTOM_PY_DONE__"); +""") + sys.stderr.write(f"wrote {sfile} ") + sfile = '/tmp/test.html' + if not os.path.exists(sfile): + with open(sfile, 'wt') as ofd: + ofd.write(""" + + +

foo foo foo

+ + + +""") + sys.stderr.write(f"wrote {sfile} ") + sys.stderr.write("\n") + +class Render(QWebPage): + def __init__(self, url, outfile, jsfile=None): + self.app = QApplication(sys.argv) + + QWebPage.__init__(self) + + self.jsfile = jsfile + self.outfile = outfile + + qurl = QUrl.fromUserInput(url) + + LOG.debug(f"phantom.py: URL= {qurl} OUTFILE={outfile} JSFILE= {jsfile)") + + # The PDF generation only happens when the special string __PHANTOM_PY_DONE__ + # is sent to console.log(). The following JS string will be executed by + # default, when no external JavaScript file is specified. + self.js_contents = "setTimeout(function() { console.log('__PHANTOM_PY_DONE__') }, 5000);"; + + if jsfile: + try: + f = open(self.jsfile) + self.js_contents = f.read() + f.close() + except: + LOG.error(traceback.format_exc()) + self._exit(10) + + self.loadFinished.connect(self._loadFinished) + self.load(qurl) + self.javaScriptConsoleMessage = self._onConsoleMessage + + if False: + # Run for a maximum of 10 seconds + watchdog = QTimer() + watchdog.setSingleShot(True) + watchdog.timeout.connect(lambda: self._exit(9)) + watchdog.start(10000) + + self.app.exec_() + + def _onConsoleMessage(self, *args): + if len(args) > 3: + level, txt, lineno, filename = args + else: + level = 1 + txt, lineno, filename = args + LOG.debug(f"CONSOLE {lineno} {txt} {filename}") + if "__PHANTOM_PY_DONE__" in txt: + # If we get this magic string, it means that the external JS is done + self._print() + if "__PHANTOM_PY_EXIT__" in txt: + self._exit(0) + + def _loadFinished(self, result): + LOG.debug(f"phantom.py: Evaluating JS from {self.jsfile}") + self.runJavaScript("document.documentElement.contentEditable=true") + self.runJavaScript(self.js_contents) + + def _printer_callback(self, *args): + """print(self, QPrinter, Callable[[bool], None])""" + # print(f"_printer_callback {self.outfile} {args}") + if args[0] is False: + i = 1 + else: + i = 0 + self._exit(i) + + def _print(self): + printer = QPrinter() + printer.setPageMargins(10, 10, 10, 10, QPrinter.Millimeter) + printer.setPaperSize(QPrinter.A4) + printer.setCreator("phantom.py by Michael Karl Franzl") + printer.setOutputFormat(QPrinter.PdfFormat); + printer.setOutputFileName(self.outfile); + self.print(printer, self._printer_callback) + LOG.debug("phantom.py: Printed") + + def _exit(self, val): + LOG.debug(f"phantom.py: Exiting with val {val}") + + # Run for a maximum of 10 seconds + watchdog = QTimer() + watchdog.setSingleShot(True) + watchdog.timeout.connect(lambda: sys.exit(val)) + watchdog.start(10000) + self.app.exit(val) + atexit._clear() + sys.exit(val) + +def main(): + if (len(sys.argv) < 3): + LOG.info("USAGE: ./phantom.py []") + else: + url = sys.argv[1] + outfile = sys.argv[2] + jsfile = sys.argv[3] if len(sys.argv) > 3 else None + r = Render(url, outfile, jsfile) + + sys.exit(0) + +if __name__ == "__main__": + main() +