added argparse
This commit is contained in:
		
							parent
							
								
									71672da7af
								
							
						
					
					
						commit
						1cb4e53cce
					
				
					 4 changed files with 78 additions and 52 deletions
				
			
		| 
						 | 
				
			
			@ -41,7 +41,7 @@ Qt picks up proxies from the environment, so this will respect
 | 
			
		|||
* Python3
 | 
			
		||||
* PyQt5 (this should work with PySide2 and PyQt6 - let us know.)
 | 
			
		||||
* [qasnyc](https://github.com/CabbageDevelopment/qasync) for the
 | 
			
		||||
  standalone program ```qasync_lookup.py```
 | 
			
		||||
  standalone program ```qasync_phantompy.py```
 | 
			
		||||
 | 
			
		||||
## Standalone
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										52
									
								
								phantompy.py
									
										
									
									
									
								
							
							
						
						
									
										52
									
								
								phantompy.py
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -13,17 +13,18 @@ replacement for other bulky headless browser frameworks.
 | 
			
		|||
 | 
			
		||||
If you have a display attached:
 | 
			
		||||
 | 
			
		||||
    ./phantom.py <url> <pdf-file> [<javascript-file>]
 | 
			
		||||
 ./phantom.py [--pdf_output <pdf-file>] [--js_input <javascript-file>] <url-or-html-file> 
 | 
			
		||||
    
 | 
			
		||||
If you don't have a display attached (i.e. on a remote server):
 | 
			
		||||
 | 
			
		||||
    xvfb-run ./phantom.py <url> <pdf-file> [<javascript-file>]
 | 
			
		||||
If you don't have a display attached (i.e. on a remote server), you can use
 | 
			
		||||
xvfb-run, or don't add --show_gui - it should work without a display.
 | 
			
		||||
 | 
			
		||||
Arguments:
 | 
			
		||||
 | 
			
		||||
[--pdf_output <pdf-file>] (optional) Path and name of PDF file to generate
 | 
			
		||||
[--html_output <html-file>] (optional) Path and name of HTML file to generate
 | 
			
		||||
[--js_input <javascript-file>] (optional) Path and name of a JavaScript file to execute
 | 
			
		||||
--log_level 10=debug 20=info 30=warn 40=error
 | 
			
		||||
<url> Can be a http(s) URL or a path to a local file
 | 
			
		||||
<pdf-file> Path and name of PDF file to generate
 | 
			
		||||
[<javascript-file>] (optional) Path and name of a JavaScript file to execute
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
## Features
 | 
			
		||||
| 
						 | 
				
			
			@ -55,6 +56,9 @@ CSS @media types, etc.
 | 
			
		|||
 | 
			
		||||
* Python3
 | 
			
		||||
* PyQt5
 | 
			
		||||
* [qasnyc](https://github.com/CabbageDevelopment/qasync) for the
 | 
			
		||||
  standalone program ```qasnyc_phantompy.py```
 | 
			
		||||
 | 
			
		||||
* xvfb (optional for display-less machines)
 | 
			
		||||
 | 
			
		||||
Installation of dependencies in Debian Stretch is easy:
 | 
			
		||||
| 
						 | 
				
			
			@ -167,15 +171,18 @@ class Render(QWebEnginePage):
 | 
			
		|||
    self.percent = 0
 | 
			
		||||
    self.uri = None
 | 
			
		||||
    self.jsfile = None
 | 
			
		||||
    self.outfile = None
 | 
			
		||||
    self.htmlfile = None
 | 
			
		||||
    self.pdffile = None
 | 
			
		||||
    QWebEnginePage.__init__(self)
 | 
			
		||||
 | 
			
		||||
  def run(self, url, outfile, jsfile):
 | 
			
		||||
  def run(self, url, pdffile, htmlfile, jsfile):
 | 
			
		||||
    self._app.lstart.append(id(self))
 | 
			
		||||
    self.percent = 10
 | 
			
		||||
    self.uri = url
 | 
			
		||||
    self.jsfile = jsfile
 | 
			
		||||
    self.outfile = outfile
 | 
			
		||||
    self.htmlfile = htmlfile
 | 
			
		||||
    self.pdffile = pdffile
 | 
			
		||||
    self.outfile = pdffile or htmlfile
 | 
			
		||||
    LOG.debug(f"phantom.py: URL={url} OUTFILE={outfile} JSFILE={jsfile}")
 | 
			
		||||
    qurl = QUrl.fromUserInput(url)    
 | 
			
		||||
    
 | 
			
		||||
| 
						 | 
				
			
			@ -236,7 +243,7 @@ class Render(QWebEnginePage):
 | 
			
		|||
        self._onConsoleMessage(0, "__PHANTOM_PY_SAVED__", 0 , '')
 | 
			
		||||
        
 | 
			
		||||
  def _save(self, html):
 | 
			
		||||
    sfile = self.outfile.replace('.pdf','.html')
 | 
			
		||||
    sfile = self.htmlfile
 | 
			
		||||
    # CompleteHtmlSaveFormat SingleHtmlSaveFormat MimeHtmlSaveFormat
 | 
			
		||||
    with open(sfile, 'wt') as ofd:
 | 
			
		||||
        ofd.write(html)
 | 
			
		||||
| 
						 | 
				
			
			@ -244,7 +251,6 @@ class Render(QWebEnginePage):
 | 
			
		|||
 | 
			
		||||
  def _printer_callback(self, *args):
 | 
			
		||||
    """print(self, QPrinter, Callable[[bool], None])"""
 | 
			
		||||
    # print(f"_printer_callback {self.outfile} {args}")
 | 
			
		||||
    if args[0] is False:
 | 
			
		||||
        i = 1
 | 
			
		||||
    else:
 | 
			
		||||
| 
						 | 
				
			
			@ -252,7 +258,7 @@ class Render(QWebEnginePage):
 | 
			
		|||
    self._onConsoleMessage(i, "__PHANTOM_PY_PRINTED__", 0 , '')
 | 
			
		||||
 | 
			
		||||
  def _print(self):
 | 
			
		||||
    sfile = self.outfile.replace('.html', '.pdf')
 | 
			
		||||
    sfile = self.pdffile
 | 
			
		||||
    printer = QPrinter()
 | 
			
		||||
    printer.setPageMargins(10, 10, 10, 10, QPrinter.Millimeter)
 | 
			
		||||
    printer.setPaperSize(QPrinter.A4)
 | 
			
		||||
| 
						 | 
				
			
			@ -268,25 +274,3 @@ class Render(QWebEnginePage):
 | 
			
		|||
      # threadsafe?
 | 
			
		||||
      self._app.ldone.append(self.uri)
 | 
			
		||||
 | 
			
		||||
def omain(app, largs):
 | 
			
		||||
    if (len(largs) < 2):
 | 
			
		||||
        LOG.info("USAGE: ./phantom.py <url> <pdf-file> [<javascript-file>]")
 | 
			
		||||
        return -1
 | 
			
		||||
    
 | 
			
		||||
    url = largs[0]
 | 
			
		||||
    outfile = largs[1]
 | 
			
		||||
    jsfile = largs[2] if len(largs) > 2 else None
 | 
			
		||||
    ilen = 1
 | 
			
		||||
 | 
			
		||||
    r = Render(app, do_print=False, do_save=True)
 | 
			
		||||
    r.run(url, outfile, jsfile)
 | 
			
		||||
    for i in range(1, 120):
 | 
			
		||||
        app.processEvents()
 | 
			
		||||
        print(f"{app.ldone} {i}")
 | 
			
		||||
        if len(app.ldone) == ilen:
 | 
			
		||||
            print(f"{app.ldone} found {ilen}")
 | 
			
		||||
            app.exit()
 | 
			
		||||
            return r
 | 
			
		||||
        time.sleep(1)
 | 
			
		||||
    return r
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -67,7 +67,21 @@ async def main(widget, app, ilen):
 | 
			
		|||
    except asyncio.CancelledError as ex:
 | 
			
		||||
        LOG.debug("Task cancelled")
 | 
			
		||||
 | 
			
		||||
def iMain(largs, bgui=True):
 | 
			
		||||
def iMain(largs):
 | 
			
		||||
    parser = oMainArgparser()
 | 
			
		||||
    oargs = parser.parse_args(lArgs)
 | 
			
		||||
    bgui=oargs.show_gui
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        from support_phantompy import vsetup_logging
 | 
			
		||||
        d = int(os.environ.get('DEBUG', 0))
 | 
			
		||||
        if d > 0:
 | 
			
		||||
            vsetup_logging(10, stream=sys.stderr)
 | 
			
		||||
        else:
 | 
			
		||||
            vsetup_logging(oargs.log_level, stream=sys.stderr)
 | 
			
		||||
        vsetup_logging(log_level, logfile='', stream=sys.stderr)
 | 
			
		||||
    except: pass
 | 
			
		||||
    
 | 
			
		||||
    app = QtWidgets.QApplication([])
 | 
			
		||||
    app.lstart = []
 | 
			
		||||
    if bgui:
 | 
			
		||||
| 
						 | 
				
			
			@ -80,14 +94,16 @@ def iMain(largs, bgui=True):
 | 
			
		|||
    loop = qasync.QEventLoop(app)
 | 
			
		||||
    asyncio.set_event_loop(loop)
 | 
			
		||||
 | 
			
		||||
    largs = sys.argv[1:]
 | 
			
		||||
    url = largs[0]
 | 
			
		||||
    outfile = largs[1]
 | 
			
		||||
    jsfile = largs[2] if len(largs) > 2 else None
 | 
			
		||||
    url = oargs.html_url
 | 
			
		||||
    htmlfile = oargs.html_output
 | 
			
		||||
    pdffile = oargs.html_output
 | 
			
		||||
    jsfile = oargs.js_input
 | 
			
		||||
    # run only starts the url loading
 | 
			
		||||
    r = Render(app, do_print=False, do_save=True)
 | 
			
		||||
    r = Render(app,
 | 
			
		||||
               do_print=True if pdffile else False,
 | 
			
		||||
               do_save=True if htmlfile else False)
 | 
			
		||||
    uri = url.strip()
 | 
			
		||||
    r.run(uri, outfile, jsfile)
 | 
			
		||||
    r.run(uri, pdffile, htmlfile, jsfile)
 | 
			
		||||
    LOG.debug(f"{r.percent} {app.lstart}")
 | 
			
		||||
    
 | 
			
		||||
    LOG.info(f"queued {len(app.lstart)} urls")
 | 
			
		||||
| 
						 | 
				
			
			@ -101,15 +117,6 @@ def iMain(largs, bgui=True):
 | 
			
		|||
    loop.run_until_complete(asyncio.gather(*tasks))
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    try:
 | 
			
		||||
        from exclude_badExits import vsetup_logging
 | 
			
		||||
        d = int(os.environ.get('DEBUG', 0))
 | 
			
		||||
        if d > 0:
 | 
			
		||||
            vsetup_logging(10, stream=sys.stderr)
 | 
			
		||||
        else:
 | 
			
		||||
            vsetup_logging(20, stream=sys.stderr)
 | 
			
		||||
        vsetup_logging(log_level, logfile='', stream=sys.stderr)
 | 
			
		||||
    except: pass
 | 
			
		||||
    
 | 
			
		||||
    iMain(sys.argv[1:], bgui=False)
 | 
			
		||||
    iMain(sys.argv[1:])
 | 
			
		||||
    
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3,6 +3,7 @@
 | 
			
		|||
 | 
			
		||||
import sys
 | 
			
		||||
import os
 | 
			
		||||
import argparse
 | 
			
		||||
 | 
			
		||||
try:
 | 
			
		||||
    if 'COLOREDLOGS_LEVEL_STYLES' not in os.environ:
 | 
			
		||||
| 
						 | 
				
			
			@ -79,3 +80,37 @@ def vsetup_logging(log_level, logfile='', stream=sys.stdout):
 | 
			
		|||
            'NOTSET': logging.NOTSET,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
def omain__argparser(_=None):
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        from OpenSSL import SSL
 | 
			
		||||
        lCAfs = SSL._CERTIFICATE_FILE_LOCATIONS
 | 
			
		||||
    except:
 | 
			
		||||
        lCAfs = []
 | 
			
		||||
 | 
			
		||||
    CAfs = []
 | 
			
		||||
    for elt in lCAfs:
 | 
			
		||||
        if os.path.exists(elt):
 | 
			
		||||
            CAfs.append(elt)
 | 
			
		||||
    if not CAfs:
 | 
			
		||||
        CAfs = ['']
 | 
			
		||||
 | 
			
		||||
    parser = argparse.ArgumentParser(add_help=True,
 | 
			
		||||
                                     epilog=__doc__)
 | 
			
		||||
    parser.add_argument('--https_cafile', type=str,
 | 
			
		||||
                        help="Certificate Authority file (in PEM) (unused)",
 | 
			
		||||
                        default=CAfs[0])
 | 
			
		||||
    parser.add_argument('--log_level', type=int, default=20,
 | 
			
		||||
                        help="10=debug 20=info 30=warn 40=error")
 | 
			
		||||
    parser.add_argument('--js_input', type=str, default='',
 | 
			
		||||
                        help="Operate on the HTML file with javascript")
 | 
			
		||||
    parser.add_argument('--html_output', type=str, default='',
 | 
			
		||||
                        help="Write loaded and javascripted result to a HTML file")
 | 
			
		||||
    parser.add_argument('--pdf_output', type=str, default=''),
 | 
			
		||||
                        help="Write loaded and javascripted result to a PDF file")
 | 
			
		||||
    parser.add_argument('--show_gui', type=bool, store_action=True),
 | 
			
		||||
                        help="show a progress meter that doesn't work")
 | 
			
		||||
    parser.add_argument('html_url', type=str, nargs='?',
 | 
			
		||||
                        required=True,
 | 
			
		||||
                        help='html file or url')
 | 
			
		||||
    return parser
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue