From c883788ccac23e77a90ea5f1980da97331581eeb Mon Sep 17 00:00:00 2001 From: Artem Sheremet Date: Thu, 31 Mar 2016 12:38:52 +0200 Subject: [PATCH 1/7] Replace "if self.logger: ..." with default logger --- webkit2png/webkit2png.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/webkit2png/webkit2png.py b/webkit2png/webkit2png.py index e6a2b21..4c8d0b7 100755 --- a/webkit2png/webkit2png.py +++ b/webkit2png/webkit2png.py @@ -23,6 +23,7 @@ # - Add QTcpSocket support to create a "screenshot daemon" that # can handle multiple requests at the same time. +import logging import time import os @@ -31,6 +32,8 @@ from PyQt4.QtWebKit import * from PyQt4.QtNetwork import * +defaultLogger = logging.getLogger("webkit2png") + # Class for Website-Rendering. Uses QWebPage, which # requires a running QtGui to work. class WebkitRenderer(QObject): @@ -60,8 +63,8 @@ def __init__(self,**kwargs): self.scaleToHeight = kwargs.get('scaleToHeight', 0) self.scaleRatio = kwargs.get('scaleRatio', 'keep') self.format = kwargs.get('format', 'png') - self.logger = kwargs.get('logger', None) - + self.logger = kwargs.get('logger', defaultLogger) + # Set this to true if you want to capture flash. # Not that your desktop must be large enough for # fitting the whole window. @@ -220,7 +223,7 @@ def render(self, res): # Wait for end of timer. In this time, process # other outstanding Qt events. if self.wait > 0: - if self.logger: self.logger.debug("Waiting %d seconds " % self.wait) + self.logger.debug("Waiting %d seconds " % self.wait) waitToTime = time.time() + self.wait while time.time() < waitToTime: if QApplication.hasPendingEvents(): @@ -296,10 +299,9 @@ def _load_page(self, res, width, height, timeout): while QApplication.hasPendingEvents() and self.__loading: QCoreApplication.processEvents() - if self.logger: self.logger.debug("Processing result") + self.logger.debug("Processing result") if self.__loading_result == False: - if self.logger: self.logger.warning("Failed to load %s" % res) # Set initial viewport (the size of the "window") size = self._page.mainFrame().contentsSize() @@ -310,6 +312,7 @@ def _load_page(self, res, width, height, timeout): size.setHeight(height) self._window.resize(size) + self.logger.warning("Failed to load %s" % res) def _post_process_image(self, qImage): """ @@ -341,7 +344,7 @@ def _on_load_started(self): """ Slot that sets the '__loading' property to true """ - if self.logger: self.logger.debug("loading started") + self.logger.debug("loading started") self.__loading = True # Eventhandler for "loadFinished(bool)" signal @@ -349,7 +352,7 @@ def _on_load_finished(self, result): """Slot that sets the '__loading' property to false and stores the result code in '__loading_result'. """ - if self.logger: self.logger.debug("loading finished with result %s", result) + self.logger.debug("loading finished with result %s", result) self.__loading = False self.__loading_result = result @@ -359,7 +362,7 @@ def _on_ssl_errors(self, reply, errors): Slot that writes SSL warnings into the log but ignores them. """ for e in errors: - if self.logger: self.logger.warn("SSL: " + e.errorString()) + self.logger.warning("SSL: " + e.errorString()) reply.ignoreSslErrors() @@ -369,19 +372,19 @@ def __init__(self, **kwargs): Class Initializer """ super(CustomWebPage, self).__init__() - self.logger = kwargs.get('logger', None) + self.logger = kwargs.get('logger', defaultLogger) self.ignore_alert = kwargs.get('ignore_alert', True) self.ignore_confirm = kwargs.get('ignore_confirm', True) self.ignore_prompt = kwargs.get('ignore_prompt', True) self.interrupt_js = kwargs.get('interrupt_js', True) def javaScriptAlert(self, frame, message): - if self.logger: self.logger.debug('Alert: %s', message) + self.logger.debug('Alert: %s', message) if not self.ignore_alert: return super(CustomWebPage, self).javaScriptAlert(frame, message) def javaScriptConfirm(self, frame, message): - if self.logger: self.logger.debug('Confirm: %s', message) + self.logger.debug('Confirm: %s', message) if not self.ignore_confirm: return super(CustomWebPage, self).javaScriptConfirm(frame, message) else: @@ -398,7 +401,7 @@ def javaScriptPrompt(self, frame, message, default, result): If the prompt was not cancelled by the user, the implementation should return true and the result string must not be null. """ - if self.logger: self.logger.debug('Prompt: %s (%s)' % (message, default)) + self.logger.debug('Prompt: %s (%s)' % (message, default)) if not self.ignore_prompt: return super(CustomWebPage, self).javaScriptPrompt(frame, message, default, result) else: @@ -409,5 +412,5 @@ def shouldInterruptJavaScript(self): This function is called when a JavaScript program is running for a long period of time. If the user wanted to stop the JavaScript the implementation should return true; otherwise false. """ - if self.logger: self.logger.debug("WebKit ask to interrupt JavaScript") + self.logger.debug("WebKit ask to interrupt JavaScript") return self.interrupt_js From 81460b13e5a386cbffca52fea1ec293b7c0f06aa Mon Sep 17 00:00:00 2001 From: Artem Sheremet Date: Thu, 31 Mar 2016 12:41:43 +0200 Subject: [PATCH 2/7] Add time.sleep() in busy-wait loops to relieve CPU --- webkit2png/webkit2png.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/webkit2png/webkit2png.py b/webkit2png/webkit2png.py index 4c8d0b7..e3dcbb9 100755 --- a/webkit2png/webkit2png.py +++ b/webkit2png/webkit2png.py @@ -226,6 +226,7 @@ def render(self, res): self.logger.debug("Waiting %d seconds " % self.wait) waitToTime = time.time() + self.wait while time.time() < waitToTime: + time.sleep(0.1) if QApplication.hasPendingEvents(): QApplication.processEvents() @@ -294,6 +295,7 @@ def _load_page(self, res, width, height, timeout): self._page.mainFrame().load(qtUrl) while self.__loading: + time.sleep(0.1) if timeout > 0 and time.time() >= cancelAt: raise RuntimeError("Request timed out on %s" % res) while QApplication.hasPendingEvents() and self.__loading: From 791a576bfcf14fbce4dbd362284b5a901ddaed9a Mon Sep 17 00:00:00 2001 From: Artem Sheremet Date: Thu, 31 Mar 2016 12:43:32 +0200 Subject: [PATCH 3/7] Propagade cookies to all requests (e.g. images) --- webkit2png/webkit2png.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/webkit2png/webkit2png.py b/webkit2png/webkit2png.py index e3dcbb9..3c8ac1c 100755 --- a/webkit2png/webkit2png.py +++ b/webkit2png/webkit2png.py @@ -285,7 +285,11 @@ def _load_page(self, res, width, height, timeout): qtUrl = QUrl(url) # Set the required cookies, if any - self.cookieJar = CookieJar(self.cookies, qtUrl) + urlWithoutPath = QUrl(qtUrl) + # Reset the path to root: QT will only serve the cookie + # to the child components of the initial path. + urlWithoutPath.setPath("/") + self.cookieJar = CookieJar(self.cookies, urlWithoutPath) self._page.networkAccessManager().setCookieJar(self.cookieJar) # Load the page From 9fbc3605da36e20d4df49e4e97bf39b208ff258f Mon Sep 17 00:00:00 2001 From: Artem Sheremet Date: Thu, 31 Mar 2016 12:44:38 +0200 Subject: [PATCH 4/7] Do not abort on rendering timeout The page is often useful even if some background tracking script failed to load on time. --- webkit2png/webkit2png.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/webkit2png/webkit2png.py b/webkit2png/webkit2png.py index 3c8ac1c..75eee70 100755 --- a/webkit2png/webkit2png.py +++ b/webkit2png/webkit2png.py @@ -152,6 +152,8 @@ def __init__(self, parent): """ QObject.__init__(self) + self.__loading_result = None + # Copy properties from parent for key,value in parent.__dict__.items(): setattr(self,key,value) @@ -301,7 +303,8 @@ def _load_page(self, res, width, height, timeout): while self.__loading: time.sleep(0.1) if timeout > 0 and time.time() >= cancelAt: - raise RuntimeError("Request timed out on %s" % res) + self.logger.warning("Request timed out on %s" % res) + break while QApplication.hasPendingEvents() and self.__loading: QCoreApplication.processEvents() From 96f0a9cfc6056674d39a70bf96a50c18d51691f2 Mon Sep 17 00:00:00 2001 From: Artem Sheremet Date: Thu, 31 Mar 2016 12:47:23 +0200 Subject: [PATCH 5/7] Allow overriding User-Agent header --- webkit2png/webkit2png.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/webkit2png/webkit2png.py b/webkit2png/webkit2png.py index 75eee70..968ce70 100755 --- a/webkit2png/webkit2png.py +++ b/webkit2png/webkit2png.py @@ -76,6 +76,7 @@ def __init__(self,**kwargs): self.interruptJavaScript = kwargs.get('interruptJavaScript', True) self.encodedUrl = kwargs.get('encodedUrl', False) self.cookies = kwargs.get('cookies', []) + self.userAgent = kwargs.get('userAgent', 'WebKit2HTML/1.0') # Set some default options for QWebPage self.qWebSettings = { @@ -178,7 +179,7 @@ def __init__(self, parent): # Create and connect required PyQt4 objects self._page = CustomWebPage(logger=self.logger, ignore_alert=self.ignoreAlert, ignore_confirm=self.ignoreConfirm, ignore_prompt=self.ignorePrompt, - interrupt_js=self.interruptJavaScript) + interrupt_js=self.interruptJavaScript, userAgent=self.userAgent) self._page.networkAccessManager().setProxy(proxy) self._view = QWebView() self._view.setPage(self._page) @@ -386,6 +387,10 @@ def __init__(self, **kwargs): self.ignore_confirm = kwargs.get('ignore_confirm', True) self.ignore_prompt = kwargs.get('ignore_prompt', True) self.interrupt_js = kwargs.get('interrupt_js', True) + self.userAgent = kwargs['userAgent'] + + def userAgentForUrl(self, url): + return self.userAgent def javaScriptAlert(self, frame, message): self.logger.debug('Alert: %s', message) From dc22a129f71392606373a5c4a7f15b2d6ee21832 Mon Sep 17 00:00:00 2001 From: Artem Sheremet Date: Thu, 31 Mar 2016 12:47:59 +0200 Subject: [PATCH 6/7] Resize the window only after "delay" is reached There could be some javascript that resizes components while we're waiting for "delay" - need to let it do the job. --- webkit2png/webkit2png.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/webkit2png/webkit2png.py b/webkit2png/webkit2png.py index 968ce70..8f9c4fc 100755 --- a/webkit2png/webkit2png.py +++ b/webkit2png/webkit2png.py @@ -222,7 +222,7 @@ def render(self, res): on the value of 'grabWholeWindow' is drawn into a QPixmap and postprocessed (_post_process_image). """ - self._load_page(res, self.width, self.height, self.timeout) + self._load_page(res, self.timeout) # Wait for end of timer. In this time, process # other outstanding Qt events. if self.wait > 0: @@ -233,6 +233,15 @@ def render(self, res): if QApplication.hasPendingEvents(): QApplication.processEvents() + # Set initial viewport (the size of the "window") + size = self._page.mainFrame().contentsSize() + self.logger.debug("contentsSize: %s x %s", size.width(), size.height()) + if self.width > 0: + size.setWidth(self.width) + if self.height > 0: + size.setHeight(self.height) + self._window.resize(size) + if self.renderTransparentBackground: # Another possible drawing solution image = QImage(self._page.viewportSize(), QImage.Format_ARGB32) @@ -260,7 +269,7 @@ def render(self, res): return self._post_process_image(image) - def _load_page(self, res, width, height, timeout): + def _load_page(self, res, timeout): """ This method implements the logic for retrieving and displaying the requested page. @@ -312,16 +321,6 @@ def _load_page(self, res, width, height, timeout): self.logger.debug("Processing result") if self.__loading_result == False: - - # Set initial viewport (the size of the "window") - size = self._page.mainFrame().contentsSize() - if self.logger: self.logger.debug("contentsSize: %s", size) - if width > 0: - size.setWidth(width) - if height > 0: - size.setHeight(height) - - self._window.resize(size) self.logger.warning("Failed to load %s" % res) def _post_process_image(self, qImage): From 98c891664a3ff2742c48df77e758ea6d877d6546 Mon Sep 17 00:00:00 2001 From: Artem Sheremet Date: Thu, 31 Mar 2016 12:49:16 +0200 Subject: [PATCH 7/7] Allow rendering only specific element Setting elementSelector should allow to selectively render page element (and its child elements), because the whole page is not that useful at times. --- webkit2png/webkit2png.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/webkit2png/webkit2png.py b/webkit2png/webkit2png.py index 8f9c4fc..2d89684 100755 --- a/webkit2png/webkit2png.py +++ b/webkit2png/webkit2png.py @@ -69,6 +69,7 @@ def __init__(self,**kwargs): # Not that your desktop must be large enough for # fitting the whole window. self.grabWholeWindow = kwargs.get('grabWholeWindow', False) + self.elementSelector = kwargs.get('elementSelector', None) self.renderTransparentBackground = kwargs.get('renderTransparentBackground', False) self.ignoreAlert = kwargs.get('ignoreAlert', True) self.ignoreConfirm = kwargs.get('ignoreConfirm', True) @@ -257,6 +258,26 @@ def render(self, res): painter.setBackgroundMode(Qt.TransparentMode) self._page.mainFrame().render(painter) painter.end() + elif self.elementSelector: + elementAndSize = self.elementSelector( + self._page.mainFrame().documentElement()) + if isinstance(elementAndSize, tuple): + (element, size) = elementAndSize + else: + (element, size) = (elementAndSize, + elementAndSize.geometry().size()) + if size.isEmpty(): + raise RuntimeError("Selected element is empty") + else: + self.logger.debug("Selected element size: %d x %d", + size.width(), + size.height()) + self._window.resize(size) + image = QImage(size, QImage.Format_ARGB32) + image.fill(QColor(255,0,0,0).rgba()) + painter = QPainter(image) + element.render(painter) + painter.end() else: if self.grabWholeWindow: # Note that this does not fully ensure that the