From 48bed127e171e82f40045b36405cdc6939bb277a Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Fri, 10 Jan 2020 13:11:35 +0000 Subject: [PATCH 01/28] [thumbnails] New, thumbnail computation for charts and dashboards --- docs/installation.rst | 14 ++ superset/__init__.py | 1 + superset/cli.py | 70 ++++++++ superset/tasks/thumbnails.py | 43 +++++ superset/utils/cache_manager.py | 7 + superset/utils/selenium.py | 278 ++++++++++++++++++++++++++++++++ 6 files changed, 413 insertions(+) create mode 100644 superset/tasks/thumbnails.py create mode 100644 superset/utils/selenium.py diff --git a/docs/installation.rst b/docs/installation.rst index e9b8cbd94830..4a3059fa3a70 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -635,6 +635,20 @@ section in `config.py`: This will cache all the charts in the top 5 most popular dashboards every hour. For other strategies, check the `superset/tasks/cache.py` file. +Caching Thumbnails +------------------ + +Thumbnails are store on cache, an example config could be: + +.. code-block:: python + + def init_thumbnail_cache(app): + return RedisCache(host='localhost', port=6379, key_prefix='superset_thumbnails_') + + + THUMBNAIL_CACHE_CONFIG = init_thumbnail_cache + +Using the above example cache keys for dashboards will be `superset_thumbnails_thumb__dashboard__{ID}` Deeper SQLAlchemy integration ----------------------------- diff --git a/superset/__init__.py b/superset/__init__.py index feffead963e4..3d5b31152ec8 100644 --- a/superset/__init__.py +++ b/superset/__init__.py @@ -51,3 +51,4 @@ lambda: results_backend_manager.should_use_msgpack ) tables_cache = LocalProxy(lambda: cache_manager.tables_cache) +thumbnail_cache = LocalProxy(lambda: cache_manager.thumbnail_cache) diff --git a/superset/cli.py b/superset/cli.py index 13aa713d59ac..64b61251ed32 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -435,6 +435,76 @@ def flower(port, address): Popen(cmd, shell=True).wait() +@superset.command() +@with_appcontext +@click.option( + "--asynchronous", + "-a", + is_flag=True, + default=False, + help="Trigger commands to run remotely on a worker", +) +@click.option( + "--dashboards_only", + "-d", + is_flag=True, + default=False, + help="Only process dashboards", +) +@click.option( + "--charts_only", "-c", is_flag=True, default=False, help="Only process charts" +) +@click.option( + "--force", + "-f", + is_flag=True, + default=False, + help="Force refresh, even if previously cached", +) +@click.option("--id", "-i", multiple=True) +def compute_thumbnails(asynchronous, dashboards_only, charts_only, force, id): + """Compute thumbnails""" + from superset.models.dashboard import Dashboard + from superset.models.slice import Slice + from superset.tasks.thumbnails import ( + cache_chart_thumbnail, + cache_dashboard_thumbnail, + ) + if not charts_only: + query = db.session.query(Dashboard) + if id: + query = query.filter(Dashboard.id.in_(id)) + dashboards = query.all() + count = len(dashboards) + for i, dash in enumerate(dashboards): + if asynchronous: + func = cache_dashboard_thumbnail.delay + action = "Triggering" + else: + func = cache_dashboard_thumbnail + action = "Processing" + msg = f'{action} dashboard "{dash.dashboard_title}" ({i+1}/{count})' + click.secho(msg, fg="green") + func(dash.id, force=force) + + if not dashboards_only: + query = db.session.query(Slice) + if id: + query = query.filter(Slice.id.in_(id)) + slices = query.all() + count = len(slices) + for i, slc in enumerate(slices): + if asynchronous: + func = cache_chart_thumbnail.delay + action = "Triggering" + else: + func = cache_chart_thumbnail + action = "Processing" + msg = f'{action} chart "{slc.slice_name}" ({i+1}/{count})' + click.secho(msg, fg="green") + func(slc.id, force=force) + + @superset.command() @with_appcontext def load_test_users(): diff --git a/superset/tasks/thumbnails.py b/superset/tasks/thumbnails.py new file mode 100644 index 000000000000..48ac2b59df8c --- /dev/null +++ b/superset/tasks/thumbnails.py @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=C,R,W + +"""Utility functions used across Superset""" + +import logging + +from superset import app, security_manager, thumbnail_cache +from superset.tasks.celery_app import app as celery_app +from superset.utils.selenium import DashboardScreenshot, SliceScreenshot + + +@celery_app.task(name="cache_chart_thumbnail", soft_time_limit=300) +def cache_chart_thumbnail(chart_id, force=False): + with app.app_context(): + logging.info(f"Caching chart {chart_id}") + screenshot = SliceScreenshot(id=chart_id) + user = security_manager.find_user("Admin") + screenshot.compute_and_cache(user=user, cache=thumbnail_cache, force=force) + + +@celery_app.task(name="cache_dashboard_thumbnail", soft_time_limit=300) +def cache_dashboard_thumbnail(dashboard_id, force=False): + with app.app_context(): + logging.info(f"Caching dashboard {dashboard_id}") + screenshot = DashboardScreenshot(id=dashboard_id) + user = security_manager.find_user("Admin") + screenshot.compute_and_cache(user=user, cache=thumbnail_cache, force=force) diff --git a/superset/utils/cache_manager.py b/superset/utils/cache_manager.py index cfbeb349978f..2ada869e01fe 100644 --- a/superset/utils/cache_manager.py +++ b/superset/utils/cache_manager.py @@ -26,12 +26,15 @@ def __init__(self) -> None: self._tables_cache = None self._cache = None + self._thumbnail_cache = None def init_app(self, app): self._cache = self._setup_cache(app, app.config.get("CACHE_CONFIG")) self._tables_cache = self._setup_cache( app, app.config.get("TABLE_NAMES_CACHE_CONFIG") ) + self._thumbnail_cache = self._setup_cache( + app, app.config.get("THUMBNAIL_CACHE_CONFIG")) @staticmethod def _setup_cache(app: Flask, cache_config) -> Optional[Cache]: @@ -54,3 +57,7 @@ def tables_cache(self): @property def cache(self): return self._cache + + @property + def thumbnail_cache(self): + return self._thumbnail_cache diff --git a/superset/utils/selenium.py b/superset/utils/selenium.py new file mode 100644 index 000000000000..0278ce6b6f0b --- /dev/null +++ b/superset/utils/selenium.py @@ -0,0 +1,278 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=C,R,W +from io import BytesIO +import logging +import time +import urllib + +from flask import current_app, request, Response, session, url_for +from flask_login import login_user +from PIL import Image +from retry.api import retry_call +from selenium.common.exceptions import TimeoutException, WebDriverException +from selenium.webdriver import chrome, firefox +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import WebDriverWait +from werkzeug.http import parse_cookie + +# Time in seconds, we will wait for the page to load and render +SELENIUM_CHECK_INTERVAL = 2 +SELENIUM_RETRIES = 5 +SELENIUM_HEADSTART = 3 + + +def headless_url(path): + return urllib.parse.urljoin(current_app.config.get("WEBDRIVER_BASEURL"), path) + + +def get_url_path(view, **kwargs): + with current_app.test_request_context(): + return headless_url(url_for(view, **kwargs)) + + +class BaseScreenshot: + thumbnail_type = None + orm_class = None + element = None + window_size = (800, 600) + thumb_size = (400, 300) + + def __init__(self, id): + self.id = id + self.screenshot = None + + @property + def cache_key(self): + return f"thumb__{self.thumbnail_type}__{self.id}" + + @property + def url(self): + raise NotImplementedError() + + def fetch_screenshot(self, user, window_size=None): + window_size = window_size or self.window_size + self.screenshot = get_png_from_url( + self.url, window_size, self.element, user=user + ) + return self.screenshot + + def get_thumb_as_bytes(self, *args, **kwargs): + payload = self.get_thumb(*args, **kwargs) + return BytesIO(payload) + + def get_from_cache(self, cache): + payload = cache.get(self.cache_key) + if payload: + return BytesIO(payload) + + def compute_and_cache( + self, user, cache, window_size=None, thumb_size=None, force=True + ): + cache_key = self.cache_key + if not cache: + logging.error("No cache set, refusing to compute") + return + if not force and cache.get(cache_key): + logging.info("Thumb already cached, skipping...") + return + window_size = window_size or self.window_size + thumb_size = thumb_size or self.thumb_size + logging.info(f"Processing url for thumbnail: {cache_key}") + + payload = None + + # Assuming all sorts of things can go wrong with Selenium + try: + payload = self.fetch_screenshot(window_size=window_size, user=user) + except Exception as e: + logging.error("Failed at generating thumbnail") + logging.exception(e) + + if payload and window_size != thumb_size: + try: + payload = self.resize_image(payload, size=thumb_size) + except Exception as e: + logging.error("Failed at resizing thumbnail") + logging.exception(e) + payload = None + + if payload and cache: + logging.info(f"Caching thumbnail: {cache_key}") + cache.set(cache_key, payload) + + return payload + + def get_thumb(self, user, window_size=None, thumb_size=None, cache=None): + payload = None + cache_key = self.cache_key + window_size = window_size or self.window_size + thumb_size = thumb_size or self.thumb_size + if cache: + payload = cache.get(cache_key) + if not payload: + payload = self.compute_and_cache(user, cache, window_size, thumb_size) + else: + logging.info(f"Loaded thumbnail from cache: {cache_key}") + return payload + + @classmethod + def resize_image(cls, img_bytes, output="png", size=None, crop=True): + size = size or cls.thumb_size + img = Image.open(BytesIO(img_bytes)) + logging.debug(f"Selenium image size: {img.size}") + if crop and img.size[1] != cls.window_size[1]: + desired_ratio = float(cls.window_size[1]) / cls.window_size[0] + desired_width = int(img.size[0] * desired_ratio) + logging.debug(f"Cropping to: {img.size[0]}*{desired_width}") + img = img.crop((0, 0, img.size[0], desired_width)) + logging.debug(f"Resizing to {size}") + img = img.resize(size, Image.ANTIALIAS) + new_img = BytesIO() + if output != "png": + img = img.convert("RGB") + img.save(new_img, output) + new_img.seek(0) + return new_img.read() + + +class SliceScreenshot(BaseScreenshot): + thumbnail_type = "slice" + element = "chart-container" + window_size = (600, int(600 * 0.75)) + thumb_size = (300, int(300 * 0.75)) + + @property + def url(self): + return get_url_path("Superset.slice", slice_id=self.id, standalone="true") + + +class DashboardScreenshot(BaseScreenshot): + thumbnail_type = "dashboard" + element = "grid-container" + window_size = (1600, int(1600 * 0.75)) + thumb_size = (400, int(400 * 0.75)) + + @property + def url(self): + return get_url_path("Superset.dashboard", dashboard_id=self.id) + + +def _destroy_webdriver(driver): + """Destroy a driver""" + # This is some very flaky code in selenium. Hence the retries + # and catch-all exceptions + try: + retry_call(driver.close, tries=2) + except Exception: + pass + try: + driver.quit() + except Exception: + pass + + +def get_auth_cookies(user): + # Login with the user specified to get the reports + with current_app.test_request_context(): + login_user(user) + + # A mock response object to get the cookie information from + response = Response() + current_app.session_interface.save_session(current_app, session, response) + + cookies = [] + + # Set the cookies in the driver + for name, value in response.headers: + if name.lower() == "set-cookie": + cookie = parse_cookie(value) + cookies.append(cookie["session"]) + return cookies + + +def create_webdriver(user=None, webdriver="chrome", window=None): + """Creates a selenium webdriver + + If no user is specified, we use the current request's context""" + # Create a webdriver for use in fetching reports + if webdriver == "firefox": + driver_class = firefox.webdriver.WebDriver + options = firefox.options.Options() + else: + # webdriver == 'chrome': + driver_class = chrome.webdriver.WebDriver + options = chrome.options.Options() + arg = f"--window-size={window[0]},{window[1]}" + options.add_argument(arg) + + options.add_argument("--headless") + + # Prepare args for the webdriver init + kwargs = dict(options=options) + kwargs.update(current_app.config.get("WEBDRIVER_CONFIGURATION")) + + logging.info("Init selenium driver") + driver = driver_class(**kwargs) + + # Setting cookies requires doing a request first + driver.get(headless_url("/login/")) + + if user: + # Set the cookies in the driver + for cookie in get_auth_cookies(user): + info = dict(name="session", value=cookie) + driver.add_cookie(info) + elif request.cookies: + cookies = request.cookies + for k, v in cookies.items(): + cookie = dict(name=k, value=v) + driver.add_cookie(cookie) + return driver + + +def get_png_from_url( + url, window, element, user, webdriver="chrome", retries=SELENIUM_RETRIES +): + driver = create_webdriver(user, webdriver, window) + driver.set_window_size(*window) + driver.get(url) + img = None + logging.debug(f"Sleeping for {SELENIUM_HEADSTART} seconds") + time.sleep(SELENIUM_HEADSTART) + try: + logging.debug(f"Wait for the presence of {element}") + element = WebDriverWait(driver, 10).until( + EC.presence_of_element_located((By.CLASS_NAME, element)) + ) + logging.debug(f"Wait for .loading to be done") + WebDriverWait(driver, 60).until_not( + EC.presence_of_all_elements_located((By.CLASS_NAME, "loading")) + ) + logging.info("Taking a PNG screenshot") + img = element.screenshot_as_png + except TimeoutException: + logging.error("Selenium timed out") + except WebDriverException as e: + logging.exception(e) + # Some webdrivers do not support screenshots for elements. + # In such cases, take a screenshot of the entire page. + img = driver.screenshot() # pylint: disable=no-member + finally: + _destroy_webdriver(driver) + return img From 420470e4f6a6355837f1494f1328b46ecf1a231f Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Fri, 10 Jan 2020 16:40:52 +0000 Subject: [PATCH 02/28] [thumbnails] Initial working API with cache --- superset/cli.py | 1 + superset/models/dashboard.py | 20 ++++++++++++++ superset/utils/cache_manager.py | 3 +- superset/utils/selenium.py | 10 +++---- superset/views/dashboard/api.py | 49 ++++++++++++++++++++++++++++++++- 5 files changed, 76 insertions(+), 7 deletions(-) diff --git a/superset/cli.py b/superset/cli.py index 64b61251ed32..d91f86db6f2e 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -470,6 +470,7 @@ def compute_thumbnails(asynchronous, dashboards_only, charts_only, force, id): cache_chart_thumbnail, cache_dashboard_thumbnail, ) + if not charts_only: query = db.session.query(Dashboard) if id: diff --git a/superset/models/dashboard.py b/superset/models/dashboard.py index 74094d6ee3e0..9e7e592dbced 100644 --- a/superset/models/dashboard.py +++ b/superset/models/dashboard.py @@ -179,6 +179,26 @@ def dashboard_link(self) -> Markup: title = escape(self.dashboard_title or "") return Markup(f'{title}') + @property + def thumbnail_url(self): + # SHA here is to force bypassing the browser cache when chart has changed + sha = utils.md5_hex(self.params, 6) + return f"/api/v1/dashboard/thumbnail/{self.id}/{sha}/" + + @property + def thumbnail_img(self): + return Markup(f'') + + @property + def thumbnail_link(self): + return Markup( + f""" + + {self.thumbnail_img} + + """ + ) + @property def changed_by_name(self): if not self.changed_by: diff --git a/superset/utils/cache_manager.py b/superset/utils/cache_manager.py index 2ada869e01fe..e32c98104470 100644 --- a/superset/utils/cache_manager.py +++ b/superset/utils/cache_manager.py @@ -34,7 +34,8 @@ def init_app(self, app): app, app.config.get("TABLE_NAMES_CACHE_CONFIG") ) self._thumbnail_cache = self._setup_cache( - app, app.config.get("THUMBNAIL_CACHE_CONFIG")) + app, app.config.get("THUMBNAIL_CACHE_CONFIG") + ) @staticmethod def _setup_cache(app: Flask, cache_config) -> Optional[Cache]: diff --git a/superset/utils/selenium.py b/superset/utils/selenium.py index 0278ce6b6f0b..913f0459b7c9 100644 --- a/superset/utils/selenium.py +++ b/superset/utils/selenium.py @@ -18,6 +18,7 @@ from io import BytesIO import logging import time +from typing import Optional import urllib from flask import current_app, request, Response, session, url_for @@ -47,9 +48,8 @@ def get_url_path(view, **kwargs): class BaseScreenshot: - thumbnail_type = None - orm_class = None - element = None + thumbnail_type: Optional[str] = None + element: Optional[str] = None window_size = (800, 600) thumb_size = (400, 300) @@ -82,7 +82,7 @@ def get_from_cache(self, cache): return BytesIO(payload) def compute_and_cache( - self, user, cache, window_size=None, thumb_size=None, force=True + self, user, cache, window_size=None, thumb_size=None, force=True ): cache_key = self.cache_key if not cache: @@ -247,7 +247,7 @@ def create_webdriver(user=None, webdriver="chrome", window=None): def get_png_from_url( - url, window, element, user, webdriver="chrome", retries=SELENIUM_RETRIES + url, window, element, user, webdriver="chrome", retries=SELENIUM_RETRIES ): driver = create_webdriver(user, webdriver, window) driver.set_window_size(*window) diff --git a/superset/views/dashboard/api.py b/superset/views/dashboard/api.py index 134506b2acd4..5d48e7977f31 100644 --- a/superset/views/dashboard/api.py +++ b/superset/views/dashboard/api.py @@ -17,16 +17,19 @@ import json import re -from flask import current_app, g, request +from flask import current_app, g, request, Response from flask_appbuilder.api import expose, protect, safe from flask_appbuilder.models.sqla.interface import SQLAInterface from marshmallow import fields, post_load, pre_load, Schema, ValidationError from marshmallow.validate import Length from sqlalchemy.exc import SQLAlchemyError +from werkzeug.wsgi import FileWrapper +from superset import thumbnail_cache from superset.exceptions import SupersetException from superset.models.dashboard import Dashboard from superset.utils import core as utils +from superset.utils.selenium import DashboardScreenshot from superset.views.base import ( BaseSupersetModelRestApi, BaseSupersetSchema, @@ -169,6 +172,7 @@ class DashboardRestApi(DashboardMixin, BaseSupersetModelRestApi): method_permission_name = { "get_list": "list", "get": "show", + "thumbnail": "list", "post": "add", "put": "edit", "delete": "delete", @@ -356,3 +360,46 @@ def delete(self, item): # pylint: disable=arguments-differ return self.response(200, message="OK") except SQLAlchemyError as e: return self.response_422(message=str(e)) + + @expose("//thumbnail//", methods=["GET"]) + @protect() + @safe + def thumbnail(self, pk, sha): # pylint: disable=invalid-name + """Delete Dashboard + --- + get: + parameters: + - in: path + schema: + type: integer + name: pk + - in: path + schema: + type: string + name: sha + responses: + 200: + description: Dashboard thumbnail image + content: + image/*: + schema: + type: string + format: binary + 401: + $ref: '#/components/responses/401' + 404: + $ref: '#/components/responses/404' + 422: + $ref: '#/components/responses/422' + 500: + $ref: '#/components/responses/500' + """ + sha = 1 + query = self.datamodel.session.query(Dashboard) + dashboard = self._base_filters.apply_all(query).get(pk) + if not dashboard: + return self.response_404() + screenshot = DashboardScreenshot(pk).get_from_cache(thumbnail_cache) + return Response( + FileWrapper(screenshot), mimetype="image/png", direct_passthrough=True + ) From 4f153fe806275643aeff39c37e2d900af5ed956a Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Mon, 13 Jan 2020 13:11:45 +0000 Subject: [PATCH 03/28] [thumbnails] type annotation and API --- superset/cli.py | 50 ++++++++++----------- superset/models/dashboard.py | 2 +- superset/utils/selenium.py | 77 +++++++++++++++++++++++++-------- superset/views/dashboard/api.py | 5 ++- 4 files changed, 86 insertions(+), 48 deletions(-) diff --git a/superset/cli.py b/superset/cli.py index d91f86db6f2e..48d1739ede2a 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -19,6 +19,7 @@ from datetime import datetime from subprocess import Popen from sys import stdout +from typing import Union import click import yaml @@ -461,8 +462,14 @@ def flower(port, address): default=False, help="Force refresh, even if previously cached", ) -@click.option("--id", "-i", multiple=True) -def compute_thumbnails(asynchronous, dashboards_only, charts_only, force, id): +@click.option("--model_id", "-i", multiple=True) +def compute_thumbnails( + asynchronous: bool, + dashboards_only: bool, + charts_only: bool, + force: bool, + model_id: int, +): """Compute thumbnails""" from superset.models.dashboard import Dashboard from superset.models.slice import Slice @@ -471,39 +478,30 @@ def compute_thumbnails(asynchronous, dashboards_only, charts_only, force, id): cache_dashboard_thumbnail, ) - if not charts_only: - query = db.session.query(Dashboard) - if id: - query = query.filter(Dashboard.id.in_(id)) + def compute_generic_thumbnail( + friendly_type: str, model_cls: Union[Dashboard, Slice], model_id: int, func + ): + query = db.session.query(model_cls) + if model_id: + query = query.filter(model_cls.id.in_(model_id)) dashboards = query.all() count = len(dashboards) - for i, dash in enumerate(dashboards): + for i, model in enumerate(dashboards): if asynchronous: - func = cache_dashboard_thumbnail.delay + func = func.delay action = "Triggering" else: - func = cache_dashboard_thumbnail action = "Processing" - msg = f'{action} dashboard "{dash.dashboard_title}" ({i+1}/{count})' + msg = f'{action} {friendly_type} "{model}" ({i+1}/{count})' click.secho(msg, fg="green") - func(dash.id, force=force) + func(model.id, force=force) + if not charts_only: + compute_generic_thumbnail( + "dashboard", Dashboard, model_id, cache_dashboard_thumbnail + ) if not dashboards_only: - query = db.session.query(Slice) - if id: - query = query.filter(Slice.id.in_(id)) - slices = query.all() - count = len(slices) - for i, slc in enumerate(slices): - if asynchronous: - func = cache_chart_thumbnail.delay - action = "Triggering" - else: - func = cache_chart_thumbnail - action = "Processing" - msg = f'{action} chart "{slc.slice_name}" ({i+1}/{count})' - click.secho(msg, fg="green") - func(slc.id, force=force) + compute_generic_thumbnail("chart", Slice, model_id, cache_chart_thumbnail) @superset.command() diff --git a/superset/models/dashboard.py b/superset/models/dashboard.py index 9e7e592dbced..a9b836f7462f 100644 --- a/superset/models/dashboard.py +++ b/superset/models/dashboard.py @@ -182,7 +182,7 @@ def dashboard_link(self) -> Markup: @property def thumbnail_url(self): # SHA here is to force bypassing the browser cache when chart has changed - sha = utils.md5_hex(self.params, 6) + # sha = utils.md5_hex(self.params, 6) return f"/api/v1/dashboard/thumbnail/{self.id}/{sha}/" @property diff --git a/superset/utils/selenium.py b/superset/utils/selenium.py index 913f0459b7c9..72374a607afc 100644 --- a/superset/utils/selenium.py +++ b/superset/utils/selenium.py @@ -15,11 +15,11 @@ # specific language governing permissions and limitations # under the License. # pylint: disable=C,R,W -from io import BytesIO import logging import time -from typing import Optional import urllib +from io import BytesIO +from typing import Optional, Tuple, TYPE_CHECKING from flask import current_app, request, Response, session, url_for from flask_login import login_user @@ -32,6 +32,11 @@ from selenium.webdriver.support.ui import WebDriverWait from werkzeug.http import parse_cookie +if TYPE_CHECKING: + from flask_appbuilder.security.sqla.models import User + from flask_caching import Cache + + # Time in seconds, we will wait for the page to load and render SELENIUM_CHECK_INTERVAL = 2 SELENIUM_RETRIES = 5 @@ -65,7 +70,7 @@ def cache_key(self): def url(self): raise NotImplementedError() - def fetch_screenshot(self, user, window_size=None): + def fetch_screenshot(self, user, window_size: Tuple[int, int] = None): window_size = window_size or self.window_size self.screenshot = get_png_from_url( self.url, window_size, self.element, user=user @@ -76,21 +81,35 @@ def get_thumb_as_bytes(self, *args, **kwargs): payload = self.get_thumb(*args, **kwargs) return BytesIO(payload) - def get_from_cache(self, cache): + def get_from_cache(self, cache: "Cache"): payload = cache.get(self.cache_key) if payload: return BytesIO(payload) def compute_and_cache( - self, user, cache, window_size=None, thumb_size=None, force=True - ): + self, + user: "User" = None, + cache: "Cache" = None, + window_size: Tuple[int, int] = None, + thumb_size: Tuple[int, int] = None, + force: bool = True, + ) -> Optional[bytes]: + """ + Fetches the screenshot, computes the thumbnail and caches the result + :param user: If no user is given will use the current context + :param cache: The cache to keep the thumbnail payload + :param window_size: The window size from which will process the thumb + :param thumb_size: The final thumbnail size + :param force: Will for the computation even if it's already cached + :return: Image payload + """ cache_key = self.cache_key if not cache: logging.error("No cache set, refusing to compute") - return + return None if not force and cache.get(cache_key): logging.info("Thumb already cached, skipping...") - return + return None window_size = window_size or self.window_size thumb_size = thumb_size or self.thumb_size logging.info(f"Processing url for thumbnail: {cache_key}") @@ -115,10 +134,16 @@ def compute_and_cache( if payload and cache: logging.info(f"Caching thumbnail: {cache_key}") cache.set(cache_key, payload) - + logging.error(type(payload)) return payload - def get_thumb(self, user, window_size=None, thumb_size=None, cache=None): + def get_thumb( + self, + user: "User" = None, + window_size: Tuple[int, int] = None, + thumb_size: Tuple[int, int] = None, + cache: "Cache" = None, + ): payload = None cache_key = self.cache_key window_size = window_size or self.window_size @@ -132,7 +157,13 @@ def get_thumb(self, user, window_size=None, thumb_size=None, cache=None): return payload @classmethod - def resize_image(cls, img_bytes, output="png", size=None, crop=True): + def resize_image( + cls, + img_bytes: bytes, + output: str = "png", + size: Tuple[int, int] = None, + crop: bool = True, + ): size = size or cls.thumb_size img = Image.open(BytesIO(img_bytes)) logging.debug(f"Selenium image size: {img.size}") @@ -158,7 +189,7 @@ class SliceScreenshot(BaseScreenshot): thumb_size = (300, int(300 * 0.75)) @property - def url(self): + def url(self) -> str: return get_url_path("Superset.slice", slice_id=self.id, standalone="true") @@ -169,7 +200,7 @@ class DashboardScreenshot(BaseScreenshot): thumb_size = (400, int(400 * 0.75)) @property - def url(self): + def url(self) -> str: return get_url_path("Superset.dashboard", dashboard_id=self.id) @@ -187,7 +218,7 @@ def _destroy_webdriver(driver): pass -def get_auth_cookies(user): +def get_auth_cookies(user: "User"): # Login with the user specified to get the reports with current_app.test_request_context(): login_user(user) @@ -206,11 +237,14 @@ def get_auth_cookies(user): return cookies -def create_webdriver(user=None, webdriver="chrome", window=None): +def create_webdriver( + user: "User" = None, webdriver: str = "chrome", window: Tuple[int, int] = None +): """Creates a selenium webdriver If no user is specified, we use the current request's context""" # Create a webdriver for use in fetching reports + window = window or (800, 600) if webdriver == "firefox": driver_class = firefox.webdriver.WebDriver options = firefox.options.Options() @@ -225,7 +259,7 @@ def create_webdriver(user=None, webdriver="chrome", window=None): # Prepare args for the webdriver init kwargs = dict(options=options) - kwargs.update(current_app.config.get("WEBDRIVER_CONFIGURATION")) + kwargs.update(current_app.config["WEBDRIVER_CONFIGURATION"]) logging.info("Init selenium driver") driver = driver_class(**kwargs) @@ -247,7 +281,12 @@ def create_webdriver(user=None, webdriver="chrome", window=None): def get_png_from_url( - url, window, element, user, webdriver="chrome", retries=SELENIUM_RETRIES + url: str, + window: Tuple[int, int], + element_name: Optional[str], + user: "User", + webdriver: str = "chrome", + retries: int = SELENIUM_RETRIES, ): driver = create_webdriver(user, webdriver, window) driver.set_window_size(*window) @@ -256,9 +295,9 @@ def get_png_from_url( logging.debug(f"Sleeping for {SELENIUM_HEADSTART} seconds") time.sleep(SELENIUM_HEADSTART) try: - logging.debug(f"Wait for the presence of {element}") + logging.debug(f"Wait for the presence of {element_name}") element = WebDriverWait(driver, 10).until( - EC.presence_of_element_located((By.CLASS_NAME, element)) + EC.presence_of_element_located((By.CLASS_NAME, element_name)) ) logging.debug(f"Wait for .loading to be done") WebDriverWait(driver, 60).until_not( diff --git a/superset/views/dashboard/api.py b/superset/views/dashboard/api.py index 5d48e7977f31..8528db698afb 100644 --- a/superset/views/dashboard/api.py +++ b/superset/views/dashboard/api.py @@ -16,6 +16,7 @@ # under the License. import json import re +from io import BytesIO from flask import current_app, g, request, Response from flask_appbuilder.api import expose, protect, safe @@ -365,7 +366,7 @@ def delete(self, item): # pylint: disable=arguments-differ @protect() @safe def thumbnail(self, pk, sha): # pylint: disable=invalid-name - """Delete Dashboard + """Get Dashboard thumbnail --- get: parameters: @@ -399,7 +400,7 @@ def thumbnail(self, pk, sha): # pylint: disable=invalid-name dashboard = self._base_filters.apply_all(query).get(pk) if not dashboard: return self.response_404() - screenshot = DashboardScreenshot(pk).get_from_cache(thumbnail_cache) + screenshot = BytesIO(DashboardScreenshot(pk).get_thumb(cache=thumbnail_cache)) return Response( FileWrapper(screenshot), mimetype="image/png", direct_passthrough=True ) From b1c886351f4485ea582d8f2d757bda64a0e3b8c7 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Mon, 13 Jan 2020 14:40:27 +0000 Subject: [PATCH 04/28] [thumbnails] Add Pillow dependency --- requirements.txt | 1 + setup.py | 1 + superset/models/dashboard.py | 2 +- superset/utils/selenium.py | 1 - 4 files changed, 3 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index ba259416143d..fc4682d6e7d7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -55,6 +55,7 @@ numpy==1.17.0 # via pandas, pyarrow pandas==0.24.2 parsedatetime==2.4 pathlib2==2.3.4 +pillow==7.0.0 polyline==1.4.0 prison==0.1.2 # via flask-appbuilder py==1.8.0 # via retry diff --git a/setup.py b/setup.py index aed85b45c6ab..62d451a00e01 100644 --- a/setup.py +++ b/setup.py @@ -93,6 +93,7 @@ def get_git_sha(): "pandas>=0.24.2, <0.25.0", "parsedatetime", "pathlib2", + "Pillow>=7.0.0, <8.0.0", "polyline", "python-dateutil", "python-dotenv", diff --git a/superset/models/dashboard.py b/superset/models/dashboard.py index a9b836f7462f..640f6cfe1dbd 100644 --- a/superset/models/dashboard.py +++ b/superset/models/dashboard.py @@ -183,7 +183,7 @@ def dashboard_link(self) -> Markup: def thumbnail_url(self): # SHA here is to force bypassing the browser cache when chart has changed # sha = utils.md5_hex(self.params, 6) - return f"/api/v1/dashboard/thumbnail/{self.id}/{sha}/" + return f"/api/v1/dashboard/{self.id}/thumbnail/{sha}/" @property def thumbnail_img(self): diff --git a/superset/utils/selenium.py b/superset/utils/selenium.py index 72374a607afc..04c13c45f459 100644 --- a/superset/utils/selenium.py +++ b/superset/utils/selenium.py @@ -14,7 +14,6 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -# pylint: disable=C,R,W import logging import time import urllib From 5952e2c87ac38e9fde88fe2805505cd1c9c8d5cc Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Mon, 13 Jan 2020 16:14:29 +0000 Subject: [PATCH 05/28] [thumbnails] More type annotations and lint --- docs/installation.rst | 10 ++++++++++ superset/cli.py | 9 ++++++--- superset/tasks/thumbnails.py | 4 ++-- superset/utils/selenium.py | 19 ++++++++++--------- superset/views/dashboard/api.py | 3 +-- 5 files changed, 29 insertions(+), 16 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index 4a3059fa3a70..8267adf5a1bc 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -642,6 +642,16 @@ Thumbnails are store on cache, an example config could be: .. code-block:: python + class CeleryConfig(object): + BROKER_URL = "redis://localhost:6379/0" + CELERY_IMPORTS = ("superset.sql_lab", "superset.tasks", "superset.tasks.thumbnails") + CELERY_RESULT_BACKEND = "redis://localhost:6379/0" + CELERYD_PREFETCH_MULTIPLIER = 10 + CELERY_ACKS_LATE = True + + + CELERY_CONFIG = CeleryConfig + def init_thumbnail_cache(app): return RedisCache(host='localhost', port=6379, key_prefix='superset_thumbnails_') diff --git a/superset/cli.py b/superset/cli.py index 48d1739ede2a..093f8ae73814 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -19,7 +19,7 @@ from datetime import datetime from subprocess import Popen from sys import stdout -from typing import Union +from typing import Union, Type import click import yaml @@ -479,7 +479,10 @@ def compute_thumbnails( ) def compute_generic_thumbnail( - friendly_type: str, model_cls: Union[Dashboard, Slice], model_id: int, func + friendly_type: str, + model_cls: Union[Type[Dashboard], Type[Slice]], + model_id: int, + compute_func, ): query = db.session.query(model_cls) if model_id: @@ -488,7 +491,7 @@ def compute_generic_thumbnail( count = len(dashboards) for i, model in enumerate(dashboards): if asynchronous: - func = func.delay + func = compute_func.delay action = "Triggering" else: action = "Processing" diff --git a/superset/tasks/thumbnails.py b/superset/tasks/thumbnails.py index 48ac2b59df8c..c64f71fe85ba 100644 --- a/superset/tasks/thumbnails.py +++ b/superset/tasks/thumbnails.py @@ -29,7 +29,7 @@ def cache_chart_thumbnail(chart_id, force=False): with app.app_context(): logging.info(f"Caching chart {chart_id}") - screenshot = SliceScreenshot(id=chart_id) + screenshot = SliceScreenshot(model_id=chart_id) user = security_manager.find_user("Admin") screenshot.compute_and_cache(user=user, cache=thumbnail_cache, force=force) @@ -38,6 +38,6 @@ def cache_chart_thumbnail(chart_id, force=False): def cache_dashboard_thumbnail(dashboard_id, force=False): with app.app_context(): logging.info(f"Caching dashboard {dashboard_id}") - screenshot = DashboardScreenshot(id=dashboard_id) + screenshot = DashboardScreenshot(model_id=dashboard_id) user = security_manager.find_user("Admin") screenshot.compute_and_cache(user=user, cache=thumbnail_cache, force=force) diff --git a/superset/utils/selenium.py b/superset/utils/selenium.py index 04c13c45f459..8da281193ade 100644 --- a/superset/utils/selenium.py +++ b/superset/utils/selenium.py @@ -57,13 +57,13 @@ class BaseScreenshot: window_size = (800, 600) thumb_size = (400, 300) - def __init__(self, id): - self.id = id - self.screenshot = None + def __init__(self, model_id: int): + self.model_id: int = model_id + self.screenshot: Optional[bytes] = None @property def cache_key(self): - return f"thumb__{self.thumbnail_type}__{self.id}" + return f"thumb__{self.thumbnail_type}__{self.model_id}" @property def url(self): @@ -94,7 +94,8 @@ def compute_and_cache( force: bool = True, ) -> Optional[bytes]: """ - Fetches the screenshot, computes the thumbnail and caches the result + Fetches the screenshot, computes the thumbnail and caches the result + :param user: If no user is given will use the current context :param cache: The cache to keep the thumbnail payload :param window_size: The window size from which will process the thumb @@ -142,7 +143,7 @@ def get_thumb( window_size: Tuple[int, int] = None, thumb_size: Tuple[int, int] = None, cache: "Cache" = None, - ): + ) -> Optional[bytes]: payload = None cache_key = self.cache_key window_size = window_size or self.window_size @@ -162,7 +163,7 @@ def resize_image( output: str = "png", size: Tuple[int, int] = None, crop: bool = True, - ): + ) -> bytes: size = size or cls.thumb_size img = Image.open(BytesIO(img_bytes)) logging.debug(f"Selenium image size: {img.size}") @@ -189,7 +190,7 @@ class SliceScreenshot(BaseScreenshot): @property def url(self) -> str: - return get_url_path("Superset.slice", slice_id=self.id, standalone="true") + return get_url_path("Superset.slice", slice_id=self.model_id, standalone="true") class DashboardScreenshot(BaseScreenshot): @@ -200,7 +201,7 @@ class DashboardScreenshot(BaseScreenshot): @property def url(self) -> str: - return get_url_path("Superset.dashboard", dashboard_id=self.id) + return get_url_path("Superset.dashboard", dashboard_id=self.model_id) def _destroy_webdriver(driver): diff --git a/superset/views/dashboard/api.py b/superset/views/dashboard/api.py index 8528db698afb..10d84c43532e 100644 --- a/superset/views/dashboard/api.py +++ b/superset/views/dashboard/api.py @@ -16,7 +16,6 @@ # under the License. import json import re -from io import BytesIO from flask import current_app, g, request, Response from flask_appbuilder.api import expose, protect, safe @@ -400,7 +399,7 @@ def thumbnail(self, pk, sha): # pylint: disable=invalid-name dashboard = self._base_filters.apply_all(query).get(pk) if not dashboard: return self.response_404() - screenshot = BytesIO(DashboardScreenshot(pk).get_thumb(cache=thumbnail_cache)) + screenshot = DashboardScreenshot(pk).get_thumb_as_bytes(cache=thumbnail_cache) return Response( FileWrapper(screenshot), mimetype="image/png", direct_passthrough=True ) From b19f17bfd06a69314e6b61ed172451b1f5966bd8 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Tue, 14 Jan 2020 07:59:46 +0000 Subject: [PATCH 06/28] [thumbnails] More type annotations and lint --- docs/installation.rst | 10 ++++++++++ superset/cli.py | 3 ++- superset/utils/selenium.py | 35 ++++++++++++++++++----------------- 3 files changed, 30 insertions(+), 18 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index 8267adf5a1bc..cdfee9defd5b 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -660,6 +660,16 @@ Thumbnails are store on cache, an example config could be: Using the above example cache keys for dashboards will be `superset_thumbnails_thumb__dashboard__{ID}` +You can override the base URL for selenium using: + +.. code-block:: python + + WEBDRIVER_BASEURL = "https://superset.company.com" + + +Additional selenium web drive config can be set using `WEBDRIVER_CONFIGURATION` + + Deeper SQLAlchemy integration ----------------------------- diff --git a/superset/cli.py b/superset/cli.py index 093f8ae73814..9c06e86c109b 100755 --- a/superset/cli.py +++ b/superset/cli.py @@ -19,7 +19,7 @@ from datetime import datetime from subprocess import Popen from sys import stdout -from typing import Union, Type +from typing import Type, Union import click import yaml @@ -494,6 +494,7 @@ def compute_generic_thumbnail( func = compute_func.delay action = "Triggering" else: + func = compute_func action = "Processing" msg = f'{action} {friendly_type} "{model}" ({i+1}/{count})' click.secho(msg, fg="green") diff --git a/superset/utils/selenium.py b/superset/utils/selenium.py index 8da281193ade..ff41de8e2a5a 100644 --- a/superset/utils/selenium.py +++ b/superset/utils/selenium.py @@ -32,8 +32,10 @@ from werkzeug.http import parse_cookie if TYPE_CHECKING: - from flask_appbuilder.security.sqla.models import User - from flask_caching import Cache + from flask_appbuilder.security.sqla.models import ( + User, + ) # pylint: disable=unused-import + from flask_caching import Cache # pylint: disable=unused-import # Time in seconds, we will wait for the page to load and render @@ -42,11 +44,11 @@ SELENIUM_HEADSTART = 3 -def headless_url(path): +def headless_url(path: str): return urllib.parse.urljoin(current_app.config.get("WEBDRIVER_BASEURL"), path) -def get_url_path(view, **kwargs): +def get_url_path(view: str, **kwargs): with current_app.test_request_context(): return headless_url(url_for(view, **kwargs)) @@ -54,8 +56,8 @@ def get_url_path(view, **kwargs): class BaseScreenshot: thumbnail_type: Optional[str] = None element: Optional[str] = None - window_size = (800, 600) - thumb_size = (400, 300) + window_size: Tuple[int, int] = (800, 600) + thumb_size: Tuple[int, int] = (400, 300) def __init__(self, model_id: int): self.model_id: int = model_id @@ -69,7 +71,7 @@ def cache_key(self): def url(self): raise NotImplementedError() - def fetch_screenshot(self, user, window_size: Tuple[int, int] = None): + def fetch_screenshot(self, user: "User", window_size: Tuple[int, int] = None): window_size = window_size or self.window_size self.screenshot = get_png_from_url( self.url, window_size, self.element, user=user @@ -80,7 +82,7 @@ def get_thumb_as_bytes(self, *args, **kwargs): payload = self.get_thumb(*args, **kwargs) return BytesIO(payload) - def get_from_cache(self, cache: "Cache"): + def get_from_cache(self, cache: "Cache") -> Optional[BytesIO]: payload = cache.get(self.cache_key) if payload: return BytesIO(payload) @@ -134,7 +136,6 @@ def compute_and_cache( if payload and cache: logging.info(f"Caching thumbnail: {cache_key}") cache.set(cache_key, payload) - logging.error(type(payload)) return payload def get_thumb( @@ -183,10 +184,10 @@ def resize_image( class SliceScreenshot(BaseScreenshot): - thumbnail_type = "slice" - element = "chart-container" - window_size = (600, int(600 * 0.75)) - thumb_size = (300, int(300 * 0.75)) + thumbnail_type: Optional[str] = "slice" + element: Optional[str] = "chart-container" + window_size: Tuple[int, int] = (600, int(600 * 0.75)) + thumb_size: Tuple[int, int] = (300, int(300 * 0.75)) @property def url(self) -> str: @@ -194,10 +195,10 @@ def url(self) -> str: class DashboardScreenshot(BaseScreenshot): - thumbnail_type = "dashboard" - element = "grid-container" - window_size = (1600, int(1600 * 0.75)) - thumb_size = (400, int(400 * 0.75)) + thumbnail_type: Optional[str] = "dashboard" + element: Optional[str] = "grid-container" + window_size: Tuple[int, int] = (1600, int(1600 * 0.75)) + thumb_size: Tuple[int, int] = (400, int(400 * 0.75)) @property def url(self) -> str: From c311cd18b321e3afa5cfec835d78b97be47947f7 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Mon, 20 Jan 2020 16:13:17 +0000 Subject: [PATCH 07/28] [thumbnails] Lint and tweaks --- docs/installation.rst | 4 ++++ superset/tasks/thumbnails.py | 10 ++++++++-- superset/utils/selenium.py | 16 ++++++++-------- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index 185f4866b1b6..452e6613156e 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -645,6 +645,10 @@ Thumbnails are store on cache, an example config could be: .. code-block:: python + from werkzeug.contrib.cache import RedisCache + + ... + class CeleryConfig(object): BROKER_URL = "redis://localhost:6379/0" CELERY_IMPORTS = ("superset.sql_lab", "superset.tasks", "superset.tasks.thumbnails") diff --git a/superset/tasks/thumbnails.py b/superset/tasks/thumbnails.py index c64f71fe85ba..75d5da233ac2 100644 --- a/superset/tasks/thumbnails.py +++ b/superset/tasks/thumbnails.py @@ -26,8 +26,11 @@ @celery_app.task(name="cache_chart_thumbnail", soft_time_limit=300) -def cache_chart_thumbnail(chart_id, force=False): +def cache_chart_thumbnail(chart_id: int, force: bool = False): with app.app_context(): + if not thumbnail_cache: + logging.warning("No cache set, refusing to compute") + return None logging.info(f"Caching chart {chart_id}") screenshot = SliceScreenshot(model_id=chart_id) user = security_manager.find_user("Admin") @@ -35,8 +38,11 @@ def cache_chart_thumbnail(chart_id, force=False): @celery_app.task(name="cache_dashboard_thumbnail", soft_time_limit=300) -def cache_dashboard_thumbnail(dashboard_id, force=False): +def cache_dashboard_thumbnail(dashboard_id: int, force: bool = False): with app.app_context(): + if not thumbnail_cache: + logging.warning("No cache set, refusing to compute") + return None logging.info(f"Caching dashboard {dashboard_id}") screenshot = DashboardScreenshot(model_id=dashboard_id) user = security_manager.find_user("Admin") diff --git a/superset/utils/selenium.py b/superset/utils/selenium.py index ff41de8e2a5a..11fe8fac134a 100644 --- a/superset/utils/selenium.py +++ b/superset/utils/selenium.py @@ -16,7 +16,7 @@ # under the License. import logging import time -import urllib +import urllib.parse from io import BytesIO from typing import Optional, Tuple, TYPE_CHECKING @@ -45,7 +45,7 @@ def headless_url(path: str): - return urllib.parse.urljoin(current_app.config.get("WEBDRIVER_BASEURL"), path) + return urllib.parse.urljoin(current_app.config.get("WEBDRIVER_BASEURL", ""), path) def get_url_path(view: str, **kwargs): @@ -78,14 +78,17 @@ def fetch_screenshot(self, user: "User", window_size: Tuple[int, int] = None): ) return self.screenshot - def get_thumb_as_bytes(self, *args, **kwargs): + def get_thumb_as_bytes(self, *args, **kwargs) -> Optional[BytesIO]: payload = self.get_thumb(*args, **kwargs) - return BytesIO(payload) + if payload: + return BytesIO(payload) + return None def get_from_cache(self, cache: "Cache") -> Optional[BytesIO]: payload = cache.get(self.cache_key) if payload: return BytesIO(payload) + return None def compute_and_cache( self, @@ -106,10 +109,7 @@ def compute_and_cache( :return: Image payload """ cache_key = self.cache_key - if not cache: - logging.error("No cache set, refusing to compute") - return None - if not force and cache.get(cache_key): + if not force and cache and cache.get(cache_key): logging.info("Thumb already cached, skipping...") return None window_size = window_size or self.window_size From 520cff92479e10540ce5c55703aa30611f024542 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Mon, 20 Jan 2020 17:07:34 +0000 Subject: [PATCH 08/28] [thumbnails] more Lint --- superset/models/dashboard.py | 1 + superset/utils/selenium.py | 61 ++++++++++++++++++--------------- superset/views/dashboard/api.py | 2 +- 3 files changed, 35 insertions(+), 29 deletions(-) diff --git a/superset/models/dashboard.py b/superset/models/dashboard.py index ad0efbb5d139..6bbe5c6f3f06 100644 --- a/superset/models/dashboard.py +++ b/superset/models/dashboard.py @@ -183,6 +183,7 @@ def dashboard_link(self) -> Markup: def thumbnail_url(self): # SHA here is to force bypassing the browser cache when chart has changed # sha = utils.md5_hex(self.params, 6) + sha = 1 return f"/api/v1/dashboard/{self.id}/thumbnail/{sha}/" @property diff --git a/superset/utils/selenium.py b/superset/utils/selenium.py index 11fe8fac134a..924af9a07ce4 100644 --- a/superset/utils/selenium.py +++ b/superset/utils/selenium.py @@ -32,9 +32,9 @@ from werkzeug.http import parse_cookie if TYPE_CHECKING: - from flask_appbuilder.security.sqla.models import ( + from flask_appbuilder.security.sqla.models import ( # pylint: disable=unused-import User, - ) # pylint: disable=unused-import + ) from flask_caching import Cache # pylint: disable=unused-import @@ -90,7 +90,7 @@ def get_from_cache(self, cache: "Cache") -> Optional[BytesIO]: return BytesIO(payload) return None - def compute_and_cache( + def compute_and_cache( # pylint: disable=too-many-arguments self, user: "User" = None, cache: "Cache" = None, @@ -121,14 +121,14 @@ def compute_and_cache( # Assuming all sorts of things can go wrong with Selenium try: payload = self.fetch_screenshot(window_size=window_size, user=user) - except Exception as e: + except Exception as e: # pylint: disable=broad-except logging.error("Failed at generating thumbnail") logging.exception(e) if payload and window_size != thumb_size: try: payload = self.resize_image(payload, size=thumb_size) - except Exception as e: + except Exception as e: # pylint: disable=broad-except logging.error("Failed at resizing thumbnail") logging.exception(e) payload = None @@ -211,11 +211,11 @@ def _destroy_webdriver(driver): # and catch-all exceptions try: retry_call(driver.close, tries=2) - except Exception: + except Exception: # pylint: disable=broad-except pass try: driver.quit() - except Exception: + except Exception: # pylint: disable=broad-except pass @@ -281,7 +281,7 @@ def create_webdriver( return driver -def get_png_from_url( +def get_png_from_url( # pylint: disable=too-many-arguments url: str, window: Tuple[int, int], element_name: Optional[str], @@ -295,24 +295,29 @@ def get_png_from_url( img = None logging.debug(f"Sleeping for {SELENIUM_HEADSTART} seconds") time.sleep(SELENIUM_HEADSTART) - try: - logging.debug(f"Wait for the presence of {element_name}") - element = WebDriverWait(driver, 10).until( - EC.presence_of_element_located((By.CLASS_NAME, element_name)) - ) - logging.debug(f"Wait for .loading to be done") - WebDriverWait(driver, 60).until_not( - EC.presence_of_all_elements_located((By.CLASS_NAME, "loading")) - ) - logging.info("Taking a PNG screenshot") - img = element.screenshot_as_png - except TimeoutException: - logging.error("Selenium timed out") - except WebDriverException as e: - logging.exception(e) - # Some webdrivers do not support screenshots for elements. - # In such cases, take a screenshot of the entire page. - img = driver.screenshot() # pylint: disable=no-member - finally: - _destroy_webdriver(driver) + retry_count = 0 + while retry_count <= retries: + try: + logging.debug(f"Wait for the presence of {element_name}") + element = WebDriverWait(driver, 10).until( + EC.presence_of_element_located((By.CLASS_NAME, element_name)) + ) + logging.debug(f"Wait for .loading to be done") + WebDriverWait(driver, 60).until_not( + EC.presence_of_all_elements_located((By.CLASS_NAME, "loading")) + ) + logging.info("Taking a PNG screenshot") + img = element.screenshot_as_png + break + except TimeoutException: + logging.error("Selenium timed out") + retry_count += 1 + except WebDriverException as e: + logging.exception(e) + # Some webdrivers do not support screenshots for elements. + # In such cases, take a screenshot of the entire page. + img = driver.screenshot() # pylint: disable=no-member + break + finally: + _destroy_webdriver(driver) return img diff --git a/superset/views/dashboard/api.py b/superset/views/dashboard/api.py index ca3428b4029f..38f4d6ae7b30 100644 --- a/superset/views/dashboard/api.py +++ b/superset/views/dashboard/api.py @@ -397,7 +397,7 @@ def thumbnail(self, pk, sha): # pylint: disable=invalid-name 500: $ref: '#/components/responses/500' """ - sha = 1 + print(sha) query = self.datamodel.session.query(Dashboard) dashboard = self._base_filters.apply_all(query).get(pk) if not dashboard: From 71d7a94342d7b04acb0e7d6fbd0b13f8fa6cc3fe Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Mon, 20 Jan 2020 18:01:49 +0000 Subject: [PATCH 09/28] [thumbnails] isort --- superset/utils/selenium.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/superset/utils/selenium.py b/superset/utils/selenium.py index 924af9a07ce4..7911e6e43042 100644 --- a/superset/utils/selenium.py +++ b/superset/utils/selenium.py @@ -32,10 +32,9 @@ from werkzeug.http import parse_cookie if TYPE_CHECKING: - from flask_appbuilder.security.sqla.models import ( # pylint: disable=unused-import - User, - ) - from flask_caching import Cache # pylint: disable=unused-import + # pylint: disable=unused-import + from flask_appbuilder.security.sqla.models import User + from flask_caching import Cache # Time in seconds, we will wait for the page to load and render From 46f99a7f591373ea6a883c56589891724b68c52b Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Mon, 20 Jan 2020 18:10:50 +0000 Subject: [PATCH 10/28] [thumbnails] isort --- superset/utils/selenium.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/superset/utils/selenium.py b/superset/utils/selenium.py index 7911e6e43042..60945a202939 100644 --- a/superset/utils/selenium.py +++ b/superset/utils/selenium.py @@ -22,7 +22,6 @@ from flask import current_app, request, Response, session, url_for from flask_login import login_user -from PIL import Image from retry.api import retry_call from selenium.common.exceptions import TimeoutException, WebDriverException from selenium.webdriver import chrome, firefox @@ -30,6 +29,7 @@ from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait from werkzeug.http import parse_cookie +from PIL import Image if TYPE_CHECKING: # pylint: disable=unused-import From 0bf90617bb7afdd3d0a4258bbe7c2e0df39a70ab Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Tue, 21 Jan 2020 00:19:22 +0000 Subject: [PATCH 11/28] [thumbnails] selenium with configurable auth function --- superset/utils/selenium.py | 219 ++++++++++++++++++++----------------- 1 file changed, 120 insertions(+), 99 deletions(-) diff --git a/superset/utils/selenium.py b/superset/utils/selenium.py index 60945a202939..2d2c112d7838 100644 --- a/superset/utils/selenium.py +++ b/superset/utils/selenium.py @@ -18,30 +18,32 @@ import time import urllib.parse from io import BytesIO -from typing import Optional, Tuple, TYPE_CHECKING +from typing import Any, Callable, Dict, Optional, Tuple, TYPE_CHECKING from flask import current_app, request, Response, session, url_for from flask_login import login_user +from PIL import Image from retry.api import retry_call from selenium.common.exceptions import TimeoutException, WebDriverException from selenium.webdriver import chrome, firefox from selenium.webdriver.common.by import By +from selenium.webdriver.remote.webdriver import WebDriver from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait from werkzeug.http import parse_cookie -from PIL import Image if TYPE_CHECKING: # pylint: disable=unused-import from flask_appbuilder.security.sqla.models import User from flask_caching import Cache - # Time in seconds, we will wait for the page to load and render SELENIUM_CHECK_INTERVAL = 2 SELENIUM_RETRIES = 5 SELENIUM_HEADSTART = 3 +WindowSize = Tuple[int, int] + def headless_url(path: str): return urllib.parse.urljoin(current_app.config.get("WEBDRIVER_BASEURL", ""), path) @@ -52,14 +54,79 @@ def get_url_path(view: str, **kwargs): return headless_url(url_for(view, **kwargs)) +def get_png_from_url( # pylint: disable=too-many-arguments + url: str, + window: WindowSize, + element_name: str, + user: "User", + auth_driver_func: Callable, + webdriver_name: str = "chrome", + retries: int = SELENIUM_RETRIES, +) -> Optional[bytes]: + driver = create_webdriver( + user, webdriver_name, window, auth_driver_func=auth_driver_func + ) + driver.set_window_size(*window) + driver.get(url) + img: Optional[bytes] = None + logging.debug(f"Sleeping for {SELENIUM_HEADSTART} seconds") + time.sleep(SELENIUM_HEADSTART) + try: + logging.debug(f"Wait for the presence of {element_name}") + element = WebDriverWait(driver, 10).until( + EC.presence_of_element_located((By.CLASS_NAME, element_name)) + ) + logging.debug(f"Wait for .loading to be done") + WebDriverWait(driver, 60).until_not( + EC.presence_of_all_elements_located((By.CLASS_NAME, "loading")) + ) + logging.info("Taking a PNG screenshot") + img = element.screenshot_as_png + except TimeoutException: + logging.error("Selenium timed out") + except WebDriverException as e: + logging.exception(e) + # Some webdrivers do not support screenshots for elements. + # In such cases, take a screenshot of the entire page. + img = driver.screenshot() # pylint: disable=no-member + finally: + _destroy_webdriver(driver, retries) + return img + + +def auth_driver(driver: WebDriver, user: "User") -> WebDriver: + """ + Default AuthDriverFuncType type that sets a session cookie flask-login style + :return: WebDriver + """ + if user: + # Set the cookies in the driver + for cookie in get_auth_cookies(user): + info = dict(name="session", value=cookie) + driver.add_cookie(info) + elif request.cookies: + cookies = request.cookies + for k, v in cookies.items(): + cookie = dict(name=k, value=v) + driver.add_cookie(cookie) + return driver + + class BaseScreenshot: - thumbnail_type: Optional[str] = None - element: Optional[str] = None - window_size: Tuple[int, int] = (800, 600) - thumb_size: Tuple[int, int] = (400, 300) + thumbnail_type: str = "" + element: str = "" + window_size: WindowSize = (800, 600) + thumb_size: WindowSize = (400, 300) - def __init__(self, model_id: int): + def __init__( + self, + model_id: int, + fetch_png_from_url_func: Callable = get_png_from_url, + auth_driver_func: Callable = auth_driver, + ): self.model_id: int = model_id + self._fetch_png_from_url_func = fetch_png_from_url_func + self._auth_driver_func = auth_driver_func self.screenshot: Optional[bytes] = None @property @@ -70,10 +137,14 @@ def cache_key(self): def url(self): raise NotImplementedError() - def fetch_screenshot(self, user: "User", window_size: Tuple[int, int] = None): + def fetch(self, user: "User", window_size: WindowSize = None): window_size = window_size or self.window_size - self.screenshot = get_png_from_url( - self.url, window_size, self.element, user=user + self.screenshot = self._fetch_png_from_url_func( + self.url, + window_size, + self.element, + user=user, + auth_driver_func=self._auth_driver_func, ) return self.screenshot @@ -83,6 +154,25 @@ def get_thumb_as_bytes(self, *args, **kwargs) -> Optional[BytesIO]: return BytesIO(payload) return None + def get_thumb( + self, + user: "User" = None, + window_size: WindowSize = None, + thumb_size: WindowSize = None, + cache: "Cache" = None, + ) -> Optional[bytes]: + payload = None + cache_key = self.cache_key + window_size = window_size or self.window_size + thumb_size = thumb_size or self.thumb_size + if cache: + payload = cache.get(cache_key) + if not payload: + payload = self.compute_and_cache(user, cache, window_size, thumb_size) + else: + logging.info(f"Loaded thumbnail from cache: {cache_key}") + return payload + def get_from_cache(self, cache: "Cache") -> Optional[BytesIO]: payload = cache.get(self.cache_key) if payload: @@ -93,8 +183,8 @@ def compute_and_cache( # pylint: disable=too-many-arguments self, user: "User" = None, cache: "Cache" = None, - window_size: Tuple[int, int] = None, - thumb_size: Tuple[int, int] = None, + window_size: WindowSize = None, + thumb_size: WindowSize = None, force: bool = True, ) -> Optional[bytes]: """ @@ -104,7 +194,7 @@ def compute_and_cache( # pylint: disable=too-many-arguments :param cache: The cache to keep the thumbnail payload :param window_size: The window size from which will process the thumb :param thumb_size: The final thumbnail size - :param force: Will for the computation even if it's already cached + :param force: Will force the computation even if it's already cached :return: Image payload """ cache_key = self.cache_key @@ -119,7 +209,7 @@ def compute_and_cache( # pylint: disable=too-many-arguments # Assuming all sorts of things can go wrong with Selenium try: - payload = self.fetch_screenshot(window_size=window_size, user=user) + payload = self.fetch(window_size=window_size, user=user) except Exception as e: # pylint: disable=broad-except logging.error("Failed at generating thumbnail") logging.exception(e) @@ -137,25 +227,6 @@ def compute_and_cache( # pylint: disable=too-many-arguments cache.set(cache_key, payload) return payload - def get_thumb( - self, - user: "User" = None, - window_size: Tuple[int, int] = None, - thumb_size: Tuple[int, int] = None, - cache: "Cache" = None, - ) -> Optional[bytes]: - payload = None - cache_key = self.cache_key - window_size = window_size or self.window_size - thumb_size = thumb_size or self.thumb_size - if cache: - payload = cache.get(cache_key) - if not payload: - payload = self.compute_and_cache(user, cache, window_size, thumb_size) - else: - logging.info(f"Loaded thumbnail from cache: {cache_key}") - return payload - @classmethod def resize_image( cls, @@ -183,8 +254,8 @@ def resize_image( class SliceScreenshot(BaseScreenshot): - thumbnail_type: Optional[str] = "slice" - element: Optional[str] = "chart-container" + thumbnail_type: str = "slice" + element: str = "chart-container" window_size: Tuple[int, int] = (600, int(600 * 0.75)) thumb_size: Tuple[int, int] = (300, int(300 * 0.75)) @@ -194,8 +265,8 @@ def url(self) -> str: class DashboardScreenshot(BaseScreenshot): - thumbnail_type: Optional[str] = "dashboard" - element: Optional[str] = "grid-container" + thumbnail_type: str = "dashboard" + element: str = "grid-container" window_size: Tuple[int, int] = (1600, int(1600 * 0.75)) thumb_size: Tuple[int, int] = (400, int(400 * 0.75)) @@ -204,12 +275,12 @@ def url(self) -> str: return get_url_path("Superset.dashboard", dashboard_id=self.model_id) -def _destroy_webdriver(driver): +def _destroy_webdriver(driver, tries=2): """Destroy a driver""" # This is some very flaky code in selenium. Hence the retries # and catch-all exceptions try: - retry_call(driver.close, tries=2) + retry_call(driver.close, tries=tries) except Exception: # pylint: disable=broad-except pass try: @@ -238,27 +309,30 @@ def get_auth_cookies(user: "User"): def create_webdriver( - user: "User" = None, webdriver: str = "chrome", window: Tuple[int, int] = None -): + user: "User" = None, + webdriver: str = "chrome", + window: WindowSize = None, + auth_driver_func: Callable = auth_driver, +) -> Any: """Creates a selenium webdriver If no user is specified, we use the current request's context""" # Create a webdriver for use in fetching reports window = window or (800, 600) if webdriver == "firefox": - driver_class = firefox.webdriver.WebDriver + driver_class: WebDriver = firefox.webdriver.WebDriver options = firefox.options.Options() else: # webdriver == 'chrome': driver_class = chrome.webdriver.WebDriver options = chrome.options.Options() - arg = f"--window-size={window[0]},{window[1]}" + arg: str = f"--window-size={window[0]},{window[1]}" options.add_argument(arg) options.add_argument("--headless") # Prepare args for the webdriver init - kwargs = dict(options=options) + kwargs: Dict = dict(options=options) kwargs.update(current_app.config["WEBDRIVER_CONFIGURATION"]) logging.info("Init selenium driver") @@ -266,57 +340,4 @@ def create_webdriver( # Setting cookies requires doing a request first driver.get(headless_url("/login/")) - - if user: - # Set the cookies in the driver - for cookie in get_auth_cookies(user): - info = dict(name="session", value=cookie) - driver.add_cookie(info) - elif request.cookies: - cookies = request.cookies - for k, v in cookies.items(): - cookie = dict(name=k, value=v) - driver.add_cookie(cookie) - return driver - - -def get_png_from_url( # pylint: disable=too-many-arguments - url: str, - window: Tuple[int, int], - element_name: Optional[str], - user: "User", - webdriver: str = "chrome", - retries: int = SELENIUM_RETRIES, -): - driver = create_webdriver(user, webdriver, window) - driver.set_window_size(*window) - driver.get(url) - img = None - logging.debug(f"Sleeping for {SELENIUM_HEADSTART} seconds") - time.sleep(SELENIUM_HEADSTART) - retry_count = 0 - while retry_count <= retries: - try: - logging.debug(f"Wait for the presence of {element_name}") - element = WebDriverWait(driver, 10).until( - EC.presence_of_element_located((By.CLASS_NAME, element_name)) - ) - logging.debug(f"Wait for .loading to be done") - WebDriverWait(driver, 60).until_not( - EC.presence_of_all_elements_located((By.CLASS_NAME, "loading")) - ) - logging.info("Taking a PNG screenshot") - img = element.screenshot_as_png - break - except TimeoutException: - logging.error("Selenium timed out") - retry_count += 1 - except WebDriverException as e: - logging.exception(e) - # Some webdrivers do not support screenshots for elements. - # In such cases, take a screenshot of the entire page. - img = driver.screenshot() # pylint: disable=no-member - break - finally: - _destroy_webdriver(driver) - return img + return auth_driver_func(driver, user) From b470e80349f0e4b3b46228c60dcb7e3e19d0f271 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Tue, 21 Jan 2020 10:55:24 +0000 Subject: [PATCH 12/28] [thumbnails] refactor --- superset/utils/selenium.py | 282 +++++++++++++++----------------- superset/views/dashboard/api.py | 3 +- 2 files changed, 131 insertions(+), 154 deletions(-) diff --git a/superset/utils/selenium.py b/superset/utils/selenium.py index 2d2c112d7838..54de7753a1b8 100644 --- a/superset/utils/selenium.py +++ b/superset/utils/selenium.py @@ -18,11 +18,10 @@ import time import urllib.parse from io import BytesIO -from typing import Any, Callable, Dict, Optional, Tuple, TYPE_CHECKING +from typing import Callable, Dict, List, Optional, Tuple, TYPE_CHECKING from flask import current_app, request, Response, session, url_for from flask_login import login_user -from PIL import Image from retry.api import retry_call from selenium.common.exceptions import TimeoutException, WebDriverException from selenium.webdriver import chrome, firefox @@ -32,6 +31,8 @@ from selenium.webdriver.support.ui import WebDriverWait from werkzeug.http import parse_cookie +from PIL import Image + if TYPE_CHECKING: # pylint: disable=unused-import from flask_appbuilder.security.sqla.models import User @@ -45,53 +46,23 @@ WindowSize = Tuple[int, int] -def headless_url(path: str): - return urllib.parse.urljoin(current_app.config.get("WEBDRIVER_BASEURL", ""), path) - - -def get_url_path(view: str, **kwargs): +def get_auth_cookies(user: "User") -> List[Dict]: + # Login with the user specified to get the reports with current_app.test_request_context(): - return headless_url(url_for(view, **kwargs)) + login_user(user) + # A mock response object to get the cookie information from + response = Response() + current_app.session_interface.save_session(current_app, session, response) -def get_png_from_url( # pylint: disable=too-many-arguments - url: str, - window: WindowSize, - element_name: str, - user: "User", - auth_driver_func: Callable, - webdriver_name: str = "chrome", - retries: int = SELENIUM_RETRIES, -) -> Optional[bytes]: - driver = create_webdriver( - user, webdriver_name, window, auth_driver_func=auth_driver_func - ) - driver.set_window_size(*window) - driver.get(url) - img: Optional[bytes] = None - logging.debug(f"Sleeping for {SELENIUM_HEADSTART} seconds") - time.sleep(SELENIUM_HEADSTART) - try: - logging.debug(f"Wait for the presence of {element_name}") - element = WebDriverWait(driver, 10).until( - EC.presence_of_element_located((By.CLASS_NAME, element_name)) - ) - logging.debug(f"Wait for .loading to be done") - WebDriverWait(driver, 60).until_not( - EC.presence_of_all_elements_located((By.CLASS_NAME, "loading")) - ) - logging.info("Taking a PNG screenshot") - img = element.screenshot_as_png - except TimeoutException: - logging.error("Selenium timed out") - except WebDriverException as e: - logging.exception(e) - # Some webdrivers do not support screenshots for elements. - # In such cases, take a screenshot of the entire page. - img = driver.screenshot() # pylint: disable=no-member - finally: - _destroy_webdriver(driver, retries) - return img + cookies = [] + + # Set the cookies in the driver + for name, value in response.headers: + if name.lower() == "set-cookie": + cookie = parse_cookie(value) + cookies.append(cookie["session"]) + return cookies def auth_driver(driver: WebDriver, user: "User") -> WebDriver: @@ -112,63 +83,138 @@ def auth_driver(driver: WebDriver, user: "User") -> WebDriver: return driver +def headless_url(path: str) -> str: + return urllib.parse.urljoin(current_app.config.get("WEBDRIVER_BASEURL", ""), path) + + +def get_url_path(view: str, **kwargs) -> str: + with current_app.test_request_context(): + return headless_url(url_for(view, **kwargs)) + + +class AuthWebDriverProxy: + def __init__( + self, driver_type: str, window: WindowSize = None, auth_func: Callable = None + ): + self._driver_type = driver_type + self._window: WindowSize = window or (800, 600) + config_auth_func: Callable = current_app.config.get( + "WEBDRIVER_AUTH_FUNC", auth_driver + ) + self._auth_func: Callable = auth_func or config_auth_func + + def create(self) -> WebDriver: + if self._driver_type == "firefox": + driver_class = firefox.webdriver.WebDriver + options = firefox.options.Options() + elif self._driver_type == "chrome": + driver_class = chrome.webdriver.WebDriver + options = chrome.options.Options() + arg: str = f"--window-size={self._window[0]},{self._window[1]}" + options.add_argument(arg) + else: + raise Exception(f"Webdriver name ({self._driver_type}) not supported") + # Prepare args for the webdriver init + options.add_argument("--headless") + kwargs: Dict = dict(options=options) + kwargs.update(current_app.config["WEBDRIVER_CONFIGURATION"]) + logging.info("Init selenium driver") + return driver_class(**kwargs) + + def auth(self, user: "User") -> WebDriver: + # Setting cookies requires doing a request first + driver = self.create() + driver.get(headless_url("/login/")) + return self._auth_func(driver, user) + + @staticmethod + def destroy(driver: WebDriver, tries=2): + """Destroy a driver""" + # This is some very flaky code in selenium. Hence the retries + # and catch-all exceptions + try: + retry_call(driver.close, tries=tries) + except Exception: # pylint: disable=broad-except + pass + try: + driver.quit() + except Exception: # pylint: disable=broad-except + pass + + def get_png_from_url( + self, url: str, element_name: str, user: "User", retries: int = SELENIUM_RETRIES + ) -> Optional[bytes]: + driver = self.auth(user) + driver.set_window_size(*self._window) + driver.get(url) + img: Optional[bytes] = None + logging.debug(f"Sleeping for {SELENIUM_HEADSTART} seconds") + time.sleep(SELENIUM_HEADSTART) + try: + logging.debug(f"Wait for the presence of {element_name}") + element = WebDriverWait(driver, 10).until( + EC.presence_of_element_located((By.CLASS_NAME, element_name)) + ) + logging.debug(f"Wait for .loading to be done") + WebDriverWait(driver, 60).until_not( + EC.presence_of_all_elements_located((By.CLASS_NAME, "loading")) + ) + logging.info("Taking a PNG screenshot") + img = element.screenshot_as_png + except TimeoutException: + logging.error("Selenium timed out") + except WebDriverException as e: + logging.exception(e) + # Some webdrivers do not support screenshots for elements. + # In such cases, take a screenshot of the entire page. + img = driver.screenshot() # pylint: disable=no-member + finally: + self.destroy(driver, retries) + return img + + class BaseScreenshot: + driver_type = "chrome" thumbnail_type: str = "" element: str = "" window_size: WindowSize = (800, 600) thumb_size: WindowSize = (400, 300) - def __init__( - self, - model_id: int, - fetch_png_from_url_func: Callable = get_png_from_url, - auth_driver_func: Callable = auth_driver, - ): + def __init__(self, model_id: int): self.model_id: int = model_id - self._fetch_png_from_url_func = fetch_png_from_url_func - self._auth_driver_func = auth_driver_func self.screenshot: Optional[bytes] = None + self._driver = AuthWebDriverProxy(self.driver_type, self.window_size) @property - def cache_key(self): + def cache_key(self) -> str: return f"thumb__{self.thumbnail_type}__{self.model_id}" @property - def url(self): + def url(self) -> str: raise NotImplementedError() - def fetch(self, user: "User", window_size: WindowSize = None): - window_size = window_size or self.window_size - self.screenshot = self._fetch_png_from_url_func( - self.url, - window_size, - self.element, - user=user, - auth_driver_func=self._auth_driver_func, - ) + def fetch(self, user: "User") -> Optional[bytes]: + self.screenshot = self._driver.get_png_from_url(self.url, self.element, user) return self.screenshot - def get_thumb_as_bytes(self, *args, **kwargs) -> Optional[BytesIO]: - payload = self.get_thumb(*args, **kwargs) + def get( + self, user: "User" = None, cache: "Cache" = None, thumb_size: WindowSize = None + ) -> Optional[BytesIO]: + payload = self._get_thumb_bytes(user=user, thumb_size=thumb_size, cache=cache) if payload: return BytesIO(payload) return None - def get_thumb( - self, - user: "User" = None, - window_size: WindowSize = None, - thumb_size: WindowSize = None, - cache: "Cache" = None, + def _get_thumb_bytes( + self, user: "User" = None, cache: "Cache" = None, thumb_size: WindowSize = None ) -> Optional[bytes]: payload = None cache_key = self.cache_key - window_size = window_size or self.window_size thumb_size = thumb_size or self.thumb_size if cache: payload = cache.get(cache_key) if not payload: - payload = self.compute_and_cache(user, cache, window_size, thumb_size) + payload = self.compute_and_cache(user, cache, thumb_size) else: logging.info(f"Loaded thumbnail from cache: {cache_key}") return payload @@ -182,9 +228,8 @@ def get_from_cache(self, cache: "Cache") -> Optional[BytesIO]: def compute_and_cache( # pylint: disable=too-many-arguments self, user: "User" = None, - cache: "Cache" = None, - window_size: WindowSize = None, thumb_size: WindowSize = None, + cache: "Cache" = None, force: bool = True, ) -> Optional[bytes]: """ @@ -201,7 +246,6 @@ def compute_and_cache( # pylint: disable=too-many-arguments if not force and cache and cache.get(cache_key): logging.info("Thumb already cached, skipping...") return None - window_size = window_size or self.window_size thumb_size = thumb_size or self.thumb_size logging.info(f"Processing url for thumbnail: {cache_key}") @@ -209,12 +253,12 @@ def compute_and_cache( # pylint: disable=too-many-arguments # Assuming all sorts of things can go wrong with Selenium try: - payload = self.fetch(window_size=window_size, user=user) + payload = self.fetch(user=user) except Exception as e: # pylint: disable=broad-except logging.error("Failed at generating thumbnail") logging.exception(e) - if payload and window_size != thumb_size: + if payload and self.window_size != thumb_size: try: payload = self.resize_image(payload, size=thumb_size) except Exception as e: # pylint: disable=broad-except @@ -232,7 +276,7 @@ def resize_image( cls, img_bytes: bytes, output: str = "png", - size: Tuple[int, int] = None, + size: WindowSize = None, crop: bool = True, ) -> bytes: size = size or cls.thumb_size @@ -256,8 +300,8 @@ def resize_image( class SliceScreenshot(BaseScreenshot): thumbnail_type: str = "slice" element: str = "chart-container" - window_size: Tuple[int, int] = (600, int(600 * 0.75)) - thumb_size: Tuple[int, int] = (300, int(300 * 0.75)) + window_size: WindowSize = (600, int(600 * 0.75)) + thumb_size: WindowSize = (300, int(300 * 0.75)) @property def url(self) -> str: @@ -267,77 +311,9 @@ def url(self) -> str: class DashboardScreenshot(BaseScreenshot): thumbnail_type: str = "dashboard" element: str = "grid-container" - window_size: Tuple[int, int] = (1600, int(1600 * 0.75)) - thumb_size: Tuple[int, int] = (400, int(400 * 0.75)) + window_size: WindowSize = (1600, int(1600 * 0.75)) + thumb_size: WindowSize = (400, int(400 * 0.75)) @property def url(self) -> str: return get_url_path("Superset.dashboard", dashboard_id=self.model_id) - - -def _destroy_webdriver(driver, tries=2): - """Destroy a driver""" - # This is some very flaky code in selenium. Hence the retries - # and catch-all exceptions - try: - retry_call(driver.close, tries=tries) - except Exception: # pylint: disable=broad-except - pass - try: - driver.quit() - except Exception: # pylint: disable=broad-except - pass - - -def get_auth_cookies(user: "User"): - # Login with the user specified to get the reports - with current_app.test_request_context(): - login_user(user) - - # A mock response object to get the cookie information from - response = Response() - current_app.session_interface.save_session(current_app, session, response) - - cookies = [] - - # Set the cookies in the driver - for name, value in response.headers: - if name.lower() == "set-cookie": - cookie = parse_cookie(value) - cookies.append(cookie["session"]) - return cookies - - -def create_webdriver( - user: "User" = None, - webdriver: str = "chrome", - window: WindowSize = None, - auth_driver_func: Callable = auth_driver, -) -> Any: - """Creates a selenium webdriver - - If no user is specified, we use the current request's context""" - # Create a webdriver for use in fetching reports - window = window or (800, 600) - if webdriver == "firefox": - driver_class: WebDriver = firefox.webdriver.WebDriver - options = firefox.options.Options() - else: - # webdriver == 'chrome': - driver_class = chrome.webdriver.WebDriver - options = chrome.options.Options() - arg: str = f"--window-size={window[0]},{window[1]}" - options.add_argument(arg) - - options.add_argument("--headless") - - # Prepare args for the webdriver init - kwargs: Dict = dict(options=options) - kwargs.update(current_app.config["WEBDRIVER_CONFIGURATION"]) - - logging.info("Init selenium driver") - driver = driver_class(**kwargs) - - # Setting cookies requires doing a request first - driver.get(headless_url("/login/")) - return auth_driver_func(driver, user) diff --git a/superset/views/dashboard/api.py b/superset/views/dashboard/api.py index 38f4d6ae7b30..1ebadbb1d750 100644 --- a/superset/views/dashboard/api.py +++ b/superset/views/dashboard/api.py @@ -402,7 +402,8 @@ def thumbnail(self, pk, sha): # pylint: disable=invalid-name dashboard = self._base_filters.apply_all(query).get(pk) if not dashboard: return self.response_404() - screenshot = DashboardScreenshot(pk).get_thumb_as_bytes(cache=thumbnail_cache) + # fetch the dashboard screenshot using the current user and cache if set + screenshot = DashboardScreenshot(pk).get(cache=thumbnail_cache) return Response( FileWrapper(screenshot), mimetype="image/png", direct_passthrough=True ) From 406d9787cac50074cd09a6ff5a988cee3092f707 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Tue, 21 Jan 2020 17:05:28 +0000 Subject: [PATCH 13/28] [thumbnails] Refactor and started tests --- docs/installation.rst | 14 +++++++++++++ superset/utils/selenium.py | 42 ++++++++++++++++++++------------------ tests/thumbnails_tests.py | 31 ++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+), 20 deletions(-) create mode 100644 tests/thumbnails_tests.py diff --git a/docs/installation.rst b/docs/installation.rst index 452e6613156e..89837d083bdb 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -676,6 +676,20 @@ You can override the base URL for selenium using: Additional selenium web drive config can be set using `WEBDRIVER_CONFIGURATION` +You can implement a custom function to authenticate selenium by default will use flask-login session cookie. +Your custom function signature: + +.. code-block:: python + + def auth_driver(driver: WebDriver, user: "User") -> WebDriver: + pass + + +Then on config: + +.. code-block:: python + + WEBDRIVER_AUTH_FUNC = auth_driver Deeper SQLAlchemy integration ----------------------------- diff --git a/superset/utils/selenium.py b/superset/utils/selenium.py index 54de7753a1b8..70666c465cab 100644 --- a/superset/utils/selenium.py +++ b/superset/utils/selenium.py @@ -141,7 +141,7 @@ def destroy(driver: WebDriver, tries=2): except Exception: # pylint: disable=broad-except pass - def get_png_from_url( + def get_screenshot( self, url: str, element_name: str, user: "User", retries: int = SELENIUM_RETRIES ) -> Optional[bytes]: driver = self.auth(user) @@ -193,36 +193,38 @@ def cache_key(self) -> str: def url(self) -> str: raise NotImplementedError() - def fetch(self, user: "User") -> Optional[bytes]: - self.screenshot = self._driver.get_png_from_url(self.url, self.element, user) + def get_screenshot(self, user: "User") -> Optional[bytes]: + self.screenshot = self._driver.get_screenshot(self.url, self.element, user) return self.screenshot def get( self, user: "User" = None, cache: "Cache" = None, thumb_size: WindowSize = None ) -> Optional[BytesIO]: - payload = self._get_thumb_bytes(user=user, thumb_size=thumb_size, cache=cache) - if payload: - return BytesIO(payload) - return None + """ + Get thumbnail screenshot has BytesIO from cache or fetch - def _get_thumb_bytes( - self, user: "User" = None, cache: "Cache" = None, thumb_size: WindowSize = None - ) -> Optional[bytes]: - payload = None - cache_key = self.cache_key + :param user: None to use current user or User Model to login and fetch + :param cache: The cache to use + :param thumb_size: Override thumbnail site + """ + payload: Optional[bytes] = None thumb_size = thumb_size or self.thumb_size if cache: - payload = cache.get(cache_key) + payload = cache.get(self.cache_key) if not payload: - payload = self.compute_and_cache(user, cache, thumb_size) + payload = self.compute_and_cache( + user=user, thumb_size=thumb_size, cache=cache + ) else: - logging.info(f"Loaded thumbnail from cache: {cache_key}") - return payload + logging.info(f"Loaded thumbnail from cache: {self.cache_key}") + if payload: + return BytesIO(payload) + return None - def get_from_cache(self, cache: "Cache") -> Optional[BytesIO]: + def get_from_cache(self, cache: "Cache") -> Optional[bytes]: payload = cache.get(self.cache_key) if payload: - return BytesIO(payload) + return payload return None def compute_and_cache( # pylint: disable=too-many-arguments @@ -253,7 +255,7 @@ def compute_and_cache( # pylint: disable=too-many-arguments # Assuming all sorts of things can go wrong with Selenium try: - payload = self.fetch(user=user) + payload = self.get_screenshot(user=user) except Exception as e: # pylint: disable=broad-except logging.error("Failed at generating thumbnail") logging.exception(e) @@ -267,7 +269,7 @@ def compute_and_cache( # pylint: disable=too-many-arguments payload = None if payload and cache: - logging.info(f"Caching thumbnail: {cache_key}") + logging.info(f"Caching thumbnail: {cache_key} {cache}") cache.set(cache_key, payload) return payload diff --git a/tests/thumbnails_tests.py b/tests/thumbnails_tests.py new file mode 100644 index 000000000000..6eb3d49caee0 --- /dev/null +++ b/tests/thumbnails_tests.py @@ -0,0 +1,31 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from superset import db +from superset.models.dashboard import Dashboard + +from .base_tests import SupersetTestCase + + +class ThumbnailsTests(SupersetTestCase): + def test_simple_get_screenshot(self): + """ + Thumbnails: Simple get screen shot + """ + self.login(username="admin") + uri = f"api/v1/dashboard/1/thumbnail/sha/" + rv = self.client.get(uri) + self.assertEqual(rv.status_code, 200) From 70d28d39a47ca5e797444d2e719ae9d3157201d7 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Mon, 27 Jan 2020 10:54:31 +0000 Subject: [PATCH 14/28] Thumbnails behind a feature flag --- docs/installation.rst | 5 ++- superset/config.py | 6 ++++ superset/tasks/thumbnails.py | 9 ++--- superset/utils/cache_manager.py | 4 +-- superset/utils/selenium.py | 4 +-- superset/views/base_api.py | 1 + superset/views/chart/api.py | 59 +++++++++++++++++++++++++++++++-- superset/views/dashboard/api.py | 8 ++++- tests/thumbnails_tests.py | 2 +- 9 files changed, 85 insertions(+), 13 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index 89837d083bdb..3cae55dd0e66 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -645,6 +645,7 @@ Thumbnails are store on cache, an example config could be: .. code-block:: python + from flask import Flask from werkzeug.contrib.cache import RedisCache ... @@ -659,11 +660,13 @@ Thumbnails are store on cache, an example config could be: CELERY_CONFIG = CeleryConfig - def init_thumbnail_cache(app): + def init_thumbnail_cache(app: Flask) -> RedisCache: return RedisCache(host='localhost', port=6379, key_prefix='superset_thumbnails_') THUMBNAIL_CACHE_CONFIG = init_thumbnail_cache + # Async selenium thumbnail task will use the following user + THUMBNAIL_SELENIUM_USER = "Admin" Using the above example cache keys for dashboards will be `superset_thumbnails_thumb__dashboard__{ID}` diff --git a/superset/config.py b/superset/config.py index 6dd70987d520..330bbaafd796 100644 --- a/superset/config.py +++ b/superset/config.py @@ -276,6 +276,8 @@ def _try_json_readsha(filepath, length): # pylint: disable=unused-argument "CLIENT_CACHE": False, "ENABLE_EXPLORE_JSON_CSRF_PROTECTION": False, "PRESTO_EXPAND_DATA": False, + # Exposes API endpoint to compute thumbnails + "THUMBNAILS": False, } # This is merely a default. @@ -296,6 +298,10 @@ def _try_json_readsha(filepath, length): # pylint: disable=unused-argument # return feature_flags_dict GET_FEATURE_FLAGS_FUNC = None +# --------------------------------------------------- +# Thumbnail config +# --------------------------------------------------- +THUMBNAIL_SELENIUM_USER = "Admin" # --------------------------------------------------- # Image and file configuration diff --git a/superset/tasks/thumbnails.py b/superset/tasks/thumbnails.py index 75d5da233ac2..68172034f5c9 100644 --- a/superset/tasks/thumbnails.py +++ b/superset/tasks/thumbnails.py @@ -20,9 +20,10 @@ import logging +from flask import current_app from superset import app, security_manager, thumbnail_cache from superset.tasks.celery_app import app as celery_app -from superset.utils.selenium import DashboardScreenshot, SliceScreenshot +from superset.utils.selenium import DashboardScreenshot, ChartScreenshot @celery_app.task(name="cache_chart_thumbnail", soft_time_limit=300) @@ -32,8 +33,8 @@ def cache_chart_thumbnail(chart_id: int, force: bool = False): logging.warning("No cache set, refusing to compute") return None logging.info(f"Caching chart {chart_id}") - screenshot = SliceScreenshot(model_id=chart_id) - user = security_manager.find_user("Admin") + screenshot = ChartScreenshot(model_id=chart_id) + user = security_manager.find_user(current_app.config["THUMBNAIL_SELENIUM_USER"]) screenshot.compute_and_cache(user=user, cache=thumbnail_cache, force=force) @@ -45,5 +46,5 @@ def cache_dashboard_thumbnail(dashboard_id: int, force: bool = False): return None logging.info(f"Caching dashboard {dashboard_id}") screenshot = DashboardScreenshot(model_id=dashboard_id) - user = security_manager.find_user("Admin") + user = security_manager.find_user(current_app.config["THUMBNAIL_SELENIUM_USER"]) screenshot.compute_and_cache(user=user, cache=thumbnail_cache, force=force) diff --git a/superset/utils/cache_manager.py b/superset/utils/cache_manager.py index ee03e0abd3c6..4a625ea6d00b 100644 --- a/superset/utils/cache_manager.py +++ b/superset/utils/cache_manager.py @@ -34,7 +34,7 @@ def init_app(self, app: Flask) -> None: app, app.config["TABLE_NAMES_CACHE_CONFIG"] ) self._thumbnail_cache = self._setup_cache( - app, app.config.get("THUMBNAIL_CACHE_CONFIG") + app, app.config["THUMBNAIL_CACHE_CONFIG"] ) @staticmethod @@ -56,5 +56,5 @@ def cache(self) -> Cache: return self._cache @property - def thumbnail_cache(self): + def thumbnail_cache(self) -> Cache: return self._thumbnail_cache diff --git a/superset/utils/selenium.py b/superset/utils/selenium.py index 70666c465cab..bb2c0f2a5878 100644 --- a/superset/utils/selenium.py +++ b/superset/utils/selenium.py @@ -299,8 +299,8 @@ def resize_image( return new_img.read() -class SliceScreenshot(BaseScreenshot): - thumbnail_type: str = "slice" +class ChartScreenshot(BaseScreenshot): + thumbnail_type: str = "chart" element: str = "chart-container" window_size: WindowSize = (600, int(600 * 0.75)) thumb_size: WindowSize = (300, int(300 * 0.75)) diff --git a/superset/views/base_api.py b/superset/views/base_api.py index 976f5b499e80..9bfb004d1c8a 100644 --- a/superset/views/base_api.py +++ b/superset/views/base_api.py @@ -73,6 +73,7 @@ class BaseSupersetModelRestApi(ModelRestApi): "bulk_delete": "delete", "info": "list", "related": "list", + "thumbnail": "list", } order_rel_fields: Dict[str, Tuple[str, str]] = {} diff --git a/superset/views/chart/api.py b/superset/views/chart/api.py index 47c7d7fb3f06..daac47023a7f 100644 --- a/superset/views/chart/api.py +++ b/superset/views/chart/api.py @@ -16,13 +16,17 @@ # under the License. from typing import Dict, List -from flask import current_app +from flask import current_app, Response +from flask_appbuilder.api import expose, safe, protect from flask_appbuilder.models.sqla.interface import SQLAInterface from marshmallow import fields, post_load, validates_schema, ValidationError from marshmallow.validate import Length from sqlalchemy.orm.exc import NoResultFound +from werkzeug.wsgi import FileWrapper -from superset import appbuilder +from superset import appbuilder, is_feature_enabled, thumbnail_cache +from superset.utils.selenium import ChartScreenshot +from superset.constants import RouteMethod from superset.connectors.connector_registry import ConnectorRegistry from superset.exceptions import SupersetException from superset.models.dashboard import Dashboard @@ -130,6 +134,7 @@ def make_object(self, data: Dict, discard: List[str] = None) -> Slice: class ChartRestApi(SliceMixin, BaseOwnedModelRestApi): datamodel = SQLAInterface(Slice) + include_route_methods = RouteMethod.REST_MODEL_VIEW_CRUD_SET | {RouteMethod.RELATED} resource_name = "chart" allow_browser_login = True @@ -170,5 +175,55 @@ class ChartRestApi(SliceMixin, BaseOwnedModelRestApi): } filter_rel_fields_field = {"owners": "first_name", "dashboards": "dashboard_title"} + def __init__(self, *args, **kwargs): + if is_feature_enabled("THUMBNAILS"): + self.include_route_methods = self.include_route_methods | {"thumbnail"} + super().__init__(*args, **kwargs) + + @expose("//thumbnail//", methods=["GET"]) + @protect() + @safe + def thumbnail(self, pk, sha): # pylint: disable=invalid-name + """Get Chart thumbnail + --- + get: + description: Compute or get already computed chart thumbnail from cache + parameters: + - in: path + schema: + type: integer + name: pk + - in: path + schema: + type: string + name: sha + responses: + 200: + description: Chart thumbnail image + content: + image/*: + schema: + type: string + format: binary + 401: + $ref: '#/components/responses/401' + 404: + $ref: '#/components/responses/404' + 422: + $ref: '#/components/responses/422' + 500: + $ref: '#/components/responses/500' + """ + print(sha) + query = self.datamodel.session.query(Slice) + slice = self._base_filters.apply_all(query).get(pk) + if not slice: + return self.response_404() + # fetch the chart screenshot using the current user and cache if set + screenshot = ChartScreenshot(pk).get(cache=thumbnail_cache) + return Response( + FileWrapper(screenshot), mimetype="image/png", direct_passthrough=True + ) + appbuilder.add_api(ChartRestApi) diff --git a/superset/views/dashboard/api.py b/superset/views/dashboard/api.py index 4aa145885f48..98130f6f86e5 100644 --- a/superset/views/dashboard/api.py +++ b/superset/views/dashboard/api.py @@ -28,7 +28,7 @@ from sqlalchemy.exc import SQLAlchemyError from werkzeug.wsgi import FileWrapper -from superset import thumbnail_cache +from superset import is_feature_enabled, thumbnail_cache from superset.utils.selenium import DashboardScreenshot from superset.constants import RouteMethod from superset.exceptions import SupersetException, SupersetSecurityException @@ -176,6 +176,11 @@ class DashboardRestApi(DashboardMixin, BaseOwnedModelRestApi): } filter_rel_fields_field = {"owners": "first_name", "slices": "slice_name"} + def __init__(self, *args, **kwargs): + if is_feature_enabled("THUMBNAILS"): + self.include_route_methods = self.include_route_methods | {"thumbnail"} + super().__init__(*args, **kwargs) + @expose("/", methods=["DELETE"]) @protect() @safe @@ -270,6 +275,7 @@ def thumbnail(self, pk, sha): # pylint: disable=invalid-name """Get Dashboard thumbnail --- get: + description: Compute or get already computed dashboard thumbnail from cache parameters: - in: path schema: diff --git a/tests/thumbnails_tests.py b/tests/thumbnails_tests.py index 6eb3d49caee0..07ec164e50f3 100644 --- a/tests/thumbnails_tests.py +++ b/tests/thumbnails_tests.py @@ -26,6 +26,6 @@ def test_simple_get_screenshot(self): Thumbnails: Simple get screen shot """ self.login(username="admin") - uri = f"api/v1/dashboard/1/thumbnail/sha/" + uri = "api/v1/dashboard/1/thumbnail/sha/" rv = self.client.get(uri) self.assertEqual(rv.status_code, 200) From b2adbcf5d49b1e991cedec23c1e61855de26a244 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Mon, 27 Jan 2020 12:56:57 +0000 Subject: [PATCH 15/28] Make REST compute async when no cache key found --- superset/config.py | 3 ++- superset/models/core.py | 9 +++++++++ superset/tasks/thumbnails.py | 2 +- superset/utils/selenium.py | 4 ++-- superset/views/chart/api.py | 16 ++++++++++------ superset/views/dashboard/api.py | 20 +++++++++++++++++--- tests/thumbnails_tests.py | 21 +++++++++++++++------ 7 files changed, 56 insertions(+), 19 deletions(-) diff --git a/superset/config.py b/superset/config.py index 330bbaafd796..f4272de3258b 100644 --- a/superset/config.py +++ b/superset/config.py @@ -299,9 +299,10 @@ def _try_json_readsha(filepath, length): # pylint: disable=unused-argument GET_FEATURE_FLAGS_FUNC = None # --------------------------------------------------- -# Thumbnail config +# Thumbnail config (behind feature flag) # --------------------------------------------------- THUMBNAIL_SELENIUM_USER = "Admin" +THUMBNAIL_CACHE_CONFIG: CacheConfig = {"CACHE_TYPE": "null"} # --------------------------------------------------- # Image and file configuration diff --git a/superset/models/core.py b/superset/models/core.py index 1235855343a2..41a4fa32e183 100755 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -56,6 +56,7 @@ from superset.models.dashboard import Dashboard from superset.models.helpers import AuditMixinNullable, ImportMixin from superset.models.tags import DashboardUpdater, FavStarUpdater +from superset.tasks.thumbnails import cache_dashboard_thumbnail from superset.utils import cache as cache_util, core as utils config = app.config @@ -673,3 +674,11 @@ class FavStar(Model): # pylint: disable=too-few-public-methods sqla.event.listen(Dashboard, "after_delete", DashboardUpdater.after_delete) sqla.event.listen(FavStar, "after_insert", FavStarUpdater.after_insert) sqla.event.listen(FavStar, "after_delete", FavStarUpdater.after_delete) + + +def event_after_dashboard_changed(mapper, connection, target): + cache_dashboard_thumbnail.delay(target.id, force=True) + + +sqla.event.listen(Dashboard, "after_insert", event_after_dashboard_changed) +sqla.event.listen(Dashboard, "after_update", event_after_dashboard_changed) diff --git a/superset/tasks/thumbnails.py b/superset/tasks/thumbnails.py index 68172034f5c9..2dc2223858f3 100644 --- a/superset/tasks/thumbnails.py +++ b/superset/tasks/thumbnails.py @@ -22,7 +22,7 @@ from flask import current_app from superset import app, security_manager, thumbnail_cache -from superset.tasks.celery_app import app as celery_app +from superset.extensions import celery_app from superset.utils.selenium import DashboardScreenshot, ChartScreenshot diff --git a/superset/utils/selenium.py b/superset/utils/selenium.py index bb2c0f2a5878..9fbb431b47eb 100644 --- a/superset/utils/selenium.py +++ b/superset/utils/selenium.py @@ -221,10 +221,10 @@ def get( return BytesIO(payload) return None - def get_from_cache(self, cache: "Cache") -> Optional[bytes]: + def get_from_cache(self, cache: "Cache") -> Optional[BytesIO]: payload = cache.get(self.cache_key) if payload: - return payload + return BytesIO(payload) return None def compute_and_cache( # pylint: disable=too-many-arguments diff --git a/superset/views/chart/api.py b/superset/views/chart/api.py index daac47023a7f..639964752c80 100644 --- a/superset/views/chart/api.py +++ b/superset/views/chart/api.py @@ -17,7 +17,7 @@ from typing import Dict, List from flask import current_app, Response -from flask_appbuilder.api import expose, safe, protect +from flask_appbuilder.api import expose, protect, safe from flask_appbuilder.models.sqla.interface import SQLAInterface from marshmallow import fields, post_load, validates_schema, ValidationError from marshmallow.validate import Length @@ -25,13 +25,14 @@ from werkzeug.wsgi import FileWrapper from superset import appbuilder, is_feature_enabled, thumbnail_cache -from superset.utils.selenium import ChartScreenshot -from superset.constants import RouteMethod from superset.connectors.connector_registry import ConnectorRegistry +from superset.constants import RouteMethod from superset.exceptions import SupersetException from superset.models.dashboard import Dashboard from superset.models.slice import Slice +from superset.tasks.thumbnails import cache_chart_thumbnail from superset.utils import core as utils +from superset.utils.selenium import ChartScreenshot from superset.views.base_api import BaseOwnedModelRestApi from superset.views.base_schemas import BaseOwnedSchema, validate_owner from superset.views.chart.mixin import SliceMixin @@ -216,11 +217,14 @@ def thumbnail(self, pk, sha): # pylint: disable=invalid-name """ print(sha) query = self.datamodel.session.query(Slice) - slice = self._base_filters.apply_all(query).get(pk) - if not slice: + chart = self._base_filters.apply_all(query).get(pk) + if not chart: return self.response_404() # fetch the chart screenshot using the current user and cache if set - screenshot = ChartScreenshot(pk).get(cache=thumbnail_cache) + screenshot = ChartScreenshot(pk).get_from_cache(cache=thumbnail_cache) + if not screenshot: + cache_chart_thumbnail.delay(chart.id, force=True) + return self.response(202, message="OK Async") return Response( FileWrapper(screenshot), mimetype="image/png", direct_passthrough=True ) diff --git a/superset/views/dashboard/api.py b/superset/views/dashboard/api.py index 98130f6f86e5..71ecb21979b3 100644 --- a/superset/views/dashboard/api.py +++ b/superset/views/dashboard/api.py @@ -29,11 +29,12 @@ from werkzeug.wsgi import FileWrapper from superset import is_feature_enabled, thumbnail_cache -from superset.utils.selenium import DashboardScreenshot from superset.constants import RouteMethod from superset.exceptions import SupersetException, SupersetSecurityException from superset.models.dashboard import Dashboard +from superset.tasks.thumbnails import cache_dashboard_thumbnail from superset.utils import core as utils +from superset.utils.selenium import DashboardScreenshot from superset.views.base import check_ownership, generate_download_headers from superset.views.base_api import BaseOwnedModelRestApi from superset.views.base_schemas import BaseOwnedSchema, validate_owner @@ -275,7 +276,8 @@ def thumbnail(self, pk, sha): # pylint: disable=invalid-name """Get Dashboard thumbnail --- get: - description: Compute or get already computed dashboard thumbnail from cache + description: >- + Compute async or get already computed dashboard thumbnail from cache parameters: - in: path schema: @@ -293,6 +295,15 @@ def thumbnail(self, pk, sha): # pylint: disable=invalid-name schema: type: string format: binary + 202: + description: Thumbnail does not exist on cache, fired async to compute + content: + application/json: + schema: + type: object + properties: + message: + type: string 401: $ref: '#/components/responses/401' 404: @@ -308,7 +319,10 @@ def thumbnail(self, pk, sha): # pylint: disable=invalid-name if not dashboard: return self.response_404() # fetch the dashboard screenshot using the current user and cache if set - screenshot = DashboardScreenshot(pk).get(cache=thumbnail_cache) + screenshot = DashboardScreenshot(pk).get_from_cache(cache=thumbnail_cache) + if not screenshot: + cache_dashboard_thumbnail.delay(dashboard.id, force=True) + return self.response(202, message="OK Async") return Response( FileWrapper(screenshot), mimetype="image/png", direct_passthrough=True ) diff --git a/tests/thumbnails_tests.py b/tests/thumbnails_tests.py index 07ec164e50f3..8affc919e39e 100644 --- a/tests/thumbnails_tests.py +++ b/tests/thumbnails_tests.py @@ -14,18 +14,27 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -from superset import db -from superset.models.dashboard import Dashboard +# from superset import db +# from superset.models.dashboard import Dashboard +from unittest.mock import patch +from superset import app, is_feature_enabled from .base_tests import SupersetTestCase class ThumbnailsTests(SupersetTestCase): + + def setUp(self) -> None: + app.config["THUMBNAILS"] = True + super().__init__() + + @patch.dict( + "superset.extensions.feature_flag_manager._feature_flags", + {"THUMBNAILS": True}, + clear=True, + ) def test_simple_get_screenshot(self): """ Thumbnails: Simple get screen shot """ - self.login(username="admin") - uri = "api/v1/dashboard/1/thumbnail/sha/" - rv = self.client.get(uri) - self.assertEqual(rv.status_code, 200) + self.assertTrue(is_feature_enabled("THUMBNAILS")) From 9f27fda1cc9f6db1357bde326b4c6fc0e23adc7f Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Mon, 27 Jan 2020 13:05:45 +0000 Subject: [PATCH 16/28] lint --- superset/tasks/thumbnails.py | 3 ++- tests/thumbnails_tests.py | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/superset/tasks/thumbnails.py b/superset/tasks/thumbnails.py index 2dc2223858f3..b02cbc946fb5 100644 --- a/superset/tasks/thumbnails.py +++ b/superset/tasks/thumbnails.py @@ -21,9 +21,10 @@ import logging from flask import current_app + from superset import app, security_manager, thumbnail_cache from superset.extensions import celery_app -from superset.utils.selenium import DashboardScreenshot, ChartScreenshot +from superset.utils.selenium import ChartScreenshot, DashboardScreenshot @celery_app.task(name="cache_chart_thumbnail", soft_time_limit=300) diff --git a/tests/thumbnails_tests.py b/tests/thumbnails_tests.py index 8affc919e39e..64af1bddbb51 100644 --- a/tests/thumbnails_tests.py +++ b/tests/thumbnails_tests.py @@ -23,7 +23,6 @@ class ThumbnailsTests(SupersetTestCase): - def setUp(self) -> None: app.config["THUMBNAILS"] = True super().__init__() From 8c80aeefbd7f0812ce8f8bad3ac15f66a0377637 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Mon, 27 Jan 2020 15:12:15 +0000 Subject: [PATCH 17/28] Tests still not working --- superset/utils/selenium.py | 3 +- tests/superset_test_config_thumbnails.py | 71 ++++++++++++++++++++++++ tests/thumbnails_tests.py | 7 +-- 3 files changed, 74 insertions(+), 7 deletions(-) create mode 100644 tests/superset_test_config_thumbnails.py diff --git a/superset/utils/selenium.py b/superset/utils/selenium.py index 9fbb431b47eb..d6c169d94b96 100644 --- a/superset/utils/selenium.py +++ b/superset/utils/selenium.py @@ -48,9 +48,8 @@ def get_auth_cookies(user: "User") -> List[Dict]: # Login with the user specified to get the reports - with current_app.test_request_context(): + with current_app.test_request_context("/login"): login_user(user) - # A mock response object to get the cookie information from response = Response() current_app.session_interface.save_session(current_app, session, response) diff --git a/tests/superset_test_config_thumbnails.py b/tests/superset_test_config_thumbnails.py new file mode 100644 index 000000000000..88f309568318 --- /dev/null +++ b/tests/superset_test_config_thumbnails.py @@ -0,0 +1,71 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from copy import copy + +from flask import Flask +from werkzeug.contrib.cache import RedisCache + +from superset.config import * # type: ignore + +AUTH_USER_REGISTRATION_ROLE = "alpha" +SQLALCHEMY_DATABASE_URI = "sqlite:///" + os.path.join(DATA_DIR, "unittests.db") +DEBUG = True +SUPERSET_WEBSERVER_PORT = 8081 + +# Allowing SQLALCHEMY_DATABASE_URI to be defined as an env var for +# continuous integration +if "SUPERSET__SQLALCHEMY_DATABASE_URI" in os.environ: + SQLALCHEMY_DATABASE_URI = os.environ["SUPERSET__SQLALCHEMY_DATABASE_URI"] + +SQL_SELECT_AS_CTA = True +SQL_MAX_ROW = 666 + + +def GET_FEATURE_FLAGS_FUNC(ff): + ff_copy = copy(ff) + ff_copy["super"] = "set" + return ff_copy + + +TESTING = True +WTF_CSRF_ENABLED = False +PUBLIC_ROLE_LIKE_GAMMA = True +AUTH_ROLE_PUBLIC = "Public" +EMAIL_NOTIFICATIONS = False + +CACHE_CONFIG = {"CACHE_TYPE": "simple"} + + +class CeleryConfig(object): + BROKER_URL = "redis://localhost" + CELERY_IMPORTS = ("superset.sql_lab", "superset.tasks.thumbnails") + CELERY_ANNOTATIONS = {"sql_lab.add": {"rate_limit": "10/s"}} + CONCURRENCY = 1 + + +CELERY_CONFIG = CeleryConfig + +FEATURE_FLAGS = {"THUMBNAILS": True} + + +def init_thumbnail_cache(app: Flask) -> RedisCache: + return RedisCache( + host="localhost", key_prefix="superset_thumbnails_", default_timeout=10000 + ) + + +THUMBNAIL_CACHE_CONFIG = init_thumbnail_cache diff --git a/tests/thumbnails_tests.py b/tests/thumbnails_tests.py index 64af1bddbb51..d6b2224143e4 100644 --- a/tests/thumbnails_tests.py +++ b/tests/thumbnails_tests.py @@ -18,15 +18,12 @@ # from superset.models.dashboard import Dashboard from unittest.mock import patch -from superset import app, is_feature_enabled +from superset import is_feature_enabled + from .base_tests import SupersetTestCase class ThumbnailsTests(SupersetTestCase): - def setUp(self) -> None: - app.config["THUMBNAILS"] = True - super().__init__() - @patch.dict( "superset.extensions.feature_flag_manager._feature_flags", {"THUMBNAILS": True}, From eb70a8c98c5bd1f2c56d316a0a7e35d596043a22 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Tue, 25 Feb 2020 14:56:09 +0000 Subject: [PATCH 18/28] [thumbnails] Make pillow optional, lint and tests --- requirements-dev.txt | 1 + requirements.txt | 1 - setup.py | 2 +- superset/utils/selenium.py | 61 +++++++++++++++++++++---------------- superset/views/chart/api.py | 3 +- tests/thumbnails_tests.py | 9 ++---- tox.ini | 8 +++++ 7 files changed, 50 insertions(+), 35 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 6478ae8936cb..9d34520bcefa 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -33,3 +33,4 @@ redis==3.2.1 requests==2.22.0 statsd==3.3.0 tox==3.11.1 +pillow==7.0.0 diff --git a/requirements.txt b/requirements.txt index eeb462b6caf6..5695d0c8839b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -55,7 +55,6 @@ numpy==1.18.1 # via pandas, pyarrow pandas==0.25.3 parsedatetime==2.5 pathlib2==2.3.5 -pillow==7.0.0 polyline==1.4.0 prison==0.1.2 # via flask-appbuilder py==1.8.1 # via retry diff --git a/setup.py b/setup.py index 003776f4ebae..5103144ca49e 100644 --- a/setup.py +++ b/setup.py @@ -91,7 +91,6 @@ def get_git_sha(): "pandas>=0.25.3, <1.0", "parsedatetime", "pathlib2", - "Pillow>=7.0.0, <8.0.0", "polyline", "python-dateutil", "python-dotenv", @@ -119,6 +118,7 @@ def get_git_sha(): "hana": ["hdbcli==2.4.162", "sqlalchemy_hana==0.4.0"], "dremio": ["sqlalchemy_dremio>=0.5.0dev0"], "cockroachdb": ["cockroachdb==0.3.3"], + "thumbnails": ["Pillow>=7.0.0, <8.0.0"], }, python_requires="~=3.6", author="Apache Software Foundation", diff --git a/superset/utils/selenium.py b/superset/utils/selenium.py index d6c169d94b96..86fff3eb5d4b 100644 --- a/superset/utils/selenium.py +++ b/superset/utils/selenium.py @@ -31,7 +31,12 @@ from selenium.webdriver.support.ui import WebDriverWait from werkzeug.http import parse_cookie -from PIL import Image +logger = logging.getLogger(__name__) + +try: + from PIL import Image +except ModuleNotFoundError: + logger.info("No PIL instalation found") if TYPE_CHECKING: # pylint: disable=unused-import @@ -93,7 +98,10 @@ def get_url_path(view: str, **kwargs) -> str: class AuthWebDriverProxy: def __init__( - self, driver_type: str, window: WindowSize = None, auth_func: Callable = None + self, + driver_type: str, + window: Optional[WindowSize] = None, + auth_func: Optional[Callable] = None, ): self._driver_type = driver_type self._window: WindowSize = window or (800, 600) @@ -117,7 +125,7 @@ def create(self) -> WebDriver: options.add_argument("--headless") kwargs: Dict = dict(options=options) kwargs.update(current_app.config["WEBDRIVER_CONFIGURATION"]) - logging.info("Init selenium driver") + logger.info("Init selenium driver") return driver_class(**kwargs) def auth(self, user: "User") -> WebDriver: @@ -147,23 +155,23 @@ def get_screenshot( driver.set_window_size(*self._window) driver.get(url) img: Optional[bytes] = None - logging.debug(f"Sleeping for {SELENIUM_HEADSTART} seconds") + logger.debug(f"Sleeping for {SELENIUM_HEADSTART} seconds") time.sleep(SELENIUM_HEADSTART) try: - logging.debug(f"Wait for the presence of {element_name}") + logger.debug(f"Wait for the presence of {element_name}") element = WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.CLASS_NAME, element_name)) ) - logging.debug(f"Wait for .loading to be done") + logger.debug(f"Wait for .loading to be done") WebDriverWait(driver, 60).until_not( EC.presence_of_all_elements_located((By.CLASS_NAME, "loading")) ) - logging.info("Taking a PNG screenshot") + logger.info("Taking a PNG screenshot") img = element.screenshot_as_png except TimeoutException: - logging.error("Selenium timed out") + logger.error("Selenium timed out") except WebDriverException as e: - logging.exception(e) + logger.error(e) # Some webdrivers do not support screenshots for elements. # In such cases, take a screenshot of the entire page. img = driver.screenshot() # pylint: disable=no-member @@ -197,7 +205,10 @@ def get_screenshot(self, user: "User") -> Optional[bytes]: return self.screenshot def get( - self, user: "User" = None, cache: "Cache" = None, thumb_size: WindowSize = None + self, + user: "User" = None, + cache: "Cache" = None, + thumb_size: Optional[WindowSize] = None, ) -> Optional[BytesIO]: """ Get thumbnail screenshot has BytesIO from cache or fetch @@ -215,7 +226,7 @@ def get( user=user, thumb_size=thumb_size, cache=cache ) else: - logging.info(f"Loaded thumbnail from cache: {self.cache_key}") + logger.info(f"Loaded thumbnail from cache: {self.cache_key}") if payload: return BytesIO(payload) return None @@ -229,7 +240,7 @@ def get_from_cache(self, cache: "Cache") -> Optional[BytesIO]: def compute_and_cache( # pylint: disable=too-many-arguments self, user: "User" = None, - thumb_size: WindowSize = None, + thumb_size: Optional[WindowSize] = None, cache: "Cache" = None, force: bool = True, ) -> Optional[bytes]: @@ -245,10 +256,10 @@ def compute_and_cache( # pylint: disable=too-many-arguments """ cache_key = self.cache_key if not force and cache and cache.get(cache_key): - logging.info("Thumb already cached, skipping...") + logger.info("Thumb already cached, skipping...") return None thumb_size = thumb_size or self.thumb_size - logging.info(f"Processing url for thumbnail: {cache_key}") + logger.info(f"Processing url for thumbnail: {cache_key}") payload = None @@ -256,19 +267,17 @@ def compute_and_cache( # pylint: disable=too-many-arguments try: payload = self.get_screenshot(user=user) except Exception as e: # pylint: disable=broad-except - logging.error("Failed at generating thumbnail") - logging.exception(e) + logger.error("Failed at generating thumbnail %s", e) if payload and self.window_size != thumb_size: try: - payload = self.resize_image(payload, size=thumb_size) + payload = self.resize_image(payload, thumb_size=thumb_size) except Exception as e: # pylint: disable=broad-except - logging.error("Failed at resizing thumbnail") - logging.exception(e) + logger.error("Failed at resizing thumbnail %s", e) payload = None if payload and cache: - logging.info(f"Caching thumbnail: {cache_key} {cache}") + logger.info(f"Caching thumbnail: {cache_key} {cache}") cache.set(cache_key, payload) return payload @@ -277,19 +286,19 @@ def resize_image( cls, img_bytes: bytes, output: str = "png", - size: WindowSize = None, + thumb_size: Optional[WindowSize] = None, crop: bool = True, ) -> bytes: - size = size or cls.thumb_size + thumb_size = thumb_size or cls.thumb_size img = Image.open(BytesIO(img_bytes)) - logging.debug(f"Selenium image size: {img.size}") + logger.debug(f"Selenium image size: {img.size}") if crop and img.size[1] != cls.window_size[1]: desired_ratio = float(cls.window_size[1]) / cls.window_size[0] desired_width = int(img.size[0] * desired_ratio) - logging.debug(f"Cropping to: {img.size[0]}*{desired_width}") + logger.debug(f"Cropping to: {img.size[0]}*{desired_width}") img = img.crop((0, 0, img.size[0], desired_width)) - logging.debug(f"Resizing to {size}") - img = img.resize(size, Image.ANTIALIAS) + logger.debug(f"Resizing to {thumb_size}") + img = img.resize(thumb_size, Image.ANTIALIAS) new_img = BytesIO() if output != "png": img = img.convert("RGB") diff --git a/superset/views/chart/api.py b/superset/views/chart/api.py index af9cf0145376..80403ee8b7ce 100644 --- a/superset/views/chart/api.py +++ b/superset/views/chart/api.py @@ -24,7 +24,7 @@ from sqlalchemy.orm.exc import NoResultFound from werkzeug.wsgi import FileWrapper -from superset import appbuilder, is_feature_enabled, thumbnail_cache +from superset import is_feature_enabled, thumbnail_cache from superset.connectors.connector_registry import ConnectorRegistry from superset.constants import RouteMethod from superset.exceptions import SupersetException @@ -227,6 +227,7 @@ def thumbnail(self, pk, sha): # pylint: disable=invalid-name return self.response_404() # fetch the chart screenshot using the current user and cache if set screenshot = ChartScreenshot(pk).get_from_cache(cache=thumbnail_cache) + # If not screenshot then send request to compute thumb to celery if not screenshot: cache_chart_thumbnail.delay(chart.id, force=True) return self.response(202, message="OK Async") diff --git a/tests/thumbnails_tests.py b/tests/thumbnails_tests.py index d6b2224143e4..6d2df00f1767 100644 --- a/tests/thumbnails_tests.py +++ b/tests/thumbnails_tests.py @@ -24,13 +24,10 @@ class ThumbnailsTests(SupersetTestCase): - @patch.dict( - "superset.extensions.feature_flag_manager._feature_flags", - {"THUMBNAILS": True}, - clear=True, - ) + def test_simple_get_screenshot(self): """ Thumbnails: Simple get screen shot """ - self.assertTrue(is_feature_enabled("THUMBNAILS")) + if is_feature_enabled("THUMBNAILS"): + self.assertTrue(False) diff --git a/tox.ini b/tox.ini index 0a7dcfffaa9e..e37f488424f7 100644 --- a/tox.ini +++ b/tox.ini @@ -33,6 +33,14 @@ setenv = whitelist_externals = npm +[testenv:thumbnails] +setenv = + SUPERSET_CONFIG = tests.superset_test_config_thumbnails +deps = + -rrequirements.txt + -rrequirements-dev.txt + .[postgres] + [testenv:black] commands = black --check setup.py superset tests From 0eae4286265ab3b10b69e02fa416312f6ec06bc0 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Wed, 26 Feb 2020 11:22:21 +0000 Subject: [PATCH 19/28] [thumbnails] improved isolations of the feature flag --- docs/installation.rst | 16 +++++++++++++--- superset/models/core.py | 9 --------- superset/models/dashboard.py | 13 +++++++++++++ superset/models/slice.py | 12 ++++++++++++ superset/tasks/thumbnails.py | 6 ++++-- tests/thumbnails_tests.py | 27 +++++++++++++++++++++++---- 6 files changed, 65 insertions(+), 18 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index e580730227de..5534c07a630b 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -643,7 +643,17 @@ For other strategies, check the `superset/tasks/cache.py` file. Caching Thumbnails ------------------ -Thumbnails are store on cache, an example config could be: +This is an optional feature that can be turned on by activating it's feature flag on config: + +.. code-block:: python + + FEATURE_FLAGS = {"THUMBNAILS": True} + + +For this feature you will need a cache system and celery workers. All thumbnails are store on cache and are processed +asynchronously by the workers. + +An example config could be: .. code-block:: python @@ -681,8 +691,8 @@ You can override the base URL for selenium using: Additional selenium web drive config can be set using `WEBDRIVER_CONFIGURATION` -You can implement a custom function to authenticate selenium by default will use flask-login session cookie. -Your custom function signature: +You can implement a custom function to authenticate selenium, the default uses flask-login session cookie. +An example of a custom function signature: .. code-block:: python diff --git a/superset/models/core.py b/superset/models/core.py index 75346d3dc928..b5f94fd87632 100755 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -56,7 +56,6 @@ from superset.models.dashboard import Dashboard from superset.models.helpers import AuditMixinNullable, ImportMixin from superset.models.tags import DashboardUpdater, FavStarUpdater -from superset.tasks.thumbnails import cache_dashboard_thumbnail from superset.utils import cache as cache_util, core as utils config = app.config @@ -682,11 +681,3 @@ class FavStar(Model): # pylint: disable=too-few-public-methods sqla.event.listen(Dashboard, "after_delete", DashboardUpdater.after_delete) sqla.event.listen(FavStar, "after_insert", FavStarUpdater.after_insert) sqla.event.listen(FavStar, "after_delete", FavStarUpdater.after_delete) - - -def event_after_dashboard_changed(mapper, connection, target): - cache_dashboard_thumbnail.delay(target.id, force=True) - - -sqla.event.listen(Dashboard, "after_insert", event_after_dashboard_changed) -sqla.event.listen(Dashboard, "after_update", event_after_dashboard_changed) diff --git a/superset/models/dashboard.py b/superset/models/dashboard.py index 96668dc2b572..cc4a0e26c409 100644 --- a/superset/models/dashboard.py +++ b/superset/models/dashboard.py @@ -43,6 +43,7 @@ from superset.models.slice import Slice as Slice from superset.models.tags import DashboardUpdater from superset.models.user_attributes import UserAttribute +from superset.tasks.thumbnails import cache_dashboard_thumbnail from superset.utils import core as utils from superset.utils.dashboard_filter_scopes_converter import ( convert_filter_scopes, @@ -473,8 +474,20 @@ def export_dashboards( # pylint: disable=too-many-locals ) +def event_after_dashboard_changed( # pylint: disable=unused-argument + mapper, connection, target +): + cache_dashboard_thumbnail.delay(target.id, force=True) + + # events for updating tags if is_feature_enabled("TAGGING_SYSTEM"): sqla.event.listen(Dashboard, "after_insert", DashboardUpdater.after_insert) sqla.event.listen(Dashboard, "after_update", DashboardUpdater.after_update) sqla.event.listen(Dashboard, "after_delete", DashboardUpdater.after_delete) + + +# events for updating tags +if is_feature_enabled("THUMBNAILS"): + sqla.event.listen(Dashboard, "after_insert", event_after_dashboard_changed) + sqla.event.listen(Dashboard, "after_update", event_after_dashboard_changed) diff --git a/superset/models/slice.py b/superset/models/slice.py index 46ebf307c2b2..4a83d504366f 100644 --- a/superset/models/slice.py +++ b/superset/models/slice.py @@ -30,6 +30,7 @@ from superset.legacy import update_time_range from superset.models.helpers import AuditMixinNullable, ImportMixin from superset.models.tags import ChartUpdater +from superset.tasks.thumbnails import cache_chart_thumbnail from superset.utils import core as utils from superset.viz import BaseViz, viz_types @@ -313,6 +314,12 @@ def set_related_perm(mapper, connection, target): target.schema_perm = ds.schema_perm +def event_after_dashboard_changed( # pylint: disable=unused-argument + mapper, connection, target +): + cache_chart_thumbnail.delay(target.id, force=True) + + sqla.event.listen(Slice, "before_insert", set_related_perm) sqla.event.listen(Slice, "before_update", set_related_perm) @@ -321,3 +328,8 @@ def set_related_perm(mapper, connection, target): sqla.event.listen(Slice, "after_insert", ChartUpdater.after_insert) sqla.event.listen(Slice, "after_update", ChartUpdater.after_update) sqla.event.listen(Slice, "after_delete", ChartUpdater.after_delete) + +# events for updating tags +if is_feature_enabled("THUMBNAILS"): + sqla.event.listen(Slice, "after_insert", event_after_dashboard_changed) + sqla.event.listen(Slice, "after_update", event_after_dashboard_changed) diff --git a/superset/tasks/thumbnails.py b/superset/tasks/thumbnails.py index b02cbc946fb5..0495cec3c12f 100644 --- a/superset/tasks/thumbnails.py +++ b/superset/tasks/thumbnails.py @@ -26,12 +26,14 @@ from superset.extensions import celery_app from superset.utils.selenium import ChartScreenshot, DashboardScreenshot +logger = logging.getLogger(__name__) + @celery_app.task(name="cache_chart_thumbnail", soft_time_limit=300) def cache_chart_thumbnail(chart_id: int, force: bool = False): with app.app_context(): if not thumbnail_cache: - logging.warning("No cache set, refusing to compute") + logger.warning("No cache set, refusing to compute") return None logging.info(f"Caching chart {chart_id}") screenshot = ChartScreenshot(model_id=chart_id) @@ -45,7 +47,7 @@ def cache_dashboard_thumbnail(dashboard_id: int, force: bool = False): if not thumbnail_cache: logging.warning("No cache set, refusing to compute") return None - logging.info(f"Caching dashboard {dashboard_id}") + logger.info(f"Caching dashboard {dashboard_id}") screenshot = DashboardScreenshot(model_id=dashboard_id) user = security_manager.find_user(current_app.config["THUMBNAIL_SELENIUM_USER"]) screenshot.compute_and_cache(user=user, cache=thumbnail_cache, force=force) diff --git a/tests/thumbnails_tests.py b/tests/thumbnails_tests.py index 6d2df00f1767..e77197572b4c 100644 --- a/tests/thumbnails_tests.py +++ b/tests/thumbnails_tests.py @@ -16,18 +16,37 @@ # under the License. # from superset import db # from superset.models.dashboard import Dashboard -from unittest.mock import patch +import subprocess +from unittest import skipUnless -from superset import is_feature_enabled +from superset import app, is_feature_enabled +from tests.test_app import app from .base_tests import SupersetTestCase class ThumbnailsTests(SupersetTestCase): + @classmethod + def setUpClass(cls): + with app.app_context(): + base_dir = app.config["BASE_DIR"] + worker_command = base_dir + "/bin/superset worker -w 2" + subprocess.Popen(worker_command, shell=True, stdout=subprocess.PIPE) + + @classmethod + def tearDownClass(cls): + subprocess.call( + "ps auxww | grep 'celeryd' | awk '{print $2}' | xargs kill -9", shell=True + ) + subprocess.call( + "ps auxww | grep 'superset worker' | awk '{print $2}' | xargs kill -9", + shell=True, + ) + + @skipUnless((is_feature_enabled("THUMBNAILS")), "Thumbnails feature") def test_simple_get_screenshot(self): """ Thumbnails: Simple get screen shot """ - if is_feature_enabled("THUMBNAILS"): - self.assertTrue(False) + pass From 01cfb5b8acd767c3c6d0c0e226e6c9e03adf5298 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Thu, 27 Feb 2020 18:31:41 +0000 Subject: [PATCH 20/28] [thumbnails] tests and fixes --- docs/installation.rst | 4 +- superset/models/dashboard.py | 29 ++-- superset/models/slice.py | 23 ++- superset/utils/core.py | 5 + superset/utils/selenium.py | 2 +- superset/views/chart/api.py | 26 +++- superset/views/dashboard/api.py | 142 +++++++++-------- tests/superset_test_config_thumbnails.py | 8 +- tests/thumbnails_tests.py | 188 ++++++++++++++++++++++- 9 files changed, 330 insertions(+), 97 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index 5534c07a630b..0f8de56ce0a6 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -673,14 +673,14 @@ An example config could be: CELERY_CONFIG = CeleryConfig def init_thumbnail_cache(app: Flask) -> RedisCache: - return RedisCache(host='localhost', port=6379, key_prefix='superset_thumbnails_') + return RedisCache(host='localhost', port=6379, key_prefix='superset_') THUMBNAIL_CACHE_CONFIG = init_thumbnail_cache # Async selenium thumbnail task will use the following user THUMBNAIL_SELENIUM_USER = "Admin" -Using the above example cache keys for dashboards will be `superset_thumbnails_thumb__dashboard__{ID}` +Using the above example cache keys for dashboards will be `superset_thumb__dashboard__{ID}` You can override the base URL for selenium using: diff --git a/superset/models/dashboard.py b/superset/models/dashboard.py index cc4a0e26c409..26c83562bbb1 100644 --- a/superset/models/dashboard.py +++ b/superset/models/dashboard.py @@ -186,25 +186,20 @@ def dashboard_link(self) -> Markup: return Markup(f'{title}') @property - def thumbnail_url(self): - # SHA here is to force bypassing the browser cache when chart has changed - # sha = utils.md5_hex(self.params, 6) - sha = 1 - return f"/api/v1/dashboard/{self.id}/thumbnail/{sha}/" - - @property - def thumbnail_img(self): - return Markup(f'') + def unique_value(self) -> str: + """ + Returns a MD5 HEX digest that makes this dashboard unique + """ + unique_string = f"{self.position_json}.{self.css}.{self.json_metadata}" + return utils.md5_hex(unique_string) @property - def thumbnail_link(self): - return Markup( - f""" - - {self.thumbnail_img} - + def thumbnail_url(self) -> str: """ - ) + Returns a thumbnail URL with a HEX digest. We want to avoid browser cache + if the dashboard has changed + """ + return f"/api/v1/dashboard/{self.id}/thumbnail/{self.unique_value}/" @property def changed_by_name(self): @@ -488,6 +483,6 @@ def event_after_dashboard_changed( # pylint: disable=unused-argument # events for updating tags -if is_feature_enabled("THUMBNAILS"): +if is_feature_enabled("THUMBNAILS_SQLA_LISTENERS"): sqla.event.listen(Dashboard, "after_insert", event_after_dashboard_changed) sqla.event.listen(Dashboard, "after_update", event_after_dashboard_changed) diff --git a/superset/models/slice.py b/superset/models/slice.py index 4a83d504366f..830bc3e84e42 100644 --- a/superset/models/slice.py +++ b/superset/models/slice.py @@ -164,6 +164,21 @@ def data(self) -> Dict[str, Any]: "changed_on": self.changed_on.isoformat(), } + @property + def unique_value(self) -> str: + """ + Returns a MD5 HEX digest that makes this dashboard unique + """ + return utils.md5_hex(self.params) + + @property + def thumbnail_url(self) -> str: + """ + Returns a thumbnail URL with a HEX digest. We want to avoid browser cache + if the dashboard has changed + """ + return f"/api/v1/chart/{self.id}/thumbnail/{self.unique_value}/" + @property def json_data(self) -> str: return json.dumps(self.data) @@ -314,7 +329,7 @@ def set_related_perm(mapper, connection, target): target.schema_perm = ds.schema_perm -def event_after_dashboard_changed( # pylint: disable=unused-argument +def event_after_chart_changed( # pylint: disable=unused-argument mapper, connection, target ): cache_chart_thumbnail.delay(target.id, force=True) @@ -330,6 +345,6 @@ def event_after_dashboard_changed( # pylint: disable=unused-argument sqla.event.listen(Slice, "after_delete", ChartUpdater.after_delete) # events for updating tags -if is_feature_enabled("THUMBNAILS"): - sqla.event.listen(Slice, "after_insert", event_after_dashboard_changed) - sqla.event.listen(Slice, "after_update", event_after_dashboard_changed) +if is_feature_enabled("THUMBNAILS_SQLA_LISTENERS"): + sqla.event.listen(Slice, "after_insert", event_after_chart_changed) + sqla.event.listen(Slice, "after_update", event_after_chart_changed) diff --git a/superset/utils/core.py b/superset/utils/core.py index 55c2395cd95f..9356383c55e1 100644 --- a/superset/utils/core.py +++ b/superset/utils/core.py @@ -19,6 +19,7 @@ import decimal import errno import functools +import hashlib import json import logging import os @@ -250,6 +251,10 @@ def dttm_from_timetuple(d: struct_time) -> datetime: return datetime(d.tm_year, d.tm_mon, d.tm_mday, d.tm_hour, d.tm_min, d.tm_sec) +def md5_hex(data: str) -> str: + return hashlib.md5(data.encode()).hexdigest() + + class DashboardEncoder(json.JSONEncoder): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) diff --git a/superset/utils/selenium.py b/superset/utils/selenium.py index 86fff3eb5d4b..61d410bb6139 100644 --- a/superset/utils/selenium.py +++ b/superset/utils/selenium.py @@ -36,7 +36,7 @@ try: from PIL import Image except ModuleNotFoundError: - logger.info("No PIL instalation found") + logger.info("No PIL installation found") if TYPE_CHECKING: # pylint: disable=unused-import diff --git a/superset/views/chart/api.py b/superset/views/chart/api.py index 80403ee8b7ce..019d985a0d44 100644 --- a/superset/views/chart/api.py +++ b/superset/views/chart/api.py @@ -14,10 +14,11 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +import logging from typing import Dict, List, Optional from flask import current_app, Response -from flask_appbuilder.api import expose, protect, safe +from flask_appbuilder.api import expose, protect, rison, safe from flask_appbuilder.models.sqla.interface import SQLAInterface from marshmallow import fields, post_load, validates_schema, ValidationError from marshmallow.validate import Length @@ -37,6 +38,13 @@ from superset.views.base_schemas import BaseOwnedSchema, validate_owner from superset.views.chart.mixin import SliceMixin +logger = logging.getLogger(__name__) + +thumbnail_query_schema = { + "type": "object", + "properties": {"force": {"type": "boolean"}}, +} + def validate_json(value): try: @@ -165,6 +173,7 @@ class ChartRestApi(SliceMixin, BaseOwnedModelRestApi): "viz_type", "params", "cache_timeout", + "thumbnail_url", ] # Will just affect _info endpoint edit_columns = ["slice_name"] @@ -186,10 +195,11 @@ def __init__(self, *args, **kwargs): self.include_route_methods = self.include_route_methods | {"thumbnail"} super().__init__(*args, **kwargs) - @expose("//thumbnail//", methods=["GET"]) + @expose("//thumbnail//", methods=["GET"]) @protect() + @rison(thumbnail_query_schema) @safe - def thumbnail(self, pk, sha): # pylint: disable=invalid-name + def thumbnail(self, pk, digest, **kwargs): # pylint: disable=invalid-name """Get Chart thumbnail --- get: @@ -220,17 +230,21 @@ def thumbnail(self, pk, sha): # pylint: disable=invalid-name 500: $ref: '#/components/responses/500' """ - print(sha) - query = self.datamodel.session.query(Slice) - chart = self._base_filters.apply_all(query).get(pk) + chart = self.datamodel.get(pk, self._base_filters) if not chart: return self.response_404() + if kwargs["rison"].get("force", False): + cache_chart_thumbnail.delay(chart.id, force=True) + return self.response(202, message="OK Async") # fetch the chart screenshot using the current user and cache if set screenshot = ChartScreenshot(pk).get_from_cache(cache=thumbnail_cache) # If not screenshot then send request to compute thumb to celery if not screenshot: cache_chart_thumbnail.delay(chart.id, force=True) return self.response(202, message="OK Async") + # If digests + if chart.unique_value != digest: + logger.info("Requested thumbnail digest differs from actual digest") return Response( FileWrapper(screenshot), mimetype="image/png", direct_passthrough=True ) diff --git a/superset/views/dashboard/api.py b/superset/views/dashboard/api.py index 03365e7bad1f..0f57f172d0a4 100644 --- a/superset/views/dashboard/api.py +++ b/superset/views/dashboard/api.py @@ -42,7 +42,13 @@ from .mixin import DashboardMixin logger = logging.getLogger(__name__) -get_delete_ids_schema = {"type": "array", "items": {"type": "integer"}} + +bulk_delete_query_schema = {"type": "array", "items": {"type": "integer"}} +thumbnail_query_schema = { + "type": "object", + "properties": {"force": {"type": "boolean"}}, +} +export_query_schema = {"type": "array", "items": {"type": "integer"}} class DashboardJSONMetadataSchema(Schema): @@ -131,9 +137,6 @@ def make_object(self, data: Dict, discard: Optional[List[str]] = None) -> Dashbo return self.instance -get_export_ids_schema = {"type": "array", "items": {"type": "integer"}} - - class DashboardRestApi(DashboardMixin, BaseOwnedModelRestApi): datamodel = SQLAInterface(Dashboard) include_route_methods = RouteMethod.REST_MODEL_VIEW_CRUD_SET | { @@ -168,6 +171,7 @@ class DashboardRestApi(DashboardMixin, BaseOwnedModelRestApi): "published", "slug", "url", + "thumbnail_url", ] add_model_schema = DashboardPostSchema() @@ -186,8 +190,8 @@ def __init__(self, *args, **kwargs): @expose("/", methods=["DELETE"]) @protect() + @rison(bulk_delete_query_schema) @safe - @rison(get_delete_ids_schema) def bulk_delete(self, **kwargs): # pylint: disable=arguments-differ """Delete bulk Dashboards --- @@ -271,10 +275,60 @@ def bulk_delete(self, **kwargs): # pylint: disable=arguments-differ ), ) - @expose("//thumbnail//", methods=["GET"]) + @expose("/export/", methods=["GET"]) @protect() + @rison(export_query_schema) @safe - def thumbnail(self, pk, sha): # pylint: disable=invalid-name + def export(self, **kwargs): + """Export dashboards + --- + get: + parameters: + - in: query + name: q + content: + application/json: + schema: + type: array + items: + type: integer + responses: + 200: + description: Dashboard export + content: + text/plain: + schema: + type: string + 400: + $ref: '#/components/responses/400' + 401: + $ref: '#/components/responses/401' + 404: + $ref: '#/components/responses/404' + 422: + $ref: '#/components/responses/422' + 500: + $ref: '#/components/responses/500' + """ + query = self.datamodel.session.query(Dashboard).filter( + Dashboard.id.in_(kwargs["rison"]) + ) + query = self._base_filters.apply_all(query) + ids = [item.id for item in query.all()] + if not ids: + return self.response_404() + export = Dashboard.export_dashboards(ids) + resp = make_response(export, 200) + resp.headers["Content-Disposition"] = generate_download_headers("json")[ + "Content-Disposition" + ] + return resp + + @expose("//thumbnail//", methods=["GET"]) + @protect() + @safe + @rison(thumbnail_query_schema) + def thumbnail(self, pk, digest, **kwargs): # pylint: disable=invalid-name """Get Dashboard thumbnail --- get: @@ -286,9 +340,20 @@ def thumbnail(self, pk, sha): # pylint: disable=invalid-name type: integer name: pk - in: path + name: digest + description: A hex digest that makes this dashboard unique schema: type: string - name: sha + - in: query + name: q + content: + application/json: + schema: + type: object + properties: + force: + type: boolean + default: false responses: 200: description: Dashboard thumbnail image @@ -315,65 +380,22 @@ def thumbnail(self, pk, sha): # pylint: disable=invalid-name 500: $ref: '#/components/responses/500' """ - print(sha) - query = self.datamodel.session.query(Dashboard) - dashboard = self._base_filters.apply_all(query).get(pk) + dashboard = self.datamodel.get(pk, self._base_filters) if not dashboard: return self.response_404() + # If force, request a screenshot from the workers + if kwargs["rison"].get("force", False): + cache_dashboard_thumbnail.delay(dashboard.id, force=True) + return self.response(202, message="OK Async") # fetch the dashboard screenshot using the current user and cache if set screenshot = DashboardScreenshot(pk).get_from_cache(cache=thumbnail_cache) + # If the screenshot does not exist, request one from the workers if not screenshot: cache_dashboard_thumbnail.delay(dashboard.id, force=True) return self.response(202, message="OK Async") + # If digests + if dashboard.unique_value != digest: + logger.info("Requested thumbnail digest differs from actual digest") return Response( FileWrapper(screenshot), mimetype="image/png", direct_passthrough=True ) - - @expose("/export/", methods=["GET"]) - @protect() - @safe - @rison(get_export_ids_schema) - def export(self, **kwargs): - """Export dashboards - --- - get: - parameters: - - in: query - name: q - content: - application/json: - schema: - type: array - items: - type: integer - responses: - 200: - description: Dashboard export - content: - text/plain: - schema: - type: string - 400: - $ref: '#/components/responses/400' - 401: - $ref: '#/components/responses/401' - 404: - $ref: '#/components/responses/404' - 422: - $ref: '#/components/responses/422' - 500: - $ref: '#/components/responses/500' - """ - query = self.datamodel.session.query(Dashboard).filter( - Dashboard.id.in_(kwargs["rison"]) - ) - query = self._base_filters.apply_all(query) - ids = [item.id for item in query.all()] - if not ids: - return self.response_404() - export = Dashboard.export_dashboards(ids) - resp = make_response(export, 200) - resp.headers["Content-Disposition"] = generate_download_headers("json")[ - "Content-Disposition" - ] - return resp diff --git a/tests/superset_test_config_thumbnails.py b/tests/superset_test_config_thumbnails.py index 88f309568318..355b2d0e9447 100644 --- a/tests/superset_test_config_thumbnails.py +++ b/tests/superset_test_config_thumbnails.py @@ -59,7 +59,13 @@ class CeleryConfig(object): CELERY_CONFIG = CeleryConfig -FEATURE_FLAGS = {"THUMBNAILS": True} +FEATURE_FLAGS = { + "foo": "bar", + "KV_STORE": False, + "SHARE_QUERIES_VIA_KV_STORE": False, + "THUMBNAILS": True, + "THUMBNAILS_SQLA_LISTENERS": False, +} def init_thumbnail_cache(app: Flask) -> RedisCache: diff --git a/tests/thumbnails_tests.py b/tests/thumbnails_tests.py index e77197572b4c..308663a62046 100644 --- a/tests/thumbnails_tests.py +++ b/tests/thumbnails_tests.py @@ -17,22 +17,56 @@ # from superset import db # from superset.models.dashboard import Dashboard import subprocess +import urllib.request from unittest import skipUnless +from unittest.mock import patch -from superset import app, is_feature_enabled +from flask_testing import LiveServerTestCase +from sqlalchemy.sql import func + +from superset import db, is_feature_enabled, security_manager, thumbnail_cache +from superset.models.dashboard import Dashboard +from superset.models.slice import Slice +from superset.utils.selenium import ( + ChartScreenshot, + DashboardScreenshot, + get_auth_cookies, +) from tests.test_app import app from .base_tests import SupersetTestCase -class ThumbnailsTests(SupersetTestCase): +class CeleryStartMixin: @classmethod def setUpClass(cls): with app.app_context(): + from werkzeug.contrib.cache import RedisCache + + class CeleryConfig(object): + BROKER_URL = "redis://localhost" + CELERY_IMPORTS = ("superset.tasks.thumbnails",) + CONCURRENCY = 1 + + app.config["CELERY_CONFIG"] = CeleryConfig + + def init_thumbnail_cache(app) -> RedisCache: + return RedisCache( + host="localhost", + key_prefix="superset_thumbnails_", + default_timeout=10000, + ) + + app.config["THUMBNAIL_CACHE_CONFIG"] = init_thumbnail_cache base_dir = app.config["BASE_DIR"] worker_command = base_dir + "/bin/superset worker -w 2" - subprocess.Popen(worker_command, shell=True, stdout=subprocess.PIPE) + subprocess.Popen( + worker_command, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) @classmethod def tearDownClass(cls): @@ -44,9 +78,151 @@ def tearDownClass(cls): shell=True, ) + +class ThumbnailsSeleniumLive(CeleryStartMixin, LiveServerTestCase): + def create_app(self): + return app + + def url_open_auth(self, username: str, url: str): + admin_user = security_manager.find_user(username=username) + cookies = {} + for cookie in get_auth_cookies(admin_user): + cookies["session"] = cookie + + opener = urllib.request.build_opener() + opener.addheaders.append(("Cookie", f"session={cookies['session']}")) + return opener.open(f"{self.get_server_url()}/{url}") + + @skipUnless((is_feature_enabled("THUMBNAILS")), "Thumbnails feature") + def test_get_async_dashboard_screenshot(self): + """ + Thumbnails: Simple get async dashboard screenshot + """ + dashboard_id = db.session.query(Dashboard).all()[0].id + with patch("superset.views.dashboard.api.DashboardRestApi.get") as mock_get: + response = self.url_open_auth( + "admin", f"api/v1/dashboard/{dashboard_id}/thumbnail/1234/" + ) + self.assertEqual(response.getcode(), 202) + + +class ThumbnailsTests(CeleryStartMixin, SupersetTestCase): + + mock_image = b"bytes mock image" + + def test_dashboard_thumbnail_disabled(self): + """ + Thumbnails: Dashboard thumbnail disabled + """ + if is_feature_enabled("THUMBNAILS"): + return + dashboard_id = db.session.query(Dashboard).all()[0].id + self.login(username="admin") + uri = f"api/v1/dashboard/{dashboard_id}/thumbnail/1234/" + rv = self.client.get(uri) + self.assertEqual(rv.status_code, 404) + + def test_chart_thumbnail_disabled(self): + """ + Thumbnails: Chart thumbnail disabled + """ + if is_feature_enabled("THUMBNAILS"): + return + chart_id = db.session.query(Slice).all()[0].id + self.login(username="admin") + uri = f"api/v1/chart/{chart_id}/thumbnail/1234/" + rv = self.client.get(uri) + self.assertEqual(rv.status_code, 404) + + @skipUnless((is_feature_enabled("THUMBNAILS")), "Thumbnails feature") + def test_get_async_dashboard_screenshot(self): + """ + Thumbnails: Simple get async dashboard screenshot + """ + dashboard_id = db.session.query(Dashboard).all()[0].id + self.login(username="admin") + uri = f"api/v1/dashboard/{dashboard_id}/thumbnail/1234/" + with patch( + "superset.tasks.thumbnails.cache_dashboard_thumbnail.delay" + ) as mock_task: + rv = self.client.get(uri) + self.assertEqual(rv.status_code, 202) + mock_task.assert_called_with(dashboard_id, force=True) + + @skipUnless((is_feature_enabled("THUMBNAILS")), "Thumbnails feature") + def test_get_async_dashboard_notfound(self): + """ + Thumbnails: Simple get async dashboard not found + """ + max_id = db.session.query(func.max(Dashboard.id)).scalar() + self.login(username="admin") + uri = f"api/v1/dashboard/{max_id + 1}/thumbnail/1234/" + rv = self.client.get(uri) + self.assertEqual(rv.status_code, 404) + + @skipUnless((is_feature_enabled("THUMBNAILS")), "Thumbnails feature") + def test_get_async_dashboard_not_allowed(self): + """ + Thumbnails: Simple get async dashboard not allowed + """ + dashboard_id = db.session.query(Dashboard).all()[0].id + self.login(username="gamma") + uri = f"api/v1/dashboard/{dashboard_id}/thumbnail/1234/" + rv = self.client.get(uri) + self.assertEqual(rv.status_code, 404) + + @skipUnless((is_feature_enabled("THUMBNAILS")), "Thumbnails feature") + def test_get_async_chart_screenshot(self): + """ + Thumbnails: Simple get async chart screenshot + """ + chart_id = db.session.query(Slice).all()[0].id + self.login(username="admin") + uri = f"api/v1/chart/{chart_id}/thumbnail/1234/" + with patch( + "superset.tasks.thumbnails.cache_chart_thumbnail.delay" + ) as mock_task: + rv = self.client.get(uri) + self.assertEqual(rv.status_code, 202) + mock_task.assert_called_with(chart_id, force=True) + + @skipUnless((is_feature_enabled("THUMBNAILS")), "Thumbnails feature") + def test_get_async_chart_notfound(self): + """ + Thumbnails: Simple get async chart not found + """ + max_id = db.session.query(func.max(Slice.id)).scalar() + self.login(username="admin") + uri = f"api/v1/chart/{max_id + 1}/thumbnail/1234/" + rv = self.client.get(uri) + self.assertEqual(rv.status_code, 404) + + @skipUnless((is_feature_enabled("THUMBNAILS")), "Thumbnails feature") + def test_get_cached_dashboard_screenshot(self): + """ + Thumbnails: Simple get cached dashboard screenshot + """ + dashboard_id = db.session.query(Dashboard).all()[0].id + # Cache a test "image" + screenshot = DashboardScreenshot(model_id=dashboard_id) + thumbnail_cache.set(screenshot.cache_key, self.mock_image) + self.login(username="admin") + uri = f"api/v1/dashboard/{dashboard_id}/thumbnail/1234/" + rv = self.client.get(uri) + self.assertEqual(rv.status_code, 200) + self.assertEqual(rv.data, self.mock_image) + @skipUnless((is_feature_enabled("THUMBNAILS")), "Thumbnails feature") - def test_simple_get_screenshot(self): + def test_get_cached_chart_screenshot(self): """ - Thumbnails: Simple get screen shot + Thumbnails: Simple get cached chart screenshot """ - pass + chart_id = db.session.query(Slice).all()[0].id + # Cache a test "image" + screenshot = ChartScreenshot(model_id=chart_id) + thumbnail_cache.set(screenshot.cache_key, self.mock_image) + self.login(username="admin") + uri = f"api/v1/chart/{chart_id}/thumbnail/1234/" + rv = self.client.get(uri) + self.assertEqual(rv.status_code, 200) + self.assertEqual(rv.data, self.mock_image) From e246e8c85f07e8074065cbfaff8b6b12bf578109 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Fri, 28 Feb 2020 11:13:18 +0000 Subject: [PATCH 21/28] [thumbnails] Update docs --- docs/installation.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/installation.rst b/docs/installation.rst index 0f8de56ce0a6..7fb41b3c22e6 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -647,7 +647,10 @@ This is an optional feature that can be turned on by activating it's feature fla .. code-block:: python - FEATURE_FLAGS = {"THUMBNAILS": True} + FEATURE_FLAGS = { + "THUMBNAILS": True, + "THUMBNAILS_SQLA_LISTENERS": True, + } For this feature you will need a cache system and celery workers. All thumbnails are store on cache and are processed From 429bff2fe9e26c747e7e0fb5fd0b1a7f4359742f Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Thu, 12 Mar 2020 13:32:20 +0000 Subject: [PATCH 22/28] [thumbnails] address comments --- superset/models/dashboard.py | 4 ++-- superset/models/slice.py | 4 ++-- superset/views/chart/api.py | 3 ++- superset/views/dashboard/api.py | 3 ++- tests/thumbnails_tests.py | 39 +++++++++++++++++---------------- 5 files changed, 28 insertions(+), 25 deletions(-) diff --git a/superset/models/dashboard.py b/superset/models/dashboard.py index 26c83562bbb1..56ed048762b2 100644 --- a/superset/models/dashboard.py +++ b/superset/models/dashboard.py @@ -186,7 +186,7 @@ def dashboard_link(self) -> Markup: return Markup(f'{title}') @property - def unique_value(self) -> str: + def digest(self) -> str: """ Returns a MD5 HEX digest that makes this dashboard unique """ @@ -199,7 +199,7 @@ def thumbnail_url(self) -> str: Returns a thumbnail URL with a HEX digest. We want to avoid browser cache if the dashboard has changed """ - return f"/api/v1/dashboard/{self.id}/thumbnail/{self.unique_value}/" + return f"/api/v1/dashboard/{self.id}/thumbnail/{self.digest}/" @property def changed_by_name(self): diff --git a/superset/models/slice.py b/superset/models/slice.py index 830bc3e84e42..c5d75f9e483b 100644 --- a/superset/models/slice.py +++ b/superset/models/slice.py @@ -165,7 +165,7 @@ def data(self) -> Dict[str, Any]: } @property - def unique_value(self) -> str: + def digest(self) -> str: """ Returns a MD5 HEX digest that makes this dashboard unique """ @@ -177,7 +177,7 @@ def thumbnail_url(self) -> str: Returns a thumbnail URL with a HEX digest. We want to avoid browser cache if the dashboard has changed """ - return f"/api/v1/chart/{self.id}/thumbnail/{self.unique_value}/" + return f"/api/v1/chart/{self.id}/thumbnail/{self.digest}/" @property def json_data(self) -> str: diff --git a/superset/views/chart/api.py b/superset/views/chart/api.py index 019d985a0d44..6537eb1908a2 100644 --- a/superset/views/chart/api.py +++ b/superset/views/chart/api.py @@ -243,8 +243,9 @@ def thumbnail(self, pk, digest, **kwargs): # pylint: disable=invalid-name cache_chart_thumbnail.delay(chart.id, force=True) return self.response(202, message="OK Async") # If digests - if chart.unique_value != digest: + if chart.digest != digest: logger.info("Requested thumbnail digest differs from actual digest") + return self.response(304, message="Digest differs") return Response( FileWrapper(screenshot), mimetype="image/png", direct_passthrough=True ) diff --git a/superset/views/dashboard/api.py b/superset/views/dashboard/api.py index 0f57f172d0a4..29a62fc2c9ae 100644 --- a/superset/views/dashboard/api.py +++ b/superset/views/dashboard/api.py @@ -394,8 +394,9 @@ def thumbnail(self, pk, digest, **kwargs): # pylint: disable=invalid-name cache_dashboard_thumbnail.delay(dashboard.id, force=True) return self.response(202, message="OK Async") # If digests - if dashboard.unique_value != digest: + if dashboard.digest != digest: logger.info("Requested thumbnail digest differs from actual digest") + return self.response(304, message="Digest differs") return Response( FileWrapper(screenshot), mimetype="image/png", direct_passthrough=True ) diff --git a/tests/thumbnails_tests.py b/tests/thumbnails_tests.py index 308663a62046..e7adf784ac16 100644 --- a/tests/thumbnails_tests.py +++ b/tests/thumbnails_tests.py @@ -98,10 +98,11 @@ def test_get_async_dashboard_screenshot(self): """ Thumbnails: Simple get async dashboard screenshot """ - dashboard_id = db.session.query(Dashboard).all()[0].id + dashboard = db.session.query(Dashboard).all()[0] with patch("superset.views.dashboard.api.DashboardRestApi.get") as mock_get: response = self.url_open_auth( - "admin", f"api/v1/dashboard/{dashboard_id}/thumbnail/1234/" + "admin", + f"api/v1/dashboard/{dashboard.id}/thumbnail/{dashboard.digest}/", ) self.assertEqual(response.getcode(), 202) @@ -116,9 +117,9 @@ def test_dashboard_thumbnail_disabled(self): """ if is_feature_enabled("THUMBNAILS"): return - dashboard_id = db.session.query(Dashboard).all()[0].id + dashboard = db.session.query(Dashboard).all()[0] self.login(username="admin") - uri = f"api/v1/dashboard/{dashboard_id}/thumbnail/1234/" + uri = f"api/v1/dashboard/{dashboard.id}/thumbnail/{dashboard.digest}/" rv = self.client.get(uri) self.assertEqual(rv.status_code, 404) @@ -128,9 +129,9 @@ def test_chart_thumbnail_disabled(self): """ if is_feature_enabled("THUMBNAILS"): return - chart_id = db.session.query(Slice).all()[0].id + chart = db.session.query(Slice).all()[0] self.login(username="admin") - uri = f"api/v1/chart/{chart_id}/thumbnail/1234/" + uri = f"api/v1/chart/{chart}/thumbnail/{chart.digest}/" rv = self.client.get(uri) self.assertEqual(rv.status_code, 404) @@ -139,9 +140,9 @@ def test_get_async_dashboard_screenshot(self): """ Thumbnails: Simple get async dashboard screenshot """ - dashboard_id = db.session.query(Dashboard).all()[0].id + dashboard = db.session.query(Dashboard).all()[0] self.login(username="admin") - uri = f"api/v1/dashboard/{dashboard_id}/thumbnail/1234/" + uri = f"api/v1/dashboard/{dashboard.id}/thumbnail/{dashboard.digest}/" with patch( "superset.tasks.thumbnails.cache_dashboard_thumbnail.delay" ) as mock_task: @@ -165,9 +166,9 @@ def test_get_async_dashboard_not_allowed(self): """ Thumbnails: Simple get async dashboard not allowed """ - dashboard_id = db.session.query(Dashboard).all()[0].id + dashboard = db.session.query(Dashboard).all()[0] self.login(username="gamma") - uri = f"api/v1/dashboard/{dashboard_id}/thumbnail/1234/" + uri = f"api/v1/dashboard/{dashboard.id}/thumbnail/{dashboard.digest}/" rv = self.client.get(uri) self.assertEqual(rv.status_code, 404) @@ -176,15 +177,15 @@ def test_get_async_chart_screenshot(self): """ Thumbnails: Simple get async chart screenshot """ - chart_id = db.session.query(Slice).all()[0].id + chart = db.session.query(Slice).all()[0] self.login(username="admin") - uri = f"api/v1/chart/{chart_id}/thumbnail/1234/" + uri = f"api/v1/chart/{chart.id}/thumbnail/{chart.digest}/" with patch( "superset.tasks.thumbnails.cache_chart_thumbnail.delay" ) as mock_task: rv = self.client.get(uri) self.assertEqual(rv.status_code, 202) - mock_task.assert_called_with(chart_id, force=True) + mock_task.assert_called_with(chart.id, force=True) @skipUnless((is_feature_enabled("THUMBNAILS")), "Thumbnails feature") def test_get_async_chart_notfound(self): @@ -202,12 +203,12 @@ def test_get_cached_dashboard_screenshot(self): """ Thumbnails: Simple get cached dashboard screenshot """ - dashboard_id = db.session.query(Dashboard).all()[0].id + dashboard = db.session.query(Dashboard).all()[0] # Cache a test "image" - screenshot = DashboardScreenshot(model_id=dashboard_id) + screenshot = DashboardScreenshot(model_id=dashboard.id) thumbnail_cache.set(screenshot.cache_key, self.mock_image) self.login(username="admin") - uri = f"api/v1/dashboard/{dashboard_id}/thumbnail/1234/" + uri = f"api/v1/dashboard/{dashboard.id}/thumbnail/{dashboard.digest}/" rv = self.client.get(uri) self.assertEqual(rv.status_code, 200) self.assertEqual(rv.data, self.mock_image) @@ -217,12 +218,12 @@ def test_get_cached_chart_screenshot(self): """ Thumbnails: Simple get cached chart screenshot """ - chart_id = db.session.query(Slice).all()[0].id + chart = db.session.query(Slice).all()[0] # Cache a test "image" - screenshot = ChartScreenshot(model_id=chart_id) + screenshot = ChartScreenshot(model_id=chart.id) thumbnail_cache.set(screenshot.cache_key, self.mock_image) self.login(username="admin") - uri = f"api/v1/chart/{chart_id}/thumbnail/1234/" + uri = f"api/v1/chart/{chart.id}/thumbnail/{chart.digest}/" rv = self.client.get(uri) self.assertEqual(rv.status_code, 200) self.assertEqual(rv.data, self.mock_image) From 4f8cd83d9c340892c2c8b1f9b3d43705802afc3e Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Mon, 23 Mar 2020 13:24:13 +0000 Subject: [PATCH 23/28] [thumbnails] Docs use S3 cache --- docs/installation.rst | 8 ++++---- superset/dashboards/api.py | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index 6143d06afb24..1ad8a2a4ed66 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -663,12 +663,12 @@ This is an optional feature that can be turned on by activating it's feature fla For this feature you will need a cache system and celery workers. All thumbnails are store on cache and are processed asynchronously by the workers. -An example config could be: +An example config where images are stored on S3 could be: .. code-block:: python from flask import Flask - from werkzeug.contrib.cache import RedisCache + from s3cache.s3cache import S3Cache ... @@ -682,8 +682,8 @@ An example config could be: CELERY_CONFIG = CeleryConfig - def init_thumbnail_cache(app: Flask) -> RedisCache: - return RedisCache(host='localhost', port=6379, key_prefix='superset_') + def init_thumbnail_cache(app: Flask) -> S3Cache: + return S3Cache("bucket_name", 'thumbs_cache/') THUMBNAIL_CACHE_CONFIG = init_thumbnail_cache diff --git a/superset/dashboards/api.py b/superset/dashboards/api.py index dd500da9943a..cd1e6adc4964 100644 --- a/superset/dashboards/api.py +++ b/superset/dashboards/api.py @@ -94,6 +94,7 @@ class DashboardRestApi(BaseSupersetModelRestApi): "published", "slug", "url", + "thumbnail_url", ] edit_columns = [ "dashboard_title", From dfd7045471fe514d8f7a3c10ff666dbf2c398537 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Mon, 23 Mar 2020 13:31:35 +0000 Subject: [PATCH 24/28] [thumbnails] Added thumbs URL to GET HTTP endpoint (show) --- superset/dashboards/api.py | 1 + superset/views/chart/api.py | 1 + 2 files changed, 2 insertions(+) diff --git a/superset/dashboards/api.py b/superset/dashboards/api.py index cd1e6adc4964..b09061549f85 100644 --- a/superset/dashboards/api.py +++ b/superset/dashboards/api.py @@ -82,6 +82,7 @@ class DashboardRestApi(BaseSupersetModelRestApi): "url", "slug", "table_names", + "thumbnail_url", ] order_columns = ["dashboard_title", "changed_on", "published", "changed_by_fk"] list_columns = [ diff --git a/superset/views/chart/api.py b/superset/views/chart/api.py index d06cf1f872f0..42b2739d29f4 100644 --- a/superset/views/chart/api.py +++ b/superset/views/chart/api.py @@ -158,6 +158,7 @@ class ChartRestApi(SliceMixin, BaseOwnedModelRestApi): "viz_type", "params", "cache_timeout", + "thumbnail_url", ] list_columns = [ "id", From 3bd3626456d8dab84d417952a8dcbedbf43a054c Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Mon, 23 Mar 2020 14:56:45 +0000 Subject: [PATCH 25/28] [thumbnails] Fix, tests --- tests/dashboards/api_tests.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/dashboards/api_tests.py b/tests/dashboards/api_tests.py index 1c8a050f2a77..1c48c3187517 100644 --- a/tests/dashboards/api_tests.py +++ b/tests/dashboards/api_tests.py @@ -102,6 +102,7 @@ def test_get_dashboard(self): "url": f"/superset/dashboard/slug1/", "slug": "slug1", "table_names": "", + "thumbnail_url": dashboard.thumbnail_url, } data = json.loads(rv.data.decode("utf-8")) self.assertIn("changed_on", data["result"]) From 86ba9db03c45e182c2b4d30270129324b8ffb5d1 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Thu, 9 Apr 2020 16:19:26 +0100 Subject: [PATCH 26/28] change 304 to 302 and address comments --- superset/charts/api.py | 17 +++++---- superset/dashboards/api.py | 19 ++++++---- superset/tasks/thumbnails.py | 2 +- .../utils/{selenium.py => screenshots.py} | 0 superset/views/base_api.py | 2 +- tests/thumbnails_tests.py | 36 ++++++++++++++++++- 6 files changed, 61 insertions(+), 15 deletions(-) rename superset/utils/{selenium.py => screenshots.py} (100%) diff --git a/superset/charts/api.py b/superset/charts/api.py index cdab2e7a39db..ec365a49262f 100644 --- a/superset/charts/api.py +++ b/superset/charts/api.py @@ -18,7 +18,7 @@ from typing import Any, Dict import simplejson -from flask import g, make_response, request, Response +from flask import g, make_response, redirect, request, Response, url_for from flask_appbuilder.api import expose, protect, rison, safe from flask_appbuilder.models.sqla.interface import SQLAInterface from flask_babel import ngettext @@ -52,7 +52,7 @@ from superset.models.slice import Slice from superset.tasks.thumbnails import cache_chart_thumbnail from superset.utils.core import json_int_dttm_ser -from superset.utils.selenium import ChartScreenshot +from superset.utils.screenshots import ChartScreenshot from superset.views.base_api import BaseSupersetModelRestApi, RelatedFieldFilter from superset.views.filters import FilterRelatedOwners @@ -496,12 +496,14 @@ def thumbnail(self, pk: int, digest: str, **kwargs: Dict[str, bool]) -> Response schema: type: string format: binary + 302: + description: Redirects to the current digest + 400: + $ref: '#/components/responses/400' 401: $ref: '#/components/responses/401' 404: $ref: '#/components/responses/404' - 422: - $ref: '#/components/responses/422' 500: $ref: '#/components/responses/500' """ @@ -519,8 +521,11 @@ def thumbnail(self, pk: int, digest: str, **kwargs: Dict[str, bool]) -> Response return self.response(202, message="OK Async") # If digests if chart.digest != digest: - logger.info("Requested thumbnail digest differs from actual digest") - return self.response(304, message="Digest differs") + return redirect( + url_for( + f"{self.__class__.__name__}.thumbnail", pk=pk, digest=chart.digest + ) + ) return Response( FileWrapper(screenshot), mimetype="image/png", direct_passthrough=True ) diff --git a/superset/dashboards/api.py b/superset/dashboards/api.py index 6b358660dacc..cfa22527250a 100644 --- a/superset/dashboards/api.py +++ b/superset/dashboards/api.py @@ -17,7 +17,7 @@ import logging from typing import Any, Dict -from flask import g, make_response, request, Response +from flask import g, make_response, redirect, request, Response, url_for from flask_appbuilder.api import expose, protect, rison, safe from flask_appbuilder.models.sqla.interface import SQLAInterface from flask_babel import ngettext @@ -48,7 +48,7 @@ ) from superset.models.dashboard import Dashboard from superset.tasks.thumbnails import cache_dashboard_thumbnail -from superset.utils.selenium import DashboardScreenshot +from superset.utils.screenshots import DashboardScreenshot from superset.views.base import generate_download_headers from superset.views.base_api import BaseSupersetModelRestApi, RelatedFieldFilter from superset.views.filters import FilterRelatedOwners @@ -163,12 +163,14 @@ def post(self) -> Response: type: number result: $ref: '#/components/schemas/{{self.__class__.__name__}}.post' + 302: + description: Redirects to the current digest 400: $ref: '#/components/responses/400' 401: $ref: '#/components/responses/401' - 422: - $ref: '#/components/responses/422' + 404: + $ref: '#/components/responses/404' 500: $ref: '#/components/responses/500' """ @@ -482,8 +484,13 @@ def thumbnail(self, pk: int, digest: str, **kwargs: Dict[str, bool]) -> Response return self.response(202, message="OK Async") # If digests if dashboard.digest != digest: - logger.info("Requested thumbnail digest differs from actual digest") - return self.response(304, message="Digest differs") + return redirect( + url_for( + f"{self.__class__.__name__}.thumbnail", + pk=pk, + digest=dashboard.digest, + ) + ) return Response( FileWrapper(screenshot), mimetype="image/png", direct_passthrough=True ) diff --git a/superset/tasks/thumbnails.py b/superset/tasks/thumbnails.py index 0495cec3c12f..72c7bdaf673f 100644 --- a/superset/tasks/thumbnails.py +++ b/superset/tasks/thumbnails.py @@ -24,7 +24,7 @@ from superset import app, security_manager, thumbnail_cache from superset.extensions import celery_app -from superset.utils.selenium import ChartScreenshot, DashboardScreenshot +from superset.utils.screenshots import ChartScreenshot, DashboardScreenshot logger = logging.getLogger(__name__) diff --git a/superset/utils/selenium.py b/superset/utils/screenshots.py similarity index 100% rename from superset/utils/selenium.py rename to superset/utils/screenshots.py diff --git a/superset/views/base_api.py b/superset/views/base_api.py index bd50bf73de87..a0673cff8273 100644 --- a/superset/views/base_api.py +++ b/superset/views/base_api.py @@ -25,8 +25,8 @@ from flask_appbuilder.models.sqla.filters import FilterStartsWith from sqlalchemy.exc import SQLAlchemyError -from superset.stats_logger import BaseStatsLogger from superset.exceptions import SupersetSecurityException +from superset.stats_logger import BaseStatsLogger from superset.views.base import check_ownership logger = logging.getLogger(__name__) diff --git a/tests/thumbnails_tests.py b/tests/thumbnails_tests.py index ff90bba7fee6..3a67ebe692e0 100644 --- a/tests/thumbnails_tests.py +++ b/tests/thumbnails_tests.py @@ -27,7 +27,7 @@ from superset import db, is_feature_enabled, security_manager, thumbnail_cache from superset.models.dashboard import Dashboard from superset.models.slice import Slice -from superset.utils.selenium import ( +from superset.utils.screenshots import ( ChartScreenshot, DashboardScreenshot, get_auth_cookies, @@ -198,6 +198,23 @@ def test_get_async_chart_notfound(self): rv = self.client.get(uri) self.assertEqual(rv.status_code, 404) + @skipUnless((is_feature_enabled("THUMBNAILS")), "Thumbnails feature") + def test_get_cached_chart_wrong_digest(self): + """ + Thumbnails: Simple get chart with wrong digest + """ + chart = db.session.query(Slice).all()[0] + # Cache a test "image" + screenshot = ChartScreenshot(model_id=chart.id) + thumbnail_cache.set(screenshot.cache_key, self.mock_image) + self.login(username="admin") + uri = f"api/v1/chart/{chart.id}/thumbnail/1234/" + rv = self.client.get(uri) + self.assertEqual(rv.status_code, 302) + self.assertRedirects( + rv, f"api/v1/chart/{chart.id}/thumbnail/{chart.digest}/" + ) + @skipUnless((is_feature_enabled("THUMBNAILS")), "Thumbnails feature") def test_get_cached_dashboard_screenshot(self): """ @@ -227,3 +244,20 @@ def test_get_cached_chart_screenshot(self): rv = self.client.get(uri) self.assertEqual(rv.status_code, 200) self.assertEqual(rv.data, self.mock_image) + + @skipUnless((is_feature_enabled("THUMBNAILS")), "Thumbnails feature") + def test_get_cached_dashboard_wrong_digest(self): + """ + Thumbnails: Simple get dashboard with wrong digest + """ + dashboard = db.session.query(Dashboard).all()[0] + # Cache a test "image" + screenshot = DashboardScreenshot(model_id=dashboard.id) + thumbnail_cache.set(screenshot.cache_key, self.mock_image) + self.login(username="admin") + uri = f"api/v1/dashboard/{dashboard.id}/thumbnail/1234/" + rv = self.client.get(uri) + self.assertEqual(rv.status_code, 302) + self.assertRedirects( + rv, f"api/v1/dashboard/{dashboard.id}/thumbnail/{dashboard.digest}/" + ) From 823fe554f19dc22100ec6c7fe4abfd6df0f58da5 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Mon, 13 Apr 2020 12:24:34 +0100 Subject: [PATCH 27/28] Fix lint --- superset/charts/api.py | 5 ++++- superset/dashboards/api.py | 5 ++++- tests/thumbnails_tests.py | 4 +--- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/superset/charts/api.py b/superset/charts/api.py index ec365a49262f..1b0b56a5a39a 100644 --- a/superset/charts/api.py +++ b/superset/charts/api.py @@ -22,6 +22,7 @@ from flask_appbuilder.api import expose, protect, rison, safe from flask_appbuilder.models.sqla.interface import SQLAInterface from flask_babel import ngettext +from werkzeug.wrappers import Response as WerkzeugResponse from werkzeug.wsgi import FileWrapper from superset import is_feature_enabled, thumbnail_cache @@ -474,7 +475,9 @@ def data(self) -> Response: @protect() @rison(thumbnail_query_schema) @safe - def thumbnail(self, pk: int, digest: str, **kwargs: Dict[str, bool]) -> Response: + def thumbnail( + self, pk: int, digest: str, **kwargs: Dict[str, bool] + ) -> WerkzeugResponse: """Get Chart thumbnail --- get: diff --git a/superset/dashboards/api.py b/superset/dashboards/api.py index cfa22527250a..993795142b2d 100644 --- a/superset/dashboards/api.py +++ b/superset/dashboards/api.py @@ -21,6 +21,7 @@ from flask_appbuilder.api import expose, protect, rison, safe from flask_appbuilder.models.sqla.interface import SQLAInterface from flask_babel import ngettext +from werkzeug.wrappers import Response as WerkzeugResponse from werkzeug.wsgi import FileWrapper from superset import is_feature_enabled, thumbnail_cache @@ -417,7 +418,9 @@ def export(self, **kwargs: Any) -> Response: @protect() @safe @rison(thumbnail_query_schema) - def thumbnail(self, pk: int, digest: str, **kwargs: Dict[str, bool]) -> Response: + def thumbnail( + self, pk: int, digest: str, **kwargs: Dict[str, bool] + ) -> WerkzeugResponse: """Get Dashboard thumbnail --- get: diff --git a/tests/thumbnails_tests.py b/tests/thumbnails_tests.py index 3a67ebe692e0..ac8f16b2709c 100644 --- a/tests/thumbnails_tests.py +++ b/tests/thumbnails_tests.py @@ -211,9 +211,7 @@ def test_get_cached_chart_wrong_digest(self): uri = f"api/v1/chart/{chart.id}/thumbnail/1234/" rv = self.client.get(uri) self.assertEqual(rv.status_code, 302) - self.assertRedirects( - rv, f"api/v1/chart/{chart.id}/thumbnail/{chart.digest}/" - ) + self.assertRedirects(rv, f"api/v1/chart/{chart.id}/thumbnail/{chart.digest}/") @skipUnless((is_feature_enabled("THUMBNAILS")), "Thumbnails feature") def test_get_cached_dashboard_screenshot(self): From e44571c0221fc6f6431e54fc4814419a1724e556 Mon Sep 17 00:00:00 2001 From: Daniel Gaspar Date: Mon, 13 Apr 2020 12:30:30 +0100 Subject: [PATCH 28/28] ignore new test config --- tests/superset_test_config_thumbnails.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/superset_test_config_thumbnails.py b/tests/superset_test_config_thumbnails.py index 355b2d0e9447..bf68df5e05d6 100644 --- a/tests/superset_test_config_thumbnails.py +++ b/tests/superset_test_config_thumbnails.py @@ -14,6 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +# type: ignore from copy import copy from flask import Flask