diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 7e26e6fe53..cd40e1c492 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -173,6 +173,18 @@ Improvements Contributed by @Kami. +* Add new ``?max_result_size`` query parameter filter to the ``GET /v1/executiond/`` API + endpoint. + + This query parameter allows clients to implement conditional execution result retrieval and + only retrieve the result field if it's smaller than the provided value. + + This comes handy in the various client scenarios (such as st2web) where we don't display and + render very large results directly since it allows to speed things up and decrease amount of + data retrieved and parsed. (improvement) #5197 + + Contributed by @Kami. + Fixed ~~~~~ diff --git a/st2api/st2api/controllers/resource.py b/st2api/st2api/controllers/resource.py index a2391ff9aa..24ea57df76 100644 --- a/st2api/st2api/controllers/resource.py +++ b/st2api/st2api/controllers/resource.py @@ -299,16 +299,23 @@ def _get_one_by_id( exclude_fields=None, include_fields=None, from_model_kwargs=None, + get_by_id_kwargs=None, ): """ :param exclude_fields: A list of object fields to exclude. :type exclude_fields: ``list`` :param include_fields: A list of object fields to include. :type include_fields: ``list`` + :param get_by_id_kwargs: Additional keyword arguments which are passed to the + "_get_by_id()" method. + :type get_by_id_kwargs: ``dict`` or ``None`` """ instance = self._get_by_id( - resource_id=id, exclude_fields=exclude_fields, include_fields=include_fields + resource_id=id, + exclude_fields=exclude_fields, + include_fields=include_fields, + **get_by_id_kwargs or {}, ) if permission_type: diff --git a/st2api/st2api/controllers/v1/actionexecutions.py b/st2api/st2api/controllers/v1/actionexecutions.py index 0282ecbe3e..1c2ea1a6e1 100644 --- a/st2api/st2api/controllers/v1/actionexecutions.py +++ b/st2api/st2api/controllers/v1/actionexecutions.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Optional + import copy import re import sys @@ -24,6 +26,7 @@ import jsonschema from oslo_config import cfg from six.moves import http_client +from mongoengine.queryset.visitor import Q from st2api.controllers.base import BaseRestControllerMixin from st2api.controllers.resource import ResourceController @@ -728,6 +731,7 @@ def get_one( exclude_attributes=None, include_attributes=None, show_secrets=False, + max_result_size=None, ): """ Retrieve a single execution. @@ -751,6 +755,10 @@ def get_one( ) } + max_result_size = self._validate_max_result_size( + max_result_size=max_result_size + ) + # Special case for id == "last" if id == "last": execution_db = ( @@ -769,6 +777,7 @@ def get_one( requester_user=requester_user, from_model_kwargs=from_model_kwargs, permission_type=PermissionType.EXECUTION_VIEW, + get_by_id_kwargs={"max_result_size": max_result_size}, ) def post( @@ -980,6 +989,94 @@ def delete(self, id, requester_user, show_secrets=False): execution_db, mask_secrets=from_model_kwargs["mask_secrets"] ) + def _validate_max_result_size( + self, max_result_size: Optional[int] + ) -> Optional[int]: + """ + Validate value of the ?max_result_size query parameter (if provided). + """ + # Maximum limit for MongoDB collection document is 16 MB and the field itself can't be + # larger than that obviously. And in reality due to the other fields, overhead, etc, + # 14 is the upper limit. + if not max_result_size: + return max_result_size + + if max_result_size <= 0: + raise ValueError("max_result_size must be a positive number") + + if max_result_size > 14 * 1024 * 1024: + raise ValueError( + "max_result_size query parameter must be smaller than 14 MB" + ) + + return max_result_size + + def _get_by_id( + self, + resource_id, + exclude_fields=None, + include_fields=None, + max_result_size=None, + ): + """ + Custom version of _get_by_id() which supports ?max_result_size pre-filtering and not + returning result field for executions which result size exceeds this threshold. + + This functionality allows us to implement fast and efficient retrievals in st2web. + """ + exclude_fields = exclude_fields or [] + include_fields = include_fields or [] + + if not max_result_size: + # If max_result_size is not provided we don't perform any prefiltering and directly + # call parent method + execution_db = super(ActionExecutionsController, self)._get_by_id( + resource_id=resource_id, + exclude_fields=exclude_fields, + include_fields=include_fields, + ) + return execution_db + + # Special query where we check if result size is smaller than pre-defined or that field + # doesn't not exist (old executions) and only return the result if the condition is met. + # This allows us to implement fast and efficient retrievals of executions on the client + # st2web side where we don't want to retrieve and display result directly for executions + # with large results + # Keep in mind that the query itself is very fast and adds almost no overhead for API + # operations which pass this query parameter because we first filter on the ID (indexed + # field) and perform projection query with two tiny fields (based on real life testing it + # takes less than 3 ms in most scenarios). + execution_db = self.access.get( + Q(id=resource_id) + & (Q(result_size__lte=max_result_size) | Q(result_size__not__exists=True)), + only_fields=["id", "result_size"], + ) + + # if result is empty, this means that execution either doesn't exist or the result is + # larger than threshold which means we don't want to retrieve and return result to + # the end user to we set exclude_fields accordingly + if not execution_db: + LOG.debug( + "Execution with id %s and result_size < %s not found. This means " + "execution with this ID doesn't exist or result_size exceeds the " + "threshold. Result field will be excluded from the retrieval and " + "the response." % (resource_id, max_result_size) + ) + + if include_fields and "result" in include_fields: + include_fields.remove("result") + elif not include_fields: + exclude_fields += ["result"] + + # Now call parent get by id with potentially modified include / exclude fields in case + # result should not be included + execution_db = super(ActionExecutionsController, self)._get_by_id( + resource_id=resource_id, + exclude_fields=exclude_fields, + include_fields=include_fields, + ) + return execution_db + def _get_action_executions( self, exclude_fields=None, diff --git a/st2api/tests/unit/controllers/v1/test_executions.py b/st2api/tests/unit/controllers/v1/test_executions.py index 3309d73db3..beb6b8a529 100644 --- a/st2api/tests/unit/controllers/v1/test_executions.py +++ b/st2api/tests/unit/controllers/v1/test_executions.py @@ -45,6 +45,7 @@ from st2common.util import crypto as crypto_utils from st2common.util import date as date_utils from st2common.util import isotime +from st2common.util.jsonify import json_encode from st2api.controllers.v1.actionexecutions import ActionExecutionsController import st2common.validators.api.action as action_validator from st2tests.api import BaseActionExecutionControllerTestCase @@ -351,6 +352,103 @@ def test_get_one(self): self.assertEqual(get_resp.status_int, 200) self.assertEqual(self._get_actionexecution_id(get_resp), actionexecution_id) + def test_get_one_max_result_size_query_parameter(self): + data = copy.deepcopy(LIVE_ACTION_1) + post_resp = self._do_post(LIVE_ACTION_1) + + actionexecution_id = self._get_actionexecution_id(post_resp) + + # Update it with the result (this populates result and result size attributes) + data = { + "result": {"fooo": "a" * 1000}, + "status": "succeeded", + } + actual_result_size = len(json_encode(data["result"])) + + # NOTE: In real-life result_size is populdated in update_execution() method which is + # called in the end with the actual result + put_resp = self._do_put(actionexecution_id, data) + self.assertEqual(put_resp.json["result_size"], actual_result_size) + self.assertEqual(put_resp.json["result"], data["result"]) + + # 1. ?max_result_size query filter not provided + get_resp = self._do_get_one(actionexecution_id) + self.assertEqual(get_resp.status_int, 200) + self.assertEqual(get_resp.json["result"], data["result"]) + self.assertEqual(get_resp.json["result_size"], actual_result_size) + self.assertEqual(self._get_actionexecution_id(get_resp), actionexecution_id) + + # 2. ?max_result_size > actual result size + get_resp = self._do_get_one( + actionexecution_id + "?max_result_size=%s" % (actual_result_size + 1) + ) + self.assertEqual(get_resp.status_int, 200) + self.assertEqual(get_resp.json["result_size"], actual_result_size) + self.assertEqual(get_resp.json["result"], data["result"]) + self.assertEqual(self._get_actionexecution_id(get_resp), actionexecution_id) + + # 3. ?max_result_size < actual result size - result field should not be returned + get_resp = self._do_get_one( + actionexecution_id + "?max_result_size=%s" % (actual_result_size - 1) + ) + self.assertEqual(get_resp.status_int, 200) + self.assertEqual(get_resp.json["result_size"], actual_result_size) + self.assertTrue("result" not in get_resp.json) + self.assertEqual(self._get_actionexecution_id(get_resp), actionexecution_id) + + # 4. ?max_result_size < actual result size and ?include_attributes=result - result field + # should not be returned + get_resp = self._do_get_one( + actionexecution_id + + "?include_attributes=result,result_size&max_result_size=%s" + % (actual_result_size - 1) + ) + self.assertEqual(get_resp.status_int, 200) + self.assertEqual(get_resp.json["result_size"], actual_result_size) + self.assertTrue("result" not in get_resp.json) + self.assertEqual(self._get_actionexecution_id(get_resp), actionexecution_id) + + # 5. ?max_result_size > actual result size and ?exclude_attributes=result - result field + # should not be returned + get_resp = self._do_get_one( + actionexecution_id + + "?include_attributes=result_size&exclude_attriubtes=result&max_result_size=%s" + % (actual_result_size - 1) + ) + self.assertEqual(get_resp.status_int, 200) + self.assertEqual(get_resp.json["result_size"], actual_result_size) + self.assertTrue("result" not in get_resp.json) + self.assertEqual(self._get_actionexecution_id(get_resp), actionexecution_id) + + # 6. max_result_size is not a positive number + get_resp = self._do_get_one( + actionexecution_id + "?max_result_size=-100", expect_errors=True + ) + self.assertEqual(get_resp.status_int, 400) + self.assertEqual( + get_resp.json["faultstring"], "max_result_size must be a positive number" + ) + + # 7. max_result_size is > max possible value + get_resp = self._do_get_one( + actionexecution_id + "?max_result_size=%s" % ((14 * 1024 * 1024) + 1), + expect_errors=True, + ) + self.assertEqual(get_resp.status_int, 400) + self.assertEqual( + get_resp.json["faultstring"], + "max_result_size query parameter must be smaller than 14 MB", + ) + + # 8. ?max_result_size == actual result size - result should be returned + get_resp = self._do_get_one( + actionexecution_id + "?max_result_size=%s" % (actual_result_size) + ) + self.assertEqual(get_resp.status_int, 200) + self.assertEqual(get_resp.json["result_size"], actual_result_size) + self.assertEqual(get_resp.json["result"], data["result"]) + self.assertEqual(self._get_actionexecution_id(get_resp), actionexecution_id) + def test_get_all_id_query_param_filtering_success(self): post_resp = self._do_post(LIVE_ACTION_1) actionexecution_id = self._get_actionexecution_id(post_resp) diff --git a/st2common/st2common/openapi.yaml b/st2common/st2common/openapi.yaml index 69e254f48d..723c6f5235 100644 --- a/st2common/st2common/openapi.yaml +++ b/st2common/st2common/openapi.yaml @@ -1095,6 +1095,10 @@ paths: in: query description: Show secrets in plain text type: boolean + - name: max_result_size + in: query + type: integer + description: True to exclude result field from the response for executions which result field exceeds the provided size in bytes. x-parameters: - name: user in: context diff --git a/st2common/st2common/openapi.yaml.j2 b/st2common/st2common/openapi.yaml.j2 index d45c1720fe..b41602587b 100644 --- a/st2common/st2common/openapi.yaml.j2 +++ b/st2common/st2common/openapi.yaml.j2 @@ -1091,6 +1091,10 @@ paths: in: query description: Show secrets in plain text type: boolean + - name: max_result_size + in: query + type: integer + description: True to exclude result field from the response for executions which result field exceeds the provided size in bytes. x-parameters: - name: user in: context