From d55b8b655703a6b8ca058be1973e2c54c698d3a4 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 20 Mar 2021 15:09:49 +0100 Subject: [PATCH 1/7] Add new ?max_result_size query parameter to the GET /v1/executions/ API endpoint. With this query parameter, user can decide to exclude execution.result field from the response in case it exceeds this size. This allows us to implement efficient execution retrieval in the client (st2web) side and avoid additional queries / API operations on the client side - aka win-win situation (reduce the load on the client and the server). --- st2api/st2api/controllers/resource.py | 9 ++- .../st2api/controllers/v1/actionexecutions.py | 77 +++++++++++++++++++ st2common/st2common/openapi.yaml.j2 | 4 + 3 files changed, 89 insertions(+), 1 deletion(-) diff --git a/st2api/st2api/controllers/resource.py b/st2api/st2api/controllers/resource.py index a2391ff9aa..24ea57df76 100644 --- a/st2api/st2api/controllers/resource.py +++ b/st2api/st2api/controllers/resource.py @@ -299,16 +299,23 @@ def _get_one_by_id( exclude_fields=None, include_fields=None, from_model_kwargs=None, + get_by_id_kwargs=None, ): """ :param exclude_fields: A list of object fields to exclude. :type exclude_fields: ``list`` :param include_fields: A list of object fields to include. :type include_fields: ``list`` + :param get_by_id_kwargs: Additional keyword arguments which are passed to the + "_get_by_id()" method. + :type get_by_id_kwargs: ``dict`` or ``None`` """ instance = self._get_by_id( - resource_id=id, exclude_fields=exclude_fields, include_fields=include_fields + resource_id=id, + exclude_fields=exclude_fields, + include_fields=include_fields, + **get_by_id_kwargs or {}, ) if permission_type: diff --git a/st2api/st2api/controllers/v1/actionexecutions.py b/st2api/st2api/controllers/v1/actionexecutions.py index 0282ecbe3e..f171162f5f 100644 --- a/st2api/st2api/controllers/v1/actionexecutions.py +++ b/st2api/st2api/controllers/v1/actionexecutions.py @@ -24,6 +24,7 @@ import jsonschema from oslo_config import cfg from six.moves import http_client +from mongoengine.queryset.visitor import Q from st2api.controllers.base import BaseRestControllerMixin from st2api.controllers.resource import ResourceController @@ -728,6 +729,7 @@ def get_one( exclude_attributes=None, include_attributes=None, show_secrets=False, + max_result_size=None, ): """ Retrieve a single execution. @@ -751,6 +753,14 @@ def get_one( ) } + # Maximum limit for MongoDB collection document is 16 MB and the field itself can't be + # larger than that obviously. And in reality due to the other fields, overhead, etc, + # 14 is the upper limit. + if max_result_size and max_result_size > 14 * 1024 * 1024: + raise ValueError( + "?max_result_size query parameter must be smaller than 14 MB" + ) + # Special case for id == "last" if id == "last": execution_db = ( @@ -769,6 +779,7 @@ def get_one( requester_user=requester_user, from_model_kwargs=from_model_kwargs, permission_type=PermissionType.EXECUTION_VIEW, + get_by_id_kwargs={"max_result_size": max_result_size}, ) def post( @@ -980,6 +991,72 @@ def delete(self, id, requester_user, show_secrets=False): execution_db, mask_secrets=from_model_kwargs["mask_secrets"] ) + def _get_by_id( + self, + resource_id, + exclude_fields=None, + include_fields=None, + max_result_size=None, + ): + """ + Custom version of _get_by_id() which supports ?max_result_size pre-filtering and not + returning result field for executions which result size exceeds this threshold. + + This functionality allows us to implement fast and efficient retrievals in st2web. + """ + exclude_fields = exclude_fields or [] + include_fields = include_fields or [] + + if not max_result_size: + # If max_result_size is not provided we don't perform any prefiltering and directly + # call parent method + execution_db = super(ActionExecutionsController, self)._get_by_id( + resource_id=resource_id, + exclude_fields=exclude_fields, + include_fields=include_fields, + ) + return execution_db + + # Special query where we check if result size is smaller than pre-defined or that field + # doesn't not exist (old executions) and only return the result if the condition is met. + # This allows us to implement fast and efficient retrievals of executions on the client + # st2web side where we don't want to retrieve and display result directly for executions + # with large results + # Keep in mind that the query itself is very fast and adds almost no overhead for API + # operations which pass this query parameter because we first filter on the ID (indexed + # field) and perform projection query with two tiny fields (based on real life testing it + # takes less than 3 ms in most scenarios). + execution_db = self.access.get( + Q(id=resource_id) + & (Q(result_size__lte=max_result_size) | Q(result_size__not__exists=True)), + only_fields=["id", "result_size"], + ) + + # if result is empty, this means that execution either doesn't exist or the result is + # larger than threshold which means we don't want to retrieve and return result to + # the end user to we set exclude_fields accordingly + if not execution_db: + LOG.debug( + "Execution with id %s and result_size < %s not found. This means " + "execution with this ID doesn't exist or result_size exceeds the " + "threshold. Result field will be excluded from the retrieval and " + "the response." % (resource_id, max_result_size) + ) + + if include_fields: + include_fields.remove("result") + else: + exclude_fields += ["result"] + + # Now call parent get by id with potentially modified include / exclude fields in case + # result should not be included + execution_db = super(ActionExecutionsController, self)._get_by_id( + resource_id=resource_id, + exclude_fields=exclude_fields, + include_fields=include_fields, + ) + return execution_db + def _get_action_executions( self, exclude_fields=None, diff --git a/st2common/st2common/openapi.yaml.j2 b/st2common/st2common/openapi.yaml.j2 index d45c1720fe..94a8154ec1 100644 --- a/st2common/st2common/openapi.yaml.j2 +++ b/st2common/st2common/openapi.yaml.j2 @@ -1091,6 +1091,10 @@ paths: in: query description: Show secrets in plain text type: boolean + - name: max_result_size + in: query + type: integer + description: True to exclude result field from the response for executions which result field exceeds the provided size. x-parameters: - name: user in: context From 94f1fd5aea4ec4ef6c57de2936b8b6527240b2dd Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 20 Mar 2021 16:18:40 +0100 Subject: [PATCH 2/7] Add tests for the new API query param filter. --- .../unit/controllers/v1/test_executions.py | 45 +++++++++++++++++++ st2common/st2common/openapi.yaml | 4 ++ 2 files changed, 49 insertions(+) diff --git a/st2api/tests/unit/controllers/v1/test_executions.py b/st2api/tests/unit/controllers/v1/test_executions.py index 3309d73db3..622cf1fa6c 100644 --- a/st2api/tests/unit/controllers/v1/test_executions.py +++ b/st2api/tests/unit/controllers/v1/test_executions.py @@ -45,6 +45,7 @@ from st2common.util import crypto as crypto_utils from st2common.util import date as date_utils from st2common.util import isotime +from st2common.util.jsonify import json_encode from st2api.controllers.v1.actionexecutions import ActionExecutionsController import st2common.validators.api.action as action_validator from st2tests.api import BaseActionExecutionControllerTestCase @@ -351,6 +352,50 @@ def test_get_one(self): self.assertEqual(get_resp.status_int, 200) self.assertEqual(self._get_actionexecution_id(get_resp), actionexecution_id) + def test_get_one_max_result_size_query_parameter(self): + data = copy.deepcopy(LIVE_ACTION_1) + post_resp = self._do_post(LIVE_ACTION_1) + + actionexecution_id = self._get_actionexecution_id(post_resp) + + # Update it with the result (this populates result and result size attributes) + data = { + "result": {"fooo": "a" * 1000}, + "status": "succeeded", + } + actual_result_size = len(json_encode(data["result"])) + + # NOTE: In real-life result_size is populdated in update_execution() method which is + # called in the end with the actual result + put_resp = self._do_put(actionexecution_id, data) + self.assertEqual(put_resp.json["result_size"], actual_result_size) + self.assertEqual(put_resp.json["result"], data["result"]) + + # 1. ?max_result_size query filter not provided + get_resp = self._do_get_one(actionexecution_id) + self.assertEqual(get_resp.status_int, 200) + self.assertEqual(get_resp.json["result"], data["result"]) + self.assertEqual(get_resp.json["result_size"], actual_result_size) + self.assertEqual(self._get_actionexecution_id(get_resp), actionexecution_id) + + # 2. ?max_result_size > actual result size + get_resp = self._do_get_one( + actionexecution_id + "?max_result_size=%s" % (actual_result_size + 1) + ) + self.assertEqual(get_resp.status_int, 200) + self.assertEqual(get_resp.json["result_size"], actual_result_size) + self.assertEqual(get_resp.json["result"], data["result"]) + self.assertEqual(self._get_actionexecution_id(get_resp), actionexecution_id) + + # 3. ?max_result_size > actual result size - resilt field should not be returned + get_resp = self._do_get_one( + actionexecution_id + "?max_result_size=%s" % (actual_result_size - 1) + ) + self.assertEqual(get_resp.status_int, 200) + self.assertEqual(get_resp.json["result_size"], actual_result_size) + self.assertTrue("result" not in get_resp.json) + self.assertEqual(self._get_actionexecution_id(get_resp), actionexecution_id) + def test_get_all_id_query_param_filtering_success(self): post_resp = self._do_post(LIVE_ACTION_1) actionexecution_id = self._get_actionexecution_id(post_resp) diff --git a/st2common/st2common/openapi.yaml b/st2common/st2common/openapi.yaml index 69e254f48d..23b70aec2b 100644 --- a/st2common/st2common/openapi.yaml +++ b/st2common/st2common/openapi.yaml @@ -1095,6 +1095,10 @@ paths: in: query description: Show secrets in plain text type: boolean + - name: max_result_size + in: query + type: integer + description: True to exclude result field from the response for executions which result field exceeds the provided size. x-parameters: - name: user in: context From baee94b0e525dd9feaadb551d38df26c38161bde Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 20 Mar 2021 16:29:40 +0100 Subject: [PATCH 3/7] Make the check more robust. --- st2api/st2api/controllers/v1/actionexecutions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/st2api/st2api/controllers/v1/actionexecutions.py b/st2api/st2api/controllers/v1/actionexecutions.py index f171162f5f..fcecbed6df 100644 --- a/st2api/st2api/controllers/v1/actionexecutions.py +++ b/st2api/st2api/controllers/v1/actionexecutions.py @@ -1043,9 +1043,9 @@ def _get_by_id( "the response." % (resource_id, max_result_size) ) - if include_fields: + if include_fields and "result" in include_fields: include_fields.remove("result") - else: + elif not include_fields: exclude_fields += ["result"] # Now call parent get by id with potentially modified include / exclude fields in case From 9457a9d7a73a886cbeaaf671d6cde7ba652b0c34 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 20 Mar 2021 16:35:04 +0100 Subject: [PATCH 4/7] Add tests for more edge cases. --- .../unit/controllers/v1/test_executions.py | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/st2api/tests/unit/controllers/v1/test_executions.py b/st2api/tests/unit/controllers/v1/test_executions.py index 622cf1fa6c..1cd3e84ba6 100644 --- a/st2api/tests/unit/controllers/v1/test_executions.py +++ b/st2api/tests/unit/controllers/v1/test_executions.py @@ -387,7 +387,7 @@ def test_get_one_max_result_size_query_parameter(self): self.assertEqual(get_resp.json["result"], data["result"]) self.assertEqual(self._get_actionexecution_id(get_resp), actionexecution_id) - # 3. ?max_result_size > actual result size - resilt field should not be returned + # 3. ?max_result_size > actual result size - result field should not be returned get_resp = self._do_get_one( actionexecution_id + "?max_result_size=%s" % (actual_result_size - 1) ) @@ -396,6 +396,30 @@ def test_get_one_max_result_size_query_parameter(self): self.assertTrue("result" not in get_resp.json) self.assertEqual(self._get_actionexecution_id(get_resp), actionexecution_id) + # 4. ?max_result_size > actual result size and ?include_attributes=result - result field + # should not be returned + get_resp = self._do_get_one( + actionexecution_id + + "?include_attributes=result,result_size&max_result_size=%s" + % (actual_result_size - 1) + ) + self.assertEqual(get_resp.status_int, 200) + self.assertEqual(get_resp.json["result_size"], actual_result_size) + self.assertTrue("result" not in get_resp.json) + self.assertEqual(self._get_actionexecution_id(get_resp), actionexecution_id) + + # 5. ?max_result_size > actual result size and ?exclude_attributes=result - result field + # should not be returned + get_resp = self._do_get_one( + actionexecution_id + + "?include_attributes=result_size&exclude_attriubtes=result&max_result_size=%s" + % (actual_result_size - 1) + ) + self.assertEqual(get_resp.status_int, 200) + self.assertEqual(get_resp.json["result_size"], actual_result_size) + self.assertTrue("result" not in get_resp.json) + self.assertEqual(self._get_actionexecution_id(get_resp), actionexecution_id) + def test_get_all_id_query_param_filtering_success(self): post_resp = self._do_post(LIVE_ACTION_1) actionexecution_id = self._get_actionexecution_id(post_resp) From d2835d87c5ba736c4b8f573e2cad027f83a56407 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 20 Mar 2021 16:35:36 +0100 Subject: [PATCH 5/7] Add changelog entry. --- CHANGELOG.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c86827565e..ab43204e43 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -156,6 +156,18 @@ Improvements Contributed by @Kami. +* Add new ``?max_result_size`` query parameter filter to the ``GET /v1/executiond/`` API + endpoint. + + This query parameter allows clients to implement conditional execution result retrieval and + only retrieve the result field if it's smaller than the provided value. + + This comes handy in the various client scenarios (such as st2web) where we don't display and + render very large results directly since it allows to speed things up and decrease amount of + data retrieved and parsed. (improvement) #5197 + + Contributed by @Kami. + Fixed ~~~~~ From 2e5233dc40f4bd17fdde5c60b1d5ae9fee3030c5 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 20 Mar 2021 19:26:00 +0100 Subject: [PATCH 6/7] Add additional validation checks and tests. --- .../st2api/controllers/v1/actionexecutions.py | 34 +++++++++++++++---- .../unit/controllers/v1/test_executions.py | 20 +++++++++++ 2 files changed, 47 insertions(+), 7 deletions(-) diff --git a/st2api/st2api/controllers/v1/actionexecutions.py b/st2api/st2api/controllers/v1/actionexecutions.py index fcecbed6df..1c2ea1a6e1 100644 --- a/st2api/st2api/controllers/v1/actionexecutions.py +++ b/st2api/st2api/controllers/v1/actionexecutions.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Optional + import copy import re import sys @@ -753,13 +755,9 @@ def get_one( ) } - # Maximum limit for MongoDB collection document is 16 MB and the field itself can't be - # larger than that obviously. And in reality due to the other fields, overhead, etc, - # 14 is the upper limit. - if max_result_size and max_result_size > 14 * 1024 * 1024: - raise ValueError( - "?max_result_size query parameter must be smaller than 14 MB" - ) + max_result_size = self._validate_max_result_size( + max_result_size=max_result_size + ) # Special case for id == "last" if id == "last": @@ -991,6 +989,28 @@ def delete(self, id, requester_user, show_secrets=False): execution_db, mask_secrets=from_model_kwargs["mask_secrets"] ) + def _validate_max_result_size( + self, max_result_size: Optional[int] + ) -> Optional[int]: + """ + Validate value of the ?max_result_size query parameter (if provided). + """ + # Maximum limit for MongoDB collection document is 16 MB and the field itself can't be + # larger than that obviously. And in reality due to the other fields, overhead, etc, + # 14 is the upper limit. + if not max_result_size: + return max_result_size + + if max_result_size <= 0: + raise ValueError("max_result_size must be a positive number") + + if max_result_size > 14 * 1024 * 1024: + raise ValueError( + "max_result_size query parameter must be smaller than 14 MB" + ) + + return max_result_size + def _get_by_id( self, resource_id, diff --git a/st2api/tests/unit/controllers/v1/test_executions.py b/st2api/tests/unit/controllers/v1/test_executions.py index 1cd3e84ba6..114e9ee470 100644 --- a/st2api/tests/unit/controllers/v1/test_executions.py +++ b/st2api/tests/unit/controllers/v1/test_executions.py @@ -420,6 +420,26 @@ def test_get_one_max_result_size_query_parameter(self): self.assertTrue("result" not in get_resp.json) self.assertEqual(self._get_actionexecution_id(get_resp), actionexecution_id) + # 6. max_result_size is not a positive number + get_resp = self._do_get_one( + actionexecution_id + "?max_result_size=-100", expect_errors=True + ) + self.assertEqual(get_resp.status_int, 400) + self.assertEqual( + get_resp.json["faultstring"], "max_result_size must be a positive number" + ) + + # 7. max_result_size is > max possible value + get_resp = self._do_get_one( + actionexecution_id + "?max_result_size=%s" % ((14 * 1024 * 1024) + 1), + expect_errors=True, + ) + self.assertEqual(get_resp.status_int, 400) + self.assertEqual( + get_resp.json["faultstring"], + "max_result_size query parameter must be smaller than 14 MB", + ) + def test_get_all_id_query_param_filtering_success(self): post_resp = self._do_post(LIVE_ACTION_1) actionexecution_id = self._get_actionexecution_id(post_resp) From 0c80a0078a6c445d27841a47024074e3f7a01446 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Tue, 30 Mar 2021 23:22:23 +0200 Subject: [PATCH 7/7] Address review feedback - fix comments, add another test case for result_size == max_result_size, clarify query parameter value represents bytes. --- st2api/tests/unit/controllers/v1/test_executions.py | 13 +++++++++++-- st2common/st2common/openapi.yaml | 2 +- st2common/st2common/openapi.yaml.j2 | 2 +- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/st2api/tests/unit/controllers/v1/test_executions.py b/st2api/tests/unit/controllers/v1/test_executions.py index 114e9ee470..beb6b8a529 100644 --- a/st2api/tests/unit/controllers/v1/test_executions.py +++ b/st2api/tests/unit/controllers/v1/test_executions.py @@ -387,7 +387,7 @@ def test_get_one_max_result_size_query_parameter(self): self.assertEqual(get_resp.json["result"], data["result"]) self.assertEqual(self._get_actionexecution_id(get_resp), actionexecution_id) - # 3. ?max_result_size > actual result size - result field should not be returned + # 3. ?max_result_size < actual result size - result field should not be returned get_resp = self._do_get_one( actionexecution_id + "?max_result_size=%s" % (actual_result_size - 1) ) @@ -396,7 +396,7 @@ def test_get_one_max_result_size_query_parameter(self): self.assertTrue("result" not in get_resp.json) self.assertEqual(self._get_actionexecution_id(get_resp), actionexecution_id) - # 4. ?max_result_size > actual result size and ?include_attributes=result - result field + # 4. ?max_result_size < actual result size and ?include_attributes=result - result field # should not be returned get_resp = self._do_get_one( actionexecution_id @@ -440,6 +440,15 @@ def test_get_one_max_result_size_query_parameter(self): "max_result_size query parameter must be smaller than 14 MB", ) + # 8. ?max_result_size == actual result size - result should be returned + get_resp = self._do_get_one( + actionexecution_id + "?max_result_size=%s" % (actual_result_size) + ) + self.assertEqual(get_resp.status_int, 200) + self.assertEqual(get_resp.json["result_size"], actual_result_size) + self.assertEqual(get_resp.json["result"], data["result"]) + self.assertEqual(self._get_actionexecution_id(get_resp), actionexecution_id) + def test_get_all_id_query_param_filtering_success(self): post_resp = self._do_post(LIVE_ACTION_1) actionexecution_id = self._get_actionexecution_id(post_resp) diff --git a/st2common/st2common/openapi.yaml b/st2common/st2common/openapi.yaml index 23b70aec2b..723c6f5235 100644 --- a/st2common/st2common/openapi.yaml +++ b/st2common/st2common/openapi.yaml @@ -1098,7 +1098,7 @@ paths: - name: max_result_size in: query type: integer - description: True to exclude result field from the response for executions which result field exceeds the provided size. + description: True to exclude result field from the response for executions which result field exceeds the provided size in bytes. x-parameters: - name: user in: context diff --git a/st2common/st2common/openapi.yaml.j2 b/st2common/st2common/openapi.yaml.j2 index 94a8154ec1..b41602587b 100644 --- a/st2common/st2common/openapi.yaml.j2 +++ b/st2common/st2common/openapi.yaml.j2 @@ -1094,7 +1094,7 @@ paths: - name: max_result_size in: query type: integer - description: True to exclude result field from the response for executions which result field exceeds the provided size. + description: True to exclude result field from the response for executions which result field exceeds the provided size in bytes. x-parameters: - name: user in: context