From b9fc5e0aa9824dc4a78021bf69a5df0d939254ea Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Fri, 7 May 2021 21:30:32 +0200 Subject: [PATCH 01/34] Add WIP change for migrating old executions field data which utilizes the old and and very slow field type to the new field type. This is really an optional script since all the new objects will already utilize new field type and most users only care about new / recent executions (old executions are not retrieved that often so that taking a bit longer is not the end of the world). --- .../v3.5/migrate-db-dict-field-values | 135 ++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100755 st2common/bin/migrations/v3.5/migrate-db-dict-field-values diff --git a/st2common/bin/migrations/v3.5/migrate-db-dict-field-values b/st2common/bin/migrations/v3.5/migrate-db-dict-field-values new file mode 100755 index 0000000000..96309b820c --- /dev/null +++ b/st2common/bin/migrations/v3.5/migrate-db-dict-field-values @@ -0,0 +1,135 @@ +#!/usr/bin/env python +# Copyright 2021 The StackStorm Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Migration which which migrates data for existing objects in the database which utilize +EscapedDictField or EscapedDynamicField and have been updated to use new JsonDictField. + +Migration step is idempotent and can be retried on failures. + +Keep in mind that running this migration script is optional and it may take a long time of you have +a lot of very large objects in the database (aka executions) - reading a lot of data from the +database using the old field types is slow and CPU intensive. + +New field type is automatically used for all the new objects when upgrading to v3.5 so migration is +optional because in most cases users are viewing recent / new executions and not old ones which may +still utilize old field typo which is slow to read / write. + +Right now the script utilizes no concurrency and performs migration one object by one. That's done +for simplicity reasons and also to avoid massive CPU usage spikes when running this script with +large concurrency on large objects. + +Keep in mind that only "completed" objects are processes - this means Executions in "final" states +(succeeded, failed, timeout, etc.). + +We determine if execution is utilizing old format if it doesn't contain "result_size" field which +was added along with the new field type. + +Actual migrating simply involves reading + re-saving the whole object to the database - everything +is handled by the mongoengine and new field abastraction. + +TODO: Also add support for migrating (trigger instances and workflow related objects - low +priority and for those objects we don't have a "result_size" attribute so it's not totally trivial +to determine if object utilizes old field type (we could simply use some date threshold and migrate +everything before that or execute raw pymongo query which searches for specifial string in the +field value.) +""" + +import sys +import traceback + +from mongoengine.queryset.visitor import Q + +from st2common import config +from st2common.service_setup import db_setup +from st2common.service_setup import db_teardown +from st2common.models.db.execution import ActionExecutionDB +from st2common.models.db.liveaction import LiveActionDB +from st2common.persistence.execution import ActionExecution +from st2common.persistence.liveaction import LiveAction +from st2common.constants import action as action_constants + + +def migrate_executions() -> None: + """ + Perform migrations for execution related objects (ActionExecutionDB, LiveActionDB). + """ + # 1. Migrate ActionExecutionDB objects + execution_dbs = ActionExecution.query( + Q(result_size__not__exists=True) + & Q(status__in=action_constants.LIVEACTION_COMPLETED_STATES) + ) + + if not execution_dbs: + print("Found no ActionExecutionDB objects to migrate.") + return None + + print("Will migrate %s ActionExecutionDB objects" % (len(execution_dbs))) + + for execution_db in execution_dbs: + # Migrate corresponding LiveAction object + print("Migrating ActionExecutionDB with id %s" % (execution_db.id)) + + # This is a bit of a "hack", but it's the easiest way to tell mongoengine that a specific + # field has been updated and should be saved. If we don't do, nothing will be re-saved on + # .save() call due to mongoengine only trying to save what has changed to make it more + # efficient instead of always re-saving the whole object. + execution_db._mark_as_changed("result") + execution_db._mark_as_changed("result_size") + + # print(getattr(execution_db, "_changed_fields", [])) + execution_db.save() + print("ActionExecutionDB with id %s has been migrated" % (execution_db.id)) + + try: + liveaction_db = LiveAction.get_by_id(execution_db.liveaction["id"]) + except Exception: + # If liveaction for some reason doesn't exist (would likely represent corrupted data) we + # simply ignore that error since it's not fatal. + continue + + liveaction_db._mark_as_changed("result") + + # print(getattr(liveaction_db, "_changed_fields", [])) + liveaction_db.save() + print("Related LiveActionDB with id %s has been migrated" % (liveaction_db.id)) + print("") + + +def migrate_objects() -> None: + print("Migrating affected database objects to utilize new field type") + migrate_executions() + + +def main(): + config.parse_args() + + db_setup() + + try: + migrate_objects() + print("SUCCESS: All database objects migrated successfully.") + exit_code = 0 + except Exception as e: + print("ABORTED: Objects migration aborted on first failure: %s" % (str(e))) + traceback.print_exc() + exit_code = 1 + + db_teardown() + sys.exit(exit_code) + + +if __name__ == "__main__": + main() From 7fe8da367ab1fd71f1d60113b6f4a609de0472b1 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Fri, 7 May 2021 21:48:37 +0200 Subject: [PATCH 02/34] Bump apt cache key. --- .github/workflows/ci.yaml | 4 ++-- .github/workflows/microbenchmarks.yaml | 4 ++-- .github/workflows/orquesta-integration-tests.yaml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 1433fce714..1719d5b476 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -95,9 +95,9 @@ jobs: with: path: | ~/apt_cache - key: ${{ runner.os }}-apt-v5-${{ hashFiles('scripts/github/apt-packages.txt') }} + key: ${{ runner.os }}-apt-v6-${{ hashFiles('scripts/github/apt-packages.txt') }} restore-keys: | - ${{ runner.os }}-apt-v5- + ${{ runner.os }}-apt-v6- - name: Install APT Depedencies env: CACHE_HIT: ${{steps.cache-apt-deps.outputs.cache-hit}} diff --git a/.github/workflows/microbenchmarks.yaml b/.github/workflows/microbenchmarks.yaml index 72d050d38b..af46dace15 100644 --- a/.github/workflows/microbenchmarks.yaml +++ b/.github/workflows/microbenchmarks.yaml @@ -91,9 +91,9 @@ jobs: with: path: | ~/apt_cache - key: ${{ runner.os }}-apt-v5-${{ hashFiles('scripts/github/apt-packages.txt') }} + key: ${{ runner.os }}-apt-v6-${{ hashFiles('scripts/github/apt-packages.txt') }} restore-keys: | - ${{ runner.os }}-apt-v5- + ${{ runner.os }}-apt-v6- - name: Install APT Dependencies env: CACHE_HIT: ${{steps.cache-apt-deps.outputs.cache-hit}} diff --git a/.github/workflows/orquesta-integration-tests.yaml b/.github/workflows/orquesta-integration-tests.yaml index 36782b2199..d7c2471f0c 100644 --- a/.github/workflows/orquesta-integration-tests.yaml +++ b/.github/workflows/orquesta-integration-tests.yaml @@ -144,9 +144,9 @@ jobs: with: path: | ~/apt_cache - key: ${{ runner.os }}-apt-v5-${{ hashFiles('scripts/github/apt-packages.txt') }} + key: ${{ runner.os }}-apt-v6-${{ hashFiles('scripts/github/apt-packages.txt') }} restore-keys: | - ${{ runner.os }}-apt-v5- + ${{ runner.os }}-apt-v6- - name: Install APT Depedencies env: CACHE_HIT: ${{steps.cache-apt-deps.outputs.cache-hit}} From c22fa561ce9021ae5ded034f6291416f2b598d79 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 8 May 2021 10:48:25 +0200 Subject: [PATCH 03/34] Add support for migrating other objects which utilize new data type and use mongodb field $type query to determine if a particular object / field should be migrated. --- .../v3.5/migrate-db-dict-field-values | 155 ++++++++++++++++-- st2common/st2common/constants/triggers.py | 7 + 2 files changed, 144 insertions(+), 18 deletions(-) diff --git a/st2common/bin/migrations/v3.5/migrate-db-dict-field-values b/st2common/bin/migrations/v3.5/migrate-db-dict-field-values index 96309b820c..3bb6fccf6f 100755 --- a/st2common/bin/migrations/v3.5/migrate-db-dict-field-values +++ b/st2common/bin/migrations/v3.5/migrate-db-dict-field-values @@ -17,7 +17,7 @@ Migration which which migrates data for existing objects in the database which utilize EscapedDictField or EscapedDynamicField and have been updated to use new JsonDictField. -Migration step is idempotent and can be retried on failures. +Migration step is idempotent and can be retried on failures / partial runs. Keep in mind that running this migration script is optional and it may take a long time of you have a lot of very large objects in the database (aka executions) - reading a lot of data from the @@ -34,17 +34,8 @@ large concurrency on large objects. Keep in mind that only "completed" objects are processes - this means Executions in "final" states (succeeded, failed, timeout, etc.). -We determine if execution is utilizing old format if it doesn't contain "result_size" field which -was added along with the new field type. - -Actual migrating simply involves reading + re-saving the whole object to the database - everything -is handled by the mongoengine and new field abastraction. - -TODO: Also add support for migrating (trigger instances and workflow related objects - low -priority and for those objects we don't have a "result_size" attribute so it's not totally trivial -to determine if object utilizes old field type (we could simply use some date threshold and migrate -everything before that or execute raw pymongo query which searches for specifial string in the -field value.) +We determine if an object should be migrating using mongodb $type query (for execution objects we +could also determine that based on the presence of result_size field). """ import sys @@ -55,21 +46,37 @@ from mongoengine.queryset.visitor import Q from st2common import config from st2common.service_setup import db_setup from st2common.service_setup import db_teardown -from st2common.models.db.execution import ActionExecutionDB -from st2common.models.db.liveaction import LiveActionDB from st2common.persistence.execution import ActionExecution +from st2common.persistence.workflow import WorkflowExecution +from st2common.persistence.workflow import TaskExecution from st2common.persistence.liveaction import LiveAction -from st2common.constants import action as action_constants +from st2common.persistence.trigger import TriggerInstance +from st2common.constants.action import LIVEACTION_COMPLETED_STATES +from st2common.constants.triggers import TRIGGER_INSTANCE_COMPLETED_STATES def migrate_executions() -> None: """ Perform migrations for execution related objects (ActionExecutionDB, LiveActionDB). """ + print("Migration execution objects") + # 1. Migrate ActionExecutionDB objects execution_dbs = ActionExecution.query( - Q(result_size__not__exists=True) - & Q(status__in=action_constants.LIVEACTION_COMPLETED_STATES) + Q(result_size__not__exists=True) & Q(status__in=LIVEACTION_COMPLETED_STATES) + ) + + execution_dbs = ActionExecution.query( + __raw__={ + "result": { + "not": { + "$type": "binData", + }, + }, + "status": { + "in": LIVEACTION_COMPLETED_STATES, + }, + } ) if not execution_dbs: @@ -79,7 +86,6 @@ def migrate_executions() -> None: print("Will migrate %s ActionExecutionDB objects" % (len(execution_dbs))) for execution_db in execution_dbs: - # Migrate corresponding LiveAction object print("Migrating ActionExecutionDB with id %s" % (execution_db.id)) # This is a bit of a "hack", but it's the easiest way to tell mongoengine that a specific @@ -93,6 +99,7 @@ def migrate_executions() -> None: execution_db.save() print("ActionExecutionDB with id %s has been migrated" % (execution_db.id)) + # Migrate corresponding LiveAction object try: liveaction_db = LiveAction.get_by_id(execution_db.liveaction["id"]) except Exception: @@ -108,9 +115,121 @@ def migrate_executions() -> None: print("") +def migrate_workflow_objects() -> None: + print("Migrating workflow objects") + + # 1. Migrate WorkflowExecutionDB + workflow_execution_dbs = WorkflowExecution.query( + __raw__={ + "output": { + "not": { + "$type": "binData", + }, + }, + "status": { + "in": LIVEACTION_COMPLETED_STATES, + }, + } + ) + + if not workflow_execution_dbs: + print("Found no WorkflowExecutionDB objects to migrate.") + else: + print( + "Will migrate %s WorkflowExecutionDB objects" + % (len(workflow_execution_dbs)) + ) + + for workflow_execution_db in workflow_execution_dbs or []: + print("Migrating ActionExecutionDB with id %s" % (workflow_execution_db.id)) + + # This is a bit of a "hack", but it's the easiest way to tell mongoengine that a specific + # field has been updated and should be saved. If we don't do, nothing will be re-saved on + # .save() call due to mongoengine only trying to save what has changed to make it more + # efficient instead of always re-saving the whole object. + workflow_execution_db._mark_as_changed("input") + workflow_execution_db._mark_as_changed("context") + workflow_execution_db._mark_as_changed("state") + workflow_execution_db._mark_as_changed("output") + + workflow_execution_db.save() + print( + "WorkflowExecutionDB with id %s has been migrated" + % (workflow_execution_db.id) + ) + print("") + + # 2. Migrate TaskExecutionDB + task_execution_dbs = TaskExecution.query( + __raw__={ + "result": { + "not": { + "$type": "binData", + }, + }, + "status": { + "in": LIVEACTION_COMPLETED_STATES, + }, + } + ) + + if not task_execution_dbs: + print("Found no TaskExecutionDB objects to migrate.") + else: + print("Will migrate %s TaskExecutionDB objects" % (len(task_execution_dbs))) + + for task_execution_db in task_execution_dbs or []: + print("Migrating TaskExecutionDB with id %s" % (task_execution_db.id)) + + task_execution_db._mark_as_changed("task_spec") + task_execution_db._mark_as_changed("context") + task_execution_db._mark_as_changed("result") + + task_execution_db.save() + print("TaskExecutionDB with id %s has been migrated" % (task_execution_db.id)) + print("") + + +def migrate_triggers() -> None: + print("Migratting trigger objects") + + trigger_instance_dbs = TriggerInstance.query( + __raw__={ + "payload": { + "not": { + "$type": "binData", + }, + }, + "status": { + "in": TRIGGER_INSTANCE_COMPLETED_STATES, + }, + } + ) + + if not trigger_instance_dbs: + print("Found no TriggerInstanceDB objects to migrate.") + return None + + print("Will migrate %s ActionExecutionDB objects" % (len(trigger_instance_dbs))) + + for trigger_instance_db in trigger_instance_dbs: + print("Migrating TriggerInstanceDB with id %s" % (trigger_instance_db.id)) + + trigger_instance_db._mark_as_changed("payload") + + trigger_instance_db.save() + print( + "TriggerInstanceDB with id %s has been migrated" % (trigger_instance_db.id) + ) + print("") + + def migrate_objects() -> None: print("Migrating affected database objects to utilize new field type") + migrate_executions() + migrate_workflow_objects() + migrate_triggers() def main(): diff --git a/st2common/st2common/constants/triggers.py b/st2common/st2common/constants/triggers.py index 14ab861fd5..4751ec372e 100644 --- a/st2common/st2common/constants/triggers.py +++ b/st2common/st2common/constants/triggers.py @@ -41,6 +41,7 @@ "TRIGGER_INSTANCE_PROCESSING", "TRIGGER_INSTANCE_PROCESSED", "TRIGGER_INSTANCE_PROCESSING_FAILED", + "TRIGGER_INSTANCE_COMPLETED_STATES", ] # Action resource triggers @@ -351,3 +352,9 @@ TRIGGER_INSTANCE_PROCESSED, TRIGGER_INSTANCE_PROCESSING_FAILED, ] + + +TRIGGER_INSTANCE_COMPLETED_STATES = [ + TRIGGER_INSTANCE_PROCESSED, + TRIGGER_INSTANCE_PROCESSING_FAILED, +] From 70261e0613981cee9abf6e571b65fe409470a1e1 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 8 May 2021 21:20:03 +0200 Subject: [PATCH 04/34] Fix query syntax, remove unncessary variable. --- .../v3.5/migrate-db-dict-field-values | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/st2common/bin/migrations/v3.5/migrate-db-dict-field-values b/st2common/bin/migrations/v3.5/migrate-db-dict-field-values index 3bb6fccf6f..0181df742a 100755 --- a/st2common/bin/migrations/v3.5/migrate-db-dict-field-values +++ b/st2common/bin/migrations/v3.5/migrate-db-dict-field-values @@ -62,19 +62,15 @@ def migrate_executions() -> None: print("Migration execution objects") # 1. Migrate ActionExecutionDB objects - execution_dbs = ActionExecution.query( - Q(result_size__not__exists=True) & Q(status__in=LIVEACTION_COMPLETED_STATES) - ) - execution_dbs = ActionExecution.query( __raw__={ "result": { - "not": { + "$not": { "$type": "binData", }, }, "status": { - "in": LIVEACTION_COMPLETED_STATES, + "$in": LIVEACTION_COMPLETED_STATES, }, } ) @@ -122,12 +118,12 @@ def migrate_workflow_objects() -> None: workflow_execution_dbs = WorkflowExecution.query( __raw__={ "output": { - "not": { + "$not": { "$type": "binData", }, }, "status": { - "in": LIVEACTION_COMPLETED_STATES, + "$in": LIVEACTION_COMPLETED_STATES, }, } ) @@ -163,12 +159,12 @@ def migrate_workflow_objects() -> None: task_execution_dbs = TaskExecution.query( __raw__={ "result": { - "not": { + "$not": { "$type": "binData", }, }, "status": { - "in": LIVEACTION_COMPLETED_STATES, + "$in": LIVEACTION_COMPLETED_STATES, }, } ) @@ -196,12 +192,12 @@ def migrate_triggers() -> None: trigger_instance_dbs = TriggerInstance.query( __raw__={ "payload": { - "not": { + "$not": { "$type": "binData", }, }, "status": { - "in": TRIGGER_INSTANCE_COMPLETED_STATES, + "$in": TRIGGER_INSTANCE_COMPLETED_STATES, }, } ) From 3a3693d9b7fb00589ddc7816bb5c835c22b659e8 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 8 May 2021 21:31:07 +0200 Subject: [PATCH 05/34] Print message if we fail to migrate "related" LiveAction (non-fatal error). --- .../bin/migrations/v3.5/migrate-db-dict-field-values | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/st2common/bin/migrations/v3.5/migrate-db-dict-field-values b/st2common/bin/migrations/v3.5/migrate-db-dict-field-values index 0181df742a..58fd1bbaf9 100755 --- a/st2common/bin/migrations/v3.5/migrate-db-dict-field-values +++ b/st2common/bin/migrations/v3.5/migrate-db-dict-field-values @@ -41,8 +41,6 @@ could also determine that based on the presence of result_size field). import sys import traceback -from mongoengine.queryset.visitor import Q - from st2common import config from st2common.service_setup import db_setup from st2common.service_setup import db_teardown @@ -97,8 +95,13 @@ def migrate_executions() -> None: # Migrate corresponding LiveAction object try: - liveaction_db = LiveAction.get_by_id(execution_db.liveaction["id"]) - except Exception: + liveaction_id = execution_db.liveaction.get("id", None) + liveaction_db = LiveAction.get_by_id(liveaction_id) + except Exception as e: + print( + "Failed to migrate LiveActionDB with id %s, ignoring error (%s)" + % (liveaction_id, str(e)) + ) # If liveaction for some reason doesn't exist (would likely represent corrupted data) we # simply ignore that error since it's not fatal. continue From 10b920c9eda42635586823399349dfaaa35d8661 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 8 May 2021 22:06:49 +0200 Subject: [PATCH 06/34] Fix log statements. --- .../bin/migrations/v3.5/migrate-db-dict-field-values | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/st2common/bin/migrations/v3.5/migrate-db-dict-field-values b/st2common/bin/migrations/v3.5/migrate-db-dict-field-values index 58fd1bbaf9..a6637d1201 100755 --- a/st2common/bin/migrations/v3.5/migrate-db-dict-field-values +++ b/st2common/bin/migrations/v3.5/migrate-db-dict-field-values @@ -45,9 +45,9 @@ from st2common import config from st2common.service_setup import db_setup from st2common.service_setup import db_teardown from st2common.persistence.execution import ActionExecution +from st2common.persistence.liveaction import LiveAction from st2common.persistence.workflow import WorkflowExecution from st2common.persistence.workflow import TaskExecution -from st2common.persistence.liveaction import LiveAction from st2common.persistence.trigger import TriggerInstance from st2common.constants.action import LIVEACTION_COMPLETED_STATES from st2common.constants.triggers import TRIGGER_INSTANCE_COMPLETED_STATES @@ -78,6 +78,7 @@ def migrate_executions() -> None: return None print("Will migrate %s ActionExecutionDB objects" % (len(execution_dbs))) + print("") for execution_db in execution_dbs: print("Migrating ActionExecutionDB with id %s" % (execution_db.id)) @@ -138,9 +139,10 @@ def migrate_workflow_objects() -> None: "Will migrate %s WorkflowExecutionDB objects" % (len(workflow_execution_dbs)) ) + print("") for workflow_execution_db in workflow_execution_dbs or []: - print("Migrating ActionExecutionDB with id %s" % (workflow_execution_db.id)) + print("Migrating WorkflowExecutionDB with id %s" % (workflow_execution_db.id)) # This is a bit of a "hack", but it's the easiest way to tell mongoengine that a specific # field has been updated and should be saved. If we don't do, nothing will be re-saved on @@ -176,6 +178,7 @@ def migrate_workflow_objects() -> None: print("Found no TaskExecutionDB objects to migrate.") else: print("Will migrate %s TaskExecutionDB objects" % (len(task_execution_dbs))) + print("") for task_execution_db in task_execution_dbs or []: print("Migrating TaskExecutionDB with id %s" % (task_execution_db.id)) @@ -209,7 +212,7 @@ def migrate_triggers() -> None: print("Found no TriggerInstanceDB objects to migrate.") return None - print("Will migrate %s ActionExecutionDB objects" % (len(trigger_instance_dbs))) + print("Will migrate %s TriggerInstanceDB objects" % (len(trigger_instance_dbs))) for trigger_instance_db in trigger_instance_dbs: print("Migrating TriggerInstanceDB with id %s" % (trigger_instance_db.id)) From f84976315fbf62b532cdba47506cbf27a7108030 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 8 May 2021 22:07:01 +0200 Subject: [PATCH 07/34] Add symlink needed to be able to import module in tests. --- st2common/bin/migrations/v3.5/__init__.py | 0 st2common/bin/migrations/v3.5/migrate_db_dict_field_values.py | 1 + 2 files changed, 1 insertion(+) create mode 100644 st2common/bin/migrations/v3.5/__init__.py create mode 120000 st2common/bin/migrations/v3.5/migrate_db_dict_field_values.py diff --git a/st2common/bin/migrations/v3.5/__init__.py b/st2common/bin/migrations/v3.5/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/st2common/bin/migrations/v3.5/migrate_db_dict_field_values.py b/st2common/bin/migrations/v3.5/migrate_db_dict_field_values.py new file mode 120000 index 0000000000..457ed3b569 --- /dev/null +++ b/st2common/bin/migrations/v3.5/migrate_db_dict_field_values.py @@ -0,0 +1 @@ +migrate-db-dict-field-values \ No newline at end of file From 9c600fd28503f087b53135425cb297e9eac67307 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 8 May 2021 22:09:53 +0200 Subject: [PATCH 08/34] Remove duplicated comment. --- st2common/bin/migrations/v3.5/migrate-db-dict-field-values | 4 ---- 1 file changed, 4 deletions(-) diff --git a/st2common/bin/migrations/v3.5/migrate-db-dict-field-values b/st2common/bin/migrations/v3.5/migrate-db-dict-field-values index a6637d1201..ee9042c667 100755 --- a/st2common/bin/migrations/v3.5/migrate-db-dict-field-values +++ b/st2common/bin/migrations/v3.5/migrate-db-dict-field-values @@ -144,10 +144,6 @@ def migrate_workflow_objects() -> None: for workflow_execution_db in workflow_execution_dbs or []: print("Migrating WorkflowExecutionDB with id %s" % (workflow_execution_db.id)) - # This is a bit of a "hack", but it's the easiest way to tell mongoengine that a specific - # field has been updated and should be saved. If we don't do, nothing will be re-saved on - # .save() call due to mongoengine only trying to save what has changed to make it more - # efficient instead of always re-saving the whole object. workflow_execution_db._mark_as_changed("input") workflow_execution_db._mark_as_changed("context") workflow_execution_db._mark_as_changed("state") From 928add0261b9edb572dabe065db9fe469f315122 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 8 May 2021 22:10:08 +0200 Subject: [PATCH 09/34] Add basic unit tests for migration script. --- .../test_migrate_db_dict_field_values.py | 501 ++++++++++++++++++ 1 file changed, 501 insertions(+) create mode 100644 st2common/tests/unit/migrations/test_migrate_db_dict_field_values.py diff --git a/st2common/tests/unit/migrations/test_migrate_db_dict_field_values.py b/st2common/tests/unit/migrations/test_migrate_db_dict_field_values.py new file mode 100644 index 0000000000..509a6f0f0e --- /dev/null +++ b/st2common/tests/unit/migrations/test_migrate_db_dict_field_values.py @@ -0,0 +1,501 @@ +# Copyright 2021 The StackStorm Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +sys.path.append("/home/vagrant/st2/st2common/bin/migrations/v3.5/") + +from st2common.constants import action as action_constants +from st2common.models.db import stormbase +from st2common.models.db.execution import ActionExecutionDB +from st2common.models.db.liveaction import LiveActionDB +from st2common.models.db.workflow import WorkflowExecutionDB +from st2common.models.db.workflow import TaskExecutionDB +from st2common.models.db.trigger import TriggerInstanceDB +from st2common.persistence.execution import ActionExecution +from st2common.persistence.liveaction import LiveAction +from st2common.persistence.workflow import WorkflowExecution +from st2common.persistence.workflow import TaskExecution +from st2common.persistence.trigger import TriggerInstance +from st2common.constants.triggers import TRIGGER_INSTANCE_PROCESSED +from st2common.constants.triggers import TRIGGER_INSTANCE_PENDING + +from st2tests import DbTestCase + +import migrate_db_dict_field_values as migration_module + + +MOCK_RESULT_1 = { + "foo": "bar1", + "bar": 1, + "baz": None, +} + +MOCK_RESULT_2 = { + "foo": "bar2", + "bar": 2, + "baz": False, +} + +MOCK_PAYLOAD_1 = {"yaaaas": "le payload!"} + + +MOCK_PAYLOAD_2 = {"yaaaas": "le payload! 2"} + +# Needed so we can subclass it +ActionExecutionDB._meta["allow_inheritance"] = True +LiveActionDB._meta["allow_inheritance"] = True + +WorkflowExecutionDB._meta["allow_inheritance"] = True +TaskExecutionDB._meta["allow_inheritance"] = True + +TriggerInstanceDB._meta["allow_inheritance"] = True + + +class ActionExecutionDB_OldFieldType(ActionExecutionDB): + result = stormbase.EscapedDynamicField(default={}) + + +class LiveActionDB_OldFieldType(LiveActionDB): + result = stormbase.EscapedDynamicField(default={}) + + +class WorkflowExecutionDB_OldFieldType(WorkflowExecutionDB): + input = stormbase.EscapedDictField() + context = stormbase.EscapedDictField() + state = stormbase.EscapedDictField() + output = stormbase.EscapedDictField() + + +class TaskExecutionDB_OldFieldType(TaskExecutionDB): + task_spec = stormbase.EscapedDictField() + context = stormbase.EscapedDictField() + result = stormbase.EscapedDictField() + + +class TriggerInstanceDB_OldFieldType(TriggerInstanceDB): + payload = stormbase.EscapedDictField() + + +class DBFieldsMigrationScriptTestCase(DbTestCase): + def test_migrate_executions(self): + execution_dbs = ActionExecution.query( + __raw__={ + "result": { + "not": { + "$type": "binData", + }, + } + } + ) + self.assertEqual(len(execution_dbs), 0) + execution_dbs = ActionExecution.query( + __raw__={ + "result": { + "$type": "object", + }, + } + ) + self.assertEqual(len(execution_dbs), 0) + + # 1. Insert data in old format + liveaction_1_db = LiveActionDB_OldFieldType() + liveaction_1_db.action = "foo.bar" + liveaction_1_db.status = action_constants.LIVEACTION_STATUS_FAILED + liveaction_1_db.result = MOCK_RESULT_1 + liveaction_1_db = LiveAction.add_or_update(liveaction_1_db) + + execution_1_db = ActionExecutionDB_OldFieldType() + execution_1_db.action = {"a": 1} + execution_1_db.runner = {"a": 1} + execution_1_db.liveaction = {"id": liveaction_1_db.id} + execution_1_db.status = action_constants.LIVEACTION_STATUS_FAILED + execution_1_db.result = MOCK_RESULT_1 + execution_1_db = ActionExecution.add_or_update(execution_1_db) + + # This execution is not in a final state yet so it should not be migrated + liveaction_2_db = LiveActionDB_OldFieldType() + liveaction_2_db.action = "foo.bar2" + liveaction_2_db.status = action_constants.LIVEACTION_STATUS_RUNNING + liveaction_2_db.result = MOCK_RESULT_2 + liveaction_2_db = LiveAction.add_or_update(liveaction_2_db) + + execution_2_db = ActionExecutionDB_OldFieldType() + execution_2_db.action = {"a": 2} + execution_2_db.runner = {"a": 2} + execution_2_db.liveaction = {"id": liveaction_2_db.id} + execution_2_db.status = action_constants.LIVEACTION_STATUS_RUNNING + execution_2_db.result = MOCK_RESULT_2 + execution_2_db = ActionExecution.add_or_update(execution_2_db) + + # Verify data has been inserted in old format + execution_dbs = ActionExecution.query( + __raw__={ + "result": { + "$type": "object", + }, + } + ) + self.assertEqual(len(execution_dbs), 2) + execution_dbs = ActionExecution.query( + __raw__={ + "result": { + "$not": { + "$type": "binData", + }, + } + } + ) + self.assertEqual(len(execution_dbs), 2) + execution_dbs = ActionExecution.query( + __raw__={ + "result": { + "$type": "binData", + }, + } + ) + self.assertEqual(len(execution_dbs), 0) + + liveaction_dbs = LiveAction.query( + __raw__={ + "result": { + "$type": "object", + }, + } + ) + self.assertEqual(len(liveaction_dbs), 2) + liveaction_dbs = LiveAction.query( + __raw__={ + "result": { + "$not": { + "$type": "binData", + }, + } + } + ) + self.assertEqual(len(liveaction_dbs), 2) + liveaction_dbs = LiveAction.query( + __raw__={ + "result": { + "$type": "binData", + }, + } + ) + self.assertEqual(len(liveaction_dbs), 0) + + # Update inserted documents and remove special _cls field added by mongoengine. We need to + # do that here due to how mongoengine works with subclasses. + ActionExecution.query( + __raw__={ + "result": { + "$type": "object", + }, + } + ).update(set___cls="ActionExecutionDB") + + LiveAction.query( + __raw__={ + "result": { + "$type": "object", + }, + } + ).update(set___cls="LiveActionDB") + + # 2. Run migration + migration_module.migrate_executions() + + # 3. Verify data has been migrated - only 1 item should have been migrated since it's in a + # completed state + execution_dbs = ActionExecution.query( + __raw__={ + "result": { + "$type": "object", + }, + } + ) + self.assertEqual(len(execution_dbs), 1) + execution_dbs = ActionExecution.query( + __raw__={ + "result": { + "$type": "binData", + }, + } + ) + self.assertEqual(len(execution_dbs), 1) + + execution_db_1_retrieved = ActionExecution.get_by_id(execution_1_db.id) + self.assertEqual(execution_db_1_retrieved.result, MOCK_RESULT_1) + + execution_db_2_retrieved = ActionExecution.get_by_id(execution_2_db.id) + self.assertEqual(execution_db_2_retrieved.result, MOCK_RESULT_2) + + liveaction_db_1_retrieved = LiveAction.get_by_id(liveaction_1_db.id) + self.assertEqual(liveaction_db_1_retrieved.result, MOCK_RESULT_1) + + liveaction_db_2_retrieved = LiveAction.get_by_id(liveaction_2_db.id) + self.assertEqual(liveaction_db_2_retrieved.result, MOCK_RESULT_2) + + def test_migrate_workflows(self): + workflow_execution_dbs = WorkflowExecution.query( + __raw__={ + "output": { + "not": { + "$type": "binData", + }, + } + } + ) + self.assertEqual(len(workflow_execution_dbs), 0) + workflow_execution_dbs = WorkflowExecution.query( + __raw__={ + "output": { + "$type": "object", + }, + } + ) + self.assertEqual(len(workflow_execution_dbs), 0) + + task_execution_dbs = TaskExecution.query( + __raw__={ + "result": { + "not": { + "$type": "binData", + }, + } + } + ) + self.assertEqual(len(task_execution_dbs), 0) + task_execution_dbs = TaskExecution.query( + __raw__={ + "result": { + "$type": "object", + }, + } + ) + self.assertEqual(len(task_execution_dbs), 0) + + # 1. Insert data in old format + workflow_execution_1_db = WorkflowExecutionDB_OldFieldType() + workflow_execution_1_db.input = MOCK_RESULT_1 + workflow_execution_1_db.context = MOCK_RESULT_1 + workflow_execution_1_db.state = MOCK_RESULT_1 + workflow_execution_1_db.output = MOCK_RESULT_1 + workflow_execution_1_db.status = action_constants.LIVEACTION_STATUS_SUCCEEDED + workflow_execution_1_db.action_execution = "a" + workflow_execution_1_db = WorkflowExecution.add_or_update( + workflow_execution_1_db + ) + + task_execution_1_db = TaskExecutionDB_OldFieldType() + task_execution_1_db.task_spec = MOCK_RESULT_1 + task_execution_1_db.context = MOCK_RESULT_1 + task_execution_1_db.result = MOCK_RESULT_1 + task_execution_1_db.status = action_constants.LIVEACTION_STATUS_SUCCEEDED + task_execution_1_db.workflow_execution = "a" + task_execution_1_db.task_name = "a" + task_execution_1_db.task_id = "a" + task_execution_1_db.task_route = 1 + task_execution_1_db = TaskExecution.add_or_update(task_execution_1_db) + + workflow_execution_2_db = WorkflowExecutionDB_OldFieldType() + workflow_execution_2_db.input = MOCK_RESULT_2 + workflow_execution_2_db.context = MOCK_RESULT_2 + workflow_execution_2_db.state = MOCK_RESULT_2 + workflow_execution_2_db.output = MOCK_RESULT_2 + workflow_execution_2_db.status = action_constants.LIVEACTION_STATUS_RUNNING + workflow_execution_2_db.action_execution = "b" + workflow_execution_2_db = WorkflowExecution.add_or_update( + workflow_execution_2_db + ) + + task_execution_2_db = TaskExecutionDB_OldFieldType() + task_execution_2_db.task_spec = MOCK_RESULT_2 + task_execution_2_db.context = MOCK_RESULT_2 + task_execution_2_db.result = MOCK_RESULT_2 + task_execution_2_db.status = action_constants.LIVEACTION_STATUS_RUNNING + task_execution_2_db.workflow_execution = "b" + task_execution_2_db.task_name = "b" + task_execution_2_db.task_id = "b" + task_execution_2_db.task_route = 2 + task_execution_2_db = TaskExecution.add_or_update(task_execution_2_db) + + # Update inserted documents and remove special _cls field added by mongoengine. We need to + # do that here due to how mongoengine works with subclasses. + WorkflowExecution.query( + __raw__={ + "input": { + "$type": "object", + }, + } + ).update(set___cls="WorkflowExecutionDB") + + TaskExecution.query( + __raw__={ + "result": { + "$type": "object", + }, + } + ).update(set___cls="TaskExecutionDB") + + # 2. Run migration + migration_module.migrate_workflow_objects() + + # 3. Verify data has been migrated - only 1 item should have been migrated since it's in a + # completed state + workflow_execution_dbs = WorkflowExecution.query( + __raw__={ + "output": { + "$type": "binData", + }, + } + ) + self.assertEqual(len(workflow_execution_dbs), 1) + workflow_execution_dbs = WorkflowExecution.query( + __raw__={ + "output": { + "$type": "object", + }, + } + ) + self.assertEqual(len(workflow_execution_dbs), 1) + + task_execution_dbs = TaskExecution.query( + __raw__={ + "result": { + "$type": "binData", + }, + } + ) + self.assertEqual(len(task_execution_dbs), 1) + task_execution_dbs = TaskExecution.query( + __raw__={ + "result": { + "$type": "object", + }, + } + ) + self.assertEqual(len(task_execution_dbs), 1) + + workflow_execution_1_db_retrieved = WorkflowExecution.get_by_id( + workflow_execution_1_db.id + ) + self.assertEqual(workflow_execution_1_db_retrieved.input, MOCK_RESULT_1) + self.assertEqual(workflow_execution_1_db_retrieved.context, MOCK_RESULT_1) + self.assertEqual(workflow_execution_1_db_retrieved.state, MOCK_RESULT_1) + self.assertEqual(workflow_execution_1_db_retrieved.output, MOCK_RESULT_1) + + workflow_execution_2_db_retrieved = WorkflowExecution.get_by_id( + workflow_execution_2_db.id + ) + self.assertEqual(workflow_execution_2_db_retrieved.input, MOCK_RESULT_2) + self.assertEqual(workflow_execution_2_db_retrieved.context, MOCK_RESULT_2) + self.assertEqual(workflow_execution_2_db_retrieved.state, MOCK_RESULT_2) + self.assertEqual(workflow_execution_2_db_retrieved.output, MOCK_RESULT_2) + + def test_migrate_triggers(self): + trigger_instance_dbs = TriggerInstance.query( + __raw__={ + "payload": { + "$not": { + "$type": "binData", + }, + } + } + ) + self.assertEqual(len(trigger_instance_dbs), 0) + trigger_instance_dbs = TriggerInstance.query( + __raw__={ + "payload": { + "$type": "object", + }, + } + ) + self.assertEqual(len(trigger_instance_dbs), 0) + + # 1. Insert data in old format + trigger_instance_1_db = TriggerInstanceDB_OldFieldType() + trigger_instance_1_db.payload = MOCK_PAYLOAD_1 + trigger_instance_1_db.status = TRIGGER_INSTANCE_PROCESSED + + trigger_instance_1_db = TriggerInstance.add_or_update(trigger_instance_1_db) + + trigger_instance_2_db = TriggerInstanceDB_OldFieldType() + trigger_instance_2_db.payload = MOCK_PAYLOAD_2 + trigger_instance_2_db.status = TRIGGER_INSTANCE_PENDING + + trigger_instance_2_db = TriggerInstance.add_or_update(trigger_instance_2_db) + + # Verify data has been inserted in old format + trigger_instance_dbs = TriggerInstance.query( + __raw__={ + "payload": { + "$not": { + "$type": "binData", + }, + } + } + ) + self.assertEqual(len(trigger_instance_dbs), 2) + trigger_instance_dbs = TriggerInstance.query( + __raw__={ + "payload": { + "$type": "object", + }, + } + ) + self.assertEqual(len(trigger_instance_dbs), 2) + + # Update inserted documents and remove special _cls field added by mongoengine. We need to + # do that here due to how mongoengine works with subclasses. + TriggerInstance.query( + __raw__={ + "payload": { + "$type": "object", + }, + } + ).update(set___cls="TriggerInstanceDB") + + # 2. Run migration + migration_module.migrate_triggers() + + # 3. Verify data has been migrated - only 1 item should have been migrated since it's in a + # completed state + trigger_instance_dbs = TriggerInstance.query( + __raw__={ + "payload": { + "$not": { + "$type": "binData", + }, + } + } + ) + self.assertEqual(len(trigger_instance_dbs), 1) + trigger_instance_dbs = TriggerInstance.query( + __raw__={ + "payload": { + "$type": "object", + }, + } + ) + self.assertEqual(len(trigger_instance_dbs), 1) + + trigger_instance_1_db_retrieved = TriggerInstance.get_by_id( + trigger_instance_1_db.id + ) + self.assertEqual(trigger_instance_1_db_retrieved.payload, MOCK_PAYLOAD_1) + + trigger_instance_2_db_retrieved = TriggerInstance.get_by_id( + trigger_instance_2_db.id + ) + self.assertEqual(trigger_instance_2_db_retrieved.payload, MOCK_PAYLOAD_2) From a917e4d31483f6df95642a99503534eb6e49ccaa Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 8 May 2021 22:12:29 +0200 Subject: [PATCH 10/34] Fix operator syntax. --- .../unit/migrations/test_migrate_db_dict_field_values.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/st2common/tests/unit/migrations/test_migrate_db_dict_field_values.py b/st2common/tests/unit/migrations/test_migrate_db_dict_field_values.py index 509a6f0f0e..f3bb1a9d36 100644 --- a/st2common/tests/unit/migrations/test_migrate_db_dict_field_values.py +++ b/st2common/tests/unit/migrations/test_migrate_db_dict_field_values.py @@ -93,7 +93,7 @@ def test_migrate_executions(self): execution_dbs = ActionExecution.query( __raw__={ "result": { - "not": { + "$not": { "$type": "binData", }, } @@ -250,7 +250,7 @@ def test_migrate_workflows(self): workflow_execution_dbs = WorkflowExecution.query( __raw__={ "output": { - "not": { + "$not": { "$type": "binData", }, } @@ -269,7 +269,7 @@ def test_migrate_workflows(self): task_execution_dbs = TaskExecution.query( __raw__={ "result": { - "not": { + "$not": { "$type": "binData", }, } From 3f0c18470b518b352a1a69f7251a342fa8381416 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 8 May 2021 22:16:25 +0200 Subject: [PATCH 11/34] Update changelog. --- CHANGELOG.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 4232e28ffc..6b87edfce8 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -167,7 +167,11 @@ Changed triggers with larger payloads. This should address a long standing issue where StackStorm was reported to be slow and CPU - inefficient with handling large executions. (improvement) #4846 + inefficient with handling large executions. + + If you want to migrate existing database objects to utilize the new type, you can use + ``st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values`` migration + script. (improvement) #4846 Contributed by @Kami. From f2bd2248e1ce3ee351987cac6d225351052ba450 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 8 May 2021 22:18:26 +0200 Subject: [PATCH 12/34] Update script name. --- st2common/bin/migrations/v3.5/migrate_db_dict_field_values.py | 1 - ...te-db-dict-field-values => st2-migrate-db-dict-field-values} | 0 .../bin/migrations/v3.5/st2_migrate_db_dict_field_values.py | 1 + .../tests/unit/migrations/test_migrate_db_dict_field_values.py | 2 +- 4 files changed, 2 insertions(+), 2 deletions(-) delete mode 120000 st2common/bin/migrations/v3.5/migrate_db_dict_field_values.py rename st2common/bin/migrations/v3.5/{migrate-db-dict-field-values => st2-migrate-db-dict-field-values} (100%) create mode 120000 st2common/bin/migrations/v3.5/st2_migrate_db_dict_field_values.py diff --git a/st2common/bin/migrations/v3.5/migrate_db_dict_field_values.py b/st2common/bin/migrations/v3.5/migrate_db_dict_field_values.py deleted file mode 120000 index 457ed3b569..0000000000 --- a/st2common/bin/migrations/v3.5/migrate_db_dict_field_values.py +++ /dev/null @@ -1 +0,0 @@ -migrate-db-dict-field-values \ No newline at end of file diff --git a/st2common/bin/migrations/v3.5/migrate-db-dict-field-values b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values similarity index 100% rename from st2common/bin/migrations/v3.5/migrate-db-dict-field-values rename to st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values diff --git a/st2common/bin/migrations/v3.5/st2_migrate_db_dict_field_values.py b/st2common/bin/migrations/v3.5/st2_migrate_db_dict_field_values.py new file mode 120000 index 0000000000..495261f46c --- /dev/null +++ b/st2common/bin/migrations/v3.5/st2_migrate_db_dict_field_values.py @@ -0,0 +1 @@ +st2-migrate-db-dict-field-values \ No newline at end of file diff --git a/st2common/tests/unit/migrations/test_migrate_db_dict_field_values.py b/st2common/tests/unit/migrations/test_migrate_db_dict_field_values.py index f3bb1a9d36..1604891732 100644 --- a/st2common/tests/unit/migrations/test_migrate_db_dict_field_values.py +++ b/st2common/tests/unit/migrations/test_migrate_db_dict_field_values.py @@ -33,7 +33,7 @@ from st2tests import DbTestCase -import migrate_db_dict_field_values as migration_module +import st2_migrate_db_dict_field_values as migration_module MOCK_RESULT_1 = { From 8c79a143d09c7e06c4527788a3f4f5cba3e4ded6 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 8 May 2021 22:21:19 +0200 Subject: [PATCH 13/34] Use correct path, use better test module name. --- ...d_values.py => test_v35_migrate_db_dict_field_values.py} | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) rename st2common/tests/unit/migrations/{test_migrate_db_dict_field_values.py => test_v35_migrate_db_dict_field_values.py} (99%) diff --git a/st2common/tests/unit/migrations/test_migrate_db_dict_field_values.py b/st2common/tests/unit/migrations/test_v35_migrate_db_dict_field_values.py similarity index 99% rename from st2common/tests/unit/migrations/test_migrate_db_dict_field_values.py rename to st2common/tests/unit/migrations/test_v35_migrate_db_dict_field_values.py index 1604891732..d5dd14b4dc 100644 --- a/st2common/tests/unit/migrations/test_migrate_db_dict_field_values.py +++ b/st2common/tests/unit/migrations/test_v35_migrate_db_dict_field_values.py @@ -12,10 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os import sys -sys.path.append("/home/vagrant/st2/st2common/bin/migrations/v3.5/") - from st2common.constants import action as action_constants from st2common.models.db import stormbase from st2common.models.db.execution import ActionExecutionDB @@ -33,6 +32,9 @@ from st2tests import DbTestCase +BASE_DIR = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(os.path.abspath(os.path.join(BASE_DIR, "../../../bin/migrations/v3.5/"))) + import st2_migrate_db_dict_field_values as migration_module From e48aca45a6799a2add72983ed7e443e815aa5b61 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 8 May 2021 22:51:59 +0200 Subject: [PATCH 14/34] Re-organize the class definitions + allow_inheritance override to avoid cross test pollution and related failures. --- .../test_v35_migrate_db_dict_field_values.py | 72 ++++++++++--------- 1 file changed, 39 insertions(+), 33 deletions(-) diff --git a/st2common/tests/unit/migrations/test_v35_migrate_db_dict_field_values.py b/st2common/tests/unit/migrations/test_v35_migrate_db_dict_field_values.py index d5dd14b4dc..c12ecfd703 100644 --- a/st2common/tests/unit/migrations/test_v35_migrate_db_dict_field_values.py +++ b/st2common/tests/unit/migrations/test_v35_migrate_db_dict_field_values.py @@ -33,7 +33,9 @@ from st2tests import DbTestCase BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(os.path.abspath(os.path.join(BASE_DIR, "../../../bin/migrations/v3.5/"))) +sys.path.append( + os.path.abspath(os.path.join(BASE_DIR, "../../../bin/migrations/v3.5/")) +) import st2_migrate_db_dict_field_values as migration_module @@ -52,46 +54,31 @@ MOCK_PAYLOAD_1 = {"yaaaas": "le payload!"} - MOCK_PAYLOAD_2 = {"yaaaas": "le payload! 2"} -# Needed so we can subclass it -ActionExecutionDB._meta["allow_inheritance"] = True -LiveActionDB._meta["allow_inheritance"] = True - -WorkflowExecutionDB._meta["allow_inheritance"] = True -TaskExecutionDB._meta["allow_inheritance"] = True - -TriggerInstanceDB._meta["allow_inheritance"] = True - - -class ActionExecutionDB_OldFieldType(ActionExecutionDB): - result = stormbase.EscapedDynamicField(default={}) - - -class LiveActionDB_OldFieldType(LiveActionDB): - result = stormbase.EscapedDynamicField(default={}) - - -class WorkflowExecutionDB_OldFieldType(WorkflowExecutionDB): - input = stormbase.EscapedDictField() - context = stormbase.EscapedDictField() - state = stormbase.EscapedDictField() - output = stormbase.EscapedDictField() +# NOTE: We define those classes and set allow_inheritance inside the methods so importing this +# module doesn't have side affect and break / affect other tests -class TaskExecutionDB_OldFieldType(TaskExecutionDB): - task_spec = stormbase.EscapedDictField() - context = stormbase.EscapedDictField() - result = stormbase.EscapedDictField() +class DBFieldsMigrationScriptTestCase(DbTestCase): + @classmethod + def tearDownClass(cls): + ActionExecutionDB._meta["allow_inheritance"] = False + LiveActionDB._meta["allow_inheritance"] = False + WorkflowExecutionDB._meta["allow_inheritance"] = False + TaskExecutionDB._meta["allow_inheritance"] = False + TriggerInstanceDB._meta["allow_inheritance"] = False + def test_migrate_executions(self): + ActionExecutionDB._meta["allow_inheritance"] = True + LiveActionDB._meta["allow_inheritance"] = True -class TriggerInstanceDB_OldFieldType(TriggerInstanceDB): - payload = stormbase.EscapedDictField() + class ActionExecutionDB_OldFieldType(ActionExecutionDB): + result = stormbase.EscapedDynamicField(default={}) + class LiveActionDB_OldFieldType(LiveActionDB): + result = stormbase.EscapedDynamicField(default={}) -class DBFieldsMigrationScriptTestCase(DbTestCase): - def test_migrate_executions(self): execution_dbs = ActionExecution.query( __raw__={ "result": { @@ -249,6 +236,20 @@ def test_migrate_executions(self): self.assertEqual(liveaction_db_2_retrieved.result, MOCK_RESULT_2) def test_migrate_workflows(self): + WorkflowExecutionDB._meta["allow_inheritance"] = True + TaskExecutionDB._meta["allow_inheritance"] = True + + class WorkflowExecutionDB_OldFieldType(WorkflowExecutionDB): + input = stormbase.EscapedDictField() + context = stormbase.EscapedDictField() + state = stormbase.EscapedDictField() + output = stormbase.EscapedDictField() + + class TaskExecutionDB_OldFieldType(TaskExecutionDB): + task_spec = stormbase.EscapedDictField() + context = stormbase.EscapedDictField() + result = stormbase.EscapedDictField() + workflow_execution_dbs = WorkflowExecution.query( __raw__={ "output": { @@ -406,6 +407,11 @@ def test_migrate_workflows(self): self.assertEqual(workflow_execution_2_db_retrieved.output, MOCK_RESULT_2) def test_migrate_triggers(self): + TriggerInstanceDB._meta["allow_inheritance"] = True + + class TriggerInstanceDB_OldFieldType(TriggerInstanceDB): + payload = stormbase.EscapedDictField() + trigger_instance_dbs = TriggerInstance.query( __raw__={ "payload": { From 05e3c047caa3aa9dc0bef92fe7e8af807bccea7d Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sun, 9 May 2021 19:54:49 +0200 Subject: [PATCH 15/34] Pin python version to a full version including patch version. Previously setup-virtualenv step would fail in case Python version got upgraded since we used cache per partial Python version and not a complete one. I believe this change should fix that issue. --- .github/workflows/ci.yaml | 32 ++++++++++--------- .github/workflows/microbenchmarks.yaml | 6 ++-- .../workflows/orquesta-integration-tests.yaml | 6 ++-- 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 1719d5b476..4586c7aff7 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -48,19 +48,21 @@ jobs: matrix: # NOTE: To speed the CI run, we split unit and integration tests into multiple jobs where # each job runs subset of tests. + # NOTE: We need to use full Python version as part of Python deps cache key otherwise + # setup virtualenv step will fail. include: - name: 'Lint Checks (black, flake8, etc.)' task: 'ci-checks' - python-version: '3.6' + python-version: '3.6.13' - name: 'Compile (pip deps, pylint, etc.)' task: 'ci-compile' - python-version: '3.6' + python-version: '3.6.13' - name: 'Lint Checks (black, flake8, etc.)' task: 'ci-checks' - python-version: '3.8' + python-version: '3.8.10' - name: 'Compile (pip deps, pylint, etc.)' task: 'ci-compile' - python-version: '3.8' + python-version: '3.8.10' env: TASK: '${{ matrix.task }}' @@ -142,26 +144,26 @@ jobs: task: 'ci-unit' nosetests_node_total: 2 nosetests_node_index: 0 - python-version: '3.6' + python-version: '3.6.13' - name: 'Unit Tests (chunk 2)' task: 'ci-unit' nosetests_node_total: 2 nosetests_node_index: 1 - python-version: '3.6' + python-version: '3.6.13' - name: 'Unit Tests (chunk 1)' task: 'ci-unit' nosetests_node_total: 2 nosetests_node_index: 0 - python-version: '3.8' + python-version: '3.8.10' - name: 'Unit Tests (chunk 2)' task: 'ci-unit' nosetests_node_total: 2 nosetests_node_index: 1 - python-version: '3.8' + python-version: '3.8.10' # This job is slow so we only run in on a daily basis # - name: 'Micro Benchmarks' # task: 'micro-benchmarks' - # python-version: '3.6' + # python-version: '3.6.13' # nosetests_node_total: 1 # nosetests_node_ index: 0 services: @@ -309,32 +311,32 @@ jobs: task: 'ci-packs-tests' nosetests_node_total: 1 nosetests_node_index: 0 - python-version: '3.6' + python-version: '3.6.13' - name: 'Integration Tests (chunk 1)' task: 'ci-integration' nosetests_node_total: 2 nosetests_node_index: 0 - python-version: '3.6' + python-version: '3.6.13' - name: 'Integration Tests (chunk 2)' task: 'ci-integration' nosetests_node_total: 2 nosetests_node_index: 1 - python-version: '3.6' + python-version: '3.6.13' - name: 'Pack Tests' task: 'ci-packs-tests' nosetests_node_total: 1 nosetests_node_index: 0 - python-version: '3.8' + python-version: '3.8.10' - name: 'Integration Tests (chunk 1)' task: 'ci-integration' nosetests_node_total: 2 nosetests_node_index: 0 - python-version: '3.8' + python-version: '3.8.10' - name: 'Integration Tests (chunk 2)' task: 'ci-integration' nosetests_node_total: 2 nosetests_node_index: 1 - python-version: '3.8' + python-version: '3.8.10' services: mongo: image: mongo:4.4 diff --git a/.github/workflows/microbenchmarks.yaml b/.github/workflows/microbenchmarks.yaml index af46dace15..6d9e6abbf9 100644 --- a/.github/workflows/microbenchmarks.yaml +++ b/.github/workflows/microbenchmarks.yaml @@ -31,17 +31,19 @@ jobs: strategy: fail-fast: false matrix: + # NOTE: We need to use full Python version as part of Python deps cache key otherwise + # setup virtualenv step will fail. include: - name: 'Microbenchmarks' task: 'micro-benchmarks' nosetests_node_total: 1 nosetests_node_index: 0 - python-version: '3.6' + python-version: '3.6.13' - name: 'Microbenchmarks' task: 'micro-benchmarks' nosetests_node_total: 1 nosetests_node_index: 0 - python-version: '3.8' + python-version: '3.8.10' services: mongo: image: mongo:4.4 diff --git a/.github/workflows/orquesta-integration-tests.yaml b/.github/workflows/orquesta-integration-tests.yaml index d7c2471f0c..99ead63bfd 100644 --- a/.github/workflows/orquesta-integration-tests.yaml +++ b/.github/workflows/orquesta-integration-tests.yaml @@ -49,17 +49,19 @@ jobs: strategy: fail-fast: false matrix: + # NOTE: We need to use full Python version as part of Python deps cache key otherwise + # setup virtualenv step will fail. include: - name: 'Integration Tests (Orquesta)' task: 'ci-orquesta' nosetests_node_total: 1 nosetests_node_index: 0 - python-version: '3.6' + python-version: '3.6.13' - name: 'Integration Tests (Orquesta)' task: 'ci-orquesta' nosetests_node_total: 1 nosetests_node_index: 0 - python-version: '3.8' + python-version: '3.8.10' services: mongo: image: mongo:4.4 From 9fc2af96331b03662aa9e828447c3a8f84b201bd Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sun, 9 May 2021 19:59:24 +0200 Subject: [PATCH 16/34] Bump apt cache version. --- .github/workflows/ci.yaml | 4 ++-- .github/workflows/microbenchmarks.yaml | 4 ++-- .github/workflows/orquesta-integration-tests.yaml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 4586c7aff7..ab64a3d663 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -97,9 +97,9 @@ jobs: with: path: | ~/apt_cache - key: ${{ runner.os }}-apt-v6-${{ hashFiles('scripts/github/apt-packages.txt') }} + key: ${{ runner.os }}-apt-v7-${{ hashFiles('scripts/github/apt-packages.txt') }} restore-keys: | - ${{ runner.os }}-apt-v6- + ${{ runner.os }}-apt-v7- - name: Install APT Depedencies env: CACHE_HIT: ${{steps.cache-apt-deps.outputs.cache-hit}} diff --git a/.github/workflows/microbenchmarks.yaml b/.github/workflows/microbenchmarks.yaml index 6d9e6abbf9..1c11527d6a 100644 --- a/.github/workflows/microbenchmarks.yaml +++ b/.github/workflows/microbenchmarks.yaml @@ -93,9 +93,9 @@ jobs: with: path: | ~/apt_cache - key: ${{ runner.os }}-apt-v6-${{ hashFiles('scripts/github/apt-packages.txt') }} + key: ${{ runner.os }}-apt-v7-${{ hashFiles('scripts/github/apt-packages.txt') }} restore-keys: | - ${{ runner.os }}-apt-v6- + ${{ runner.os }}-apt-v7- - name: Install APT Dependencies env: CACHE_HIT: ${{steps.cache-apt-deps.outputs.cache-hit}} diff --git a/.github/workflows/orquesta-integration-tests.yaml b/.github/workflows/orquesta-integration-tests.yaml index 99ead63bfd..a6841b7633 100644 --- a/.github/workflows/orquesta-integration-tests.yaml +++ b/.github/workflows/orquesta-integration-tests.yaml @@ -146,9 +146,9 @@ jobs: with: path: | ~/apt_cache - key: ${{ runner.os }}-apt-v6-${{ hashFiles('scripts/github/apt-packages.txt') }} + key: ${{ runner.os }}-apt-v7-${{ hashFiles('scripts/github/apt-packages.txt') }} restore-keys: | - ${{ runner.os }}-apt-v6- + ${{ runner.os }}-apt-v7- - name: Install APT Depedencies env: CACHE_HIT: ${{steps.cache-apt-deps.outputs.cache-hit}} From 86afbb116c4c899ce55fce3427ad79e0cc73665c Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sun, 9 May 2021 20:01:16 +0200 Subject: [PATCH 17/34] Bump eventlet version due to security vulnerability (which doesn't directly affect us since we don't utilize websockets functionality). --- fixed-requirements.txt | 2 +- requirements.txt | 2 +- st2actions/requirements.txt | 2 +- st2api/requirements.txt | 2 +- st2auth/requirements.txt | 2 +- st2common/requirements.txt | 2 +- st2exporter/requirements.txt | 2 +- st2reactor/requirements.txt | 2 +- st2stream/requirements.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fixed-requirements.txt b/fixed-requirements.txt index 7793ccab22..e544271763 100644 --- a/fixed-requirements.txt +++ b/fixed-requirements.txt @@ -9,7 +9,7 @@ dnspython>=1.16.0,<2.0.0 cryptography==3.4.7 # Note: 0.20.0 removed select.poll() on which some of our code and libraries we # depend on rely -eventlet==0.30.2 +eventlet==0.31.0 flex==6.14.1 gitpython==3.1.15 # Needed by gitpython, old versions used to bundle it diff --git a/requirements.txt b/requirements.txt index ae3e7a01f2..3005e70001 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,7 +13,7 @@ bcrypt==3.2.0 chardet<3.1.0 cryptography==3.4.7 dnspython>=1.16.0,<2.0.0 -eventlet==0.30.2 +eventlet==0.31.0 flex==6.14.1 git+https://github.com/StackStorm/logshipper.git@stackstorm_patched#egg=logshipper git+https://github.com/StackStorm/orquesta.git@v1.3.0#egg=orquesta diff --git a/st2actions/requirements.txt b/st2actions/requirements.txt index c2cbd0b00f..747762adb2 100644 --- a/st2actions/requirements.txt +++ b/st2actions/requirements.txt @@ -7,7 +7,7 @@ # update the component requirements.txt apscheduler==3.7.0 chardet<3.1.0 -eventlet==0.30.2 +eventlet==0.31.0 git+https://github.com/StackStorm/logshipper.git@stackstorm_patched#egg=logshipper gitpython==3.1.15 jinja2==2.11.3 diff --git a/st2api/requirements.txt b/st2api/requirements.txt index c9f385ab4d..332e3af57f 100644 --- a/st2api/requirements.txt +++ b/st2api/requirements.txt @@ -5,7 +5,7 @@ # If you want to update depdencies for a single component, modify the # in-requirements.txt for that component and then run 'make requirements' to # update the component requirements.txt -eventlet==0.30.2 +eventlet==0.31.0 gunicorn==20.1.0 jsonschema==2.6.0 kombu==5.0.2 diff --git a/st2auth/requirements.txt b/st2auth/requirements.txt index 6b6016bcb6..4dd511c3d2 100644 --- a/st2auth/requirements.txt +++ b/st2auth/requirements.txt @@ -6,7 +6,7 @@ # in-requirements.txt for that component and then run 'make requirements' to # update the component requirements.txt bcrypt==3.2.0 -eventlet==0.30.2 +eventlet==0.31.0 git+https://github.com/StackStorm/st2-auth-backend-flat-file.git@master#egg=st2-auth-backend-flat-file git+https://github.com/StackStorm/st2-auth-ldap.git@master#egg=st2-auth-ldap gunicorn==20.1.0 diff --git a/st2common/requirements.txt b/st2common/requirements.txt index d01bd35036..5ea7521ca4 100644 --- a/st2common/requirements.txt +++ b/st2common/requirements.txt @@ -10,7 +10,7 @@ apscheduler==3.7.0 chardet<3.1.0 cryptography==3.4.7 dnspython>=1.16.0,<2.0.0 -eventlet==0.30.2 +eventlet==0.31.0 flex==6.14.1 git+https://github.com/StackStorm/orquesta.git@v1.3.0#egg=orquesta git+https://github.com/StackStorm/st2-rbac-backend.git@master#egg=st2-rbac-backend diff --git a/st2exporter/requirements.txt b/st2exporter/requirements.txt index 9e00c8a9b0..eaa526e7a9 100644 --- a/st2exporter/requirements.txt +++ b/st2exporter/requirements.txt @@ -5,7 +5,7 @@ # If you want to update depdencies for a single component, modify the # in-requirements.txt for that component and then run 'make requirements' to # update the component requirements.txt -eventlet==0.30.2 +eventlet==0.31.0 kombu==5.0.2 oslo.config>=1.12.1,<1.13 six==1.13.0 diff --git a/st2reactor/requirements.txt b/st2reactor/requirements.txt index 3388a8214b..ad6f27cc5a 100644 --- a/st2reactor/requirements.txt +++ b/st2reactor/requirements.txt @@ -6,7 +6,7 @@ # in-requirements.txt for that component and then run 'make requirements' to # update the component requirements.txt apscheduler==3.7.0 -eventlet==0.30.2 +eventlet==0.31.0 jsonpath-rw==1.4.0 jsonschema==2.6.0 kombu==5.0.2 diff --git a/st2stream/requirements.txt b/st2stream/requirements.txt index a7bb19887a..7064a47412 100644 --- a/st2stream/requirements.txt +++ b/st2stream/requirements.txt @@ -5,7 +5,7 @@ # If you want to update depdencies for a single component, modify the # in-requirements.txt for that component and then run 'make requirements' to # update the component requirements.txt -eventlet==0.30.2 +eventlet==0.31.0 gunicorn==20.1.0 jsonschema==2.6.0 kombu==5.0.2 From e8361983b117849e1f5f74225662e8744de450cb Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sun, 9 May 2021 20:15:58 +0200 Subject: [PATCH 18/34] Temporary disable apt cache since it seems to be failing the build. --- scripts/github/install-apt-packages-use-cache.sh | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/scripts/github/install-apt-packages-use-cache.sh b/scripts/github/install-apt-packages-use-cache.sh index 66c7e5cba5..38a77fc84e 100755 --- a/scripts/github/install-apt-packages-use-cache.sh +++ b/scripts/github/install-apt-packages-use-cache.sh @@ -17,6 +17,14 @@ echo "" echo "CACHE_HIT=${CACHE_HIT}" echo "" +# TODO: Recently using cached dependency started failing so I (@Kami) temporary disabled cache. +# We should investigate why it's failing and try to fix it. +sudo apt-get -y update +# shellcheck disable=SC2086 +sudo apt-get -f -y --reinstall install ${APT_PACKAGES} +sudo dpkg -l +exit 0 + # Directory where installed package files will be copied - should match directory specified for # cache target in github actions workflow CACHE_DIRECTORY="${HOME}/apt_cache" @@ -38,7 +46,7 @@ if [[ "$CACHE_HIT" != 'true' ]]; then fi # shellcheck disable=SC2086 -sudo apt-get -f -y install ${APT_PACKAGES} +sudo apt-get -f -y --reinstall install ${APT_PACKAGES} ls -la "${APT_STATE_LISTS}" ls -la "${APT_CACHE_ARCHIVES}" From 23ddc7e5e1e3d17a8319b048c33721cbf7ec5a75 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sun, 9 May 2021 20:40:44 +0200 Subject: [PATCH 19/34] Revert "Bump eventlet version due to security vulnerability (which doesn't" This reverts commit 86afbb116c4c899ce55fce3427ad79e0cc73665c. Revert eventlet upgrade since it seems to fail with gunicorn eventlet worker. --- fixed-requirements.txt | 2 +- requirements.txt | 2 +- st2actions/requirements.txt | 2 +- st2api/requirements.txt | 2 +- st2auth/requirements.txt | 2 +- st2common/requirements.txt | 2 +- st2exporter/requirements.txt | 2 +- st2reactor/requirements.txt | 2 +- st2stream/requirements.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fixed-requirements.txt b/fixed-requirements.txt index e544271763..7793ccab22 100644 --- a/fixed-requirements.txt +++ b/fixed-requirements.txt @@ -9,7 +9,7 @@ dnspython>=1.16.0,<2.0.0 cryptography==3.4.7 # Note: 0.20.0 removed select.poll() on which some of our code and libraries we # depend on rely -eventlet==0.31.0 +eventlet==0.30.2 flex==6.14.1 gitpython==3.1.15 # Needed by gitpython, old versions used to bundle it diff --git a/requirements.txt b/requirements.txt index 3005e70001..ae3e7a01f2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,7 +13,7 @@ bcrypt==3.2.0 chardet<3.1.0 cryptography==3.4.7 dnspython>=1.16.0,<2.0.0 -eventlet==0.31.0 +eventlet==0.30.2 flex==6.14.1 git+https://github.com/StackStorm/logshipper.git@stackstorm_patched#egg=logshipper git+https://github.com/StackStorm/orquesta.git@v1.3.0#egg=orquesta diff --git a/st2actions/requirements.txt b/st2actions/requirements.txt index 747762adb2..c2cbd0b00f 100644 --- a/st2actions/requirements.txt +++ b/st2actions/requirements.txt @@ -7,7 +7,7 @@ # update the component requirements.txt apscheduler==3.7.0 chardet<3.1.0 -eventlet==0.31.0 +eventlet==0.30.2 git+https://github.com/StackStorm/logshipper.git@stackstorm_patched#egg=logshipper gitpython==3.1.15 jinja2==2.11.3 diff --git a/st2api/requirements.txt b/st2api/requirements.txt index 332e3af57f..c9f385ab4d 100644 --- a/st2api/requirements.txt +++ b/st2api/requirements.txt @@ -5,7 +5,7 @@ # If you want to update depdencies for a single component, modify the # in-requirements.txt for that component and then run 'make requirements' to # update the component requirements.txt -eventlet==0.31.0 +eventlet==0.30.2 gunicorn==20.1.0 jsonschema==2.6.0 kombu==5.0.2 diff --git a/st2auth/requirements.txt b/st2auth/requirements.txt index 4dd511c3d2..6b6016bcb6 100644 --- a/st2auth/requirements.txt +++ b/st2auth/requirements.txt @@ -6,7 +6,7 @@ # in-requirements.txt for that component and then run 'make requirements' to # update the component requirements.txt bcrypt==3.2.0 -eventlet==0.31.0 +eventlet==0.30.2 git+https://github.com/StackStorm/st2-auth-backend-flat-file.git@master#egg=st2-auth-backend-flat-file git+https://github.com/StackStorm/st2-auth-ldap.git@master#egg=st2-auth-ldap gunicorn==20.1.0 diff --git a/st2common/requirements.txt b/st2common/requirements.txt index 5ea7521ca4..d01bd35036 100644 --- a/st2common/requirements.txt +++ b/st2common/requirements.txt @@ -10,7 +10,7 @@ apscheduler==3.7.0 chardet<3.1.0 cryptography==3.4.7 dnspython>=1.16.0,<2.0.0 -eventlet==0.31.0 +eventlet==0.30.2 flex==6.14.1 git+https://github.com/StackStorm/orquesta.git@v1.3.0#egg=orquesta git+https://github.com/StackStorm/st2-rbac-backend.git@master#egg=st2-rbac-backend diff --git a/st2exporter/requirements.txt b/st2exporter/requirements.txt index eaa526e7a9..9e00c8a9b0 100644 --- a/st2exporter/requirements.txt +++ b/st2exporter/requirements.txt @@ -5,7 +5,7 @@ # If you want to update depdencies for a single component, modify the # in-requirements.txt for that component and then run 'make requirements' to # update the component requirements.txt -eventlet==0.31.0 +eventlet==0.30.2 kombu==5.0.2 oslo.config>=1.12.1,<1.13 six==1.13.0 diff --git a/st2reactor/requirements.txt b/st2reactor/requirements.txt index ad6f27cc5a..3388a8214b 100644 --- a/st2reactor/requirements.txt +++ b/st2reactor/requirements.txt @@ -6,7 +6,7 @@ # in-requirements.txt for that component and then run 'make requirements' to # update the component requirements.txt apscheduler==3.7.0 -eventlet==0.31.0 +eventlet==0.30.2 jsonpath-rw==1.4.0 jsonschema==2.6.0 kombu==5.0.2 diff --git a/st2stream/requirements.txt b/st2stream/requirements.txt index 7064a47412..a7bb19887a 100644 --- a/st2stream/requirements.txt +++ b/st2stream/requirements.txt @@ -5,7 +5,7 @@ # If you want to update depdencies for a single component, modify the # in-requirements.txt for that component and then run 'make requirements' to # update the component requirements.txt -eventlet==0.31.0 +eventlet==0.30.2 gunicorn==20.1.0 jsonschema==2.6.0 kombu==5.0.2 From 7a117bc04bb19cd5e588db5f6388ddd4eb5f0dbe Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Tue, 11 May 2021 20:45:33 +0200 Subject: [PATCH 20/34] Update migration script so we only load maximum of one whole database object in memory in once - we now first retrieve ids for all the affected objects and then retrieve and process one by one. Also add prompt and run in interactive mode by default with some warnings. --- .../v3.5/st2-migrate-db-dict-field-values | 98 ++++++++++++++----- 1 file changed, 71 insertions(+), 27 deletions(-) diff --git a/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values index ee9042c667..95829aae80 100755 --- a/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values +++ b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values @@ -44,6 +44,10 @@ import traceback from st2common import config from st2common.service_setup import db_setup from st2common.service_setup import db_teardown +from st2common.models.db.execution import ActionExecutionDB +from st2common.models.db.workflow import WorkflowExecutionDB +from st2common.models.db.workflow import TaskExecutionDB +from st2common.models.db.trigger import TriggerInstanceDB from st2common.persistence.execution import ActionExecution from st2common.persistence.liveaction import LiveAction from st2common.persistence.workflow import WorkflowExecution @@ -57,10 +61,17 @@ def migrate_executions() -> None: """ Perform migrations for execution related objects (ActionExecutionDB, LiveActionDB). """ - print("Migration execution objects") + print("Migrating execution objects") + + # NOTE: We first only retrieve the IDs because there could be a lot of objects in the database + # and this could result in massive ram use. Technically, mongoengine loads querysets lazily, + # but this is not always the case so it's better to first retrieve all the IDs and then retrieve + # objects one by one. + # Keep in mind we need to use ModelClass.objects and not PersistanceClass.query() so .only() + # works correctly - with PersistanceClass.query().only() all the fields will still be retrieved. # 1. Migrate ActionExecutionDB objects - execution_dbs = ActionExecution.query( + execution_dbs = ActionExecutionDB.objects( __raw__={ "result": { "$not": { @@ -70,18 +81,20 @@ def migrate_executions() -> None: "status": { "$in": LIVEACTION_COMPLETED_STATES, }, - } - ) + }, + ).only("id") + execution_ids = [str(execution_db.id) for execution_db in execution_dbs] - if not execution_dbs: + if not execution_ids: print("Found no ActionExecutionDB objects to migrate.") return None - print("Will migrate %s ActionExecutionDB objects" % (len(execution_dbs))) + print("Will migrate %s ActionExecutionDB objects" % (len(execution_ids))) print("") - for execution_db in execution_dbs: - print("Migrating ActionExecutionDB with id %s" % (execution_db.id)) + for execution_id in execution_ids: + execution_db = ActionExecution.get_by_id(execution_id) + print("Migrating ActionExecutionDB with id %s" % (execution_id)) # This is a bit of a "hack", but it's the easiest way to tell mongoengine that a specific # field has been updated and should be saved. If we don't do, nothing will be re-saved on @@ -119,7 +132,7 @@ def migrate_workflow_objects() -> None: print("Migrating workflow objects") # 1. Migrate WorkflowExecutionDB - workflow_execution_dbs = WorkflowExecution.query( + workflow_execution_dbs = WorkflowExecutionDB.objects( __raw__={ "output": { "$not": { @@ -130,18 +143,23 @@ def migrate_workflow_objects() -> None: "$in": LIVEACTION_COMPLETED_STATES, }, } - ) + ).only("id") + workflow_execution_ids = [ + str(workflow_execution_db.id) + for workflow_execution_db in workflow_execution_dbs + ] - if not workflow_execution_dbs: + if not workflow_execution_ids: print("Found no WorkflowExecutionDB objects to migrate.") else: print( "Will migrate %s WorkflowExecutionDB objects" - % (len(workflow_execution_dbs)) + % (len(workflow_execution_ids)) ) print("") - for workflow_execution_db in workflow_execution_dbs or []: + for workflow_execution_id in workflow_execution_ids or []: + workflow_execution_db = WorkflowExecution.get_by_id(workflow_execution_id) print("Migrating WorkflowExecutionDB with id %s" % (workflow_execution_db.id)) workflow_execution_db._mark_as_changed("input") @@ -157,7 +175,7 @@ def migrate_workflow_objects() -> None: print("") # 2. Migrate TaskExecutionDB - task_execution_dbs = TaskExecution.query( + task_execution_dbs = TaskExecutionDB.objects( __raw__={ "result": { "$not": { @@ -168,15 +186,19 @@ def migrate_workflow_objects() -> None: "$in": LIVEACTION_COMPLETED_STATES, }, } - ) + ).only("id") + task_execution_ids = [ + str(task_execution_db.id) for task_execution_db in task_execution_dbs + ] - if not task_execution_dbs: + if not task_execution_ids: print("Found no TaskExecutionDB objects to migrate.") else: - print("Will migrate %s TaskExecutionDB objects" % (len(task_execution_dbs))) + print("Will migrate %s TaskExecutionDB objects" % (len(task_execution_ids))) print("") - for task_execution_db in task_execution_dbs or []: + for task_execution_id in task_execution_ids or []: + task_execution_db = TaskExecution.get_by_id(task_execution_id) print("Migrating TaskExecutionDB with id %s" % (task_execution_db.id)) task_execution_db._mark_as_changed("task_spec") @@ -189,9 +211,9 @@ def migrate_workflow_objects() -> None: def migrate_triggers() -> None: - print("Migratting trigger objects") + print("Migrating trigger objects") - trigger_instance_dbs = TriggerInstance.query( + trigger_instance_dbs = TriggerInstanceDB.objects( __raw__={ "payload": { "$not": { @@ -202,15 +224,19 @@ def migrate_triggers() -> None: "$in": TRIGGER_INSTANCE_COMPLETED_STATES, }, } - ) + ).only("id") + trigger_instance_ids = [ + str(trigger_instance_db.id) for trigger_instance_db in trigger_instance_dbs + ] - if not trigger_instance_dbs: + if not trigger_instance_ids: print("Found no TriggerInstanceDB objects to migrate.") return None - print("Will migrate %s TriggerInstanceDB objects" % (len(trigger_instance_dbs))) + print("Will migrate %s TriggerInstanceDB objects" % (len(trigger_instance_ids))) - for trigger_instance_db in trigger_instance_dbs: + for trigger_instance_id in trigger_instance_ids or []: + trigger_instance_db = TriggerInstance.get_by_id(trigger_instance_id) print("Migrating TriggerInstanceDB with id %s" % (trigger_instance_db.id)) trigger_instance_db._mark_as_changed("payload") @@ -222,8 +248,21 @@ def migrate_triggers() -> None: print("") -def migrate_objects() -> None: +def migrate_objects(display_prompt: bool = True) -> None: + print("StackStorm v3.5 database field data migration script\n") + + if display_prompt: + input( + "You are stronly recommended to create database backup before running the " + "migration script.\n\nDepending on the number of the objects in the database, " + "migration may take multiple hours or more. You are recommended to start the the " + "script in a screen session or similar. \n\n" + "To proceed with the migration, press enter and to cancel it, press CTRL+C." + ) + print("") + print("Migrating affected database objects to utilize new field type") + print("") migrate_executions() migrate_workflow_objects() @@ -231,12 +270,17 @@ def migrate_objects() -> None: def main(): - config.parse_args() + if "--yes" in sys.argv: + sys.argv.remove("--yes") + display_prompt = False + else: + display_prompt = True + config.parse_args() db_setup() try: - migrate_objects() + migrate_objects(display_prompt=display_prompt) print("SUCCESS: All database objects migrated successfully.") exit_code = 0 except Exception as e: From 3e3743fd99b47f4003559b4094343c04532e15fe Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Tue, 11 May 2021 21:06:13 +0200 Subject: [PATCH 21/34] Also print duration at the end. --- .../migrations/v3.5/st2-migrate-db-dict-field-values | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values index 95829aae80..24ee3abb53 100755 --- a/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values +++ b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values @@ -39,6 +39,7 @@ could also determine that based on the presence of result_size field). """ import sys +import time import traceback from st2common import config @@ -264,9 +265,19 @@ def migrate_objects(display_prompt: bool = True) -> None: print("Migrating affected database objects to utilize new field type") print("") + start_ts = int(time.time()) migrate_executions() migrate_workflow_objects() migrate_triggers() + end_ts = int(time.time()) + + duration = end_ts - start_ts + + print("") + print( + "SUCCESS: All database objects migrated successfully (duration: %s seconds)." + % (duration) + ) def main(): @@ -281,7 +292,6 @@ def main(): try: migrate_objects(display_prompt=display_prompt) - print("SUCCESS: All database objects migrated successfully.") exit_code = 0 except Exception as e: print("ABORTED: Objects migration aborted on first failure: %s" % (str(e))) From b5376cbf01ed2443a678af2e768422591c0ba80c Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Tue, 11 May 2021 22:15:48 +0200 Subject: [PATCH 22/34] Ignore DB object doesn't exist errors since those should not be fatal - we simply proceed with migrating other objects. --- .../v3.5/st2-migrate-db-dict-field-values | 58 +++++++++++++++---- 1 file changed, 48 insertions(+), 10 deletions(-) diff --git a/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values index 24ee3abb53..7a6101c40a 100755 --- a/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values +++ b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values @@ -54,6 +54,7 @@ from st2common.persistence.liveaction import LiveAction from st2common.persistence.workflow import WorkflowExecution from st2common.persistence.workflow import TaskExecution from st2common.persistence.trigger import TriggerInstance +from st2common.exceptions.db import StackStormDBObjectNotFoundError from st2common.constants.action import LIVEACTION_COMPLETED_STATES from st2common.constants.triggers import TRIGGER_INSTANCE_COMPLETED_STATES @@ -94,7 +95,15 @@ def migrate_executions() -> None: print("") for execution_id in execution_ids: - execution_db = ActionExecution.get_by_id(execution_id) + try: + execution_db = ActionExecution.get_by_id(execution_id) + except StackStormDBObjectNotFoundError: + print( + "Skipping ActionExecutionDB with id %s which is missing in the database" + % (execution_id) + ) + continue + print("Migrating ActionExecutionDB with id %s" % (execution_id)) # This is a bit of a "hack", but it's the easiest way to tell mongoengine that a specific @@ -109,16 +118,21 @@ def migrate_executions() -> None: print("ActionExecutionDB with id %s has been migrated" % (execution_db.id)) # Migrate corresponding LiveAction object + liveaction = execution_db.liveaction or {} + liveaction_id = liveaction.get("id", None) + + if not liveaction_id: + continue + try: - liveaction_id = execution_db.liveaction.get("id", None) liveaction_db = LiveAction.get_by_id(liveaction_id) - except Exception as e: - print( - "Failed to migrate LiveActionDB with id %s, ignoring error (%s)" - % (liveaction_id, str(e)) - ) + except StackStormDBObjectNotFoundError: # If liveaction for some reason doesn't exist (would likely represent corrupted data) we # simply ignore that error since it's not fatal. + print( + "Skipping LiveActionDB with id %s which is missing in the database" + % (liveaction_db) + ) continue liveaction_db._mark_as_changed("result") @@ -160,7 +174,15 @@ def migrate_workflow_objects() -> None: print("") for workflow_execution_id in workflow_execution_ids or []: - workflow_execution_db = WorkflowExecution.get_by_id(workflow_execution_id) + try: + workflow_execution_db = WorkflowExecution.get_by_id(workflow_execution_id) + except StackStormDBObjectNotFoundError as e: + print( + "Skipping WorkflowExecutionDB with id %s which is missing in the database" + % (workflow_execution_id) + ) + continue + print("Migrating WorkflowExecutionDB with id %s" % (workflow_execution_db.id)) workflow_execution_db._mark_as_changed("input") @@ -199,7 +221,15 @@ def migrate_workflow_objects() -> None: print("") for task_execution_id in task_execution_ids or []: - task_execution_db = TaskExecution.get_by_id(task_execution_id) + try: + task_execution_db = TaskExecution.get_by_id(task_execution_id) + except StackStormDBObjectNotFoundError as e: + print( + "Skipping TaskExecutionDB with id %s which is missing in the database" + % (task_execution_db) + ) + continue + print("Migrating TaskExecutionDB with id %s" % (task_execution_db.id)) task_execution_db._mark_as_changed("task_spec") @@ -237,7 +267,15 @@ def migrate_triggers() -> None: print("Will migrate %s TriggerInstanceDB objects" % (len(trigger_instance_ids))) for trigger_instance_id in trigger_instance_ids or []: - trigger_instance_db = TriggerInstance.get_by_id(trigger_instance_id) + try: + trigger_instance_db = TriggerInstance.get_by_id(trigger_instance_id) + except StackStormDBObjectNotFoundError as e: + print( + "Skipping TriggerInstanceDB with id %s which is missing in the database" + % (trigger_instance_id) + ) + continue + print("Migrating TriggerInstanceDB with id %s" % (trigger_instance_db.id)) trigger_instance_db._mark_as_changed("payload") From a5964e2167e94603f5cf01941f7b4acb2808d22a Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Tue, 11 May 2021 22:36:10 +0200 Subject: [PATCH 23/34] Update the output to make it easier to see how far along we are. --- .../v3.5/st2-migrate-db-dict-field-values | 54 ++++++++++++------- 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values index 7a6101c40a..6d853b5881 100755 --- a/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values +++ b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values @@ -86,15 +86,17 @@ def migrate_executions() -> None: }, ).only("id") execution_ids = [str(execution_db.id) for execution_db in execution_dbs] + objects_count = len(execution_ids) if not execution_ids: print("Found no ActionExecutionDB objects to migrate.") + print("") return None - print("Will migrate %s ActionExecutionDB objects" % (len(execution_ids))) + print("Will migrate %s ActionExecutionDB objects" % (objects_count)) print("") - for execution_id in execution_ids: + for index, execution_id in enumerate(execution_ids, 1): try: execution_db = ActionExecution.get_by_id(execution_id) except StackStormDBObjectNotFoundError: @@ -104,7 +106,10 @@ def migrate_executions() -> None: ) continue - print("Migrating ActionExecutionDB with id %s" % (execution_id)) + print( + "[%s/%s] Migrating ActionExecutionDB with id %s" + % (index, objects_count, execution_id) + ) # This is a bit of a "hack", but it's the easiest way to tell mongoengine that a specific # field has been updated and should be saved. If we don't do, nothing will be re-saved on @@ -163,27 +168,29 @@ def migrate_workflow_objects() -> None: str(workflow_execution_db.id) for workflow_execution_db in workflow_execution_dbs ] + objects_count = len(workflow_execution_ids) if not workflow_execution_ids: print("Found no WorkflowExecutionDB objects to migrate.") + print("") else: - print( - "Will migrate %s WorkflowExecutionDB objects" - % (len(workflow_execution_ids)) - ) + print("Will migrate %s WorkflowExecutionDB objects" % (objects_count)) print("") - for workflow_execution_id in workflow_execution_ids or []: + for index, workflow_execution_id in enumerate(workflow_execution_ids, 1): try: workflow_execution_db = WorkflowExecution.get_by_id(workflow_execution_id) - except StackStormDBObjectNotFoundError as e: + except StackStormDBObjectNotFoundError: print( "Skipping WorkflowExecutionDB with id %s which is missing in the database" % (workflow_execution_id) ) continue - print("Migrating WorkflowExecutionDB with id %s" % (workflow_execution_db.id)) + print( + "[%s/%s] Migrating WorkflowExecutionDB with id %s" + % (index, objects_count, workflow_execution_id) + ) workflow_execution_db._mark_as_changed("input") workflow_execution_db._mark_as_changed("context") @@ -213,24 +220,29 @@ def migrate_workflow_objects() -> None: task_execution_ids = [ str(task_execution_db.id) for task_execution_db in task_execution_dbs ] + objects_count = len(task_execution_ids) if not task_execution_ids: print("Found no TaskExecutionDB objects to migrate.") + print("") else: - print("Will migrate %s TaskExecutionDB objects" % (len(task_execution_ids))) + print("Will migrate %s TaskExecutionDB objects" % (objects_count)) print("") - for task_execution_id in task_execution_ids or []: + for index, task_execution_id in enumerate(task_execution_ids, 1): try: task_execution_db = TaskExecution.get_by_id(task_execution_id) - except StackStormDBObjectNotFoundError as e: + except StackStormDBObjectNotFoundError: print( "Skipping TaskExecutionDB with id %s which is missing in the database" % (task_execution_db) ) continue - print("Migrating TaskExecutionDB with id %s" % (task_execution_db.id)) + print( + "[%s/%s] Migrating TaskExecutionDB with id %s" + % (index, objects_count, task_execution_id) + ) task_execution_db._mark_as_changed("task_spec") task_execution_db._mark_as_changed("context") @@ -259,24 +271,29 @@ def migrate_triggers() -> None: trigger_instance_ids = [ str(trigger_instance_db.id) for trigger_instance_db in trigger_instance_dbs ] + objects_count = len(trigger_instance_dbs) if not trigger_instance_ids: print("Found no TriggerInstanceDB objects to migrate.") + print("") return None - print("Will migrate %s TriggerInstanceDB objects" % (len(trigger_instance_ids))) + print("Will migrate %s TriggerInstanceDB objects" % (objects_count)) - for trigger_instance_id in trigger_instance_ids or []: + for index, trigger_instance_id in enumerate(trigger_instance_ids, 1): try: trigger_instance_db = TriggerInstance.get_by_id(trigger_instance_id) - except StackStormDBObjectNotFoundError as e: + except StackStormDBObjectNotFoundError: print( "Skipping TriggerInstanceDB with id %s which is missing in the database" % (trigger_instance_id) ) continue - print("Migrating TriggerInstanceDB with id %s" % (trigger_instance_db.id)) + print( + "[%s/%s] Migrating TriggerInstanceDB with id %s" + % (index, objects_count, trigger_instance_id) + ) trigger_instance_db._mark_as_changed("payload") @@ -311,7 +328,6 @@ def migrate_objects(display_prompt: bool = True) -> None: duration = end_ts - start_ts - print("") print( "SUCCESS: All database objects migrated successfully (duration: %s seconds)." % (duration) From 94ed6b0838371466471aa12c8391d3d182fc98f2 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Tue, 11 May 2021 22:40:48 +0200 Subject: [PATCH 24/34] Address review feedback. --- .../migrations/v3.5/st2-migrate-db-dict-field-values | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values index 6d853b5881..73108a0f0e 100755 --- a/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values +++ b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values @@ -118,7 +118,8 @@ def migrate_executions() -> None: execution_db._mark_as_changed("result") execution_db._mark_as_changed("result_size") - # print(getattr(execution_db, "_changed_fields", [])) + # NOTE: If you want to view changed fields, you can access execution_db._changed_fields + execution_db.save() print("ActionExecutionDB with id %s has been migrated" % (execution_db.id)) @@ -142,7 +143,6 @@ def migrate_executions() -> None: liveaction_db._mark_as_changed("result") - # print(getattr(liveaction_db, "_changed_fields", [])) liveaction_db.save() print("Related LiveActionDB with id %s has been migrated" % (liveaction_db.id)) print("") @@ -309,10 +309,10 @@ def migrate_objects(display_prompt: bool = True) -> None: if display_prompt: input( - "You are stronly recommended to create database backup before running the " + "You are strongly recommended to create database backup before running the " "migration script.\n\nDepending on the number of the objects in the database, " - "migration may take multiple hours or more. You are recommended to start the the " - "script in a screen session or similar. \n\n" + "migration may take multiple hours or more. You are recommended to start the " + "script in a screen session, tmux or similar. \n\n" "To proceed with the migration, press enter and to cancel it, press CTRL+C." ) print("") From db132ac75bb75093a441554743e5f112209e8479 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Tue, 11 May 2021 22:46:28 +0200 Subject: [PATCH 25/34] Don't include patch version in the job name. --- .github/workflows/ci.yaml | 20 ++++++++++++++++--- .github/workflows/microbenchmarks.yaml | 4 +++- .../workflows/orquesta-integration-tests.yaml | 4 +++- 3 files changed, 23 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index ab64a3d663..4e4f6c0b07 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -41,7 +41,7 @@ jobs: # NOTE: We always want to run job on master since we run some additional checks there (code # coverage, etc) if: ${{ needs.pre_job.outputs.should_skip != 'true' || github.ref == 'refs/heads/master' }} - name: '${{ matrix.name }} - Python ${{ matrix.python-version }}' + name: '${{ matrix.name }} - Python ${{ matrix.python-version-short }}' runs-on: ubuntu-latest strategy: fail-fast: false @@ -53,15 +53,19 @@ jobs: include: - name: 'Lint Checks (black, flake8, etc.)' task: 'ci-checks' + python-version-short: '3.6' python-version: '3.6.13' - name: 'Compile (pip deps, pylint, etc.)' task: 'ci-compile' + python-version-short: '3.6' python-version: '3.6.13' - name: 'Lint Checks (black, flake8, etc.)' task: 'ci-checks' + python-version-short: '3.8' python-version: '3.8.10' - name: 'Compile (pip deps, pylint, etc.)' task: 'ci-compile' + python-version-short: '3.8' python-version: '3.8.10' env: @@ -132,7 +136,7 @@ jobs: # NOTE: We always want to run job on master since we run some additional checks there (code # coverage, etc) if: ${{ needs.pre_job.outputs.should_skip != 'true' || github.ref == 'refs/heads/master' }} - name: '${{ matrix.name }} - Python ${{ matrix.python-version }}' + name: '${{ matrix.name }} - Python ${{ matrix.python-version-short }}' runs-on: ubuntu-latest strategy: fail-fast: false @@ -144,21 +148,25 @@ jobs: task: 'ci-unit' nosetests_node_total: 2 nosetests_node_index: 0 + python-version-short: '3.6' python-version: '3.6.13' - name: 'Unit Tests (chunk 2)' task: 'ci-unit' nosetests_node_total: 2 nosetests_node_index: 1 + python-version-short: '3.6' python-version: '3.6.13' - name: 'Unit Tests (chunk 1)' task: 'ci-unit' nosetests_node_total: 2 nosetests_node_index: 0 + python-version-short: '3.8' python-version: '3.8.10' - name: 'Unit Tests (chunk 2)' task: 'ci-unit' nosetests_node_total: 2 nosetests_node_index: 1 + python-version-short: '3.8' python-version: '3.8.10' # This job is slow so we only run in on a daily basis # - name: 'Micro Benchmarks' @@ -297,7 +305,7 @@ jobs: # NOTE: We always want to run job on master since we run some additional checks there (code # coverage, etc) if: ${{ needs.pre_job.outputs.should_skip != 'true' || github.ref == 'refs/heads/master' }} - name: '${{ matrix.name }} - Python ${{ matrix.python-version }}' + name: '${{ matrix.name }} - Python ${{ matrix.python-version-short }}' runs-on: ubuntu-latest strategy: fail-fast: false @@ -311,31 +319,37 @@ jobs: task: 'ci-packs-tests' nosetests_node_total: 1 nosetests_node_index: 0 + python-version-short: '3.6' python-version: '3.6.13' - name: 'Integration Tests (chunk 1)' task: 'ci-integration' nosetests_node_total: 2 nosetests_node_index: 0 + python-version-short: '3.6' python-version: '3.6.13' - name: 'Integration Tests (chunk 2)' task: 'ci-integration' nosetests_node_total: 2 nosetests_node_index: 1 + python-version-short: '3.6' python-version: '3.6.13' - name: 'Pack Tests' task: 'ci-packs-tests' nosetests_node_total: 1 nosetests_node_index: 0 + python-version-short: '3.8' python-version: '3.8.10' - name: 'Integration Tests (chunk 1)' task: 'ci-integration' nosetests_node_total: 2 nosetests_node_index: 0 + python-version-short: '3.8' python-version: '3.8.10' - name: 'Integration Tests (chunk 2)' task: 'ci-integration' nosetests_node_total: 2 nosetests_node_index: 1 + python-version-short: '3.8' python-version: '3.8.10' services: mongo: diff --git a/.github/workflows/microbenchmarks.yaml b/.github/workflows/microbenchmarks.yaml index 1c11527d6a..7480c13b3a 100644 --- a/.github/workflows/microbenchmarks.yaml +++ b/.github/workflows/microbenchmarks.yaml @@ -26,7 +26,7 @@ jobs: # NOTE: We always want to run job on master since we run some additional checks there (code # coverage, etc) if: ${{ needs.pre_job.outputs.should_skip != 'true' || github.ref == 'refs/heads/master' }} - name: '${{ matrix.name }} - Python ${{ matrix.python-version }}' + name: '${{ matrix.name }} - Python ${{ matrix.python-version-short }}' runs-on: ubuntu-latest strategy: fail-fast: false @@ -38,11 +38,13 @@ jobs: task: 'micro-benchmarks' nosetests_node_total: 1 nosetests_node_index: 0 + python-version-short: '3.6' python-version: '3.6.13' - name: 'Microbenchmarks' task: 'micro-benchmarks' nosetests_node_total: 1 nosetests_node_index: 0 + python-version-short: '3.8' python-version: '3.8.10' services: mongo: diff --git a/.github/workflows/orquesta-integration-tests.yaml b/.github/workflows/orquesta-integration-tests.yaml index a6841b7633..98159afe8f 100644 --- a/.github/workflows/orquesta-integration-tests.yaml +++ b/.github/workflows/orquesta-integration-tests.yaml @@ -44,7 +44,7 @@ jobs: # NOTE: We always want to run job on master since we run some additional checks there (code # coverage, etc) if: ${{ needs.pre_job.outputs.should_skip != 'true' || github.ref == 'refs/heads/master' }} - name: '${{ matrix.name }} - Python ${{ matrix.python-version }}' + name: '${{ matrix.name }} - Python ${{ matrix.python-version-short }}' runs-on: ubuntu-latest strategy: fail-fast: false @@ -57,10 +57,12 @@ jobs: nosetests_node_total: 1 nosetests_node_index: 0 python-version: '3.6.13' + python-version-short: '3.6' - name: 'Integration Tests (Orquesta)' task: 'ci-orquesta' nosetests_node_total: 1 nosetests_node_index: 0 + python-version-short: '3.8' python-version: '3.8.10' services: mongo: From ab93e8688e84936765f6cb14872e77a4b9b436c9 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Tue, 11 May 2021 22:59:58 +0200 Subject: [PATCH 26/34] Optimize the script and operate with raw pymongo values when retrieving just the ids - this way we avoid unnecessary object churn and instantiating model class for a single field item. --- .../v3.5/st2-migrate-db-dict-field-values | 134 ++++++++++-------- 1 file changed, 76 insertions(+), 58 deletions(-) diff --git a/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values index 73108a0f0e..54a6082535 100755 --- a/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values +++ b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values @@ -58,6 +58,15 @@ from st2common.exceptions.db import StackStormDBObjectNotFoundError from st2common.constants.action import LIVEACTION_COMPLETED_STATES from st2common.constants.triggers import TRIGGER_INSTANCE_COMPLETED_STATES +# We retrieve and process up to this amount of object IDs in a single MongoDB query. This is done +# to avoid potentially retrieving many 10s of millions of object ids in a single query +BATCH_SIZE = 1000 + + +# NOTE: To avoid unncessary mongoengine object churn when retrieving only object ids (aka to avoid +# instantiating model class with a single field), we use raw pymongo value which is a dict with a +# single value + def migrate_executions() -> None: """ @@ -73,20 +82,24 @@ def migrate_executions() -> None: # works correctly - with PersistanceClass.query().only() all the fields will still be retrieved. # 1. Migrate ActionExecutionDB objects - execution_dbs = ActionExecutionDB.objects( - __raw__={ - "result": { - "$not": { - "$type": "binData", + result = ( + ActionExecutionDB.objects( + __raw__={ + "result": { + "$not": { + "$type": "binData", + }, + }, + "status": { + "$in": LIVEACTION_COMPLETED_STATES, }, }, - "status": { - "$in": LIVEACTION_COMPLETED_STATES, - }, - }, - ).only("id") - execution_ids = [str(execution_db.id) for execution_db in execution_dbs] - objects_count = len(execution_ids) + ) + .only("id") + .as_pymongo() + ) + execution_ids = [str(item["_id"]) for item in result] + objects_count = result.count() if not execution_ids: print("Found no ActionExecutionDB objects to migrate.") @@ -152,23 +165,24 @@ def migrate_workflow_objects() -> None: print("Migrating workflow objects") # 1. Migrate WorkflowExecutionDB - workflow_execution_dbs = WorkflowExecutionDB.objects( - __raw__={ - "output": { - "$not": { - "$type": "binData", + result = ( + WorkflowExecutionDB.objects( + __raw__={ + "output": { + "$not": { + "$type": "binData", + }, }, - }, - "status": { - "$in": LIVEACTION_COMPLETED_STATES, - }, - } - ).only("id") - workflow_execution_ids = [ - str(workflow_execution_db.id) - for workflow_execution_db in workflow_execution_dbs - ] - objects_count = len(workflow_execution_ids) + "status": { + "$in": LIVEACTION_COMPLETED_STATES, + }, + } + ) + .only("id") + .as_pymongo() + ) + workflow_execution_ids = [str(item["_id"]) for item in result] + objects_count = result.count() if not workflow_execution_ids: print("Found no WorkflowExecutionDB objects to migrate.") @@ -205,22 +219,24 @@ def migrate_workflow_objects() -> None: print("") # 2. Migrate TaskExecutionDB - task_execution_dbs = TaskExecutionDB.objects( - __raw__={ - "result": { - "$not": { - "$type": "binData", + result = ( + TaskExecutionDB.objects( + __raw__={ + "result": { + "$not": { + "$type": "binData", + }, }, - }, - "status": { - "$in": LIVEACTION_COMPLETED_STATES, - }, - } - ).only("id") - task_execution_ids = [ - str(task_execution_db.id) for task_execution_db in task_execution_dbs - ] - objects_count = len(task_execution_ids) + "status": { + "$in": LIVEACTION_COMPLETED_STATES, + }, + } + ) + .only("id") + .as_pymongo() + ) + task_execution_ids = [str(item["_id"]) for item in result] + objects_count = result.count() if not task_execution_ids: print("Found no TaskExecutionDB objects to migrate.") @@ -256,22 +272,24 @@ def migrate_workflow_objects() -> None: def migrate_triggers() -> None: print("Migrating trigger objects") - trigger_instance_dbs = TriggerInstanceDB.objects( - __raw__={ - "payload": { - "$not": { - "$type": "binData", + result = ( + TriggerInstanceDB.objects( + __raw__={ + "payload": { + "$not": { + "$type": "binData", + }, }, - }, - "status": { - "$in": TRIGGER_INSTANCE_COMPLETED_STATES, - }, - } - ).only("id") - trigger_instance_ids = [ - str(trigger_instance_db.id) for trigger_instance_db in trigger_instance_dbs - ] - objects_count = len(trigger_instance_dbs) + "status": { + "$in": TRIGGER_INSTANCE_COMPLETED_STATES, + }, + } + ) + .only("id") + .as_pymongo() + ) + trigger_instance_ids = [str(item["_id"]) for item in result] + objects_count = result.count() if not trigger_instance_ids: print("Found no TriggerInstanceDB objects to migrate.") From 85149e787ce8a89f8d618d4f11bb05279aa41b0d Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Tue, 11 May 2021 23:56:48 +0200 Subject: [PATCH 27/34] Add new --start-ts= argument and default it to now - 30 days. --- .../v3.5/st2-migrate-db-dict-field-values | 71 ++++++++++++++----- 1 file changed, 53 insertions(+), 18 deletions(-) diff --git a/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values index 54a6082535..80a1a32a74 100755 --- a/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values +++ b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values @@ -40,8 +40,11 @@ could also determine that based on the presence of result_size field). import sys import time +import datetime import traceback +from oslo_config import cfg + from st2common import config from st2common.service_setup import db_setup from st2common.service_setup import db_teardown @@ -60,15 +63,15 @@ from st2common.constants.triggers import TRIGGER_INSTANCE_COMPLETED_STATES # We retrieve and process up to this amount of object IDs in a single MongoDB query. This is done # to avoid potentially retrieving many 10s of millions of object ids in a single query +# TODO: Implement batching --since-ts, --model=action,trigger,workflow flag BATCH_SIZE = 1000 - # NOTE: To avoid unncessary mongoengine object churn when retrieving only object ids (aka to avoid # instantiating model class with a single field), we use raw pymongo value which is a dict with a # single value -def migrate_executions() -> None: +def migrate_executions(start_dt: datetime.datetime) -> None: """ Perform migrations for execution related objects (ActionExecutionDB, LiveActionDB). """ @@ -94,6 +97,7 @@ def migrate_executions() -> None: "$in": LIVEACTION_COMPLETED_STATES, }, }, + start_timestamp__gte=start_dt, ) .only("id") .as_pymongo() @@ -161,7 +165,7 @@ def migrate_executions() -> None: print("") -def migrate_workflow_objects() -> None: +def migrate_workflow_objects(start_dt: datetime.datetime) -> None: print("Migrating workflow objects") # 1. Migrate WorkflowExecutionDB @@ -176,7 +180,8 @@ def migrate_workflow_objects() -> None: "status": { "$in": LIVEACTION_COMPLETED_STATES, }, - } + }, + start_timestamp__gte=start_dt, ) .only("id") .as_pymongo() @@ -230,7 +235,8 @@ def migrate_workflow_objects() -> None: "status": { "$in": LIVEACTION_COMPLETED_STATES, }, - } + }, + start_timestamp__gte=start_dt, ) .only("id") .as_pymongo() @@ -269,7 +275,7 @@ def migrate_workflow_objects() -> None: print("") -def migrate_triggers() -> None: +def migrate_triggers(start_dt: datetime.datetime) -> None: print("Migrating trigger objects") result = ( @@ -283,7 +289,8 @@ def migrate_triggers() -> None: "status": { "$in": TRIGGER_INSTANCE_COMPLETED_STATES, }, - } + }, + occurrence_time__gte=start_dt, ) .only("id") .as_pymongo() @@ -322,7 +329,7 @@ def migrate_triggers() -> None: print("") -def migrate_objects(display_prompt: bool = True) -> None: +def migrate_objects(start_dt: datetime.datetime, display_prompt: bool = True) -> None: print("StackStorm v3.5 database field data migration script\n") if display_prompt: @@ -335,13 +342,18 @@ def migrate_objects(display_prompt: bool = True) -> None: ) print("") - print("Migrating affected database objects to utilize new field type") + start_dt_str = start_dt.strftime("%Y-%m-%d %H:%M:%S") + + print( + "Migrating affected database objects newer than %s to utilize new field type" + % (start_dt_str) + ) print("") start_ts = int(time.time()) - migrate_executions() - migrate_workflow_objects() - migrate_triggers() + migrate_executions(start_dt=start_dt) + migrate_workflow_objects(start_dt=start_dt) + migrate_triggers(start_dt=start_dt) end_ts = int(time.time()) duration = end_ts - start_ts @@ -352,18 +364,41 @@ def migrate_objects(display_prompt: bool = True) -> None: ) +def _register_cli_opts(): + cfg.CONF.register_cli_opt( + cfg.BoolOpt( + "yes", + short="y", + required=False, + default=False, + ) + ) + + # Unix timestamp which indicates how far we should go when migrating data. We default to + # now - 30 days. Keep in mind that using longer period may take a long time in case there are + # many objects in the database + default_ts = int(time.time()) - (30 * 86400) + cfg.CONF.register_cli_opt( + cfg.IntOpt( + "start-ts", + required=False, + default=default_ts, + ) + ) + + def main(): - if "--yes" in sys.argv: - sys.argv.remove("--yes") - display_prompt = False - else: - display_prompt = True + _register_cli_opts() config.parse_args() db_setup() + start_dt = datetime.datetime.utcfromtimestamp(cfg.CONF.start_ts).replace( + tzinfo=datetime.timezone.utc + ) + try: - migrate_objects(display_prompt=display_prompt) + migrate_objects(start_dt=start_dt, display_prompt=not cfg.CONF.yes) exit_code = 0 except Exception as e: print("ABORTED: Objects migration aborted on first failure: %s" % (str(e))) From 2a0969f6458ac8fdbe9bf02c5e197cd102f327bf Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Wed, 12 May 2021 18:40:44 +0200 Subject: [PATCH 28/34] Make sure we also set execution.result_size field on migration and add temporary workaround for AJ which will be removed after he runs this script and confirms it works. --- .../v3.5/st2-migrate-db-dict-field-values | 42 ++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values index 80a1a32a74..8990c3f776 100755 --- a/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values +++ b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values @@ -85,6 +85,7 @@ def migrate_executions(start_dt: datetime.datetime) -> None: # works correctly - with PersistanceClass.query().only() all the fields will still be retrieved. # 1. Migrate ActionExecutionDB objects + # 1.1 Find all executions where field type is not binData result = ( ActionExecutionDB.objects( __raw__={ @@ -102,7 +103,36 @@ def migrate_executions(start_dt: datetime.datetime) -> None: .only("id") .as_pymongo() ) - execution_ids = [str(item["_id"]) for item in result] + execution_ids = set([str(item["_id"]) for item in result]) + + # 1.2 Find all executions which don't contain "result_size" attribute, but field type is + # aready binData + # This is just aa workaround for migration script issue during development which has been + # resolved. + # https://github.com/StackStorm/st2/pull/5255#issuecomment-839920397 + # This code path can be removed once @AJ runs that code on his setup. + result = ( + ActionExecutionDB.objects( + __raw__={ + "result": { + "$type": "binData", + }, + "status": { + "$in": LIVEACTION_COMPLETED_STATES, + }, + }, + result_size__not__exists=True, + start_timestamp__gte=start_dt, + ) + .only("id") + .as_pymongo() + ) + + for item in result: + execution_ids.add(str(item["_id"])) + + execution_ids = list(execution_ids) + objects_count = result.count() if not execution_ids: @@ -135,6 +165,16 @@ def migrate_executions(start_dt: datetime.datetime) -> None: execution_db._mark_as_changed("result") execution_db._mark_as_changed("result_size") + # We need to explicitly set result_size attribute since Document.save() code path doesn't + # populate it (but other code paths we utilize elsewhere do). + # Technically we could do it on document clean() / validate() method, but we don't want that + # since execution update code in action runner and elsewhere is optimized to make partial + # updates more efficient. + result_size = len( + ActionExecutionDB.result._serialize_field_value(execution_db.result or {}) + ) + execution_db.result_size = result_size + # NOTE: If you want to view changed fields, you can access execution_db._changed_fields execution_db.save() From 3bc46f04c15d94c19958a692f30414c459a58b32 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Wed, 12 May 2021 21:47:52 +0200 Subject: [PATCH 29/34] Add --start-dt, --end-dt argument to the migration script, update affected tests. --- .../v3.5/st2-migrate-db-dict-field-values | 71 ++++++--- .../test_v35_migrate_db_dict_field_values.py | 142 +++++++++++++++--- 2 files changed, 171 insertions(+), 42 deletions(-) diff --git a/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values index 8990c3f776..46b2178514 100755 --- a/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values +++ b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values @@ -71,7 +71,7 @@ BATCH_SIZE = 1000 # single value -def migrate_executions(start_dt: datetime.datetime) -> None: +def migrate_executions(start_dt: datetime.datetime, end_dt: datetime.datetime) -> None: """ Perform migrations for execution related objects (ActionExecutionDB, LiveActionDB). """ @@ -99,6 +99,7 @@ def migrate_executions(start_dt: datetime.datetime) -> None: }, }, start_timestamp__gte=start_dt, + start_timestamp__lte=end_dt, ) .only("id") .as_pymongo() @@ -123,6 +124,7 @@ def migrate_executions(start_dt: datetime.datetime) -> None: }, result_size__not__exists=True, start_timestamp__gte=start_dt, + start_timestamp__lte=end_dt, ) .only("id") .as_pymongo() @@ -205,7 +207,9 @@ def migrate_executions(start_dt: datetime.datetime) -> None: print("") -def migrate_workflow_objects(start_dt: datetime.datetime) -> None: +def migrate_workflow_objects( + start_dt: datetime.datetime, end_dt: datetime.datetime +) -> None: print("Migrating workflow objects") # 1. Migrate WorkflowExecutionDB @@ -222,6 +226,7 @@ def migrate_workflow_objects(start_dt: datetime.datetime) -> None: }, }, start_timestamp__gte=start_dt, + start_timestamp__lte=end_dt, ) .only("id") .as_pymongo() @@ -277,6 +282,7 @@ def migrate_workflow_objects(start_dt: datetime.datetime) -> None: }, }, start_timestamp__gte=start_dt, + start_timestamp__lte=end_dt, ) .only("id") .as_pymongo() @@ -315,7 +321,7 @@ def migrate_workflow_objects(start_dt: datetime.datetime) -> None: print("") -def migrate_triggers(start_dt: datetime.datetime) -> None: +def migrate_triggers(start_dt: datetime.datetime, end_dt: datetime.datetime) -> None: print("Migrating trigger objects") result = ( @@ -331,6 +337,7 @@ def migrate_triggers(start_dt: datetime.datetime) -> None: }, }, occurrence_time__gte=start_dt, + occurrence_time__lte=end_dt, ) .only("id") .as_pymongo() @@ -369,7 +376,9 @@ def migrate_triggers(start_dt: datetime.datetime) -> None: print("") -def migrate_objects(start_dt: datetime.datetime, display_prompt: bool = True) -> None: +def migrate_objects( + start_dt: datetime.datetime, end_dt: datetime.datetime, display_prompt: bool = True +) -> None: print("StackStorm v3.5 database field data migration script\n") if display_prompt: @@ -383,17 +392,18 @@ def migrate_objects(start_dt: datetime.datetime, display_prompt: bool = True) -> print("") start_dt_str = start_dt.strftime("%Y-%m-%d %H:%M:%S") + end_dt_str = end_dt.strftime("%Y-%m-%d %H:%M:%S") print( - "Migrating affected database objects newer than %s to utilize new field type" - % (start_dt_str) + "Migrating affected database objects between %s and %s" + % (start_dt_str, end_dt_str) ) print("") start_ts = int(time.time()) - migrate_executions(start_dt=start_dt) - migrate_workflow_objects(start_dt=start_dt) - migrate_triggers(start_dt=start_dt) + migrate_executions(start_dt=start_dt, end_dt=end_dt) + migrate_workflow_objects(start_dt=start_dt, end_dt=end_dt) + migrate_triggers(start_dt=start_dt, end_dt=end_dt) end_ts = int(time.time()) duration = end_ts - start_ts @@ -414,15 +424,33 @@ def _register_cli_opts(): ) ) - # Unix timestamp which indicates how far we should go when migrating data. We default to - # now - 30 days. Keep in mind that using longer period may take a long time in case there are - # many objects in the database - default_ts = int(time.time()) - (30 * 86400) + # We default to past 30 days. Keep in mind that using longer period may take a long time in + # case there are many objects in the database. + now_dt = datetime.datetime.utcnow() + start_dt = now_dt - datetime.timedelta(days=30) + cfg.CONF.register_cli_opt( - cfg.IntOpt( - "start-ts", + cfg.StrOpt( + "start-dt", required=False, - default=default_ts, + help=( + "Start cut off ISO UTC iso date time string for objects which will be migrated. " + "Defaults to now - 30 days." + "Example value: 2020-03-13T19:01:27Z" + ), + default=start_dt.strftime("%Y-%m-%dT%H:%M:%SZ"), + ) + ) + cfg.CONF.register_cli_opt( + cfg.StrOpt( + "end-dt", + required=False, + help=( + "End cut off UTC ISO date time string for objects which will be migrated." + "Defaults to now." + "Example value: 2020-03-13T19:01:27Z" + ), + default=now_dt.strftime("%Y-%m-%dT%H:%M:%SZ"), ) ) @@ -433,12 +461,15 @@ def main(): config.parse_args() db_setup() - start_dt = datetime.datetime.utcfromtimestamp(cfg.CONF.start_ts).replace( - tzinfo=datetime.timezone.utc - ) + start_dt = datetime.datetime.strptime(cfg.CONF.start_dt, "%Y-%m-%dT%H:%M:%SZ") + start_dt = start_dt.replace(tzinfo=datetime.timezone.utc) + end_dt = datetime.datetime.strptime(cfg.CONF.end_dt, "%Y-%m-%dT%H:%M:%SZ") + end_dt = end_dt.replace(tzinfo=datetime.timezone.utc) try: - migrate_objects(start_dt=start_dt, display_prompt=not cfg.CONF.yes) + migrate_objects( + start_dt=start_dt, end_dt=end_dt, display_prompt=not cfg.CONF.yes + ) exit_code = 0 except Exception as e: print("ABORTED: Objects migration aborted on first failure: %s" % (str(e))) diff --git a/st2common/tests/unit/migrations/test_v35_migrate_db_dict_field_values.py b/st2common/tests/unit/migrations/test_v35_migrate_db_dict_field_values.py index c12ecfd703..84347fcbab 100644 --- a/st2common/tests/unit/migrations/test_v35_migrate_db_dict_field_values.py +++ b/st2common/tests/unit/migrations/test_v35_migrate_db_dict_field_values.py @@ -15,6 +15,8 @@ import os import sys +import datetime + from st2common.constants import action as action_constants from st2common.models.db import stormbase from st2common.models.db.execution import ActionExecutionDB @@ -69,6 +71,9 @@ def tearDownClass(cls): TaskExecutionDB._meta["allow_inheritance"] = False TriggerInstanceDB._meta["allow_inheritance"] = False + def test_migrate_executions_related_liveaction_doesnt_exist(self): + pass + def test_migrate_executions(self): ActionExecutionDB._meta["allow_inheritance"] = True LiveActionDB._meta["allow_inheritance"] = True @@ -103,7 +108,10 @@ class LiveActionDB_OldFieldType(LiveActionDB): liveaction_1_db.action = "foo.bar" liveaction_1_db.status = action_constants.LIVEACTION_STATUS_FAILED liveaction_1_db.result = MOCK_RESULT_1 - liveaction_1_db = LiveAction.add_or_update(liveaction_1_db) + liveaction_1_db.start_timestamp = datetime.datetime.utcnow().replace( + tzinfo=datetime.timezone.utc + ) + liveaction_1_db = LiveAction.add_or_update(liveaction_1_db, publish=False) execution_1_db = ActionExecutionDB_OldFieldType() execution_1_db.action = {"a": 1} @@ -111,14 +119,22 @@ class LiveActionDB_OldFieldType(LiveActionDB): execution_1_db.liveaction = {"id": liveaction_1_db.id} execution_1_db.status = action_constants.LIVEACTION_STATUS_FAILED execution_1_db.result = MOCK_RESULT_1 - execution_1_db = ActionExecution.add_or_update(execution_1_db) + execution_1_db.start_timestamp = datetime.datetime.utcnow().replace( + tzinfo=datetime.timezone.utc + ) + + execution_1_db = ActionExecution.add_or_update(execution_1_db, publish=False) # This execution is not in a final state yet so it should not be migrated liveaction_2_db = LiveActionDB_OldFieldType() liveaction_2_db.action = "foo.bar2" liveaction_2_db.status = action_constants.LIVEACTION_STATUS_RUNNING liveaction_2_db.result = MOCK_RESULT_2 - liveaction_2_db = LiveAction.add_or_update(liveaction_2_db) + liveaction_2_db.start_timestamp = datetime.datetime.utcnow().replace( + tzinfo=datetime.timezone.utc + ) + + liveaction_2_db = LiveAction.add_or_update(liveaction_2_db, publish=False) execution_2_db = ActionExecutionDB_OldFieldType() execution_2_db.action = {"a": 2} @@ -126,7 +142,24 @@ class LiveActionDB_OldFieldType(LiveActionDB): execution_2_db.liveaction = {"id": liveaction_2_db.id} execution_2_db.status = action_constants.LIVEACTION_STATUS_RUNNING execution_2_db.result = MOCK_RESULT_2 - execution_2_db = ActionExecution.add_or_update(execution_2_db) + execution_2_db.start_timestamp = datetime.datetime.utcnow().replace( + tzinfo=datetime.timezone.utc + ) + + execution_2_db = ActionExecution.add_or_update(execution_2_db, publish=False) + + # This object is older than the default threshold so it should not be migrated + execution_3_db = ActionExecutionDB_OldFieldType() + execution_3_db.action = {"a": 2} + execution_3_db.runner = {"a": 2} + execution_3_db.liveaction = {"id": liveaction_2_db.id} + execution_3_db.status = action_constants.LIVEACTION_STATUS_SUCCEEDED + execution_3_db.result = MOCK_RESULT_1 + execution_3_db.start_timestamp = datetime.datetime.utcfromtimestamp(0).replace( + tzinfo=datetime.timezone.utc + ) + + execution_3_db = ActionExecution.add_or_update(execution_3_db, publish=False) # Verify data has been inserted in old format execution_dbs = ActionExecution.query( @@ -136,7 +169,7 @@ class LiveActionDB_OldFieldType(LiveActionDB): }, } ) - self.assertEqual(len(execution_dbs), 2) + self.assertEqual(len(execution_dbs), 3) execution_dbs = ActionExecution.query( __raw__={ "result": { @@ -146,7 +179,7 @@ class LiveActionDB_OldFieldType(LiveActionDB): } } ) - self.assertEqual(len(execution_dbs), 2) + self.assertEqual(len(execution_dbs), 3) execution_dbs = ActionExecution.query( __raw__={ "result": { @@ -202,7 +235,11 @@ class LiveActionDB_OldFieldType(LiveActionDB): ).update(set___cls="LiveActionDB") # 2. Run migration - migration_module.migrate_executions() + start_dt = datetime.datetime.utcnow().replace( + tzinfo=datetime.timezone.utc + ) - datetime.timedelta(hours=2) + end_dt = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc) + migration_module.migrate_executions(start_dt=start_dt, end_dt=end_dt) # 3. Verify data has been migrated - only 1 item should have been migrated since it's in a # completed state @@ -213,7 +250,7 @@ class LiveActionDB_OldFieldType(LiveActionDB): }, } ) - self.assertEqual(len(execution_dbs), 1) + self.assertEqual(len(execution_dbs), 2) execution_dbs = ActionExecution.query( __raw__={ "result": { @@ -297,7 +334,7 @@ class TaskExecutionDB_OldFieldType(TaskExecutionDB): workflow_execution_1_db.status = action_constants.LIVEACTION_STATUS_SUCCEEDED workflow_execution_1_db.action_execution = "a" workflow_execution_1_db = WorkflowExecution.add_or_update( - workflow_execution_1_db + workflow_execution_1_db, publish=False ) task_execution_1_db = TaskExecutionDB_OldFieldType() @@ -309,7 +346,9 @@ class TaskExecutionDB_OldFieldType(TaskExecutionDB): task_execution_1_db.task_name = "a" task_execution_1_db.task_id = "a" task_execution_1_db.task_route = 1 - task_execution_1_db = TaskExecution.add_or_update(task_execution_1_db) + task_execution_1_db = TaskExecution.add_or_update( + task_execution_1_db, publish=False + ) workflow_execution_2_db = WorkflowExecutionDB_OldFieldType() workflow_execution_2_db.input = MOCK_RESULT_2 @@ -319,7 +358,7 @@ class TaskExecutionDB_OldFieldType(TaskExecutionDB): workflow_execution_2_db.status = action_constants.LIVEACTION_STATUS_RUNNING workflow_execution_2_db.action_execution = "b" workflow_execution_2_db = WorkflowExecution.add_or_update( - workflow_execution_2_db + workflow_execution_2_db, publish=False ) task_execution_2_db = TaskExecutionDB_OldFieldType() @@ -331,7 +370,40 @@ class TaskExecutionDB_OldFieldType(TaskExecutionDB): task_execution_2_db.task_name = "b" task_execution_2_db.task_id = "b" task_execution_2_db.task_route = 2 - task_execution_2_db = TaskExecution.add_or_update(task_execution_2_db) + task_execution_2_db = TaskExecution.add_or_update( + task_execution_2_db, publish=False + ) + + # This object is older than the default threshold so it should not be migrated + workflow_execution_3_db = WorkflowExecutionDB_OldFieldType() + workflow_execution_3_db.input = MOCK_RESULT_2 + workflow_execution_3_db.context = MOCK_RESULT_2 + workflow_execution_3_db.state = MOCK_RESULT_2 + workflow_execution_3_db.output = MOCK_RESULT_2 + workflow_execution_3_db.status = action_constants.LIVEACTION_STATUS_SUCCEEDED + workflow_execution_3_db.action_execution = "b" + workflow_execution_3_db.start_timestamp = datetime.datetime.utcfromtimestamp( + 0 + ).replace(tzinfo=datetime.timezone.utc) + workflow_execution_3_db = WorkflowExecution.add_or_update( + workflow_execution_3_db, publish=False + ) + + task_execution_3_db = TaskExecutionDB_OldFieldType() + task_execution_3_db.task_spec = MOCK_RESULT_2 + task_execution_3_db.context = MOCK_RESULT_2 + task_execution_3_db.result = MOCK_RESULT_2 + task_execution_3_db.status = action_constants.LIVEACTION_STATUS_SUCCEEDED + task_execution_3_db.workflow_execution = "b" + task_execution_3_db.task_name = "b" + task_execution_3_db.task_id = "b" + task_execution_3_db.task_route = 2 + task_execution_3_db.start_timestamp = datetime.datetime.utcfromtimestamp( + 0 + ).replace(tzinfo=datetime.timezone.utc) + task_execution_3_db = TaskExecution.add_or_update( + task_execution_3_db, publish=False + ) # Update inserted documents and remove special _cls field added by mongoengine. We need to # do that here due to how mongoengine works with subclasses. @@ -352,7 +424,11 @@ class TaskExecutionDB_OldFieldType(TaskExecutionDB): ).update(set___cls="TaskExecutionDB") # 2. Run migration - migration_module.migrate_workflow_objects() + start_dt = datetime.datetime.utcnow().replace( + tzinfo=datetime.timezone.utc + ) - datetime.timedelta(hours=2) + end_dt = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc) + migration_module.migrate_workflow_objects(start_dt=start_dt, end_dt=end_dt) # 3. Verify data has been migrated - only 1 item should have been migrated since it's in a # completed state @@ -371,7 +447,7 @@ class TaskExecutionDB_OldFieldType(TaskExecutionDB): }, } ) - self.assertEqual(len(workflow_execution_dbs), 1) + self.assertEqual(len(workflow_execution_dbs), 2) task_execution_dbs = TaskExecution.query( __raw__={ @@ -388,7 +464,7 @@ class TaskExecutionDB_OldFieldType(TaskExecutionDB): }, } ) - self.assertEqual(len(task_execution_dbs), 1) + self.assertEqual(len(task_execution_dbs), 2) workflow_execution_1_db_retrieved = WorkflowExecution.get_by_id( workflow_execution_1_db.id @@ -435,14 +511,30 @@ class TriggerInstanceDB_OldFieldType(TriggerInstanceDB): trigger_instance_1_db = TriggerInstanceDB_OldFieldType() trigger_instance_1_db.payload = MOCK_PAYLOAD_1 trigger_instance_1_db.status = TRIGGER_INSTANCE_PROCESSED + trigger_instance_1_db.occurrence_time = datetime.datetime.utcnow() - trigger_instance_1_db = TriggerInstance.add_or_update(trigger_instance_1_db) + trigger_instance_1_db = TriggerInstance.add_or_update( + trigger_instance_1_db, publish=False + ) trigger_instance_2_db = TriggerInstanceDB_OldFieldType() trigger_instance_2_db.payload = MOCK_PAYLOAD_2 trigger_instance_2_db.status = TRIGGER_INSTANCE_PENDING + trigger_instance_2_db.occurrence_time = datetime.datetime.utcnow() + + trigger_instance_2_db = TriggerInstance.add_or_update( + trigger_instance_2_db, publish=False + ) + + # This object is older than the default threshold so it should not be migrated + trigger_instance_3_db = TriggerInstanceDB_OldFieldType() + trigger_instance_3_db.payload = MOCK_PAYLOAD_2 + trigger_instance_3_db.status = TRIGGER_INSTANCE_PROCESSED + trigger_instance_3_db.occurrence_time = datetime.datetime.utcfromtimestamp(0) - trigger_instance_2_db = TriggerInstance.add_or_update(trigger_instance_2_db) + trigger_instance_3_db = TriggerInstance.add_or_update( + trigger_instance_3_db, publish=False + ) # Verify data has been inserted in old format trigger_instance_dbs = TriggerInstance.query( @@ -454,7 +546,7 @@ class TriggerInstanceDB_OldFieldType(TriggerInstanceDB): } } ) - self.assertEqual(len(trigger_instance_dbs), 2) + self.assertEqual(len(trigger_instance_dbs), 3) trigger_instance_dbs = TriggerInstance.query( __raw__={ "payload": { @@ -462,7 +554,7 @@ class TriggerInstanceDB_OldFieldType(TriggerInstanceDB): }, } ) - self.assertEqual(len(trigger_instance_dbs), 2) + self.assertEqual(len(trigger_instance_dbs), 3) # Update inserted documents and remove special _cls field added by mongoengine. We need to # do that here due to how mongoengine works with subclasses. @@ -475,7 +567,11 @@ class TriggerInstanceDB_OldFieldType(TriggerInstanceDB): ).update(set___cls="TriggerInstanceDB") # 2. Run migration - migration_module.migrate_triggers() + start_dt = datetime.datetime.utcnow().replace( + tzinfo=datetime.timezone.utc + ) - datetime.timedelta(hours=2) + end_dt = datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc) + migration_module.migrate_triggers(start_dt=start_dt, end_dt=end_dt) # 3. Verify data has been migrated - only 1 item should have been migrated since it's in a # completed state @@ -488,7 +584,9 @@ class TriggerInstanceDB_OldFieldType(TriggerInstanceDB): } } ) - self.assertEqual(len(trigger_instance_dbs), 1) + + # TODO: Also verify raw as_pymongo() bin field value + self.assertEqual(len(trigger_instance_dbs), 2) trigger_instance_dbs = TriggerInstance.query( __raw__={ "payload": { @@ -496,7 +594,7 @@ class TriggerInstanceDB_OldFieldType(TriggerInstanceDB): }, } ) - self.assertEqual(len(trigger_instance_dbs), 1) + self.assertEqual(len(trigger_instance_dbs), 2) trigger_instance_1_db_retrieved = TriggerInstance.get_by_id( trigger_instance_1_db.id From f9c3536f4c89a0ceba39b644b7fff54daddae643 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Wed, 12 May 2021 22:19:46 +0200 Subject: [PATCH 30/34] Remove temporary change which is not needed anymore. --- .../v3.5/st2-migrate-db-dict-field-values | 31 ------------------- 1 file changed, 31 deletions(-) diff --git a/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values index 46b2178514..c40c064f85 100755 --- a/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values +++ b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values @@ -85,7 +85,6 @@ def migrate_executions(start_dt: datetime.datetime, end_dt: datetime.datetime) - # works correctly - with PersistanceClass.query().only() all the fields will still be retrieved. # 1. Migrate ActionExecutionDB objects - # 1.1 Find all executions where field type is not binData result = ( ActionExecutionDB.objects( __raw__={ @@ -105,36 +104,6 @@ def migrate_executions(start_dt: datetime.datetime, end_dt: datetime.datetime) - .as_pymongo() ) execution_ids = set([str(item["_id"]) for item in result]) - - # 1.2 Find all executions which don't contain "result_size" attribute, but field type is - # aready binData - # This is just aa workaround for migration script issue during development which has been - # resolved. - # https://github.com/StackStorm/st2/pull/5255#issuecomment-839920397 - # This code path can be removed once @AJ runs that code on his setup. - result = ( - ActionExecutionDB.objects( - __raw__={ - "result": { - "$type": "binData", - }, - "status": { - "$in": LIVEACTION_COMPLETED_STATES, - }, - }, - result_size__not__exists=True, - start_timestamp__gte=start_dt, - start_timestamp__lte=end_dt, - ) - .only("id") - .as_pymongo() - ) - - for item in result: - execution_ids.add(str(item["_id"])) - - execution_ids = list(execution_ids) - objects_count = result.count() if not execution_ids: From 5f24607755972cf33d83cfacdecc10996411be84 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Wed, 12 May 2021 22:26:13 +0200 Subject: [PATCH 31/34] Update formatting. --- st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values | 1 + 1 file changed, 1 insertion(+) diff --git a/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values index c40c064f85..b44b3ecf34 100755 --- a/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values +++ b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values @@ -320,6 +320,7 @@ def migrate_triggers(start_dt: datetime.datetime, end_dt: datetime.datetime) -> return None print("Will migrate %s TriggerInstanceDB objects" % (objects_count)) + print("") for index, trigger_instance_id in enumerate(trigger_instance_ids, 1): try: From 613bf2d8ec62bb03f31ed13d00324b31f478ba2a Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Wed, 12 May 2021 22:28:17 +0200 Subject: [PATCH 32/34] Support special now notation for --end-dt. --- .../bin/migrations/v3.5/st2-migrate-db-dict-field-values | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values index b44b3ecf34..5c4938f478 100755 --- a/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values +++ b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values @@ -433,8 +433,13 @@ def main(): start_dt = datetime.datetime.strptime(cfg.CONF.start_dt, "%Y-%m-%dT%H:%M:%SZ") start_dt = start_dt.replace(tzinfo=datetime.timezone.utc) - end_dt = datetime.datetime.strptime(cfg.CONF.end_dt, "%Y-%m-%dT%H:%M:%SZ") - end_dt = end_dt.replace(tzinfo=datetime.timezone.utc) + + if cfg.CONF.end_dt == "now": + end_dt = datetime.datetime.utcnow() + end_dt = end_dt.replace(tzinfo=datetime.timezone.utc) + else: + end_dt = datetime.datetime.strptime(cfg.CONF.end_dt, "%Y-%m-%dT%H:%M:%SZ") + end_dt = end_dt.replace(tzinfo=datetime.timezone.utc) try: migrate_objects( From 9d8d981b28d739d77d13d432b37ea80418efea9d Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Wed, 12 May 2021 22:35:32 +0200 Subject: [PATCH 33/34] Use isotime.parse() since it supports more formats. --- .../v3.5/st2-migrate-db-dict-field-values | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values index 5c4938f478..b4b38bfb14 100755 --- a/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values +++ b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values @@ -48,6 +48,7 @@ from oslo_config import cfg from st2common import config from st2common.service_setup import db_setup from st2common.service_setup import db_teardown +from st2common.util import isotime from st2common.models.db.execution import ActionExecutionDB from st2common.models.db.workflow import WorkflowExecutionDB from st2common.models.db.workflow import TaskExecutionDB @@ -349,21 +350,23 @@ def migrate_triggers(start_dt: datetime.datetime, end_dt: datetime.datetime) -> def migrate_objects( start_dt: datetime.datetime, end_dt: datetime.datetime, display_prompt: bool = True ) -> None: + start_dt_str = start_dt.strftime("%Y-%m-%d %H:%M:%S") + end_dt_str = end_dt.strftime("%Y-%m-%d %H:%M:%S") + print("StackStorm v3.5 database field data migration script\n") if display_prompt: input( - "You are strongly recommended to create database backup before running the " - "migration script.\n\nDepending on the number of the objects in the database, " + "Will migrate objects with creation date between %s UTC and %s UTC.\n\n" + "You are strongly recommended to create database backup before proceeding.\n\n" + "Depending on the number of the objects in the database, " "migration may take multiple hours or more. You are recommended to start the " "script in a screen session, tmux or similar. \n\n" - "To proceed with the migration, press enter and to cancel it, press CTRL+C." + "To proceed with the migration, press enter and to cancel it, press CTRL+C.\n" + % (start_dt_str, end_dt_str) ) print("") - start_dt_str = start_dt.strftime("%Y-%m-%d %H:%M:%S") - end_dt_str = end_dt.strftime("%Y-%m-%d %H:%M:%S") - print( "Migrating affected database objects between %s and %s" % (start_dt_str, end_dt_str) @@ -431,15 +434,13 @@ def main(): config.parse_args() db_setup() - start_dt = datetime.datetime.strptime(cfg.CONF.start_dt, "%Y-%m-%dT%H:%M:%SZ") - start_dt = start_dt.replace(tzinfo=datetime.timezone.utc) + start_dt = isotime.parse(cfg.CONF.start_dt) if cfg.CONF.end_dt == "now": end_dt = datetime.datetime.utcnow() end_dt = end_dt.replace(tzinfo=datetime.timezone.utc) else: - end_dt = datetime.datetime.strptime(cfg.CONF.end_dt, "%Y-%m-%dT%H:%M:%SZ") - end_dt = end_dt.replace(tzinfo=datetime.timezone.utc) + end_dt = isotime.parse(cfg.CONF.end_dt) try: migrate_objects( From ff37b4fb277c74db1813ae4036574d863fb31e11 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Thu, 13 May 2021 09:48:41 +0200 Subject: [PATCH 34/34] Remove unused constant. --- .../bin/migrations/v3.5/st2-migrate-db-dict-field-values | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values index b4b38bfb14..0f82738809 100755 --- a/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values +++ b/st2common/bin/migrations/v3.5/st2-migrate-db-dict-field-values @@ -62,12 +62,7 @@ from st2common.exceptions.db import StackStormDBObjectNotFoundError from st2common.constants.action import LIVEACTION_COMPLETED_STATES from st2common.constants.triggers import TRIGGER_INSTANCE_COMPLETED_STATES -# We retrieve and process up to this amount of object IDs in a single MongoDB query. This is done -# to avoid potentially retrieving many 10s of millions of object ids in a single query -# TODO: Implement batching --since-ts, --model=action,trigger,workflow flag -BATCH_SIZE = 1000 - -# NOTE: To avoid unncessary mongoengine object churn when retrieving only object ids (aka to avoid +# NOTE: To avoid unnecessary mongoengine object churn when retrieving only object ids (aka to avoid # instantiating model class with a single field), we use raw pymongo value which is a dict with a # single value