StackStorm · Kami · May 22, 2021 · May 13, 2021 · May 13, 2021 · May 14, 2021
diff --git a/Makefile b/Makefile
@@ -569,6 +569,7 @@ micro-benchmarks: requirements .micro-benchmarks
 	. $(VIRTUALENV_DIR)/bin/activate; pytest --benchmark-histogram=benchmark_histograms/benchmark --benchmark-only --benchmark-name=short --benchmark-columns=min,max,mean,stddev,median,ops,rounds --benchmark-group-by=group,param:fixture_file -s -v st2common/benchmarks/micro/test_fast_deepcopy.py -k "test_fast_deepcopy_with_json_fixture_file"
 	. $(VIRTUALENV_DIR)/bin/activate; pytest --benchmark-histogram=benchmark_histograms/benchmark --benchmark-only --benchmark-name=short --benchmark-columns=min,max,mean,stddev,median,ops,rounds --benchmark-group-by=group,param:fixture_file,param:indent_sort_keys_tuple -s -v st2common/benchmarks/micro/test_json_serialization_and_deserialization.py -k "test_json_dumps"
 	. $(VIRTUALENV_DIR)/bin/activate; pytest --benchmark-histogram=benchmark_histograms/benchmark --benchmark-only --benchmark-name=short --benchmark-columns=min,max,mean,stddev,median,ops,rounds --benchmark-group-by=group,param:fixture_file -s -v st2common/benchmarks/micro/test_json_serialization_and_deserialization.py -k "test_json_loads"
+	. $(VIRTUALENV_DIR)/bin/activate; pytest --benchmark-histogram=benchmark_histograms/benchmark --benchmark-only --benchmark-name=short --benchmark-columns=min,max,mean,stddev,median,ops,rounds --benchmark-group-by=group,param:fixture_file -s -v st2common/benchmarks/micro/test_json_serialization_and_deserialization.py -k "test_orjson_dumps"
 	. $(VIRTUALENV_DIR)/bin/activate; pytest --benchmark-histogram=benchmark_histograms/benchmark --benchmark-only --benchmark-name=short --benchmark-columns=min,max,mean,stddev,median,ops,rounds --benchmark-group-by=group,param:fixture_file -s -v st2common/benchmarks/micro/test_publisher_compression.py -k "test_pickled_object_compression"
 	. $(VIRTUALENV_DIR)/bin/activate; pytest --benchmark-histogram=benchmark_histograms/benchmark --benchmark-only --benchmark-name=short --benchmark-columns=min,max,mean,stddev,median,ops,rounds --benchmark-group-by=group,param:fixture_file -s -v st2common/benchmarks/micro/test_publisher_compression.py -k "test_pickled_object_compression_publish"
 

diff --git a/st2common/benchmarks/micro/test_json_serialization_and_deserialization.py b/st2common/benchmarks/micro/test_json_serialization_and_deserialization.py
@@ -132,3 +132,47 @@ def run_benchmark():
 
     result = benchmark(run_benchmark)
     assert result == content_loaded
+
+
+def default_handle_sets(obj):
+    if isinstance(obj, set):
+        return list(obj)
+    raise TypeError
+
+
+@pytest.mark.parametrize(
+    "fixture_file",
+    [
+        "rows.json",
+        "json_4mb.json",
+    ],
+    ids=[
+        "rows.json",
+        "json_4mb.json",
+    ],
+)
+@pytest.mark.parametrize(
+    "options",
+    [
+        {},
+        {"default": default_handle_sets},
+    ],
+    ids=[
+        "none",
+        "custom_default_function",
+    ],
+)
+def test_orjson_dumps(benchmark, fixture_file, options):
+    with open(os.path.join(FIXTURES_DIR, fixture_file), "r") as fp:
+        content = fp.read()
+
+    content_loaded = orjson.loads(content)
+
+    if options:
+        content_loaded["fooo_set"] = set([1, 2, 3, 3, 4, 5])
+
+    def run_benchmark():
+        return orjson.dumps(content_loaded, **options)
+
+    result = benchmark(run_benchmark)
+    assert len(result) >= 100
diff --git a/st2common/st2common/fields.py b/st2common/st2common/fields.py
@@ -458,10 +458,26 @@ def _serialize_field_value(self, value: dict) -> bytes:
         """
         Serialize and encode the provided field value.
         """
+        # Orquesta workflows support toSet() YAQL operator which returns a set which used to get
+        # serialized to list by mongoengine DictField.
+        #
+        # For backward compatibility reasons, we need to support serializing set to a list as
+        # well.
+        #
+        # Based on micro benchmarks, using default function adds very little overhead (1%) so it
+        # should be safe to use default for every operation.
+        #
+        # If this turns out to be not true or it adds more overhead in other scenarios, we should
+        # revisit this decision and only use "default" argument where needed (aka Workflow models).
+        def default(obj):
+            if isinstance(obj, set):
+                return list(obj)
+            raise TypeError
+
         if not self.use_header:
-            return orjson.dumps(value)
+            return orjson.dumps(value, default=default)
 
-        data = orjson.dumps(value)
+        data = orjson.dumps(value, default=default)
 
         if self.compression_algorithm == "zstandard":
             # NOTE: At this point zstandard is only test dependency

diff --git a/st2common/tests/unit/test_db_fields.py b/st2common/tests/unit/test_db_fields.py
@@ -73,6 +73,16 @@ class ModelWithJSONDictFieldDB(stormbase.StormFoundationDB):
 
 
 class JSONDictFieldTestCase(unittest2.TestCase):
+    def test_set_to_mongo(self):
+        field = JSONDictField(use_header=False)
+        result = field.to_mongo({"test": {1, 2}})
+        self.assertTrue(isinstance(result, bytes))
+
+    def test_header_set_to_mongo(self):
+        field = JSONDictField(use_header=True)
+        result = field.to_mongo({"test": {1, 2}})
+        self.assertTrue(isinstance(result, bytes))
+
     def test_to_mongo(self):
         field = JSONDictField(use_header=False)
         result = field.to_mongo(MOCK_DATA_DICT)
@@ -96,6 +106,16 @@ def test_roundtrip(self):
 
         self.assertEqual(result_to_python, MOCK_DATA_DICT)
 
+        # sets get serialized to a list
+        input_dict = {"a": 1, "set": {1, 2, 3, 4, 4, 4, 5, 5}}
+        result = {"a": 1, "set": [1, 2, 3, 4, 5]}
+
+        field = JSONDictField(use_header=False)
+        result_to_mongo = field.to_mongo(input_dict)
+        result_to_python = field.to_python(result_to_mongo)
+
+        self.assertEqual(result_to_python, result)
+
     def test_parse_field_value(self):
         # 1. Value not provided, should use default one
         field = JSONDictField(use_header=False, default={})