diff --git a/Makefile b/Makefile index c6ce74e937..699cbb947d 100644 --- a/Makefile +++ b/Makefile @@ -569,6 +569,7 @@ micro-benchmarks: requirements .micro-benchmarks . $(VIRTUALENV_DIR)/bin/activate; pytest --benchmark-histogram=benchmark_histograms/benchmark --benchmark-only --benchmark-name=short --benchmark-columns=min,max,mean,stddev,median,ops,rounds --benchmark-group-by=group,param:fixture_file -s -v st2common/benchmarks/micro/test_fast_deepcopy.py -k "test_fast_deepcopy_with_json_fixture_file" . $(VIRTUALENV_DIR)/bin/activate; pytest --benchmark-histogram=benchmark_histograms/benchmark --benchmark-only --benchmark-name=short --benchmark-columns=min,max,mean,stddev,median,ops,rounds --benchmark-group-by=group,param:fixture_file,param:indent_sort_keys_tuple -s -v st2common/benchmarks/micro/test_json_serialization_and_deserialization.py -k "test_json_dumps" . $(VIRTUALENV_DIR)/bin/activate; pytest --benchmark-histogram=benchmark_histograms/benchmark --benchmark-only --benchmark-name=short --benchmark-columns=min,max,mean,stddev,median,ops,rounds --benchmark-group-by=group,param:fixture_file -s -v st2common/benchmarks/micro/test_json_serialization_and_deserialization.py -k "test_json_loads" + . $(VIRTUALENV_DIR)/bin/activate; pytest --benchmark-histogram=benchmark_histograms/benchmark --benchmark-only --benchmark-name=short --benchmark-columns=min,max,mean,stddev,median,ops,rounds --benchmark-group-by=group,param:fixture_file -s -v st2common/benchmarks/micro/test_json_serialization_and_deserialization.py -k "test_orjson_dumps" . $(VIRTUALENV_DIR)/bin/activate; pytest --benchmark-histogram=benchmark_histograms/benchmark --benchmark-only --benchmark-name=short --benchmark-columns=min,max,mean,stddev,median,ops,rounds --benchmark-group-by=group,param:fixture_file -s -v st2common/benchmarks/micro/test_publisher_compression.py -k "test_pickled_object_compression" . $(VIRTUALENV_DIR)/bin/activate; pytest --benchmark-histogram=benchmark_histograms/benchmark --benchmark-only --benchmark-name=short --benchmark-columns=min,max,mean,stddev,median,ops,rounds --benchmark-group-by=group,param:fixture_file -s -v st2common/benchmarks/micro/test_publisher_compression.py -k "test_pickled_object_compression_publish" diff --git a/st2common/benchmarks/micro/test_json_serialization_and_deserialization.py b/st2common/benchmarks/micro/test_json_serialization_and_deserialization.py index e90626ae21..13b6da0bd6 100644 --- a/st2common/benchmarks/micro/test_json_serialization_and_deserialization.py +++ b/st2common/benchmarks/micro/test_json_serialization_and_deserialization.py @@ -132,3 +132,47 @@ def run_benchmark(): result = benchmark(run_benchmark) assert result == content_loaded + + +def default_handle_sets(obj): + if isinstance(obj, set): + return list(obj) + raise TypeError + + +@pytest.mark.parametrize( + "fixture_file", + [ + "rows.json", + "json_4mb.json", + ], + ids=[ + "rows.json", + "json_4mb.json", + ], +) +@pytest.mark.parametrize( + "options", + [ + {}, + {"default": default_handle_sets}, + ], + ids=[ + "none", + "custom_default_function", + ], +) +def test_orjson_dumps(benchmark, fixture_file, options): + with open(os.path.join(FIXTURES_DIR, fixture_file), "r") as fp: + content = fp.read() + + content_loaded = orjson.loads(content) + + if options: + content_loaded["fooo_set"] = set([1, 2, 3, 3, 4, 5]) + + def run_benchmark(): + return orjson.dumps(content_loaded, **options) + + result = benchmark(run_benchmark) + assert len(result) >= 100 diff --git a/st2common/st2common/fields.py b/st2common/st2common/fields.py index 6322d231d3..0e94f11f85 100644 --- a/st2common/st2common/fields.py +++ b/st2common/st2common/fields.py @@ -458,10 +458,26 @@ def _serialize_field_value(self, value: dict) -> bytes: """ Serialize and encode the provided field value. """ + # Orquesta workflows support toSet() YAQL operator which returns a set which used to get + # serialized to list by mongoengine DictField. + # + # For backward compatibility reasons, we need to support serializing set to a list as + # well. + # + # Based on micro benchmarks, using default function adds very little overhead (1%) so it + # should be safe to use default for every operation. + # + # If this turns out to be not true or it adds more overhead in other scenarios, we should + # revisit this decision and only use "default" argument where needed (aka Workflow models). + def default(obj): + if isinstance(obj, set): + return list(obj) + raise TypeError + if not self.use_header: - return orjson.dumps(value) + return orjson.dumps(value, default=default) - data = orjson.dumps(value) + data = orjson.dumps(value, default=default) if self.compression_algorithm == "zstandard": # NOTE: At this point zstandard is only test dependency diff --git a/st2common/tests/unit/test_db_fields.py b/st2common/tests/unit/test_db_fields.py index e44a51561f..4da8400a78 100644 --- a/st2common/tests/unit/test_db_fields.py +++ b/st2common/tests/unit/test_db_fields.py @@ -73,6 +73,16 @@ class ModelWithJSONDictFieldDB(stormbase.StormFoundationDB): class JSONDictFieldTestCase(unittest2.TestCase): + def test_set_to_mongo(self): + field = JSONDictField(use_header=False) + result = field.to_mongo({"test": {1, 2}}) + self.assertTrue(isinstance(result, bytes)) + + def test_header_set_to_mongo(self): + field = JSONDictField(use_header=True) + result = field.to_mongo({"test": {1, 2}}) + self.assertTrue(isinstance(result, bytes)) + def test_to_mongo(self): field = JSONDictField(use_header=False) result = field.to_mongo(MOCK_DATA_DICT) @@ -96,6 +106,16 @@ def test_roundtrip(self): self.assertEqual(result_to_python, MOCK_DATA_DICT) + # sets get serialized to a list + input_dict = {"a": 1, "set": {1, 2, 3, 4, 4, 4, 5, 5}} + result = {"a": 1, "set": [1, 2, 3, 4, 5]} + + field = JSONDictField(use_header=False) + result_to_mongo = field.to_mongo(input_dict) + result_to_python = field.to_python(result_to_mongo) + + self.assertEqual(result_to_python, result) + def test_parse_field_value(self): # 1. Value not provided, should use default one field = JSONDictField(use_header=False, default={})