From 7ff2b1f04d3fe1d4690a267c130f46939e5789cc Mon Sep 17 00:00:00 2001 From: guzzijones12 Date: Thu, 13 May 2021 20:08:27 +0000 Subject: [PATCH 1/7] orjson handle set --- st2common/st2common/fields.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/st2common/st2common/fields.py b/st2common/st2common/fields.py index 6322d231d3..8dbbe46cda 100644 --- a/st2common/st2common/fields.py +++ b/st2common/st2common/fields.py @@ -458,10 +458,14 @@ def _serialize_field_value(self, value: dict) -> bytes: """ Serialize and encode the provided field value. """ + def default(obj): + if isinstance(obj, set): + return list(obj) + raise TypeError if not self.use_header: - return orjson.dumps(value) + return orjson.dumps(value, default=default) - data = orjson.dumps(value) + data = orjson.dumps(value, default=default) if self.compression_algorithm == "zstandard": # NOTE: At this point zstandard is only test dependency From b182108ae6d4ea985723e52176d95b781e37a78b Mon Sep 17 00:00:00 2001 From: guzzijones12 Date: Thu, 13 May 2021 20:32:25 +0000 Subject: [PATCH 2/7] unit test for set fix to to_mongo --- st2common/tests/unit/test_db_fields.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/st2common/tests/unit/test_db_fields.py b/st2common/tests/unit/test_db_fields.py index e44a51561f..6aaa44393f 100644 --- a/st2common/tests/unit/test_db_fields.py +++ b/st2common/tests/unit/test_db_fields.py @@ -73,6 +73,11 @@ class ModelWithJSONDictFieldDB(stormbase.StormFoundationDB): class JSONDictFieldTestCase(unittest2.TestCase): + def test_set_to_mongo(self): + field = JSONDictField(use_header=False) + result = field.to_mongo({"test":{1,2}}) + self.assertTrue(isinstance(result, bytes)) + def test_to_mongo(self): field = JSONDictField(use_header=False) result = field.to_mongo(MOCK_DATA_DICT) From eb810ef85451f0e3825fe841003f6b902667fe43 Mon Sep 17 00:00:00 2001 From: guzzijones12 Date: Fri, 14 May 2021 13:07:19 +0000 Subject: [PATCH 3/7] black fixes --- st2common/st2common/fields.py | 2 ++ st2common/tests/unit/test_db_fields.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/st2common/st2common/fields.py b/st2common/st2common/fields.py index 8dbbe46cda..b0fd075b96 100644 --- a/st2common/st2common/fields.py +++ b/st2common/st2common/fields.py @@ -458,10 +458,12 @@ def _serialize_field_value(self, value: dict) -> bytes: """ Serialize and encode the provided field value. """ + def default(obj): if isinstance(obj, set): return list(obj) raise TypeError + if not self.use_header: return orjson.dumps(value, default=default) diff --git a/st2common/tests/unit/test_db_fields.py b/st2common/tests/unit/test_db_fields.py index 6aaa44393f..9b0d59f5c9 100644 --- a/st2common/tests/unit/test_db_fields.py +++ b/st2common/tests/unit/test_db_fields.py @@ -75,9 +75,9 @@ class ModelWithJSONDictFieldDB(stormbase.StormFoundationDB): class JSONDictFieldTestCase(unittest2.TestCase): def test_set_to_mongo(self): field = JSONDictField(use_header=False) - result = field.to_mongo({"test":{1,2}}) + result = field.to_mongo({"test": {1, 2}}) self.assertTrue(isinstance(result, bytes)) - + def test_to_mongo(self): field = JSONDictField(use_header=False) result = field.to_mongo(MOCK_DATA_DICT) From 62f7e35dc2907c70aa3e1d554a3ce5d668b089e2 Mon Sep 17 00:00:00 2001 From: guzzijones12 Date: Fri, 14 May 2021 13:14:27 +0000 Subject: [PATCH 4/7] add no header test for serialize set --- st2common/tests/unit/test_db_fields.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/st2common/tests/unit/test_db_fields.py b/st2common/tests/unit/test_db_fields.py index 9b0d59f5c9..cc03b5448f 100644 --- a/st2common/tests/unit/test_db_fields.py +++ b/st2common/tests/unit/test_db_fields.py @@ -78,6 +78,11 @@ def test_set_to_mongo(self): result = field.to_mongo({"test": {1, 2}}) self.assertTrue(isinstance(result, bytes)) + def test_header_set_to_mongo(self): + field = JSONDictField(use_header=True) + result = field.to_mongo({"test": {1, 2}}) + self.assertTrue(isinstance(result, bytes)) + def test_to_mongo(self): field = JSONDictField(use_header=False) result = field.to_mongo(MOCK_DATA_DICT) From fb2dce41e17a9ef503ccac2edcf283acbffa78ec Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 22 May 2021 21:04:23 +0200 Subject: [PATCH 5/7] Add micro-benchmark which measures the overhead of using default function for orjson.dumps. --- Makefile | 1 + ..._json_serialization_and_deserialization.py | 44 +++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/Makefile b/Makefile index c6ce74e937..699cbb947d 100644 --- a/Makefile +++ b/Makefile @@ -569,6 +569,7 @@ micro-benchmarks: requirements .micro-benchmarks . $(VIRTUALENV_DIR)/bin/activate; pytest --benchmark-histogram=benchmark_histograms/benchmark --benchmark-only --benchmark-name=short --benchmark-columns=min,max,mean,stddev,median,ops,rounds --benchmark-group-by=group,param:fixture_file -s -v st2common/benchmarks/micro/test_fast_deepcopy.py -k "test_fast_deepcopy_with_json_fixture_file" . $(VIRTUALENV_DIR)/bin/activate; pytest --benchmark-histogram=benchmark_histograms/benchmark --benchmark-only --benchmark-name=short --benchmark-columns=min,max,mean,stddev,median,ops,rounds --benchmark-group-by=group,param:fixture_file,param:indent_sort_keys_tuple -s -v st2common/benchmarks/micro/test_json_serialization_and_deserialization.py -k "test_json_dumps" . $(VIRTUALENV_DIR)/bin/activate; pytest --benchmark-histogram=benchmark_histograms/benchmark --benchmark-only --benchmark-name=short --benchmark-columns=min,max,mean,stddev,median,ops,rounds --benchmark-group-by=group,param:fixture_file -s -v st2common/benchmarks/micro/test_json_serialization_and_deserialization.py -k "test_json_loads" + . $(VIRTUALENV_DIR)/bin/activate; pytest --benchmark-histogram=benchmark_histograms/benchmark --benchmark-only --benchmark-name=short --benchmark-columns=min,max,mean,stddev,median,ops,rounds --benchmark-group-by=group,param:fixture_file -s -v st2common/benchmarks/micro/test_json_serialization_and_deserialization.py -k "test_orjson_dumps" . $(VIRTUALENV_DIR)/bin/activate; pytest --benchmark-histogram=benchmark_histograms/benchmark --benchmark-only --benchmark-name=short --benchmark-columns=min,max,mean,stddev,median,ops,rounds --benchmark-group-by=group,param:fixture_file -s -v st2common/benchmarks/micro/test_publisher_compression.py -k "test_pickled_object_compression" . $(VIRTUALENV_DIR)/bin/activate; pytest --benchmark-histogram=benchmark_histograms/benchmark --benchmark-only --benchmark-name=short --benchmark-columns=min,max,mean,stddev,median,ops,rounds --benchmark-group-by=group,param:fixture_file -s -v st2common/benchmarks/micro/test_publisher_compression.py -k "test_pickled_object_compression_publish" diff --git a/st2common/benchmarks/micro/test_json_serialization_and_deserialization.py b/st2common/benchmarks/micro/test_json_serialization_and_deserialization.py index e90626ae21..13b6da0bd6 100644 --- a/st2common/benchmarks/micro/test_json_serialization_and_deserialization.py +++ b/st2common/benchmarks/micro/test_json_serialization_and_deserialization.py @@ -132,3 +132,47 @@ def run_benchmark(): result = benchmark(run_benchmark) assert result == content_loaded + + +def default_handle_sets(obj): + if isinstance(obj, set): + return list(obj) + raise TypeError + + +@pytest.mark.parametrize( + "fixture_file", + [ + "rows.json", + "json_4mb.json", + ], + ids=[ + "rows.json", + "json_4mb.json", + ], +) +@pytest.mark.parametrize( + "options", + [ + {}, + {"default": default_handle_sets}, + ], + ids=[ + "none", + "custom_default_function", + ], +) +def test_orjson_dumps(benchmark, fixture_file, options): + with open(os.path.join(FIXTURES_DIR, fixture_file), "r") as fp: + content = fp.read() + + content_loaded = orjson.loads(content) + + if options: + content_loaded["fooo_set"] = set([1, 2, 3, 3, 4, 5]) + + def run_benchmark(): + return orjson.dumps(content_loaded, **options) + + result = benchmark(run_benchmark) + assert len(result) >= 100 From eec022e5a8c569d9adfccea9bfa69d46f4952d35 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 22 May 2021 21:14:45 +0200 Subject: [PATCH 6/7] Add a comment on why we need custom default function for orjson.dumps. --- st2common/st2common/fields.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/st2common/st2common/fields.py b/st2common/st2common/fields.py index b0fd075b96..0e94f11f85 100644 --- a/st2common/st2common/fields.py +++ b/st2common/st2common/fields.py @@ -458,7 +458,17 @@ def _serialize_field_value(self, value: dict) -> bytes: """ Serialize and encode the provided field value. """ - + # Orquesta workflows support toSet() YAQL operator which returns a set which used to get + # serialized to list by mongoengine DictField. + # + # For backward compatibility reasons, we need to support serializing set to a list as + # well. + # + # Based on micro benchmarks, using default function adds very little overhead (1%) so it + # should be safe to use default for every operation. + # + # If this turns out to be not true or it adds more overhead in other scenarios, we should + # revisit this decision and only use "default" argument where needed (aka Workflow models). def default(obj): if isinstance(obj, set): return list(obj) From bdbce41ecdaa1a12a581f8f279da9f88711d3286 Mon Sep 17 00:00:00 2001 From: Tomaz Muraus Date: Sat, 22 May 2021 21:18:58 +0200 Subject: [PATCH 7/7] Also add round-trip test. --- st2common/tests/unit/test_db_fields.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/st2common/tests/unit/test_db_fields.py b/st2common/tests/unit/test_db_fields.py index cc03b5448f..4da8400a78 100644 --- a/st2common/tests/unit/test_db_fields.py +++ b/st2common/tests/unit/test_db_fields.py @@ -106,6 +106,16 @@ def test_roundtrip(self): self.assertEqual(result_to_python, MOCK_DATA_DICT) + # sets get serialized to a list + input_dict = {"a": 1, "set": {1, 2, 3, 4, 4, 4, 5, 5}} + result = {"a": 1, "set": [1, 2, 3, 4, 5]} + + field = JSONDictField(use_header=False) + result_to_mongo = field.to_mongo(input_dict) + result_to_python = field.to_python(result_to_mongo) + + self.assertEqual(result_to_python, result) + def test_parse_field_value(self): # 1. Value not provided, should use default one field = JSONDictField(use_header=False, default={})