diff --git a/sdk/monitor/azure-monitor-opentelemetry-exporter/CHANGELOG.md b/sdk/monitor/azure-monitor-opentelemetry-exporter/CHANGELOG.md index 8494c6ad9a2f..567b24599dff 100644 --- a/sdk/monitor/azure-monitor-opentelemetry-exporter/CHANGELOG.md +++ b/sdk/monitor/azure-monitor-opentelemetry-exporter/CHANGELOG.md @@ -3,7 +3,8 @@ ## 1.0.0b49 (Unreleased) ### Features Added - +- Added a 256kb custom properties truncation limit on specific GenAI attributes + ([#45749](https://github.com/Azure/azure-sdk-for-python/pull/45749)) - Add Browser SDK loader SDK Stats feature bit ([#42904](https://github.com/Azure/azure-sdk-for-python/pull/44162)) ### Breaking Changes diff --git a/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_utils.py b/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_utils.py index 5b8d016eaf76..a01120e5a8fa 100644 --- a/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_utils.py +++ b/sdk/monitor/azure-monitor-opentelemetry-exporter/azure/monitor/opentelemetry/exporter/_utils.py @@ -352,6 +352,7 @@ def _is_any_synthetic_source(properties: Optional[Any]) -> bool: # pylint: disable=W0622 def _filter_custom_properties(properties: Attributes, filter=None) -> Dict[str, str]: max_length = 64 * 1024 + max_length_for_gen_ai_attributes = 256 * 1024 processed_properties: Dict[str, str] = {} if not properties: return processed_properties @@ -365,7 +366,7 @@ def _filter_custom_properties(properties: Attributes, filter=None) -> Dict[str, if not key or len(key) > 150 or val is None: continue if key in _GEN_AI_ATTRIBUTES: - processed_properties[key] = str(val) + processed_properties[key] = str(val)[:max_length_for_gen_ai_attributes] else: processed_properties[key] = str(val)[:max_length] return processed_properties diff --git a/sdk/monitor/azure-monitor-opentelemetry-exporter/tests/test_utils.py b/sdk/monitor/azure-monitor-opentelemetry-exporter/tests/test_utils.py index 3101992e8f25..984c056014dd 100644 --- a/sdk/monitor/azure-monitor-opentelemetry-exporter/tests/test_utils.py +++ b/sdk/monitor/azure-monitor-opentelemetry-exporter/tests/test_utils.py @@ -54,8 +54,8 @@ def test_filter_custom_properties_drops_invalid_entries(self): self.assertEqual(filtered["short"], "ok") self.assertNotIn("k" * 151, filtered) - def test_custom_properties_gen_ai_attributes_not_truncated(self): - # All values in _GEN_AI_ATTRIBUTES should not be truncated even when > 64KiB + def test_custom_properties_gen_ai_attributes_not_truncated_at_64kb(self): + # All values in _GEN_AI_ATTRIBUTES should not be truncated at > 64kb but at > 256kb large_value = "x" * (64 * 1024 + 1000) properties = {key: large_value for key in _GEN_AI_ATTRIBUTES} filtered = _utils._filter_custom_properties(properties) @@ -65,7 +65,7 @@ def test_custom_properties_gen_ai_attributes_not_truncated(self): self.assertEqual(len(filtered[key]), 64 * 1024 + 1000) def test_filter_custom_properties_non_gen_ai_truncated_at_64kb(self): - # Regular properties exceeding 64KiB should be truncated + # Regular properties exceeding 64kb should be truncated max_length = 64 * 1024 large_value = "y" * (max_length + 2000) properties = { @@ -81,9 +81,10 @@ def test_filter_custom_properties_non_gen_ai_truncated_at_64kb(self): self.assertEqual(len(filtered[key]), max_length) def test_filter_custom_properties_mixed_gen_ai_and_regular(self): - # Gen AI attributes keep full value, regular ones are truncated + # Gen AI attributes truncated at 256kb, regular ones are truncated at 64kb max_length = 64 * 1024 - large_value = "z" * (max_length + 3000) + max_length_for_gen_ai_attributes = 256 * 1024 + large_value = "z" * (1024 * 1024 + 3000) properties = { "gen_ai.input.messages": large_value, "gen_ai.output.messages": large_value, @@ -92,14 +93,25 @@ def test_filter_custom_properties_mixed_gen_ai_and_regular(self): "db.statement": large_value, } filtered = _utils._filter_custom_properties(properties) - # Gen AI attributes — not truncated - self.assertEqual(len(filtered["gen_ai.input.messages"]), max_length + 3000) - self.assertEqual(len(filtered["gen_ai.output.messages"]), max_length + 3000) - # Regular attributes — truncated + + self.assertEqual(len(filtered["gen_ai.input.messages"]), max_length_for_gen_ai_attributes) + self.assertEqual(len(filtered["gen_ai.output.messages"]), max_length_for_gen_ai_attributes) + self.assertEqual(len(filtered["gen_ai.agent.version"]), max_length) self.assertEqual(len(filtered["span_kind"]), max_length) self.assertEqual(len(filtered["db.statement"]), max_length) + def test_custom_properties_gen_ai_attributes_truncated_at_256kb(self): + # All values in _GEN_AI_ATTRIBUTES should be truncated when > 256kb + max_length_for_gen_ai_attributes = 256 * 1024 + large_value = "x" * (256 * 1024 + 1000) + properties = {key: large_value for key in _GEN_AI_ATTRIBUTES} + filtered = _utils._filter_custom_properties(properties) + for key in _GEN_AI_ATTRIBUTES: + with self.subTest(key=key): + self.assertIn(key, filtered) + self.assertEqual(len(filtered[key]), max_length_for_gen_ai_attributes) + def test_nanoseconds_to_duration(self): ns_to_duration = _utils.ns_to_duration self.assertEqual(ns_to_duration(0), "0.00:00:00.000")