From 5114a373f030152400c116d2359d53a7fbb4c879 Mon Sep 17 00:00:00 2001 From: Daniel Thorn Date: Mon, 5 May 2025 11:58:30 -0700 Subject: [PATCH] Add new experiments_column_type to support events_stream tables --- src/mozanalysis/metrics.py | 38 +++++++++++++++++++++++++++++++++++++- tests/test_metrics.py | 4 +++- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/src/mozanalysis/metrics.py b/src/mozanalysis/metrics.py index 82889bc4..aae09e33 100644 --- a/src/mozanalysis/metrics.py +++ b/src/mozanalysis/metrics.py @@ -116,7 +116,7 @@ class DataSource: converter=group_id_column_converter, ) - EXPERIMENT_COLUMN_TYPES = (None, "simple", "native", "glean") + EXPERIMENT_COLUMN_TYPES = (None, "simple", "native", "glean", "events_stream") @experiments_column_type.validator def _check_experiments_column_type(self, attribute, value): @@ -177,6 +177,16 @@ def experiments_column_expr(self) -> str: ).branch IS NOT NULL )""" + elif self.experiments_column_type == "events_stream": + return """AND ( + ds.{submission_date} != e.enrollment_date + OR IF( + JSON_VALUE(ds.event_extra, '$.experiment') = '{experiment_slug}', + JSON_VALUE(ds.event_extra, '$.branch'), + NULL + ) IS NOT NULL + )""" # noqa:E501 + else: raise ValueError @@ -413,6 +423,32 @@ def get_sanity_metrics(self, experiment_slug: str) -> list[Metric]: ), ] + elif self.experiments_column_type == "events_stream": + return [ + Metric( + name=self.name + "_has_contradictory_branch", + data_source=self, + select_expr=agg_any( + """IF( + JSON_VALUE(ds.event_extra, '$.experiment') = '{experiment_slug}', + JSON_VALUE(ds.event_extra, '$.branch'), + NULL + ) != e.branch """ + ), + ), + Metric( + name=self.name + "_has_non_enrolled_data", + data_source=self, + select_expr=agg_any( + f"""IF( + JSON_VALUE(ds.event_extra, '$.experiment') = '{experiment_slug}', + JSON_VALUE(ds.event_extra, '$.branch'), + NULL + ) IS NULL""" + ), + ), + ] + else: raise ValueError diff --git a/tests/test_metrics.py b/tests/test_metrics.py index 8a56cd10..1f50faf4 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -7,7 +7,9 @@ from mozanalysis.metrics import AnalysisBasis, DataSource, Metric -@pytest.mark.parametrize("experiments_column_type", [None, "simple", "native", "glean"]) +@pytest.mark.parametrize( + "experiments_column_type", [None, "simple", "native", "glean", "events_stream"] +) def test_datasource_constructor_succeeds(experiments_column_type): DataSource( name="foo",