From a5afc050d48cb2489039c1e3d3eae03b1eecd8c0 Mon Sep 17 00:00:00 2001 From: Mike Williams Date: Mon, 13 Apr 2026 14:48:57 -0400 Subject: [PATCH 1/2] feat: compute sample_id dynamically for enrollments --- src/mozanalysis/experiment.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/mozanalysis/experiment.py b/src/mozanalysis/experiment.py index 5acbb0a3..fce0b0e0 100644 --- a/src/mozanalysis/experiment.py +++ b/src/mozanalysis/experiment.py @@ -844,6 +844,7 @@ def _build_enrollments_query_normandy( return f""" SELECT e.{self.analysis_unit.value} AS analysis_id, + udf.safe_sample_id({self.analysis_unit.value}) AS sample_id, `mozfun.map.get_key`(e.event_map_values, 'branch') AS branch, MIN(e.submission_date) AS enrollment_date, @@ -877,6 +878,7 @@ def _build_enrollments_query_fenix_baseline( return """ SELECT b.client_info.client_id AS analysis_id, + udf.safe_sample_id(b.client_info.client_id) AS sample_id, mozfun.map.get_key( b.ping_info.experiments, '{experiment_slug}' @@ -918,6 +920,7 @@ def _build_enrollments_query_glean_events_stream( return f""" SELECT {analysis_id} AS analysis_id, + udf.safe_sample_id({analysis_id}) AS sample_id, JSON_VALUE(event_extra, '$.branch') AS branch, DATE(MIN(submission_timestamp)) AS enrollment_date, COUNT(submission_timestamp) AS num_enrollment_events @@ -947,6 +950,7 @@ def _build_enrollments_query_glean_events_stream_enrollment_status( return f""" SELECT client_id AS analysis_id, + udf.safe_sample_id(analysis_id) AS sample_id, JSON_VALUE(event_extra, '$.branch') AS branch, DATE(MIN(submission_timestamp)) AS enrollment_date, COUNT(submission_timestamp) AS num_enrollment_events @@ -980,6 +984,7 @@ def _build_enrollments_query_cirrus( return f""" SELECT mozfun.map.get_key(e.extra, "nimbus_user_id") AS analysis_id, + 0 AS sample_id, mozfun.map.get_key( e.extra, 'branch' @@ -1004,6 +1009,7 @@ def _build_exposure_query_normandy(self, time_limits: TimeLimits) -> str: return f""" SELECT e.analysis_id, + udf.safe_sample_id(e.analysis_id) AS sample_id, e.branch, min(e.submission_date) AS exposure_date, COUNT(e.submission_date) AS num_exposure_events @@ -1039,6 +1045,7 @@ def _build_exposure_query_glean_event( return f""" SELECT exposures.analysis_id AS analysis_id, + udf.safe_sample_id(exposures.analysis_id) AS sample_id, exposures.branch, DATE(MIN(exposures.submission_date)) AS exposure_date, COUNT(exposures.submission_date) AS num_exposure_events @@ -1077,6 +1084,7 @@ def _build_exposure_query_glean_events_stream( return f""" SELECT exposures.analysis_id AS analysis_id, + udf.safe_sample_id(exposures.analysis_id) AS sample_id, exposures.branch, DATE(MIN(exposures.submission_date)) AS exposure_date, COUNT(exposures.submission_date) AS num_exposure_events From 33be13494b1ec9c74abae52b9de607c720341c3c Mon Sep 17 00:00:00 2001 From: Mike Williams Date: Thu, 16 Apr 2026 12:16:32 -0400 Subject: [PATCH 2/2] updates --- src/mozanalysis/experiment.py | 3 --- tests/test_experiment.py | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/mozanalysis/experiment.py b/src/mozanalysis/experiment.py index fce0b0e0..0a9eede4 100644 --- a/src/mozanalysis/experiment.py +++ b/src/mozanalysis/experiment.py @@ -1009,7 +1009,6 @@ def _build_exposure_query_normandy(self, time_limits: TimeLimits) -> str: return f""" SELECT e.analysis_id, - udf.safe_sample_id(e.analysis_id) AS sample_id, e.branch, min(e.submission_date) AS exposure_date, COUNT(e.submission_date) AS num_exposure_events @@ -1045,7 +1044,6 @@ def _build_exposure_query_glean_event( return f""" SELECT exposures.analysis_id AS analysis_id, - udf.safe_sample_id(exposures.analysis_id) AS sample_id, exposures.branch, DATE(MIN(exposures.submission_date)) AS exposure_date, COUNT(exposures.submission_date) AS num_exposure_events @@ -1084,7 +1082,6 @@ def _build_exposure_query_glean_events_stream( return f""" SELECT exposures.analysis_id AS analysis_id, - udf.safe_sample_id(exposures.analysis_id) AS sample_id, exposures.branch, DATE(MIN(exposures.submission_date)) AS exposure_date, COUNT(exposures.submission_date) AS num_exposure_events diff --git a/tests/test_experiment.py b/tests/test_experiment.py index dddd39c9..f648f2ee 100644 --- a/tests/test_experiment.py +++ b/tests/test_experiment.py @@ -1062,6 +1062,7 @@ def test_enrollments_query_analysis_unit(analysis_unit): WITH raw_enrollments AS ( SELECT e.{analysis_unit.value} AS analysis_id, + udf.safe_sample_id({analysis_unit.value}) AS sample_id, `mozfun.map.get_key`(e.event_map_values, 'branch') AS branch, MIN(e.submission_date) AS enrollment_date,