diff --git a/requirements.in b/requirements.in index 11d93a6f..3c9503e2 100644 --- a/requirements.in +++ b/requirements.in @@ -107,7 +107,7 @@ mizani==0.14.4 # via plotnine mock==5.2.0 # via mozanalysis -mozilla-metric-config-parser==2026.1.1 +mozilla-metric-config-parser==2026.4.1 # via mozanalysis mozilla-nimbus-schemas==3001.0.0 # via mozilla-metric-config-parser diff --git a/requirements.txt b/requirements.txt index e9f2baf5..18ab6aa2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1159,8 +1159,8 @@ mock==5.2.0 \ --hash=sha256:4e460e818629b4b173f32d08bf30d3af8123afbb8e04bb5707a1fd4799e503f0 \ --hash=sha256:7ba87f72ca0e915175596069dbbcc7c75af7b5e9b9bc107ad6349ede0819982f # via -r requirements.in -mozilla-metric-config-parser==2026.1.1 \ - --hash=sha256:0e74d8a453a839cd5eb82799655ddc1d2c79c461ce099375749c23b6ffdb4903 +mozilla-metric-config-parser==2026.4.1 \ + --hash=sha256:b349016cae82b59427b69f800f162a77ce80dc82693ef2a8718675e1b6198e90 # via -r requirements.in mozilla-nimbus-schemas==3001.0.0 \ --hash=sha256:25b89f8ce1e547b383e067023027b84bdbd01cc32b0375dcc5446df737ded533 \ diff --git a/src/mozanalysis/experiment.py b/src/mozanalysis/experiment.py index 5acbb0a3..afbabb29 100644 --- a/src/mozanalysis/experiment.py +++ b/src/mozanalysis/experiment.py @@ -4,11 +4,11 @@ from __future__ import annotations import logging -from enum import StrEnum from typing import TYPE_CHECKING, assert_never, cast import attr from metric_config_parser import AnalysisUnit +from metric_config_parser.experiment import EnrollmentsQueryType from mozanalysis import APPS from mozanalysis.bq import BigQueryContext, sanitize_table_name_for_bq @@ -26,13 +26,6 @@ logger = logging.getLogger(__name__) -class EnrollmentsQueryType(StrEnum): - CIRRUS = "cirrus" - FENIX_FALLBACK = "fenix-fallback" - NORMANDY = "normandy" - GLEAN_EVENT = "glean-event" - - def partition_segments_by_data_source( segment_list: list[Segment], ) -> dict[SegmentDataSource, list[Segment]]: @@ -782,6 +775,10 @@ def _build_enrollments_query( "Cirrus enrollments currently only support client_id analysis units" ) return self._build_enrollments_query_cirrus(time_limits, self.app_id) + elif enrollments_query_type == EnrollmentsQueryType.BACKGROUND_UPDATE: + return self._build_enrollments_query_background_update( + time_limits, sample_size + ) else: assert_never(enrollments_query_type) @@ -792,7 +789,12 @@ def _build_exposure_query( use_glean_ids: bool = False, ) -> str: """Return SQL to query a list of exposures and their branches""" - if exposure_query_type == EnrollmentsQueryType.NORMANDY: + # try to get exposure events from typical normandy sources + # even for background-update + if ( + exposure_query_type == EnrollmentsQueryType.NORMANDY + or exposure_query_type == EnrollmentsQueryType.BACKGROUND_UPDATE + ): if use_glean_ids: return self._build_exposure_query_glean_events_stream( time_limits, @@ -999,6 +1001,109 @@ def _build_enrollments_query_cirrus( GROUP BY ALL """ # noqa:E501 + def _build_enrollments_query_background_update( + self, time_limits: TimeLimits, sample_size: int = 100 + ) -> str: + """Return SQL to query enrollments for background-update experiments. + + These experiments do not send enrollment events in the normal telemetry, + rather they have their own datasets. + """ + return f""" + SELECT * FROM ( + ( + SELECT + JSON_VALUE( + metrics, '$.uuid.background_update_client_id' + ) AS analysis_id, + JSON_VALUE(event_extra, '$.branch') AS branch, + MIN(DATE(events.submission_timestamp)) AS enrollment_date, + COUNT(events.submission_timestamp) AS num_enrollment_events + FROM + `moz-fx-data-shared-prod.firefox_desktop_background_update.events_stream` + events + WHERE + DATE(submission_timestamp) BETWEEN + '{time_limits.first_enrollment_date}' + AND '{time_limits.last_enrollment_date}' + AND event_category = 'nimbus_events' + AND event_name = 'enrollment' + -- The background update experiment slug is exact. + AND JSON_VALUE(event_extra, '$.experiment') = '{self.experiment_slug}' + -- This should never happen, but belt-and-braces. + AND JSON_VALUE( + metrics, '$.uuid.background_update_client_id' + ) IS NOT NULL + AND sample_id < {sample_size} + GROUP BY analysis_id, branch + ) + + UNION ALL + + ( + SELECT + m.metrics.uuid.background_update_client_id AS analysis_id, + experiment.value.branch AS branch, + MIN(DATE(submission_timestamp)) AS enrollment_date, + -- These are not discrete events, it makes no sense to count them. + 1 AS num_enrollment_events + -- We need to query from the Glean `background_update` table because + -- pre-[Bug 1794053](https://bugzilla.mozilla.org/show_bug.cgi?id=1794053) + -- (scheduled for Firefox 109) we do not have the legacy client ID in + -- `mozdata.firefox_desktop_background_update.events`. + FROM `mozdata.firefox_desktop_background_update.background_update` AS m + CROSS JOIN + UNNEST(ping_info.experiments) AS experiment + WHERE + -- Background update telemetry can be delayed, so we accept enrollment + -- _submission_ dates during the elongated enrollment period. It is + -- safer to compare submission dates generated server-side than internal + -- ping dates generated client-side. + DATE(submission_timestamp) BETWEEN + '{time_limits.first_enrollment_date}' + AND '{time_limits.last_enrollment_date}' + -- The background update experiment slug is exact. + AND experiment.key = '{self.experiment_slug}' + AND sample_id < {sample_size} + GROUP BY analysis_id, branch + ) + + UNION ALL + + ( + SELECT + client_id AS analysis_id, + SPLIT( + mozfun.map.get_key(event_map_values, 'name'), ':' + )[SAFE_OFFSET(1)] AS branch, + MIN(submission_date) AS enrollment_date, + COUNT(submission_date) AS num_enrollment_events + FROM + `mozdata.telemetry.events` + WHERE + submission_date BETWEEN + '{time_limits.first_enrollment_date}' + AND '{time_limits.last_enrollment_date}' + AND event_category = 'browser.launched_to_handle' + AND event_method = 'system_notification' + AND event_object = 'toast' + -- Post [Bug 1804988](https://bugzilla.mozilla.org/show_bug.cgi?id=1804988), + -- this name looks like 'slug:branch'. + AND STARTS_WITH( + mozfun.map.get_key(event_map_values, 'name'), + '{self.experiment_slug}:' + ) + AND sample_id < {sample_size} + GROUP BY + analysis_id, branch + ) + + ) + QUALIFY ROW_NUMBER() OVER ( + PARTITION BY analysis_id ORDER BY enrollment_date ASC + ) = 1 + """ + def _build_exposure_query_normandy(self, time_limits: TimeLimits) -> str: """Return SQL to query exposures for a normandy experiment""" return f""" diff --git a/tests/test_experiment.py b/tests/test_experiment.py index dddd39c9..2b7f85b0 100644 --- a/tests/test_experiment.py +++ b/tests/test_experiment.py @@ -11,11 +11,11 @@ klar_ios_metrics, ) from metric_config_parser import AnalysisUnit +from metric_config_parser.experiment import EnrollmentsQueryType from mozanalysis.config import ApplicationNotFound, ConfigLoader from mozanalysis.experiment import ( AnalysisWindow, - EnrollmentsQueryType, Experiment, IncompatibleAnalysisUnit, TimeLimits, @@ -289,10 +289,18 @@ def test_analysis_window_validates_end(): @pytest.mark.parametrize( - "analysis_unit", [AnalysisUnit.CLIENT, AnalysisUnit.PROFILE_GROUP] + ("analysis_unit", "enrollments_query_type"), + [ + (AnalysisUnit.CLIENT, EnrollmentsQueryType.BACKGROUND_UPDATE), + (AnalysisUnit.PROFILE_GROUP, EnrollmentsQueryType.NORMANDY), + ], ) -def test_query_not_detectably_malformed(analysis_unit: AnalysisUnit): - exp = Experiment("slug", "2019-01-01", 8, analysis_unit=analysis_unit) +def test_query_not_detectably_malformed( + analysis_unit: AnalysisUnit, enrollments_query_type: EnrollmentsQueryType +): + exp = Experiment( + "experiment-test-slug", "2019-01-01", 8, analysis_unit=analysis_unit + ) tl = TimeLimits.for_ts( first_enrollment_date="2019-01-01", @@ -303,14 +311,21 @@ def test_query_not_detectably_malformed(analysis_unit: AnalysisUnit): enrollments_sql = exp.build_enrollments_query( time_limits=tl, - enrollments_query_type=EnrollmentsQueryType.NORMANDY, + enrollments_query_type=enrollments_query_type, sample_size=None, ) sql_lint(enrollments_sql) assert "sample_id < None" not in enrollments_sql - assert enrollments_sql.count(analysis_unit.value) == 2 + if enrollments_query_type == EnrollmentsQueryType.BACKGROUND_UPDATE: + assert enrollments_sql.count(analysis_unit.value) == 5 + assert enrollments_sql.count("experiment-test-slug") == 4 + assert enrollments_sql.count("sample_id <") == 3 + else: + assert enrollments_sql.count(analysis_unit.value) == 2 + assert enrollments_sql.count("experiment-test-slug") == 2 + assert enrollments_sql.count("sample_id <") == 1 metrics_sql = exp.build_metrics_query( metric_list=[],