Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ mizani==0.14.4
# via plotnine
mock==5.2.0
# via mozanalysis
mozilla-metric-config-parser==2026.1.1
mozilla-metric-config-parser==2026.4.1
# via mozanalysis
mozilla-nimbus-schemas==3001.0.0
# via mozilla-metric-config-parser
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1159,8 +1159,8 @@ mock==5.2.0 \
--hash=sha256:4e460e818629b4b173f32d08bf30d3af8123afbb8e04bb5707a1fd4799e503f0 \
--hash=sha256:7ba87f72ca0e915175596069dbbcc7c75af7b5e9b9bc107ad6349ede0819982f
# via -r requirements.in
mozilla-metric-config-parser==2026.1.1 \
--hash=sha256:0e74d8a453a839cd5eb82799655ddc1d2c79c461ce099375749c23b6ffdb4903
mozilla-metric-config-parser==2026.4.1 \
--hash=sha256:b349016cae82b59427b69f800f162a77ce80dc82693ef2a8718675e1b6198e90
# via -r requirements.in
mozilla-nimbus-schemas==3001.0.0 \
--hash=sha256:25b89f8ce1e547b383e067023027b84bdbd01cc32b0375dcc5446df737ded533 \
Expand Down
123 changes: 114 additions & 9 deletions src/mozanalysis/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
from __future__ import annotations

import logging
from enum import StrEnum
from typing import TYPE_CHECKING, assert_never, cast

import attr
from metric_config_parser import AnalysisUnit
from metric_config_parser.experiment import EnrollmentsQueryType

from mozanalysis import APPS
from mozanalysis.bq import BigQueryContext, sanitize_table_name_for_bq
Expand All @@ -26,13 +26,6 @@
logger = logging.getLogger(__name__)


class EnrollmentsQueryType(StrEnum):
CIRRUS = "cirrus"
FENIX_FALLBACK = "fenix-fallback"
NORMANDY = "normandy"
GLEAN_EVENT = "glean-event"


def partition_segments_by_data_source(
segment_list: list[Segment],
) -> dict[SegmentDataSource, list[Segment]]:
Expand Down Expand Up @@ -782,6 +775,10 @@ def _build_enrollments_query(
"Cirrus enrollments currently only support client_id analysis units"
)
return self._build_enrollments_query_cirrus(time_limits, self.app_id)
elif enrollments_query_type == EnrollmentsQueryType.BACKGROUND_UPDATE:
return self._build_enrollments_query_background_update(
time_limits, sample_size
)
else:
assert_never(enrollments_query_type)

Expand All @@ -792,7 +789,12 @@ def _build_exposure_query(
use_glean_ids: bool = False,
) -> str:
"""Return SQL to query a list of exposures and their branches"""
if exposure_query_type == EnrollmentsQueryType.NORMANDY:
# try to get exposure events from typical normandy sources
# even for background-update
if (
exposure_query_type == EnrollmentsQueryType.NORMANDY
or exposure_query_type == EnrollmentsQueryType.BACKGROUND_UPDATE
):
if use_glean_ids:
return self._build_exposure_query_glean_events_stream(
time_limits,
Expand Down Expand Up @@ -999,6 +1001,109 @@ def _build_enrollments_query_cirrus(
GROUP BY ALL
""" # noqa:E501

def _build_enrollments_query_background_update(
self, time_limits: TimeLimits, sample_size: int = 100
) -> str:
"""Return SQL to query enrollments for background-update experiments.

These experiments do not send enrollment events in the normal telemetry,
rather they have their own datasets.
"""
return f"""
SELECT * FROM (
(
SELECT
JSON_VALUE(
metrics, '$.uuid.background_update_client_id'
) AS analysis_id,
JSON_VALUE(event_extra, '$.branch') AS branch,
MIN(DATE(events.submission_timestamp)) AS enrollment_date,
COUNT(events.submission_timestamp) AS num_enrollment_events
FROM
`moz-fx-data-shared-prod.firefox_desktop_background_update.events_stream`
events
WHERE
DATE(submission_timestamp) BETWEEN
'{time_limits.first_enrollment_date}'
AND '{time_limits.last_enrollment_date}'
AND event_category = 'nimbus_events'
AND event_name = 'enrollment'
-- The background update experiment slug is exact.
AND JSON_VALUE(event_extra, '$.experiment') = '{self.experiment_slug}'
-- This should never happen, but belt-and-braces.
AND JSON_VALUE(
metrics, '$.uuid.background_update_client_id'
) IS NOT NULL
AND sample_id < {sample_size}
GROUP BY analysis_id, branch
)

UNION ALL

(
SELECT
m.metrics.uuid.background_update_client_id AS analysis_id,
experiment.value.branch AS branch,
MIN(DATE(submission_timestamp)) AS enrollment_date,
-- These are not discrete events, it makes no sense to count them.
1 AS num_enrollment_events
-- We need to query from the Glean `background_update` table because
-- pre-[Bug 1794053](https://bugzilla.mozilla.org/show_bug.cgi?id=1794053)
-- (scheduled for Firefox 109) we do not have the legacy client ID in
-- `mozdata.firefox_desktop_background_update.events`.
FROM `mozdata.firefox_desktop_background_update.background_update` AS m
CROSS JOIN
UNNEST(ping_info.experiments) AS experiment
WHERE
-- Background update telemetry can be delayed, so we accept enrollment
-- _submission_ dates during the elongated enrollment period. It is
-- safer to compare submission dates generated server-side than internal
-- ping dates generated client-side.
DATE(submission_timestamp) BETWEEN
'{time_limits.first_enrollment_date}'
AND '{time_limits.last_enrollment_date}'
-- The background update experiment slug is exact.
AND experiment.key = '{self.experiment_slug}'
AND sample_id < {sample_size}
GROUP BY analysis_id, branch
)

UNION ALL

(
SELECT
client_id AS analysis_id,
SPLIT(
mozfun.map.get_key(event_map_values, 'name'), ':'
)[SAFE_OFFSET(1)] AS branch,
MIN(submission_date) AS enrollment_date,
COUNT(submission_date) AS num_enrollment_events
FROM
`mozdata.telemetry.events`
WHERE
submission_date BETWEEN
'{time_limits.first_enrollment_date}'
AND '{time_limits.last_enrollment_date}'
AND event_category = 'browser.launched_to_handle'
AND event_method = 'system_notification'
AND event_object = 'toast'
-- Post [Bug 1804988](https://bugzilla.mozilla.org/show_bug.cgi?id=1804988),
-- this name looks like 'slug:branch'.
AND STARTS_WITH(
mozfun.map.get_key(event_map_values, 'name'),
'{self.experiment_slug}:'
)
AND sample_id < {sample_size}
GROUP BY
analysis_id, branch
)

)
QUALIFY ROW_NUMBER() OVER (
PARTITION BY analysis_id ORDER BY enrollment_date ASC
) = 1
"""

def _build_exposure_query_normandy(self, time_limits: TimeLimits) -> str:
"""Return SQL to query exposures for a normandy experiment"""
return f"""
Expand Down
27 changes: 21 additions & 6 deletions tests/test_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
klar_ios_metrics,
)
from metric_config_parser import AnalysisUnit
from metric_config_parser.experiment import EnrollmentsQueryType

from mozanalysis.config import ApplicationNotFound, ConfigLoader
from mozanalysis.experiment import (
AnalysisWindow,
EnrollmentsQueryType,
Experiment,
IncompatibleAnalysisUnit,
TimeLimits,
Expand Down Expand Up @@ -289,10 +289,18 @@ def test_analysis_window_validates_end():


@pytest.mark.parametrize(
"analysis_unit", [AnalysisUnit.CLIENT, AnalysisUnit.PROFILE_GROUP]
("analysis_unit", "enrollments_query_type"),
[
(AnalysisUnit.CLIENT, EnrollmentsQueryType.BACKGROUND_UPDATE),
(AnalysisUnit.PROFILE_GROUP, EnrollmentsQueryType.NORMANDY),
],
)
def test_query_not_detectably_malformed(analysis_unit: AnalysisUnit):
exp = Experiment("slug", "2019-01-01", 8, analysis_unit=analysis_unit)
def test_query_not_detectably_malformed(
analysis_unit: AnalysisUnit, enrollments_query_type: EnrollmentsQueryType
):
exp = Experiment(
"experiment-test-slug", "2019-01-01", 8, analysis_unit=analysis_unit
)

tl = TimeLimits.for_ts(
first_enrollment_date="2019-01-01",
Expand All @@ -303,14 +311,21 @@ def test_query_not_detectably_malformed(analysis_unit: AnalysisUnit):

enrollments_sql = exp.build_enrollments_query(
time_limits=tl,
enrollments_query_type=EnrollmentsQueryType.NORMANDY,
enrollments_query_type=enrollments_query_type,
sample_size=None,
)

sql_lint(enrollments_sql)
assert "sample_id < None" not in enrollments_sql

assert enrollments_sql.count(analysis_unit.value) == 2
if enrollments_query_type == EnrollmentsQueryType.BACKGROUND_UPDATE:
assert enrollments_sql.count(analysis_unit.value) == 5
assert enrollments_sql.count("experiment-test-slug") == 4
assert enrollments_sql.count("sample_id <") == 3
else:
assert enrollments_sql.count(analysis_unit.value) == 2
assert enrollments_sql.count("experiment-test-slug") == 2
assert enrollments_sql.count("sample_id <") == 1

metrics_sql = exp.build_metrics_query(
metric_list=[],
Expand Down
Loading