diff --git a/jobs/kpi-forecasting/README.md b/jobs/kpi-forecasting/README.md index 31231cf8..beb9c70e 100644 --- a/jobs/kpi-forecasting/README.md +++ b/jobs/kpi-forecasting/README.md @@ -85,8 +85,87 @@ The tests can be run locally with `python -m pytest` in the root directory of th # YAML Configs -Each of the sections in the YAML files contains a list of arguments that are passed to their relevant objects or methods. -Definitions should be documented in the code. +Configuration for each forecast is found in the `configs` folder. Below is an example config file with sample values and a description of what the field means as a comment when it is not self-evident + +``` +metric_hub: # this configures the observed data fed to the model which is obtained via metrichub + app_name: "multi_product" # metric-hub app name + slug: "search_forecasting_ad_clicks" # metric-hub slug + alias: "search_forecasting_ad_clicks" # metric-hub alias + start_date: "2018-01-01" # date at which the observed data should start + end_date: "last complete month" + # date at which the observed data will end, can be a date or "last complete month" + # which uses `utils.parse_end_date` to determine the last complete month + segments: + # this section is optional and currently only used in funnel forecast, + # specifies which segments are used to partition the data, + # enabling separate models to be fit for each partition. + # Values underneath are a map of column names to be output by the + # metric-hub call and the SQL queries to populate those columns + device: "device" + channel: "'all'" + country: "CASE WHEN country = 'US' THEN 'US' ELSE 'ROW' END" + partner: "partner" + where: "partner = 'Google'" # filter to apply to the metric hub pull + +forecast_model: # this section configures the model + model_type: "funnel" + # type of model object to use, current options are "funnel" for FunnelForecast and "prophet" for ProphetForecast + start_date: NULL + # starting date for the predicted data (unless predict_historical_dates is set), + # if unset, value depends on predict_historical_dates. + end_date: NULL + # final date for the predicted data + use_all_us_holidays: False + For prophet-based models, when true, call `model.add_country_holidays(country_name="US")` on model + predict_historical_dates: True + # if predict_historical_dates is True, set to first date of the observed data + # if predict_historical_dates is False, defaults to the day after the last day in the observed data + number_of_simulations: 1000 + # for prophet-based models,number of simulations to run + parameters: + # this section can be a map or a list. + # If it's a map, these parameters are used for all models + # (recall multiple models are train if there is a metric_hub.segments) + # If it's a list, it will set different parameters + # for different subsets of the parition specified in `metric_hub.segments`. + - segment: + # specifies which subset of the partitions this applies to + # key is a column specified in metric_hub.segments + # value is a value that column can take to which the configuration is applied + device: desktop + start_date: "2018-01-01" # only applies to FunnelForecast, allows one to set start date for each sub-model + end_date: NULL # only applies to FunnelForecast, allows one to set end date for each sub-model + holidays: ["easter", "covid_sip11"] # holidays specified in `configs.model_inputs.holidays` to use. + regressors: ["post_esr_migration", "in_covid", "ad_click_bug"] # regressors specified in `configs.model_inputs.regressors` + grid_parameters: + # sets grid for hyperparameter tuning + changepoint_prior_scale: [0.001, 0.01, 0.1, 0.2, 0.5] # parameter of prior distribution controlling how much the trend fluctuates at changepoints + changepoint_range: [0.8, 0.9] # the proportion of the time series over which the changepoints are distributed + n_changepoints: [25, 50] # number of trend changepoints, equally spaced over the time series + weekly_seasonality: True # if weekly seasonality is included in the model + yearly_seasonality: True # if yearly seasonality is included in the model + cv_settings: + # sets parameters for prophet cross-validation used in FunnelForecast + initial: "1296 days" # the initial training period, used to train the first iteration of the model for CV + period: "30 days" # spacing between cutoff dates, the sliding window over which each round of cross validation is performed + horizon: "30 days" # forecast horizon used to make predictions and calculate model fit metrics for optimization + parallel: "processes" # how parallelization is performed by Prophet, or None if no paralellization is used + ... + +summarize: + # parameters used to summarize and aggregate the predictions + periods: ["day", "month"] # periods to aggregate up to + numpy_aggregations: ["mean"] # numpy aggregation functions to use when aggregating predictions + percentiles: [10, 50, 90] # precentiles to calculate on aggregation + +write_results: + # set the project, dataset and table for output data + project: "moz-fx-data-shared-prod" + dataset: "search_derived" + table: "search_funnel_forecasts_v1" + components_table: "search_forecast_model_components_v1" +``` # Development diff --git a/jobs/kpi-forecasting/kpi_forecasting.py b/jobs/kpi-forecasting/kpi_forecasting.py index e7dcca7c..d8c3f04c 100644 --- a/jobs/kpi-forecasting/kpi_forecasting.py +++ b/jobs/kpi-forecasting/kpi_forecasting.py @@ -1,4 +1,4 @@ -from kpi_forecasting.inputs import CLI, YAML +from kpi_forecasting.inputs import CLI, load_yaml from kpi_forecasting.models.prophet_forecast import ProphetForecast from kpi_forecasting.models.funnel_forecast import FunnelForecast from kpi_forecasting.metric_hub import MetricHub @@ -13,17 +13,17 @@ def main() -> None: # Load the config - config = YAML(filepath=CLI().args.config).data - model_type = config.forecast_model.model_type + config = load_yaml(filepath=CLI().args.config) + model_type = config["forecast_model"]["model_type"] if model_type in MODELS: - metric_hub = MetricHub(**config.metric_hub) - model = MODELS[model_type](metric_hub=metric_hub, **config.forecast_model) + metric_hub = MetricHub(**config["metric_hub"]) + model = MODELS[model_type](metric_hub=metric_hub, **config["forecast_model"]) model.fit() model.predict() - model.summarize(**config.summarize) - model.write_results(**config.write_results) + model.summarize(**config["summarize"]) + model.write_results(**config["write_results"]) else: raise ValueError(f"Don't know how to forecast using {model_type}.") diff --git a/jobs/kpi-forecasting/kpi_forecasting/configs/dau_desktop.yaml b/jobs/kpi-forecasting/kpi_forecasting/configs/dau_desktop.yaml index 5ba432ea..0b8966f2 100644 --- a/jobs/kpi-forecasting/kpi_forecasting/configs/dau_desktop.yaml +++ b/jobs/kpi-forecasting/kpi_forecasting/configs/dau_desktop.yaml @@ -10,7 +10,9 @@ forecast_model: model_type: "prophet" start_date: NULL end_date: NULL - use_holidays: False + use_all_us_holidays: False + predict_historical_dates: False + number_of_simulations: 1000 parameters: seasonality_prior_scale: 0.00825 changepoint_prior_scale: 0.15983 diff --git a/jobs/kpi-forecasting/kpi_forecasting/configs/dau_mobile.yaml b/jobs/kpi-forecasting/kpi_forecasting/configs/dau_mobile.yaml index 74889971..c9288408 100644 --- a/jobs/kpi-forecasting/kpi_forecasting/configs/dau_mobile.yaml +++ b/jobs/kpi-forecasting/kpi_forecasting/configs/dau_mobile.yaml @@ -10,7 +10,9 @@ forecast_model: model_type: "prophet" start_date: NULL end_date: NULL - use_holidays: True + use_all_us_holidays: True + predict_historical_dates: False + number_of_simulations: 1000 parameters: seasonality_prior_scale: 0.01 changepoint_prior_scale: 0.01 diff --git a/jobs/kpi-forecasting/kpi_forecasting/configs/model_inputs/__init__.py b/jobs/kpi-forecasting/kpi_forecasting/configs/model_inputs/__init__.py index 1ebd482e..caacc611 100644 --- a/jobs/kpi-forecasting/kpi_forecasting/configs/model_inputs/__init__.py +++ b/jobs/kpi-forecasting/kpi_forecasting/configs/model_inputs/__init__.py @@ -3,15 +3,15 @@ from pathlib import Path -from kpi_forecasting.inputs import YAML +from kpi_forecasting.inputs import load_yaml PARENT_PATH = Path(__file__).parent HOLIDAY_PATH = PARENT_PATH / "holidays.yaml" REGRESSOR_PATH = PARENT_PATH / "regressors.yaml" -holiday_collection = YAML(HOLIDAY_PATH) -regressor_collection = YAML(REGRESSOR_PATH) +holiday_collection = load_yaml(HOLIDAY_PATH) +regressor_collection = load_yaml(REGRESSOR_PATH) @attr.s(auto_attribs=True, frozen=False) diff --git a/jobs/kpi-forecasting/kpi_forecasting/configs/search_forecasting_ad_clicks.yaml b/jobs/kpi-forecasting/kpi_forecasting/configs/search_forecasting_ad_clicks.yaml index a756b518..7a01aa15 100644 --- a/jobs/kpi-forecasting/kpi_forecasting/configs/search_forecasting_ad_clicks.yaml +++ b/jobs/kpi-forecasting/kpi_forecasting/configs/search_forecasting_ad_clicks.yaml @@ -16,43 +16,45 @@ forecast_model: model_type: "funnel" start_date: NULL end_date: NULL - use_holidays: False + use_all_us_holidays: False + predict_historical_dates: True + number_of_simulations: 1000 parameters: - model_setting_split_dim: "device" - segment_settings: - desktop: - start_date: "2018-01-01" - end_date: NULL - holidays: ["easter", "covid_sip11"] - regressors: ["post_esr_migration", "in_covid", "ad_click_bug"] - grid_parameters: - changepoint_prior_scale: [0.001, 0.01, 0.1, 0.2, 0.5] - changepoint_range: [0.8, 0.9] - n_changepoints: [25, 50] - weekly_seasonality: True - yearly_seasonality: True - cv_settings: - initial: "1296 days" - period: "30 days" - horizon: "30 days" - parallel: "processes" - mobile: - start_date: "2022-01-01" - end_date: NULL - holidays: ["easter"] - regressors: ["after_fenix", "in_covid"] - grid_parameters: - changepoint_prior_scale: [.01, .1, .15, .2] - changepoint_range: [0.8, 0.9, 1] - n_changepoints: [30] - weekly_seasonality: True - yearly_seasonality: True - growth: "logistic" - cv_settings: - initial: "366 days" - period: "30 days" - horizon: "30 days" - parallel: "processes" + - segment: + device: desktop + start_date: "2018-01-01" + end_date: NULL + holidays: ["easter", "covid_sip11"] + regressors: ["post_esr_migration", "in_covid", "ad_click_bug"] + grid_parameters: + changepoint_prior_scale: [0.001, 0.01, 0.1, 0.2, 0.5] + changepoint_range: [0.8, 0.9] + n_changepoints: [25, 50] + weekly_seasonality: True + yearly_seasonality: True + cv_settings: + initial: "1296 days" + period: "30 days" + horizon: "30 days" + parallel: "processes" + - segment: + device: mobile + start_date: "2022-01-01" + end_date: NULL + holidays: ["easter"] + regressors: ["after_fenix", "in_covid"] + grid_parameters: + changepoint_prior_scale: [.01, .1, .15, .2] + changepoint_range: [0.8, 0.9, 1] + n_changepoints: [30] + weekly_seasonality: True + yearly_seasonality: True + growth: "logistic" + cv_settings: + initial: "366 days" + period: "30 days" + horizon: "30 days" + parallel: "processes" summarize: periods: ["day", "month"] diff --git a/jobs/kpi-forecasting/kpi_forecasting/configs/search_forecasting_daily_active_users.yaml b/jobs/kpi-forecasting/kpi_forecasting/configs/search_forecasting_daily_active_users.yaml index b6643c4a..dfb7bb49 100644 --- a/jobs/kpi-forecasting/kpi_forecasting/configs/search_forecasting_daily_active_users.yaml +++ b/jobs/kpi-forecasting/kpi_forecasting/configs/search_forecasting_daily_active_users.yaml @@ -16,40 +16,42 @@ forecast_model: model_type: "funnel" start_date: NULL end_date: NULL - use_holidays: False + use_all_us_holidays: False + predict_historical_dates: True + number_of_simulations: 1000 parameters: - model_setting_split_dim: "device" - segment_settings: - desktop: - start_date: "2018-01-01" - end_date: NULL - holidays: ["easter", "covid_sip11"] - regressors: ["post_esr_migration", "in_covid"] - grid_parameters: - changepoint_prior_scale: [0.001, 0.01, 0.1, 0.2, 0.5] - changepoint_range: [0.8, 0.9] - weekly_seasonality: True - yearly_seasonality: True - cv_settings: - initial: "1296 days" - period: "30 days" - horizon: "30 days" - parallel: "processes" - mobile: - start_date: "2021-01-01" - end_date: NULL - holidays: ["easter"] - regressors: ["after_fenix", "in_covid"] - grid_parameters: - changepoint_prior_scale: [0.001, 0.01, 0.1] - weekly_seasonality: True - yearly_seasonality: True - growth: "logistic" - cv_settings: - initial: "366 days" - period: "30 days" - horizon: "30 days" - parallel: "processes" + - segment: + device: desktop + start_date: "2018-01-01" + end_date: NULL + holidays: ["easter", "covid_sip11"] + regressors: ["post_esr_migration", "in_covid"] + grid_parameters: + changepoint_prior_scale: [0.001, 0.01, 0.1, 0.2, 0.5] + changepoint_range: [0.8, 0.9] + weekly_seasonality: True + yearly_seasonality: True + cv_settings: + initial: "1296 days" + period: "30 days" + horizon: "30 days" + parallel: "processes" + - segment: + device: mobile + start_date: "2021-01-01" + end_date: NULL + holidays: ["easter"] + regressors: ["after_fenix", "in_covid"] + grid_parameters: + changepoint_prior_scale: [0.001, 0.01, 0.1] + weekly_seasonality: True + yearly_seasonality: True + growth: "logistic" + cv_settings: + initial: "366 days" + period: "30 days" + horizon: "30 days" + parallel: "processes" summarize: periods: ["day", "month"] diff --git a/jobs/kpi-forecasting/kpi_forecasting/configs/search_forecasting_search_count.yaml b/jobs/kpi-forecasting/kpi_forecasting/configs/search_forecasting_search_count.yaml index 8dd8f811..17431247 100644 --- a/jobs/kpi-forecasting/kpi_forecasting/configs/search_forecasting_search_count.yaml +++ b/jobs/kpi-forecasting/kpi_forecasting/configs/search_forecasting_search_count.yaml @@ -16,40 +16,42 @@ forecast_model: model_type: "funnel" start_date: NULL end_date: NULL - use_holidays: False + use_all_us_holidays: False + predict_historical_dates: True + number_of_simulations: 1000 parameters: - model_setting_split_dim: "device" - segment_settings: - desktop: - start_date: "2018-01-01" - end_date: NULL - holidays: ["easter", "covid_sip11"] - regressors: ["post_esr_migration", "in_covid"] - grid_parameters: - changepoint_prior_scale: [0.001, 0.01, 0.1, 0.2, 0.5] - changepoint_range: [0.8, 0.9] - weekly_seasonality: True - yearly_seasonality: True - cv_settings: - initial: "1296 days" - period: "30 days" - horizon: "30 days" - parallel: "processes" - mobile: - start_date: "2020-01-01" - end_date: NULL - holidays: ["easter"] - regressors: ["after_fenix", "in_covid"] - grid_parameters: - changepoint_prior_scale: [0.001, 0.01, 0.1] - weekly_seasonality: True - yearly_seasonality: True - growth: "logistic" - cv_settings: - initial: "366 days" - period: "30 days" - horizon: "30 days" - parallel: "processes" + - segment: + device: desktop + start_date: "2018-01-01" + end_date: NULL + holidays: ["easter", "covid_sip11"] + regressors: ["post_esr_migration", "in_covid"] + grid_parameters: + changepoint_prior_scale: [0.001, 0.01, 0.1, 0.2, 0.5] + changepoint_range: [0.8, 0.9] + weekly_seasonality: True + yearly_seasonality: True + cv_settings: + initial: "1296 days" + period: "30 days" + horizon: "30 days" + parallel: "processes" + - segment: + device: mobile + start_date: "2020-01-01" + end_date: NULL + holidays: ["easter"] + regressors: ["after_fenix", "in_covid"] + grid_parameters: + changepoint_prior_scale: [0.001, 0.01, 0.1] + weekly_seasonality: True + yearly_seasonality: True + growth: "logistic" + cv_settings: + initial: "366 days" + period: "30 days" + horizon: "30 days" + parallel: "processes" summarize: periods: ["day", "month"] diff --git a/jobs/kpi-forecasting/kpi_forecasting/inputs.py b/jobs/kpi-forecasting/kpi_forecasting/inputs.py index 034af27a..14da5545 100644 --- a/jobs/kpi-forecasting/kpi_forecasting/inputs.py +++ b/jobs/kpi-forecasting/kpi_forecasting/inputs.py @@ -2,7 +2,6 @@ import yaml from dataclasses import dataclass -from dotmap import DotMap @dataclass @@ -20,18 +19,10 @@ def __post_init__(self) -> None: self.args = self.parser.parse_args() -@dataclass -class YAML: +def load_yaml(filepath: str) -> dict: """ - Create a data structure from a YAML config filepath. Instead of loading the - YAML as a dictionary, which requires verbose code to access nested dictionary - values, this class loads YAML as a dot map. Nested values can be accessed using - dot notation, like `YAML().data.section.subsection.value`. + Create a data structure from a YAML config filepath. """ - - filepath: str - - def __post_init__(self) -> None: - with open(self.filepath, "r") as f: - data = yaml.safe_load(f) - self.data = DotMap(data) + with open(filepath, "r") as f: + data = yaml.safe_load(f) + return data diff --git a/jobs/kpi-forecasting/kpi_forecasting/metric_hub.py b/jobs/kpi-forecasting/kpi_forecasting/metric_hub.py index 64cf9d42..e0a86c83 100644 --- a/jobs/kpi-forecasting/kpi_forecasting/metric_hub.py +++ b/jobs/kpi-forecasting/kpi_forecasting/metric_hub.py @@ -1,7 +1,6 @@ import pandas as pd from dataclasses import dataclass -from dotmap import DotMap from google.cloud import bigquery from mozanalysis.config import ConfigLoader from textwrap import dedent @@ -36,7 +35,7 @@ class MetricHub: app_name: str slug: str start_date: str - segments: DotMap = None + segments: dict = None where: str = None end_date: str = None alias: str = None diff --git a/jobs/kpi-forecasting/kpi_forecasting/models/base_forecast.py b/jobs/kpi-forecasting/kpi_forecasting/models/base_forecast.py index dcf64b91..896051f8 100644 --- a/jobs/kpi-forecasting/kpi_forecasting/models/base_forecast.py +++ b/jobs/kpi-forecasting/kpi_forecasting/models/base_forecast.py @@ -20,7 +20,7 @@ class BaseForecast(abc.ABC): Args: model_type (str): The name of the forecasting model that's being used. parameters (Dict): Parameters that should be passed to the forecasting model. - use_holidays (bool): Whether or not the forecasting model should use holidays. + use_all_us_holidays (bool): Whether or not the forecasting model should use holidays. The base model does not apply holiday logic; that logic needs to be built in the child class. start_date (str): A 'YYYY-MM-DD' formatted-string that specifies the first @@ -29,18 +29,18 @@ class BaseForecast(abc.ABC): date the metric should be queried. metric_hub (MetricHub): A MetricHub object that provides details about the metric to be forecasted. - number_of_simulations (int): The number of simulated timeseries that the forecast - should generate. Since many forecast models are probablistic, this enables the - measurement of variation across a range of possible outcomes. + predict_historical_dates (bool): If True, forecast starts at the first + date in the observed data. If False, it uses the value of start_date it set + and the first day after the observed data ends otherwise """ model_type: str parameters: Dict - use_holidays: bool + use_all_us_holidays: bool start_date: str end_date: str metric_hub: MetricHub - number_of_simulations: int = 1000 + predict_historical_dates: bool = False def _get_observed_data(self): if self.metric_hub: @@ -55,6 +55,11 @@ def __post_init__(self) -> None: self.collected_at = datetime.now(timezone.utc).replace(tzinfo=None) self._get_observed_data() + # raise an error is predict_historical_dates is True and start_date is set + if self.start_date and self.predict_historical_dates: + raise ValueError( + "forecast start_date set while predict_historical_dates is True" + ) # use default start/end dates if the user doesn't specify them self.start_date = pd.to_datetime(self.start_date or self._default_start_date) self.end_date = pd.to_datetime(self.end_date or self._default_end_date) @@ -71,8 +76,8 @@ def __post_init__(self) -> None: self.metadata_params = json.dumps( { "model_type": self.model_type.lower(), - "model_params": self.parameters.toDict(), - "use_holidays": self.use_holidays, + "model_params": self.parameters, + "use_all_us_holidays": self.use_all_us_holidays, } ) @@ -138,7 +143,10 @@ def _summarize( @property def _default_start_date(self) -> str: """The first day after the last date in the observed dataset.""" - return self.observed_df["submission_date"].max() + timedelta(days=1) + if self.predict_historical_dates: + return self.observed_df["submission_date"].min() + else: + return self.observed_df["submission_date"].max() + timedelta(days=1) @property def _default_end_date(self) -> str: diff --git a/jobs/kpi-forecasting/kpi_forecasting/models/funnel_forecast.py b/jobs/kpi-forecasting/kpi_forecasting/models/funnel_forecast.py index 487f5510..3c06863c 100644 --- a/jobs/kpi-forecasting/kpi_forecasting/models/funnel_forecast.py +++ b/jobs/kpi-forecasting/kpi_forecasting/models/funnel_forecast.py @@ -67,15 +67,6 @@ def __post_init__(self) -> None: # this is used to avoid the code below for testing purposes return - # Overwrite dates_to_predict to provide historical date forecasts - self.dates_to_predict = pd.DataFrame( - { - "submission_date": pd.date_range( - self.metric_hub.start_date, self.end_date - ).date - } - ) - self._set_segment_models(self.observed_df, self.metric_hub.segments.keys()) # initialize unset attributes @@ -85,10 +76,10 @@ def _set_segment_models( self, observed_df: pd.DataFrame, segment_column_list: list ) -> None: """Creates a SegmentSettings object for each segment specified in the - metric_hub.segments section of the config. These objects are stored in a list - in the segment_models attribute - Parameters can be specified independently for at most one dimension column - set using model_setting_split_dim in self.parameters + metric_hub.segments section of the config. It is populated from the list of + parameters in the forecast_model.parameters section of the configuration file. + The segements section of each element of the list specifies which values within which + segments the parameters are associated with. Args: observed_df (pd.DataFrame): dataframe containing observed data used to model @@ -100,51 +91,64 @@ def _set_segment_models( combination_df = observed_df[segment_column_list].drop_duplicates() # Construct dictionaries from those combinations + # this will be used to check that the config actually partitions the data segment_combinations = combination_df.to_dict("records") - # initialize a list to hold models for each segment - ## populate the list with segments and parameters for the segment - split_dim = self.parameters["model_setting_split_dim"] - - # check to make sure split_dim is one of the columns set in segment_column_list - if split_dim not in segment_column_list: - columns_str = ",".join(segment_column_list) + # get subset of segment that is used in partitioning + split_dims = None + for partition in self.parameters: + partition_dim = set(partition["segment"].keys()) + if split_dims and partition_dim != split_dims: + raise ValueError( + "Segment keys are not the same across different elements of parameters in the config file" + ) + elif split_dims is None: + split_dims = partition_dim + else: + # this is case where split_dim is set and matches paritition_dim + continue + if not split_dims <= set(combination_df.keys()): + missing_dims = split_dims - set(combination_df.keys()) + missing_dims_str = ",".join(missing_dims) raise ValueError( - f"model_setting_split_dim set to {split_dim} which is not among segment columns: {columns_str}" + f"Segment keys missing from metric hub segments: {missing_dims_str}" ) # For each segment combinination, get the model parameters from the config ## file. Parse the holidays and regressors specified in the config file. segment_models = [] for segment in segment_combinations: - model_params = getattr( - self.parameters["segment_settings"], segment[split_dim] - ) - + # find the correct configuration + for partition in self.parameters: + partition_segment = partition["segment"] + # get subset of segment that is used to partition + subset_segment = { + key: val for key, val in segment.items() if key in split_dims + } + if partition_segment == subset_segment: + # parition is set to the desired value + # break out of loop + break holiday_list = [] regressor_list = [] - if model_params["holidays"]: - holiday_list = [ - getattr(holiday_collection.data, h) - for h in model_params["holidays"] - ] - if model_params["regressors"]: + if "holidays" in partition: + holiday_list = [holiday_collection[h] for h in partition["holidays"]] + if "regressors" in partition: regressor_list = [ - getattr(regressor_collection.data, r) - for r in model_params["regressors"] + regressor_collection[r] for r in partition["regressors"] ] # Create a SegmentModelSettings object for each segment combination segment_models.append( SegmentModelSettings( segment=segment, - start_date=model_params["start_date"], + start_date=partition["start_date"], end_date=self.end_date, holidays=[ProphetHoliday(**h) for h in holiday_list], regressors=[ProphetRegressor(**r) for r in regressor_list], - grid_parameters=dict(model_params["grid_parameters"]), - cv_settings=dict(model_params["cv_settings"]), + grid_parameters=dict(partition["grid_parameters"]), + cv_settings=dict(partition["cv_settings"]), ) ) self.segment_models = segment_models @@ -489,9 +493,7 @@ def _predict( segment_settings.components_df = components_df.copy() - return df.loc[ - pd.to_datetime(df["submission_date"]) >= pd.to_datetime(self.start_date) - ] + return df def _validate_forecast_df(self, df: pd.DataFrame) -> None: """ @@ -561,6 +563,12 @@ def _combine_forecast_observed( Returns: pd.DataFrame: combined dataframe containing aggregated values from observed and forecast """ + # filter the forecast data to just the data in the future + last_historic_date = observed_df["submission_date"].max() + forecast_df = forecast_df.loc[ + forecast_df["submission_date"] > last_historic_date + ] + forecast_summarized, observed_summarized = self._aggregate_forecast_observed( forecast_df, observed_df, period, numpy_aggregations, percentiles ) diff --git a/jobs/kpi-forecasting/kpi_forecasting/models/prophet_forecast.py b/jobs/kpi-forecasting/kpi_forecasting/models/prophet_forecast.py index 19f57e1d..3dc2b920 100644 --- a/jobs/kpi-forecasting/kpi_forecasting/models/prophet_forecast.py +++ b/jobs/kpi-forecasting/kpi_forecasting/models/prophet_forecast.py @@ -16,6 +16,16 @@ @dataclass class ProphetForecast(BaseForecast): + """Forecast object specifically for prophet forecast models + + Additional attributes: + number_of_simulations (int): The number of simulated timeseries that the forecast + should generate. Since many forecast models are probablistic, this enables the + measurement of variation across a range of possible outcomes. + """ + + number_of_simulations: int = 1000 + @property def column_names_map(self) -> Dict[str, str]: return {"submission_date": "ds", "value": "y"} @@ -27,7 +37,7 @@ def _build_model(self, parameter_dict): mcmc_samples=0, ) - if self.use_holidays: + if self.use_all_us_holidays: model.add_country_holidays(country_name="US") return model @@ -96,7 +106,7 @@ def _predict_legacy(self) -> pd.DataFrame: datetime.now(timezone.utc).replace(tzinfo=None).date() ) df["forecast_parameters"] = str( - json.dumps({**self.parameters, "holidays": self.use_holidays}) + json.dumps({**self.parameters, "holidays": self.use_all_us_holidays}) ) alias = self.metric_hub.alias.lower() @@ -352,8 +362,8 @@ def write_results( project_legacy: str, dataset_legacy: str, write_disposition: str = "WRITE_APPEND", - forecast_table_legacy: str = "kpi_automated_forecast_v1", - confidences_table_legacy: str = "kpi_automated_forecast_confidences_v1", + forecast_table_legacy: str = "kpi_automated_forecast_v1_branch", + confidences_table_legacy: str = "kpi_automated_forecast_confidences_v1_branch", ) -> None: """ Write `self.summary_df` to Big Query. diff --git a/jobs/kpi-forecasting/kpi_forecasting/results_processing.py b/jobs/kpi-forecasting/kpi_forecasting/results_processing.py index f7e8ab88..e2f199e5 100644 --- a/jobs/kpi-forecasting/kpi_forecasting/results_processing.py +++ b/jobs/kpi-forecasting/kpi_forecasting/results_processing.py @@ -4,7 +4,7 @@ from google.cloud import bigquery from google.cloud.bigquery.enums import SqlTypeNames as bq_types -from kpi_forecasting.inputs import YAML +from kpi_forecasting.inputs import load_yaml import pandas as pd import numpy as np @@ -74,12 +74,12 @@ def _set_intra_forecast_agg_functions(self): def _load_config_data(self): """Extracts data from the list of config files passed to the class and stores it in the - config_data attribute. The filename is the key, and the contents (represnted as a DotMap) + config_data attribute. The filename is the key, and the contents are the values""" self.config_data = {} for config_file in self.input_config_list: full_path = f"{self.input_config_path}/{config_file}" - config_data = YAML(full_path).data + config_data = load_yaml(full_path) self.config_data[config_file] = config_data def _extract_config_data(self): @@ -99,7 +99,7 @@ def _extract_config_data(self): config_file_list = list(self.config_data.keys()) for config_data in self.config_data.values(): # get segment data - metric_hub_data = config_data.metric_hub.toDict() + metric_hub_data = config_data["metric_hub"] if "segments" in metric_hub_data: segment_data = metric_hub_data["segments"] segment_data_list.append(segment_data) @@ -107,7 +107,7 @@ def _extract_config_data(self): segment_data_list.append(None) # get input table info - input_table_list.append(config_data.write_results.toDict()) + input_table_list.append(config_data["write_results"]) input_table_data = input_table_list.pop(0) input_table_matches_first = [input_table_data == el for el in input_table_list] diff --git a/jobs/kpi-forecasting/kpi_forecasting/tests/test_base_forecast.py b/jobs/kpi-forecasting/kpi_forecasting/tests/test_base_forecast.py index 19a2db9d..bfea0e5a 100644 --- a/jobs/kpi-forecasting/kpi_forecasting/tests/test_base_forecast.py +++ b/jobs/kpi-forecasting/kpi_forecasting/tests/test_base_forecast.py @@ -5,7 +5,6 @@ import pytest import pandas as pd -from dotmap import DotMap import numpy as np from datetime import timedelta, timezone @@ -92,8 +91,8 @@ def test_post_init(good_class): end_date = TEST_PREDICT_END_STR good_class = good_class( model_type="test", - parameters=DotMap(), - use_holidays=None, + parameters={}, + use_all_us_holidays=None, start_date=start_date, end_date=end_date, metric_hub=None, @@ -108,12 +107,30 @@ def test_post_init(good_class): assert good_class.dates_to_predict.equals(dates_to_predict_expected) +def test_post_init_exception(good_class): + start_date = TEST_DATE_STR + end_date = TEST_PREDICT_END_STR + with pytest.raises( + ValueError, + match="forecast start_date set while predict_historical_dates is True", + ): + _ = good_class( + model_type="test", + parameters={}, + use_all_us_holidays=None, + start_date=start_date, + end_date=end_date, + metric_hub=None, + predict_historical_dates=True, + ) + + def test_post_init_default_dates(good_class): # check default start and end time good_class = good_class( model_type="test", - parameters=DotMap(), - use_holidays=None, + parameters={}, + use_all_us_holidays=None, start_date="", end_date="", metric_hub=None, @@ -130,11 +147,33 @@ def test_post_init_default_dates(good_class): assert good_class.dates_to_predict.equals(dates_to_predict_expected) +def test_post_init_default_dates_historical(good_class): + # check default start and end time + good_class = good_class( + model_type="test", + parameters={}, + use_all_us_holidays=None, + start_date="", + end_date="", + metric_hub=None, + predict_historical_dates=True, + ) + # this is the min date of the observed data + start_date = TEST_DATE - relativedelta(years=1) + end_date = ( + datetime.now(timezone.utc).replace(tzinfo=None) + timedelta(weeks=78) + ).date() + dates_to_predict_expected = pd.DataFrame( + {"submission_date": pd.date_range(start_date, end_date).date} + ) + assert good_class.dates_to_predict.equals(dates_to_predict_expected) + + def test_fit(good_class): good_class = good_class( model_type="test", - parameters=DotMap(), - use_holidays=None, + parameters={}, + use_all_us_holidays=None, start_date=TEST_DATE_STR, end_date=TEST_PREDICT_END_STR, metric_hub=None, @@ -149,8 +188,8 @@ def test_fit(good_class): def test_predict_and_validate(good_class): good_class = good_class( model_type="test", - parameters=DotMap(), - use_holidays=None, + parameters={}, + use_all_us_holidays=None, start_date=TEST_DATE_STR, end_date=TEST_PREDICT_END_STR, metric_hub=None, @@ -164,8 +203,8 @@ def test_predict_and_validate(good_class): def test_summarize(good_class): good_class = good_class( model_type="test", - parameters=DotMap(), - use_holidays=None, + parameters={}, + use_all_us_holidays=None, start_date=TEST_DATE_STR, end_date=TEST_PREDICT_END_STR, metric_hub=None, diff --git a/jobs/kpi-forecasting/kpi_forecasting/tests/test_data/test_funnel_config.yaml b/jobs/kpi-forecasting/kpi_forecasting/tests/test_data/test_funnel_config.yaml index 2aebbeff..17943134 100644 --- a/jobs/kpi-forecasting/kpi_forecasting/tests/test_data/test_funnel_config.yaml +++ b/jobs/kpi-forecasting/kpi_forecasting/tests/test_data/test_funnel_config.yaml @@ -15,7 +15,7 @@ forecast_model: model_type: "funnel" start_date: NULL end_date: NULL - use_holidays: False + use_all_us_holidays: False parameters: model_setting_split_dim: "device" segment_settings: diff --git a/jobs/kpi-forecasting/kpi_forecasting/tests/test_funnel_forecast.py b/jobs/kpi-forecasting/kpi_forecasting/tests/test_funnel_forecast.py index 52121b7c..6e43e409 100644 --- a/jobs/kpi-forecasting/kpi_forecasting/tests/test_funnel_forecast.py +++ b/jobs/kpi-forecasting/kpi_forecasting/tests/test_funnel_forecast.py @@ -5,7 +5,6 @@ from dateutil.relativedelta import relativedelta import pandas as pd -from dotmap import DotMap import pytest import numpy as np @@ -31,8 +30,8 @@ def forecast(): forecast = FunnelForecast( model_type="test", - parameters=DotMap(), - use_holidays=None, + parameters={}, + use_all_us_holidays=None, start_date=predict_start_date, end_date=predict_end_date, metric_hub=None, @@ -70,35 +69,34 @@ def funnel_forecast_for_fit_tests(segment_info_fit_tests, mocker): """This method creates a forecast object from the segment dict created in the segment_info_fit_tests fixture. It also mocks some of the object methods to enable easier testing""" - parameter_dict = { - "model_setting_split_dim": "a", - "segment_settings": { - "A1": { - "start_date": segment_info_fit_tests["A1"]["start_date"], - "end_date": None, - "holidays": [], - "regressors": [], - "grid_parameters": segment_info_fit_tests["A1"]["grid_parameters"], - "cv_settings": {}, - }, - "A2": { - "start_date": segment_info_fit_tests["A2"]["start_date"], - "end_date": None, - "holidays": [], - "regressors": [], - "grid_parameters": segment_info_fit_tests["A2"]["grid_parameters"], - "cv_settings": {}, - }, + parameter_list = [ + { + "segment": {"a": "A1"}, + "start_date": segment_info_fit_tests["A1"]["start_date"], + "end_date": None, + "holidays": [], + "regressors": [], + "grid_parameters": segment_info_fit_tests["A1"]["grid_parameters"], + "cv_settings": {}, }, - } + { + "segment": {"a": "A2"}, + "start_date": segment_info_fit_tests["A2"]["start_date"], + "end_date": None, + "holidays": [], + "regressors": [], + "grid_parameters": segment_info_fit_tests["A2"]["grid_parameters"], + "cv_settings": {}, + }, + ] - parameter_dotmap = DotMap(parameter_dict) predict_start_date = TEST_DATE_STR predict_end_date = TEST_DATE_NEXT_DAY_STR + forecast = FunnelForecast( model_type="test", - parameters=parameter_dotmap, - use_holidays=None, + parameters=parameter_list, + use_all_us_holidays=None, start_date=predict_start_date, end_date=predict_end_date, metric_hub=None, @@ -196,8 +194,8 @@ def test_combine_forecast_observed(mocker, forecast): observed_df = pd.DataFrame( { "submission_date": [ - TEST_DATE, - TEST_DATE_NEXT_DAY, + TEST_DATE - relativedelta(days=2), + TEST_DATE - relativedelta(days=1), ], "a": ["A1", "A1"], "value": [5, 6], @@ -259,10 +257,10 @@ def test_under_summarize(mocker, forecast): { "submission_date": [ TEST_DATE - relativedelta(months=1), - TEST_DATE, - TEST_DATE_NEXT_DAY, - TEST_DATE, - TEST_DATE_NEXT_DAY, + TEST_DATE - relativedelta(days=2), + TEST_DATE - relativedelta(days=1), + TEST_DATE - relativedelta(days=2), + TEST_DATE - relativedelta(days=1), ], "a": ["A1", "A1", "A1", "A2", "A2"], "value": [10, 20, 30, 40, 50], @@ -274,7 +272,7 @@ def test_under_summarize(mocker, forecast): ["start_date", "forecast_df", "segment", "trained_parameters"], ) dummy_segment_settings = SegmentSettings( - start_date=TEST_DATE_STR, + start_date=(TEST_DATE - relativedelta(days=2)).strftime("%Y-%m-%d"), forecast_df=forecast_df.copy(), segment={"a": "A1"}, trained_parameters={"trained_parameters": "yes"}, @@ -297,8 +295,8 @@ def test_under_summarize(mocker, forecast): observed_expected_df = pd.DataFrame( { "submission_date": [ - TEST_DATE, - TEST_DATE_NEXT_DAY, + TEST_DATE - relativedelta(days=2), + TEST_DATE - relativedelta(days=1), ], "a": ["A1", "A1"], "value": [20, 30], @@ -363,10 +361,10 @@ def test_summarize(mocker, forecast): { "submission_date": [ TEST_DATE - relativedelta(months=1), - TEST_DATE, - TEST_DATE_NEXT_DAY, - TEST_DATE, - TEST_DATE_NEXT_DAY, + TEST_DATE - relativedelta(days=2), + TEST_DATE - relativedelta(days=1), + TEST_DATE - relativedelta(days=2), + TEST_DATE - relativedelta(days=1), ], "a": ["A1", "A1", "A1", "A2", "A2"], "value": [10, 20, 30, 40, 50], @@ -382,7 +380,7 @@ def test_summarize(mocker, forecast): # we're only testing that it is concatenated properly # with the segment data added dummy_segment_settings_A1 = SegmentSettings( - start_date=TEST_DATE_STR, + start_date=(TEST_DATE - relativedelta(days=2)).strftime("%Y-%m-%d"), forecast_df=forecast_df.copy(), segment={"a": "A1"}, trained_parameters={"trained_parameters": "yes"}, @@ -390,7 +388,7 @@ def test_summarize(mocker, forecast): ) dummy_segment_settings_A2 = SegmentSettings( - start_date=TEST_DATE_STR, + start_date=(TEST_DATE - relativedelta(days=2)).strftime("%Y-%m-%d"), forecast_df=forecast_df.copy(), segment={"a": "A2"}, trained_parameters={"trained_parameters": "yes"}, @@ -427,10 +425,10 @@ def test_summarize(mocker, forecast): observed_expected_df = pd.DataFrame( { "submission_date": [ - TEST_DATE, - TEST_DATE_NEXT_DAY, - TEST_DATE, - TEST_DATE_NEXT_DAY, + TEST_DATE - relativedelta(days=2), + TEST_DATE - relativedelta(days=1), + TEST_DATE - relativedelta(days=2), + TEST_DATE - relativedelta(days=1), ], "a": ["A1", "A1", "A2", "A2"], "value": [20, 30, 40, 50], @@ -502,28 +500,25 @@ def test_under_predict(mocker): # set segment models A1_start_date = TEST_DATE_STR - parameter_dict = { - "model_setting_split_dim": "a", - "segment_settings": { - "A1": { - "start_date": A1_start_date, - "end_date": None, - "holidays": [], - "regressors": [], - "grid_parameters": {"param1": [1, 2], "param2": [20, 10]}, - "cv_settings": {}, - }, - }, - } + parameter_list = [ + { + "segment": {"a": "A1"}, + "start_date": A1_start_date, + "end_date": None, + "holidays": [], + "regressors": [], + "grid_parameters": {"param1": [1, 2], "param2": [20, 10]}, + "cv_settings": {}, + } + ] - parameter_dotmap = DotMap(parameter_dict) predict_start_date = TEST_DATE_NEXT_DAY_STR predict_end_date = TEST_PREDICT_END_STR forecast = FunnelForecast( model_type="test", - parameters=parameter_dotmap, - use_holidays=None, + parameters=parameter_list, + use_all_us_holidays=None, start_date=predict_start_date, end_date=predict_end_date, metric_hub=None, @@ -589,13 +584,6 @@ def test_under_predict(mocker): } ) - # time filter corresponds to the start time of the object - # as opposed to the segment - expected_time_filter = ( - expected["submission_date"] >= pd.to_datetime(forecast.start_date).date() - ) - expected = expected[expected_time_filter].reset_index(drop=True) - pd.testing.assert_frame_equal(out, expected) # check the components @@ -858,36 +846,34 @@ def test_set_segment_models(): """test the set_segment_models method""" A1_start_date = "2018-01-01" A2_start_date = "2020-02-02" - parameter_dict = { - "model_setting_split_dim": "a", - "segment_settings": { - "A1": { - "start_date": A1_start_date, - "end_date": None, - "holidays": [], - "regressors": [], - "grid_parameters": {}, - "cv_settings": {}, - }, - "A2": { - "start_date": A2_start_date, - "end_date": None, - "holidays": [], - "regressors": [], - "grid_parameters": {}, - "cv_settings": {}, - }, + parameter_list = [ + { + "segment": {"a": "A1"}, + "start_date": A1_start_date, + "end_date": None, + "holidays": [], + "regressors": [], + "grid_parameters": {}, + "cv_settings": {}, }, - } + { + "segment": {"a": "A2"}, + "start_date": A2_start_date, + "end_date": None, + "holidays": [], + "regressors": [], + "grid_parameters": {}, + "cv_settings": {}, + }, + ] - parameter_dotmap = DotMap(parameter_dict) predict_start_date = TEST_DATE_STR predict_end_date = TEST_PREDICT_END_STR forecast = FunnelForecast( model_type="test", - parameters=parameter_dotmap, - use_holidays=None, + parameters=parameter_list, + use_all_us_holidays=None, start_date=predict_start_date, end_date=predict_end_date, metric_hub=None, @@ -931,42 +917,138 @@ def test_set_segment_models(): assert checkval == expectedval +def test_set_segment_models_multiple(): + """test the set_segment_models method + with segments on multiple columns""" + # set arbitrary dates + # they're only used to make sure segments are set correctly + A1B1_start_date = "2018-01-01" + A1B2_start_date = "2019-01-01" + A2B1_start_date = "2020-02-02" + A2B2_start_date = "2021-02-02" + parameter_list = [ + { + "segment": {"a": "A1", "b": "B1"}, + "start_date": A1B1_start_date, + "end_date": None, + "holidays": [], + "regressors": [], + "grid_parameters": {}, + "cv_settings": {}, + }, + { + "segment": {"a": "A1", "b": "B2"}, + "start_date": A1B2_start_date, + "end_date": None, + "holidays": [], + "regressors": [], + "grid_parameters": {}, + "cv_settings": {}, + }, + { + "segment": {"a": "A2", "b": "B1"}, + "start_date": A2B1_start_date, + "end_date": None, + "holidays": [], + "regressors": [], + "grid_parameters": {}, + "cv_settings": {}, + }, + { + "segment": {"a": "A2", "b": "B2"}, + "start_date": A2B2_start_date, + "end_date": None, + "holidays": [], + "regressors": [], + "grid_parameters": {}, + "cv_settings": {}, + }, + ] + + predict_start_date = TEST_DATE_STR + predict_end_date = TEST_PREDICT_END_STR + + forecast = FunnelForecast( + model_type="test", + parameters=parameter_list, + use_all_us_holidays=None, + start_date=predict_start_date, + end_date=predict_end_date, + metric_hub=None, + ) + + observed_data = pd.DataFrame( + {"a": ["A1", "A1", "A2", "A2", "A2"], "b": ["B1", "B2", "B1", "B2", "B2"]} + ) + + segment_list = ["a", "b"] + + forecast._set_segment_models( + observed_df=observed_data, segment_column_list=segment_list + ) + + # put the segments and the start date in the same dictionary to make + # comparison easier + # the important things to check is that all possible combinations + # of segments are present and that each has the parameters set properly + # start_date is a stand-in for these parameters and + # is determined by the value of a as specified in parameter_dict + check_segment_models = [ + dict(**el.segment, **{"start_date": el.start_date}) + for el in forecast.segment_models + ] + expected = [ + {"a": "A1", "b": "B1", "start_date": A1B1_start_date}, + {"a": "A1", "b": "B2", "start_date": A1B2_start_date}, + {"a": "A2", "b": "B1", "start_date": A2B1_start_date}, + {"a": "A2", "b": "B2", "start_date": A2B2_start_date}, + ] + + # can't make a set of dicts for comparison + # so sort the lists and compare each element + compare_sorted = zip( + sorted(check_segment_models, key=lambda x: (x["a"], x["b"])), + sorted(expected, key=lambda x: (x["a"], x["b"])), + ) + + for checkval, expectedval in compare_sorted: + assert checkval == expectedval + + def test_set_segment_models_exception(): """test the exception for segment_models where and exception is raised if a model_setting_split_dim is specified that isn't in the data""" A1_start_date = "2018-01-01" A2_start_date = "2020-02-02" - parameter_dict = { - "model_setting_split_dim": "c", # not in data - "segment_settings": { - "A1": { - "start_date": A1_start_date, - "end_date": None, - "holidays": [], - "regressors": [], - "grid_parameters": {}, - "cv_settings": {}, - }, - "A2": { - "start_date": A2_start_date, - "end_date": None, - "holidays": [], - "regressors": [], - "grid_parameters": {}, - "cv_settings": {}, - }, + parameter_list = [ + { + "segment": {"c": "A1"}, + "start_date": A1_start_date, + "end_date": None, + "holidays": [], + "regressors": [], + "grid_parameters": {}, + "cv_settings": {}, }, - } + { + "segment": {"c": "A2"}, + "start_date": A2_start_date, + "end_date": None, + "holidays": [], + "regressors": [], + "grid_parameters": {}, + "cv_settings": {}, + }, + ] - parameter_dotmap = DotMap(parameter_dict) predict_start_date = TEST_DATE_STR predict_end_date = TEST_PREDICT_END_STR forecast = FunnelForecast( model_type="test", - parameters=parameter_dotmap, - use_holidays=None, + parameters=parameter_list, + use_all_us_holidays=None, start_date=predict_start_date, end_date=predict_end_date, metric_hub=None, @@ -980,7 +1062,7 @@ def test_set_segment_models_exception(): with pytest.raises( ValueError, - match="model_setting_split_dim set to c which is not among segment columns: a,b", + match="Segment keys missing from metric hub segments: c", ): forecast._set_segment_models( observed_df=observed_data, segment_column_list=segment_list diff --git a/jobs/kpi-forecasting/kpi_forecasting/tests/test_performance_analysis.py b/jobs/kpi-forecasting/kpi_forecasting/tests/test_performance_analysis.py index edbc2cbb..3e4f0120 100644 --- a/jobs/kpi-forecasting/kpi_forecasting/tests/test_performance_analysis.py +++ b/jobs/kpi-forecasting/kpi_forecasting/tests/test_performance_analysis.py @@ -58,6 +58,7 @@ def directory_of_configs(tmp_path_factory): "dataset": "y", "table": "z", }, + "metric_hub": {}, } f4 = tmpdir / "config_nosegments1_1.yaml" f5 = tmpdir / "config_nosegments1_2.yaml" @@ -73,6 +74,7 @@ def directory_of_configs(tmp_path_factory): "dataset": "q", "table": "z", }, + "metric_hub": {}, } f6 = tmpdir / "config_nosegments2_1.yaml" @@ -91,6 +93,7 @@ def get_forecast_performance_config(tmp_path_factory): "dataset": "", "table": "", }, + "metric_hub": {}, } f1 = tmpdir / "config.yaml" with open(f1, "w") as outfile: diff --git a/jobs/kpi-forecasting/kpi_forecasting/tests/test_prophet_forecast.py b/jobs/kpi-forecasting/kpi_forecasting/tests/test_prophet_forecast.py index 1e211375..adc9c4ba 100644 --- a/jobs/kpi-forecasting/kpi_forecasting/tests/test_prophet_forecast.py +++ b/jobs/kpi-forecasting/kpi_forecasting/tests/test_prophet_forecast.py @@ -2,7 +2,6 @@ from dateutil.relativedelta import relativedelta import pandas as pd -from dotmap import DotMap import numpy as np import pytest import collections @@ -34,14 +33,13 @@ def forecast(): }, } - parameter_dotmap = DotMap(parameter_dict) predict_start_date = TEST_DATE_NEXT_DAY_STR # arbitarily set it a couple months in the future predict_end_date = (TEST_DATE + relativedelta(months=2)).strftime("%Y-%m-%d") return ProphetForecast( model_type="test", - parameters=parameter_dotmap, - use_holidays=None, + parameters=parameter_dict, + use_all_us_holidays=None, start_date=predict_start_date, end_date=predict_end_date, metric_hub=None, @@ -550,8 +548,8 @@ def test_summarize_non_overlapping_day(): forecast = ProphetForecast( model_type="test", - parameters=DotMap(), - use_holidays=None, + parameters={}, + use_all_us_holidays=None, start_date=predict_start_date, end_date=predict_end_date, metric_hub=None, @@ -668,8 +666,8 @@ def test_summarize_non_overlapping_month(): forecast = ProphetForecast( model_type="test", - parameters=DotMap(), - use_holidays=None, + parameters={}, + use_all_us_holidays=None, start_date=predict_start_date, end_date=predict_end_date, metric_hub=None, @@ -787,8 +785,8 @@ def test_summarize_overlapping_day(): forecast = ProphetForecast( model_type="test", - parameters=DotMap(), - use_holidays=None, + parameters={}, + use_all_us_holidays=None, start_date=predict_start_date, end_date=predict_end_date, metric_hub=None, @@ -901,8 +899,8 @@ def test_summarize_overlapping_month(): forecast = ProphetForecast( model_type="test", - parameters=DotMap(), - use_holidays=None, + parameters={}, + use_all_us_holidays=None, start_date=predict_start_date, end_date=predict_end_date, metric_hub=None, diff --git a/jobs/kpi-forecasting/requirements.txt b/jobs/kpi-forecasting/requirements.txt index 218d688a..cae076e6 100644 --- a/jobs/kpi-forecasting/requirements.txt +++ b/jobs/kpi-forecasting/requirements.txt @@ -10,7 +10,6 @@ contourpy==1.1.0 convertdate==2.4.0 cycler==0.11.0 db-dtypes==1.1.1 -dotmap==1.3.30 ephem==4.1.4 exceptiongroup==1.1.1 fonttools==4.40.0