Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
2a60eef
refactored base_forecast and prophet_forecast to enable easier testing
jaredsnyder Jul 9, 2024
340fabf
Apply suggestions from code review
jaredsnyder Jul 10, 2024
6c7d3f2
add test for fit
jaredsnyder Jul 10, 2024
38e721d
revert signatures
jaredsnyder Jul 10, 2024
9b17337
made timezone-aware stamps naive
jaredsnyder Jul 10, 2024
90a822e
finished base_forecast tests
jaredsnyder Jul 10, 2024
72fabef
added tests for prophet class
jaredsnyder Jul 11, 2024
1ece1dd
linting
jaredsnyder Jul 11, 2024
606e2e4
fixed divide by zero
jaredsnyder Jul 11, 2024
585f2ca
linting again
jaredsnyder Jul 11, 2024
97bd46c
adding tests to funnel_forecast
jaredsnyder Jul 23, 2024
0e0ea91
Merge branch 'main' into kpi_forecasting_funnel_unit_tests
jaredsnyder Jul 23, 2024
c35247d
added tests for funnel_forecast
jaredsnyder Jul 29, 2024
e54d2c3
Merge branch 'main' into kpi_forecasting_funnel_unit_tests
jaredsnyder Jul 29, 2024
6ab0527
feat(workday):remove unwanted fields (#249)
JCMOSCON1976 Jul 29, 2024
07e5388
fix(exit):Added sys.exit() call (#250)
JCMOSCON1976 Jul 30, 2024
b102a7a
fix issue with call to _get_crossvalidation_metric
jaredsnyder Jul 30, 2024
0726287
fixed type check
jaredsnyder Aug 5, 2024
65f8e27
Merge branch 'main' into kpi_forecasting_funnel_unit_tests
jaredsnyder Aug 5, 2024
d8db825
added string case to aggregate_to_period and added tests
jaredsnyder Aug 6, 2024
6b6dac6
merge main
jaredsnyder Aug 7, 2024
2358ee3
update
jaredsnyder Aug 7, 2024
83aa229
revert file
jaredsnyder Aug 7, 2024
d5a0e63
added more tests to prophet_forecast
jaredsnyder Aug 8, 2024
f551f4c
Update jobs/kpi-forecasting/kpi_forecasting/models/base_forecast.py
jaredsnyder Aug 9, 2024
1a63912
Brad easy fixes
jaredsnyder Aug 9, 2024
6a8c90c
remove magic year
jaredsnyder Aug 12, 2024
325fd1d
feat(code):increasing the max_limit from 10 to 40. (#259)
JCMOSCON1976 Aug 7, 2024
cc4dc69
typo
m-d-bowerman Aug 13, 2024
9d8ed62
revert bugfix in _add_regressors
jaredsnyder Aug 14, 2024
3b5d8d1
update tests to reflect reversion
jaredsnyder Aug 14, 2024
3e7d056
Merge branch 'main' into kpi_forecast_add_more_prophet_tests
jaredsnyder Aug 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 18 additions & 1 deletion jobs/kpi-forecasting/kpi_forecasting/models/base_forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def summarize(
Returns:
pd.DataFrame: metric dataframe for all metrics and aggregations
"""
self.summary_df = pd.concat(
summary_df = pd.concat(
[
self._summarize(
self.forecast_df,
Expand All @@ -202,4 +202,21 @@ def summarize(
]
)

# add Metric Hub metadata columns
summary_df["metric_alias"] = self.metric_hub.alias.lower()
summary_df["metric_hub_app_name"] = self.metric_hub.app_name.lower()
summary_df["metric_hub_slug"] = self.metric_hub.slug.lower()
summary_df["metric_start_date"] = pd.to_datetime(self.metric_hub.min_date)
summary_df["metric_end_date"] = pd.to_datetime(self.metric_hub.max_date)
summary_df["metric_collected_at"] = self.collected_at

# add forecast model metadata columns
summary_df["forecast_start_date"] = self.start_date
summary_df["forecast_end_date"] = self.end_date
summary_df["forecast_trained_at"] = self.trained_at
summary_df["forecast_predicted_at"] = self.predicted_at
summary_df["forecast_parameters"] = self.metadata_params

self.summary_df = summary_df

return self.summary_df
31 changes: 9 additions & 22 deletions jobs/kpi-forecasting/kpi_forecasting/models/prophet_forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,20 @@ class ProphetForecast(BaseForecast):
def column_names_map(self) -> Dict[str, str]:
return {"submission_date": "ds", "value": "y"}

def _fit(self, observed_df) -> None:
self.model = prophet.Prophet(
**self.parameters,
def _build_model(self, parameter_dict):
Comment thread
jaredsnyder marked this conversation as resolved.
model = prophet.Prophet(
**parameter_dict,
uncertainty_samples=self.number_of_simulations,
mcmc_samples=0,
)

if self.use_holidays:
self.model.add_country_holidays(country_name="US")
model.add_country_holidays(country_name="US")

return model

def _fit(self, observed_df) -> None:
self.model = self._build_model(self.parameters)

# Modify observed data to have column names that Prophet expects, and fit
# the model
Expand Down Expand Up @@ -235,24 +240,6 @@ def _summarize(
# add summary metadata columns
df["aggregation_period"] = period.lower()

# reorder columns to make interpretation easier
df = df[["submission_date", "aggregation_period", "source", "measure", "value"]]

# add Metric Hub metadata columns
df["metric_alias"] = self.metric_hub.alias.lower()
df["metric_hub_app_name"] = self.metric_hub.app_name.lower()
df["metric_hub_slug"] = self.metric_hub.slug.lower()
df["metric_start_date"] = pd.to_datetime(self.metric_hub.min_date)
df["metric_end_date"] = pd.to_datetime(self.metric_hub.max_date)
df["metric_collected_at"] = self.collected_at

# add forecast model metadata columns
df["forecast_start_date"] = self.start_date
df["forecast_end_date"] = self.end_date
df["forecast_trained_at"] = self.trained_at
df["forecast_predicted_at"] = self.predicted_at
df["forecast_parameters"] = self.metadata_params

return df

def _summarize_legacy(self) -> pd.DataFrame:
Expand Down
78 changes: 62 additions & 16 deletions jobs/kpi-forecasting/kpi_forecasting/tests/test_base_forecast.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,25 @@
from typing import List
import collections
from datetime import date, datetime
from dateutil.relativedelta import relativedelta

import pytest
import pandas as pd
from dotmap import DotMap
import numpy as np
from datetime import datetime, timedelta, timezone
from datetime import timedelta, timezone


from kpi_forecasting.models.base_forecast import BaseForecast

# Arbitrarily choose some date to use for the tests
TEST_DATE = date(2024, 1, 1)
TEST_DATE_STR = TEST_DATE.strftime("%Y-%m-%d")
TEST_DATE_NEXT_DAY = date(2024, 1, 2)
TEST_DATE_NEXT_DAY_STR = TEST_DATE_NEXT_DAY.strftime("%Y-%m-%d")
TEST_PREDICT_END = TEST_DATE + relativedelta(months=2)
TEST_PREDICT_END_STR = TEST_PREDICT_END.strftime("%Y-%m-%d")


class BadClass(BaseForecast):
pass
Expand All @@ -29,8 +40,9 @@ def _get_observed_data(self):
self.observed_df = pd.DataFrame(
{
"submission_date": [
pd.to_datetime("2020-01-01"),
pd.to_datetime("1990-01-01"),
TEST_DATE,
TEST_DATE
- relativedelta(years=1), # just an arbitrary date in the past
]
}
)
Expand Down Expand Up @@ -76,8 +88,8 @@ def test_not_implemented():


def test_post_init(good_class):
start_date = "2124-01-01"
end_date = "2124-02-02"
start_date = TEST_DATE_STR
end_date = TEST_PREDICT_END_STR
good_class = good_class(
model_type="test",
parameters=DotMap(),
Expand Down Expand Up @@ -108,7 +120,7 @@ def test_post_init_default_dates(good_class):
)
# this is the max date of the self.observed_data['submission_date'] plus one day
# from the object definion
start_date = pd.to_datetime("2020-01-02")
start_date = TEST_DATE_NEXT_DAY
end_date = (
datetime.now(timezone.utc).replace(tzinfo=None) + timedelta(weeks=78)
).date()
Expand All @@ -123,24 +135,24 @@ def test_fit(good_class):
model_type="test",
parameters=DotMap(),
use_holidays=None,
start_date="2124-01-01",
end_date="2124-02-02",
start_date=TEST_DATE_STR,
end_date=TEST_PREDICT_END_STR,
metric_hub=None,
)
good_class.fit()
assert good_class.model

#
assert good_class.model.is_fit == pd.to_datetime("2020-01-01")
# model sets is_fit to the largest day in the observed data
assert good_class.model.is_fit == TEST_DATE


def test_predict_and_validate(good_class):
good_class = good_class(
model_type="test",
parameters=DotMap(),
use_holidays=None,
start_date="2124-01-01",
end_date="2124-02-02",
start_date=TEST_DATE_STR,
end_date=TEST_PREDICT_END_STR,
metric_hub=None,
)
# overwrite date range set in __post_init__
Expand All @@ -154,12 +166,24 @@ def test_summarize(good_class):
model_type="test",
parameters=DotMap(),
use_holidays=None,
start_date="2124-01-01",
end_date="2124-02-02",
start_date=TEST_DATE_STR,
end_date=TEST_PREDICT_END_STR,
metric_hub=None,
)
good_class.forecast_df = np.array([1, 2])
good_class.observed_df = np.array([3, 4])
MetricHub = collections.namedtuple(
"MetricHub",
["alias", "app_name", "slug", "min_date", "max_date"],
)

dummy_metric_hub = MetricHub("", "", "", TEST_DATE_STR, TEST_DATE_STR)

# add it here rather than in __init__ so it doesn't try to load data
good_class.metric_hub = dummy_metric_hub
good_class.trained_at = ""
good_class.predicted_at = ""

number_val = 10
output = good_class.summarize(
periods=["a", "b", "c"], numpy_aggregations=["sum"], percentiles=["percentiles"]
Expand All @@ -170,5 +194,27 @@ def test_summarize(good_class):
for el in ["a", "b", "c"]
]
)
assert output.reset_index(drop=True).equals(expected_output)
assert good_class.summary_df.reset_index(drop=True).equals(expected_output)
# not going to check all the metadata columns
# in assert_frame_equal. Just make sure they're there
metadata_columns = {
"metric_alias",
"metric_hub_app_name",
"metric_hub_slug",
"metric_start_date",
"metric_end_date",
"metric_collected_at",
"forecast_start_date",
"forecast_end_date",
"forecast_trained_at",
"forecast_predicted_at",
"forecast_parameters",
}
assert set(expected_output.columns) | metadata_columns == set(output.columns)

pd.testing.assert_frame_equal(
output[expected_output.columns].reset_index(drop=True), expected_output
)
pd.testing.assert_frame_equal(
good_class.summary_df[expected_output.columns].reset_index(drop=True),
expected_output,
)
Loading