Hierarchical timeseries¶
In [1]:
Copied!
# Disable warnings
import warnings
warnings.simplefilter(action="ignore")
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
from sktime.transformations.series.fourier import FourierFeatures
from sktime.forecasting.compose import ForecastingPipeline
from numpyro import distributions as dist
# Disable warnings
import warnings
warnings.simplefilter(action="ignore")
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
from sktime.transformations.series.fourier import FourierFeatures
from sktime.forecasting.compose import ForecastingPipeline
from numpyro import distributions as dist
Import dataset¶
Here we use the tourism dataset with purpose-level aggregation.
In [2]:
Copied!
from sktime.transformations.hierarchical.aggregate import Aggregator
data = pd.read_csv("tourism.csv")
data["Quarter"] = pd.PeriodIndex(data["Quarter"], freq="Q")
data = data.set_index(["Region", "Purpose", "State", "Quarter"])[["Trips"]]
data = data.sort_index()
y = (
Aggregator(flatten_single_levels=False)
.fit_transform(data)
.groupby(level=[1, -1])
.sum()
)
y
from sktime.transformations.hierarchical.aggregate import Aggregator
data = pd.read_csv("tourism.csv")
data["Quarter"] = pd.PeriodIndex(data["Quarter"], freq="Q")
data = data.set_index(["Region", "Purpose", "State", "Quarter"])[["Trips"]]
data = data.sort_index()
y = (
Aggregator(flatten_single_levels=False)
.fit_transform(data)
.groupby(level=[1, -1])
.sum()
)
y
Out[2]:
Trips | ||
---|---|---|
Purpose | Quarter | |
Business | 1998Q1 | 7391.962068 |
1998Q2 | 7701.153191 | |
1998Q3 | 8911.852065 | |
1998Q4 | 7777.766525 | |
1999Q1 | 6917.257864 | |
... | ... | ... |
__total | 2015Q4 | 51518.858354 |
2016Q1 | 54984.720748 | |
2016Q2 | 49583.595515 | |
2016Q3 | 49392.159616 | |
2016Q4 | 54034.155613 |
380 rows × 1 columns
In [3]:
Copied!
LEVELS = y.index.get_level_values(0).unique()
def plot_preds(y=None, preds={}, axs=None):
if axs is None:
fig, axs = plt.subplots(figsize=(12, 8), nrows=int(np.ceil(len(LEVELS)/2)), ncols=2)
ax_generator = iter(axs.flatten())
for level in LEVELS:
ax = next(ax_generator)
if y is not None:
y.loc[level].iloc[:, 0].rename("Observation").plot(ax=ax, label="truth", color="black")
for name, _preds in preds.items():
_preds.loc[level].iloc[:, 0].rename(name).plot(ax=ax, legend=True)
ax.set_title(level)
# Tight layout
plt.tight_layout()
return ax
LEVELS = y.index.get_level_values(0).unique()
def plot_preds(y=None, preds={}, axs=None):
if axs is None:
fig, axs = plt.subplots(figsize=(12, 8), nrows=int(np.ceil(len(LEVELS)/2)), ncols=2)
ax_generator = iter(axs.flatten())
for level in LEVELS:
ax = next(ax_generator)
if y is not None:
y.loc[level].iloc[:, 0].rename("Observation").plot(ax=ax, label="truth", color="black")
for name, _preds in preds.items():
_preds.loc[level].iloc[:, 0].rename(name).plot(ax=ax, legend=True)
ax.set_title(level)
# Tight layout
plt.tight_layout()
return ax
Fit univariate model¶
Because of sktime's amazing interface, we can use the univariate Prophet seamlessly with hierarchical data. We do not reconcile it here, but it could be achieved with the Reconciler
class.
In [4]:
Copied!
from prophetverse.sktime.univariate import Prophet
from prophetverse.effects import LinearFourierSeasonality
from prophetverse.utils import no_input_columns
model = Prophet(
trend="logistic",
capacity_prior_loc=1,
capacity_prior_scale=0.5,
changepoint_interval=8,
changepoint_range=-8,
changepoint_prior_scale=0.01,
exogenous_effects=[
(
"seasonality",
LinearFourierSeasonality(
sp_list=["Y"],
fourier_terms_list=[1],
freq="Q",
prior_scale=0.1,
effect_mode="multiplicative",
),
no_input_columns,
)
],
noise_scale=0.05,
optimizer_name="Adam",
optimizer_kwargs={"step_size": 0.0001},
inference_method="map",
optimizer_steps=50000,
)
model.fit(y=y)
from prophetverse.sktime.univariate import Prophet
from prophetverse.effects import LinearFourierSeasonality
from prophetverse.utils import no_input_columns
model = Prophet(
trend="logistic",
capacity_prior_loc=1,
capacity_prior_scale=0.5,
changepoint_interval=8,
changepoint_range=-8,
changepoint_prior_scale=0.01,
exogenous_effects=[
(
"seasonality",
LinearFourierSeasonality(
sp_list=["Y"],
fourier_terms_list=[1],
freq="Q",
prior_scale=0.1,
effect_mode="multiplicative",
),
no_input_columns,
)
],
noise_scale=0.05,
optimizer_name="Adam",
optimizer_kwargs={"step_size": 0.0001},
inference_method="map",
optimizer_steps=50000,
)
model.fit(y=y)
100%|██████████| 50000/50000 [00:04<00:00, 11490.37it/s, init loss: 28927.5215, avg. loss [47501-50000]: -144.4374] 100%|██████████| 50000/50000 [00:04<00:00, 10610.97it/s, init loss: 42360.3320, avg. loss [47501-50000]: -155.7188] 100%|██████████| 50000/50000 [00:04<00:00, 11723.89it/s, init loss: 11874.3643, avg. loss [47501-50000]: -130.7118] 100%|██████████| 50000/50000 [00:04<00:00, 12386.00it/s, init loss: 20033.0938, avg. loss [47501-50000]: -166.1868] 100%|██████████| 50000/50000 [00:04<00:00, 11643.06it/s, init loss: 21640.5449, avg. loss [47501-50000]: -185.3383]
Out[4]:
Prophet(capacity_prior_loc=1, capacity_prior_scale=0.5, changepoint_interval=8, changepoint_prior_scale=0.01, changepoint_range=-8, exogenous_effects=[('seasonality', LinearFourierSeasonality(effect_mode='multiplicative', fourier_terms_list=[1], freq='Q', prior_scale=0.1, sp_list=['Y']), '^$')], optimizer_kwargs={'step_size': 0.0001}, optimizer_steps=50000)Please rerun this cell to show the HTML repr or trust the notebook.
Prophet(capacity_prior_loc=1, capacity_prior_scale=0.5, changepoint_interval=8, changepoint_prior_scale=0.01, changepoint_range=-8, exogenous_effects=[('seasonality', LinearFourierSeasonality(effect_mode='multiplicative', fourier_terms_list=[1], freq='Q', prior_scale=0.1, sp_list=['Y']), '^$')], optimizer_kwargs={'step_size': 0.0001}, optimizer_steps=50000)
LinearFourierSeasonality(effect_mode='multiplicative', fourier_terms_list=[1], freq='Q', prior_scale=0.1, sp_list=['Y'])
Forecasting¶
In [5]:
Copied!
y
y
Out[5]:
Trips | ||
---|---|---|
Purpose | Quarter | |
Business | 1998Q1 | 7391.962068 |
1998Q2 | 7701.153191 | |
1998Q3 | 8911.852065 | |
1998Q4 | 7777.766525 | |
1999Q1 | 6917.257864 | |
... | ... | ... |
__total | 2015Q4 | 51518.858354 |
2016Q1 | 54984.720748 | |
2016Q2 | 49583.595515 | |
2016Q3 | 49392.159616 | |
2016Q4 | 54034.155613 |
380 rows × 1 columns
In [6]:
Copied!
forecast_horizon = pd.period_range("1997Q1", "2020Q4", freq="Q")
preds = model.predict(fh=forecast_horizon)
plot_preds(y, {"Prophet": preds})
forecast_horizon = pd.period_range("1997Q1", "2020Q4", freq="Q")
preds = model.predict(fh=forecast_horizon)
plot_preds(y, {"Prophet": preds})
Out[6]:
<Axes: title={'center': '__total'}, xlabel='Quarter'>
Hierarchical Prophet¶
Now, let's use the hierarchical prophet to forecast all of the series at once. In addition, we will use the logistic growth model.
In [7]:
Copied!
from prophetverse.sktime.multivariate import HierarchicalProphet
from prophetverse.effects.linear import LinearEffect
model_hier = HierarchicalProphet(
trend="logistic",
changepoint_prior_scale=0.01,
capacity_prior_loc=1,
capacity_prior_scale=0.5,
changepoint_interval=8,
changepoint_range=-8,
noise_scale=0.05,
exogenous_effects=[
(
"seasonality",
LinearFourierSeasonality(
sp_list=["Y"],
fourier_terms_list=[1],
freq="Q",
prior_scale=0.1,
effect_mode="multiplicative",
),
no_input_columns,
)
],
optimizer_name="Adam",
optimizer_kwargs={"step_size": 0.0001},
inference_method="map",
optimizer_steps=300_000,
correlation_matrix_concentration=2,
)
model_hier.fit(y=y)
from prophetverse.sktime.multivariate import HierarchicalProphet
from prophetverse.effects.linear import LinearEffect
model_hier = HierarchicalProphet(
trend="logistic",
changepoint_prior_scale=0.01,
capacity_prior_loc=1,
capacity_prior_scale=0.5,
changepoint_interval=8,
changepoint_range=-8,
noise_scale=0.05,
exogenous_effects=[
(
"seasonality",
LinearFourierSeasonality(
sp_list=["Y"],
fourier_terms_list=[1],
freq="Q",
prior_scale=0.1,
effect_mode="multiplicative",
),
no_input_columns,
)
],
optimizer_name="Adam",
optimizer_kwargs={"step_size": 0.0001},
inference_method="map",
optimizer_steps=300_000,
correlation_matrix_concentration=2,
)
model_hier.fit(y=y)
100%|██████████| 300000/300000 [00:32<00:00, 9247.09it/s, init loss: 8760975360.0000, avg. loss [285001-300000]: -580.5894]
Out[7]:
HierarchicalProphet(capacity_prior_loc=1, capacity_prior_scale=0.5, changepoint_interval=8, changepoint_prior_scale=0.01, changepoint_range=-8, correlation_matrix_concentration=2, exogenous_effects=[('seasonality', LinearFourierSeasonality(effect_mode='multiplicative', fourier_terms_list=[1], freq='Q', prior_scale=0.1, sp_list=['Y']), '^$')], optimizer_kwargs={'step_size': 0.0001}, optimizer_steps=300000, trend='logistic')Please rerun this cell to show the HTML repr or trust the notebook.
HierarchicalProphet(capacity_prior_loc=1, capacity_prior_scale=0.5, changepoint_interval=8, changepoint_prior_scale=0.01, changepoint_range=-8, correlation_matrix_concentration=2, exogenous_effects=[('seasonality', LinearFourierSeasonality(effect_mode='multiplicative', fourier_terms_list=[1], freq='Q', prior_scale=0.1, sp_list=['Y']), '^$')], optimizer_kwargs={'step_size': 0.0001}, optimizer_steps=300000, trend='logistic')
LinearFourierSeasonality(effect_mode='multiplicative', fourier_terms_list=[1], freq='Q', prior_scale=0.1, sp_list=['Y'])
Forecasting with hierarchical prophet¶
In [8]:
Copied!
preds_hier = model_hier.predict(fh=forecast_horizon)
plot_preds(
y,
preds={
"Prophet": preds,
"HierarchicalProphet": preds_hier,
},
)
preds_hier = model_hier.predict(fh=forecast_horizon)
plot_preds(
y,
preds={
"Prophet": preds,
"HierarchicalProphet": preds_hier,
},
)
Out[8]:
<Axes: title={'center': '__total'}, xlabel='Quarter'>
In [9]:
Copied!
from sktime.transformations.hierarchical.aggregate import Aggregator
sites = model_hier.predict_all_sites(fh=forecast_horizon)
sites = Aggregator(flatten_single_levels=True).fit_transform(sites)
for column in sites.columns.difference(["obs"]):
fig, axs = plt.subplots(
figsize=(12, 8), nrows=int(np.ceil(len(LEVELS) / 2)), ncols=2
)
plot_preds(preds={column: sites[[column]]}, axs=axs)
# Set figure title
fig.suptitle(column)
fig.tight_layout()
from sktime.transformations.hierarchical.aggregate import Aggregator
sites = model_hier.predict_all_sites(fh=forecast_horizon)
sites = Aggregator(flatten_single_levels=True).fit_transform(sites)
for column in sites.columns.difference(["obs"]):
fig, axs = plt.subplots(
figsize=(12, 8), nrows=int(np.ceil(len(LEVELS) / 2)), ncols=2
)
plot_preds(preds={column: sites[[column]]}, axs=axs)
# Set figure title
fig.suptitle(column)
fig.tight_layout()