# Disable warnings
import warnings
="ignore")
warnings.simplefilter(actionimport matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from numpyro import distributions as dist
from sktime.forecasting.compose import ForecastingPipeline
from sktime.transformations.series.fourier import FourierFeatures
from prophetverse.datasets.loaders import load_pedestrian_count
Forecasting count data
This tutorial shows how to use the prophetverse
library to model count data, with a prophet-like model that uses Negative Binomial likelihood. Many timeseries are composed of counts, which are non-negative integers. For example, the number of cars that pass through a toll booth in a given hour, the number of people who visit a website in a given day, or the number of sales of a product. The original Prophet model struggles to handle this type of data, as it assumes that the data is continuous and normally distributed.
import numpyro
numpyro.enable_x64()
Import dataset
We use a dataset Melbourne Pedestrian Counts
from forecastingdata, which contains the hourly pedestrian counts in Melbourne, Australia, from a set of sensors located in different parts of the city.
= load_pedestrian_count()
y
# We take only one time series for simplicity
= y.loc["T2"]
y
= 24 * 365
split_index = y.iloc[:split_index], y.iloc[split_index + 1 : split_index * 2 + 1] y_train, y_test
Letโs plot a section of the time series to see how it looks like:
display(y_train.head())24 * 21].plot(figsize=(10, 3), marker="o", color="black", legend=True)
y_train.iloc[: plt.show()
pedestrian_count | |
---|---|
timestamp | |
2009-05-01 00:00 | 52.0 |
2009-05-01 01:00 | 34.0 |
2009-05-01 02:00 | 19.0 |
2009-05-01 03:00 | 14.0 |
2009-05-01 04:00 | 15.0 |
The full dataset is actually large, and plotting it all at once does not help a lot. Either way, letโs plot the full dataset to see how it looks like:
= y_train["pedestrian_count"].rename("Train").plot(figsize=(20, 7))
ax "pedestrian_count"].rename("Test").plot(ax=ax)
y_test[
ax.legend() plt.show()
Fitting models
The series has some clear patterns: a daily seasonality, a weekly seasonality, and a yearly seasonality. It also has many zeros, and a model assuming normal distributed observations would not be able to capture this.
First, letโs fit and forecast with the standard prophet, then see how the negative binomial model performs.
Prophet with normal likelihood
In this case, we will see how the model will output non-sensical negative values. The probabilistic intervals, mainly, will output values much lower than the support of the timeseries.
from prophetverse.effects.fourier import LinearFourierSeasonality
from prophetverse.effects.trend import FlatTrend
from prophetverse.engine import MAPInferenceEngine
from prophetverse.engine.optimizer import CosineScheduleAdamOptimizer, LBFGSSolver
from prophetverse.sktime import Prophetverse
from prophetverse.utils.regex import no_input_columns
# Here we set the prior for the seasonality effect
# And the coefficients for it
= [
exogenous_effects
("seasonality",
LinearFourierSeasonality(=[24, 24 * 7, 24 * 365.5],
sp_list=[2, 2, 10],
fourier_terms_list="H",
freq=0.1,
prior_scale="multiplicative",
effect_mode
),
no_input_columns,
),
]
= Prophetverse(
model =FlatTrend(),
trend=exogenous_effects,
exogenous_effects=MAPInferenceEngine(),
inference_engine
)=y_train) model.fit(y
Prophetverse(exogenous_effects=[('seasonality', LinearFourierSeasonality(effect_mode='multiplicative', fourier_terms_list=[2, 2, 10], freq='H', prior_scale=0.1, sp_list=[24, 168, 8772.0]), '^$')], inference_engine=MAPInferenceEngine(), trend=FlatTrend())Please rerun this cell to show the HTML repr or trust the notebook.
Prophetverse(exogenous_effects=[('seasonality', LinearFourierSeasonality(effect_mode='multiplicative', fourier_terms_list=[2, 2, 10], freq='H', prior_scale=0.1, sp_list=[24, 168, 8772.0]), '^$')], inference_engine=MAPInferenceEngine(), trend=FlatTrend())
FlatTrend()
LinearFourierSeasonality(effect_mode='multiplicative', fourier_terms_list=[2, 2, 10], freq='H', prior_scale=0.1, sp_list=[24, 168, 8772.0])
MAPInferenceEngine()
Forecasting with the normal model
Below we see the negative predictions, which is clear a limitation of this gaussian likelihood for this kind of data.
= y_train.index[-100:].union(y_test.index[:300])
forecast_horizon = plt.subplots(figsize=(10, 3))
fig, ax = model.predict(fh=forecast_horizon)
preds_normal "pedestrian_count"].rename("Normal model").plot.line(
preds_normal[=ax, legend=False, color="tab:blue"
ax
)="o", color="k", s=2, alpha=0.5, label="Train")
ax.scatter(y_train.index, y_train, marker
ax.scatter(="o", color="green", s=2, alpha=0.5, label="Test"
y_test.index, y_test, marker
)"Prophet with normal likelihood")
ax.set_title(
ax.legend() fig.show()
= model.predict_quantiles(fh=forecast_horizon, alpha=[0.1, 0.9])
quantiles = plt.subplots(figsize=(10, 3))
fig, ax # Plot area between quantiles
ax.fill_between(
quantiles.index.to_timestamp(),0],
quantiles.iloc[:, -1],
quantiles.iloc[:, =0.5,
alpha
)
ax.scatter(
forecast_horizon.to_timestamp(),
y.loc[forecast_horizon],="o",
marker="k",
color=2,
s=1,
alpha
)-1].to_timestamp(), color="r", linestyle="--")
ax.axvline(y_train.index[ fig.show()
Prophet with negative binomial likelihood
The negative binomial likehood has support on the non-negative integers, which makes it perfect for count data. We change the likelihood of the model, and fit it again.
="negbinomial")
model.set_params(likelihood=y_train) model.fit(y
Prophetverse(exogenous_effects=[('seasonality', LinearFourierSeasonality(effect_mode='multiplicative', fourier_terms_list=[2, 2, 10], freq='H', prior_scale=0.1, sp_list=[24, 168, 8772.0]), '^$')], inference_engine=MAPInferenceEngine(), likelihood='negbinomial', trend=FlatTrend())Please rerun this cell to show the HTML repr or trust the notebook.
Prophetverse(exogenous_effects=[('seasonality', LinearFourierSeasonality(effect_mode='multiplicative', fourier_terms_list=[2, 2, 10], freq='H', prior_scale=0.1, sp_list=[24, 168, 8772.0]), '^$')], inference_engine=MAPInferenceEngine(), likelihood='negbinomial', trend=FlatTrend())
FlatTrend()
LinearFourierSeasonality(effect_mode='multiplicative', fourier_terms_list=[2, 2, 10], freq='H', prior_scale=0.1, sp_list=[24, 168, 8772.0])
MAPInferenceEngine()
Forecasting with the negative binomial model
= plt.subplots(figsize=(10, 3))
fig, ax = model.predict(fh=forecast_horizon)
preds_negbin "pedestrian_count"].rename("Neg. Binomial model").plot.line(
preds_negbin[=ax, legend=False, color="tab:purple"
ax
)="o", color="k", s=2, alpha=0.5, label="Train")
ax.scatter(y_train.index, y_train, marker
ax.scatter(="o", color="green", s=2, alpha=0.5, label="Test"
y_test.index, y_test, marker
)"Prophet with Negative Binomial likelihood")
ax.set_title(
ax.legend() fig.show()
= model.predict_quantiles(fh=forecast_horizon, alpha=[0.1, 0.9])
quantiles = plt.subplots(figsize=(10, 3))
fig, ax # Plot area between quantiles
ax.fill_between(
quantiles.index.to_timestamp(),0],
quantiles.iloc[:, -1],
quantiles.iloc[:, =0.5,
alpha
)
ax.scatter(
forecast_horizon.to_timestamp(),
y.loc[forecast_horizon],="o",
marker="k",
color=2,
s=1,
alpha
)-1].to_timestamp(), color="r", linestyle="--")
ax.axvline(y_train.index[ fig.show()
Comparing both forecasts side by side
To make our point clear, we plot both forecasts side by side. Isnโt it nice to have forecasts that make sense? :smile:
= plt.subplots(figsize=(9, 5))
fig, ax "pedestrian_count"].rename("Neg. Binomial model").plot.line(
preds_negbin[=ax, legend=False, color="tab:purple"
ax
)"pedestrian_count"].rename("Normal model").plot.line(
preds_normal[=ax, legend=False, color="tab:blue"
ax
)="o", color="k", s=6, alpha=0.5, label="Train")
ax.scatter(y_train.index, y_train, marker
ax.scatter(="o", color="green", s=6, alpha=0.5, label="Test"
y_test.index, y_test, marker
)"Forecasting pedestrian counts")
ax.set_title(# Remove xlabel
"")
ax.set_xlabel(
ax.axvline(-1].to_timestamp(),
y_train.index[="black",
color="--",
linestyle=0.3,
alpha=-1,
zorder
)="center", ncol=4, bbox_to_anchor=(0.5, 0.8))
fig.legend(loc
fig.tight_layout() fig.show()