point metrics

%23%20%2F%2F%2F%20script%0A%23%20requires-python%20%3D%20%22%3E%3D3.11%22%0A%23%20dependencies%20%3D%20%5B%0A%23%20%20%20%20%20%22scikit-learn%22%2C%0A%23%20%20%20%20%20%22yohou%5Bplotting%5D%22%2C%0A%23%20%5D%0A%23%20%2F%2F%2F%0A%0Aimport%20marimo%0A%0A__generated_with%20%3D%20%220.23.14%22%0Aapp%20%3D%20marimo.App(width%3D%22medium%22)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_()%3A%0A%20%20%20%20import%20marimo%20as%20mo%0A%0A%20%20%20%20return%20(mo%2C)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%20Point%20Metrics%20for%20Forecast%20Evaluation%0A%0A%20%20%20%20Yohou%20provides%20a%20comprehensive%20set%20of%20point%20forecast%20metrics%2C%20all%20following%0A%20%20%20%20sklearn's%20scorer%20API%20with%20%60fit%60%20%2F%20%60score%60.%20Each%20metric%20supports%20flexible%0A%20%20%20%20aggregation%20across%20time%2C%20components%2C%20and%20panel%20groups.%0A%0A%20%20%20%20**Prerequisites%3A**%20Basic%20understanding%20of%20forecast%20error%20metrics.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_()%3A%0A%20%20%20%20from%20copy%20import%20deepcopy%0A%0A%20%20%20%20from%20sklearn.linear_model%20import%20Ridge%0A%0A%20%20%20%20from%20yohou.datasets%20import%20fetch_tourism_monthly%0A%20%20%20%20from%20yohou.metrics%20import%20(%0A%20%20%20%20%20%20%20%20MeanAbsoluteError%2C%0A%20%20%20%20%20%20%20%20MeanAbsolutePercentageError%2C%0A%20%20%20%20%20%20%20%20MeanAbsoluteScaledError%2C%0A%20%20%20%20%20%20%20%20MeanSquaredError%2C%0A%20%20%20%20%20%20%20%20MedianAbsoluteError%2C%0A%20%20%20%20%20%20%20%20RootMeanSquaredError%2C%0A%20%20%20%20%20%20%20%20RootMeanSquaredScaledError%2C%0A%20%20%20%20%20%20%20%20SymmetricMeanAbsolutePercentageError%2C%0A%20%20%20%20)%0A%20%20%20%20from%20yohou.model_selection%20import%20train_test_split%0A%20%20%20%20from%20yohou.plotting%20import%20(%0A%20%20%20%20%20%20%20%20plot_forecast%2C%0A%20%20%20%20%20%20%20%20plot_score_per_vintage%2C%0A%20%20%20%20%20%20%20%20plot_score_summary%2C%0A%20%20%20%20%20%20%20%20plot_score_time_series%2C%0A%20%20%20%20%20%20%20%20plot_time_series%2C%0A%20%20%20%20)%0A%20%20%20%20from%20yohou.point%20import%20PointReductionForecaster%2C%20SeasonalNaive%0A%20%20%20%20from%20yohou.preprocessing%20import%20LagTransformer%0A%0A%20%20%20%20return%20(%0A%20%20%20%20%20%20%20%20LagTransformer%2C%0A%20%20%20%20%20%20%20%20MeanAbsoluteError%2C%0A%20%20%20%20%20%20%20%20MeanAbsolutePercentageError%2C%0A%20%20%20%20%20%20%20%20MeanAbsoluteScaledError%2C%0A%20%20%20%20%20%20%20%20MeanSquaredError%2C%0A%20%20%20%20%20%20%20%20MedianAbsoluteError%2C%0A%20%20%20%20%20%20%20%20PointReductionForecaster%2C%0A%20%20%20%20%20%20%20%20Ridge%2C%0A%20%20%20%20%20%20%20%20RootMeanSquaredError%2C%0A%20%20%20%20%20%20%20%20RootMeanSquaredScaledError%2C%0A%20%20%20%20%20%20%20%20SeasonalNaive%2C%0A%20%20%20%20%20%20%20%20SymmetricMeanAbsolutePercentageError%2C%0A%20%20%20%20%20%20%20%20deepcopy%2C%0A%20%20%20%20%20%20%20%20fetch_tourism_monthly%2C%0A%20%20%20%20%20%20%20%20plot_forecast%2C%0A%20%20%20%20%20%20%20%20plot_score_per_vintage%2C%0A%20%20%20%20%20%20%20%20plot_score_summary%2C%0A%20%20%20%20%20%20%20%20plot_score_time_series%2C%0A%20%20%20%20%20%20%20%20plot_time_series%2C%0A%20%20%20%20%20%20%20%20train_test_split%2C%0A%20%20%20%20)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%201.%20Generate%20Forecasts%20for%20Evaluation%0A%0A%20%20%20%20We%20fit%20two%20forecasters%20on%20the%20Tourism%20Monthly%20dataset%3A%20a%20%5B%60SeasonalNaive%60%5D(%2Fpages%2Fapi%2Fgenerated%2Fyohou.point.naive.SeasonalNaive%2F)%0A%20%20%20%20baseline%20and%20a%20%5B%60PointReductionForecaster%60%5D(%2Fpages%2Fapi%2Fgenerated%2Fyohou.point.reduction.PointReductionForecaster%2F)%20with%20a%20%5B%60Ridge%60%5D(https%3A%2F%2Fscikit-learn.org%2Fstable%2Fmodules%2Fgenerated%2Fsklearn.linear_model.Ridge.html)%20regressor.%20Both%0A%20%20%20%20produce%20predictions%20over%20the%20same%20test%20horizon%2C%20giving%20us%20two%20sets%20of%0A%20%20%20%20forecasts%20to%20compare%20across%20all%20metrics.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(%0A%20%20%20%20LagTransformer%2C%0A%20%20%20%20PointReductionForecaster%2C%0A%20%20%20%20Ridge%2C%0A%20%20%20%20SeasonalNaive%2C%0A%20%20%20%20fetch_tourism_monthly%2C%0A%20%20%20%20train_test_split%2C%0A)%3A%0A%20%20%20%20y%20%3D%20fetch_tourism_monthly().frame.select(%22time%22%2C%20%22T1__tourists%22).drop_nulls().rename(%7B%22T1__tourists%22%3A%20%22tourists%22%7D)%0A%0A%20%20%20%20y_train%2C%20y_test%20%3D%20train_test_split(y%2C%20test_size%3D0.2)%0A%20%20%20%20fh%20%3D%20len(y_test)%0A%0A%20%20%20%20naive%20%3D%20SeasonalNaive(seasonality%3D12)%0A%20%20%20%20naive.fit(y_train%2C%20forecasting_horizon%3Dfh)%0A%20%20%20%20y_pred_naive%20%3D%20naive.predict(forecasting_horizon%3Dfh)%0A%0A%20%20%20%20ridge_fc%20%3D%20PointReductionForecaster(%0A%20%20%20%20%20%20%20%20estimator%3DRidge()%2C%0A%20%20%20%20%20%20%20%20actual_transformer%3DLagTransformer(lag%3Dlist(range(1%2C%2013)))%2C%0A%20%20%20%20)%0A%20%20%20%20ridge_fc.fit(y_train%2C%20forecasting_horizon%3Dfh)%0A%20%20%20%20y_pred_ridge%20%3D%20ridge_fc.predict(forecasting_horizon%3Dfh)%0A%0A%20%20%20%20print(f%22Train%3A%20%7Blen(y_train)%7D%2C%20Test%3A%20%7Blen(y_test)%7D%22)%0A%20%20%20%20return%20fh%2C%20ridge_fc%2C%20y%2C%20y_pred_naive%2C%20y_pred_ridge%2C%20y_test%2C%20y_train%0A%0A%0A%40app.cell%0Adef%20_(plot_time_series%2C%20y)%3A%0A%20%20%20%20plot_time_series(y%2C%20title%3D%22Tourism%20Monthly%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(plot_forecast%2C%20y_pred_naive%2C%20y_pred_ridge%2C%20y_test%2C%20y_train)%3A%0A%20%20%20%20plot_forecast(%0A%20%20%20%20%20%20%20%20y_test%2C%0A%20%20%20%20%20%20%20%20%7B%22Naive%22%3A%20y_pred_naive%2C%20%22Ridge%22%3A%20y_pred_ridge%7D%2C%0A%20%20%20%20%20%20%20%20y_train%3Dy_train%2C%0A%20%20%20%20%20%20%20%20title%3D%22Forecasts%20for%20Evaluation%22%2C%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%202.%20MeanAbsoluteError%20(MAE)%0A%0A%20%20%20%20The%20average%20absolute%20difference%20between%20prediction%20and%20actual%20value.%0A%20%20%20%20Easy%20to%20interpret%3A%20measured%20in%20the%20same%20units%20as%20the%20target.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(MeanAbsoluteError%2C%20y_pred_naive%2C%20y_pred_ridge%2C%20y_test%2C%20y_train)%3A%0A%20%20%20%20mae%20%3D%20MeanAbsoluteError()%0A%20%20%20%20mae.fit(y_train)%0A%20%20%20%20print(f%22MAE%20%20Naive%3A%20%7Bmae.score(y_test%2C%20y_pred_naive)%3A.2f%7D%22)%0A%20%20%20%20print(f%22MAE%20%20Ridge%3A%20%7Bmae.score(y_test%2C%20y_pred_ridge)%3A.2f%7D%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(%0A%20%20%20%20MeanAbsoluteError%2C%0A%20%20%20%20plot_score_time_series%2C%0A%20%20%20%20y_pred_naive%2C%0A%20%20%20%20y_pred_ridge%2C%0A%20%20%20%20y_test%2C%0A)%3A%0A%20%20%20%20plot_score_time_series(%0A%20%20%20%20%20%20%20%20MeanAbsoluteError()%2C%0A%20%20%20%20%20%20%20%20y_test%2C%0A%20%20%20%20%20%20%20%20%7B%22Naive%22%3A%20y_pred_naive%2C%20%22Ridge%22%3A%20y_pred_ridge%7D%2C%0A%20%20%20%20%20%20%20%20title%3D%22MAE%20per%20Timestep%22%2C%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%203.%20MeanSquaredError%20(MSE)%20and%20RootMeanSquaredError%20(RMSE)%0A%0A%20%20%20%20MSE%20penalizes%20large%20errors%20more%20heavily%20(squared).%20RMSE%20is%20its%20square%0A%20%20%20%20root%2C%20bringing%20units%20back%20to%20the%20original%20scale.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(%0A%20%20%20%20MeanSquaredError%2C%0A%20%20%20%20RootMeanSquaredError%2C%0A%20%20%20%20y_pred_naive%2C%0A%20%20%20%20y_pred_ridge%2C%0A%20%20%20%20y_test%2C%0A%20%20%20%20y_train%2C%0A)%3A%0A%20%20%20%20mse%20%3D%20MeanSquaredError()%0A%20%20%20%20mse.fit(y_train)%0A%20%20%20%20print(f%22MSE%20%20%20Naive%3A%20%7Bmse.score(y_test%2C%20y_pred_naive)%3A.2f%7D%22)%0A%20%20%20%20print(f%22MSE%20%20%20Ridge%3A%20%7Bmse.score(y_test%2C%20y_pred_ridge)%3A.2f%7D%22)%0A%0A%20%20%20%20rmse%20%3D%20RootMeanSquaredError()%0A%20%20%20%20rmse.fit(y_train)%0A%20%20%20%20print(f%22RMSE%20%20Naive%3A%20%7Brmse.score(y_test%2C%20y_pred_naive)%3A.2f%7D%22)%0A%20%20%20%20print(f%22RMSE%20%20Ridge%3A%20%7Brmse.score(y_test%2C%20y_pred_ridge)%3A.2f%7D%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(%0A%20%20%20%20RootMeanSquaredError%2C%0A%20%20%20%20plot_score_time_series%2C%0A%20%20%20%20y_pred_naive%2C%0A%20%20%20%20y_pred_ridge%2C%0A%20%20%20%20y_test%2C%0A)%3A%0A%20%20%20%20plot_score_time_series(%0A%20%20%20%20%20%20%20%20RootMeanSquaredError()%2C%0A%20%20%20%20%20%20%20%20y_test%2C%0A%20%20%20%20%20%20%20%20%7B%22Naive%22%3A%20y_pred_naive%2C%20%22Ridge%22%3A%20y_pred_ridge%7D%2C%0A%20%20%20%20%20%20%20%20title%3D%22RMSE%20per%20Timestep%22%2C%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%204.%20MedianAbsoluteError%20(MedianAE)%0A%0A%20%20%20%20The%20median%20of%20absolute%20errors%20which%20is%20more%20robust%20to%20outliers%20than%20MAE.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(MedianAbsoluteError%2C%20y_pred_naive%2C%20y_pred_ridge%2C%20y_test%2C%20y_train)%3A%0A%20%20%20%20medae%20%3D%20MedianAbsoluteError()%0A%20%20%20%20medae.fit(y_train)%0A%20%20%20%20print(f%22MedianAE%20%20Naive%3A%20%7Bmedae.score(y_test%2C%20y_pred_naive)%3A.2f%7D%22)%0A%20%20%20%20print(f%22MedianAE%20%20Ridge%3A%20%7Bmedae.score(y_test%2C%20y_pred_ridge)%3A.2f%7D%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(%0A%20%20%20%20MedianAbsoluteError%2C%0A%20%20%20%20plot_score_time_series%2C%0A%20%20%20%20y_pred_naive%2C%0A%20%20%20%20y_pred_ridge%2C%0A%20%20%20%20y_test%2C%0A)%3A%0A%20%20%20%20plot_score_time_series(%0A%20%20%20%20%20%20%20%20MedianAbsoluteError()%2C%0A%20%20%20%20%20%20%20%20y_test%2C%0A%20%20%20%20%20%20%20%20%7B%22Naive%22%3A%20y_pred_naive%2C%20%22Ridge%22%3A%20y_pred_ridge%7D%2C%0A%20%20%20%20%20%20%20%20title%3D%22MedianAE%20per%20Timestep%22%2C%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%205.%20MeanAbsolutePercentageError%20(MAPE)%20and%20sMAPE%0A%0A%20%20%20%20MAPE%20expresses%20error%20as%20a%20percentage%20of%20the%20actual%20value.%0A%20%20%20%20sMAPE%20(symmetric%20MAPE)%20avoids%20the%20asymmetry%20of%20standard%20MAPE%20by%0A%20%20%20%20normalizing%20by%20the%20average%20of%20actual%20and%20predicted%20values.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(%0A%20%20%20%20MeanAbsolutePercentageError%2C%0A%20%20%20%20SymmetricMeanAbsolutePercentageError%2C%0A%20%20%20%20y_pred_naive%2C%0A%20%20%20%20y_pred_ridge%2C%0A%20%20%20%20y_test%2C%0A%20%20%20%20y_train%2C%0A)%3A%0A%20%20%20%20mape%20%3D%20MeanAbsolutePercentageError()%0A%20%20%20%20mape.fit(y_train)%0A%20%20%20%20print(f%22MAPE%20%20%20Naive%3A%20%7Bmape.score(y_test%2C%20y_pred_naive)%3A.4f%7D%22)%0A%20%20%20%20print(f%22MAPE%20%20%20Ridge%3A%20%7Bmape.score(y_test%2C%20y_pred_ridge)%3A.4f%7D%22)%0A%0A%20%20%20%20smape%20%3D%20SymmetricMeanAbsolutePercentageError()%0A%20%20%20%20smape.fit(y_train)%0A%20%20%20%20print(f%22sMAPE%20%20Naive%3A%20%7Bsmape.score(y_test%2C%20y_pred_naive)%3A.4f%7D%22)%0A%20%20%20%20print(f%22sMAPE%20%20Ridge%3A%20%7Bsmape.score(y_test%2C%20y_pred_ridge)%3A.4f%7D%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(%0A%20%20%20%20MeanAbsolutePercentageError%2C%0A%20%20%20%20plot_score_time_series%2C%0A%20%20%20%20y_pred_naive%2C%0A%20%20%20%20y_pred_ridge%2C%0A%20%20%20%20y_test%2C%0A)%3A%0A%20%20%20%20plot_score_time_series(%0A%20%20%20%20%20%20%20%20MeanAbsolutePercentageError()%2C%0A%20%20%20%20%20%20%20%20y_test%2C%0A%20%20%20%20%20%20%20%20%7B%22Naive%22%3A%20y_pred_naive%2C%20%22Ridge%22%3A%20y_pred_ridge%7D%2C%0A%20%20%20%20%20%20%20%20title%3D%22MAPE%20per%20Timestep%22%2C%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%206.%20Scaled%20Metrics%20(MASE%2C%20RMSSE)%0A%0A%20%20%20%20Scaled%20metrics%20normalize%20errors%20by%20the%20in-sample%20naive%20forecast%20error.%0A%20%20%20%20They%20require%20%60fit(y_train)%60%20to%20compute%20the%20scaling%20factor.%0A%20%20%20%20A%20score%20%3C%201%20means%20the%20model%20outperforms%20the%20naive%20baseline.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(%0A%20%20%20%20MeanAbsoluteScaledError%2C%0A%20%20%20%20RootMeanSquaredScaledError%2C%0A%20%20%20%20y_pred_naive%2C%0A%20%20%20%20y_pred_ridge%2C%0A%20%20%20%20y_test%2C%0A%20%20%20%20y_train%2C%0A)%3A%0A%20%20%20%20mase%20%3D%20MeanAbsoluteScaledError(seasonality%3D12)%0A%20%20%20%20mase.fit(y_train)%0A%20%20%20%20print(f%22MASE%20%20%20Naive%3A%20%7Bmase.score(y_test%2C%20y_pred_naive)%3A.3f%7D%22)%0A%20%20%20%20print(f%22MASE%20%20%20Ridge%3A%20%7Bmase.score(y_test%2C%20y_pred_ridge)%3A.3f%7D%22)%0A%0A%20%20%20%20rmsse%20%3D%20RootMeanSquaredScaledError(seasonality%3D12)%0A%20%20%20%20rmsse.fit(y_train)%0A%20%20%20%20print(f%22RMSSE%20%20Naive%3A%20%7Brmsse.score(y_test%2C%20y_pred_naive)%3A.3f%7D%22)%0A%20%20%20%20print(f%22RMSSE%20%20Ridge%3A%20%7Brmsse.score(y_test%2C%20y_pred_ridge)%3A.3f%7D%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(%0A%20%20%20%20MeanAbsoluteScaledError%2C%0A%20%20%20%20plot_score_time_series%2C%0A%20%20%20%20y_pred_naive%2C%0A%20%20%20%20y_pred_ridge%2C%0A%20%20%20%20y_test%2C%0A)%3A%0A%20%20%20%20plot_score_time_series(%0A%20%20%20%20%20%20%20%20MeanAbsoluteScaledError(seasonality%3D12)%2C%0A%20%20%20%20%20%20%20%20y_test%2C%0A%20%20%20%20%20%20%20%20%7B%22Naive%22%3A%20y_pred_naive%2C%20%22Ridge%22%3A%20y_pred_ridge%7D%2C%0A%20%20%20%20%20%20%20%20title%3D%22MASE%20per%20Timestep%22%2C%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%207.%20Aggregation%20Methods%0A%0A%20%20%20%20By%20default%20%60aggregation_method%3D%22all%22%60%20returns%20a%20single%20scalar.%0A%20%20%20%20Choose%20%60%5B%22stepwise%22%2C%20%22vintagewise%22%5D%60%20or%20%60%22componentwise%22%60%20for%20more%20granular%20results.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(MeanAbsoluteError%2C%20y_pred_ridge%2C%20y_test%2C%20y_train)%3A%0A%20%20%20%20mae_sv%20%3D%20MeanAbsoluteError(aggregation_method%3D%5B%22stepwise%22%2C%20%22vintagewise%22%5D)%0A%20%20%20%20mae_sv.fit(y_train)%0A%20%20%20%20scores_tw%20%3D%20mae_sv.score(y_test%2C%20y_pred_ridge)%0A%20%20%20%20print(%22Stepwise%2Bvintagewise%20MAE%20(first%205%20steps)%3A%22)%0A%20%20%20%20print(scores_tw.head())%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(MeanAbsoluteError%2C%20y_pred_ridge%2C%20y_test%2C%20y_train)%3A%0A%20%20%20%20mae_cw%20%3D%20MeanAbsoluteError(aggregation_method%3D%22componentwise%22)%0A%20%20%20%20mae_cw.fit(y_train)%0A%20%20%20%20scores_cw%20%3D%20mae_cw.score(y_test%2C%20y_pred_ridge)%0A%20%20%20%20print(f%22Componentwise%20MAE%3A%20%7Bscores_cw%7D%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%208.%20Model%20Comparison%20Summary%0A%0A%20%20%20%20%5B%60plot_score_summary%60%5D(%2Fpages%2Fapi%2Fgenerated%2Fyohou.plotting.evaluation.plot_score_summary%2F)%20takes%0A%20%20%20%20scorer(s)%2C%20ground%20truth%2C%20and%20a%20dict%20of%20predictions%2C%20then%20renders%20a%20grouped%20bar%20chart%0A%20%20%20%20making%20it%20easy%20to%20spot%20which%20model%20performs%20best%20on%20each%20metric.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(%0A%20%20%20%20MeanAbsoluteError%2C%0A%20%20%20%20MeanAbsolutePercentageError%2C%0A%20%20%20%20RootMeanSquaredError%2C%0A%20%20%20%20plot_score_summary%2C%0A%20%20%20%20y_pred_naive%2C%0A%20%20%20%20y_pred_ridge%2C%0A%20%20%20%20y_test%2C%0A)%3A%0A%20%20%20%20plot_score_summary(%0A%20%20%20%20%20%20%20%20%7B%22MAE%22%3A%20MeanAbsoluteError()%2C%20%22RMSE%22%3A%20RootMeanSquaredError()%2C%20%22MAPE%22%3A%20MeanAbsolutePercentageError()%7D%2C%0A%20%20%20%20%20%20%20%20y_test%2C%0A%20%20%20%20%20%20%20%20%7B%22Naive%22%3A%20y_pred_naive%2C%20%22Ridge%22%3A%20y_pred_ridge%7D%2C%0A%20%20%20%20%20%20%20%20title%3D%22Model%20Comparison%22%2C%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%209.%20Classification%20Accuracy%20(Hard%20Classification)%0A%0A%20%20%20%20When%20forecasting%20**categorical%20outcomes**%20(e.g.%2C%20weather%2C%20state%20transitions)%2C%0A%20%20%20%20%5B%60Accuracy%60%5D(%2Fpages%2Fapi%2Fgenerated%2Fyohou.metrics.class_proba.Accuracy%2F)%20measures%0A%20%20%20%20the%20fraction%20of%20time%20steps%20where%20the%20predicted%20class%20matches%20the%20true%20class.%0A%0A%20%20%20%20%3E%20**Caution**%3A%20Accuracy%20can%20be%20misleading%20on%20**imbalanced**%20datasets.%20%20A%20model%0A%20%20%20%20%3E%20that%20always%20predicts%20the%20majority%20class%20can%20score%20high%20without%20learning%0A%20%20%20%20%3E%20anything%20useful.%20For%20imbalanced%20problems%2C%20prefer%20*soft*%20metrics%20like%0A%20%20%20%20%3E%20%5B%60LogLoss%60%5D(%2Fpages%2Fapi%2Fgenerated%2Fyohou.metrics.class_proba.LogLoss%2F)%20or%0A%20%20%20%20%3E%20%5B%60BrierScore%60%5D(%2Fpages%2Fapi%2Fgenerated%2Fyohou.metrics.class_proba.BrierScore%2F)%2C%0A%20%20%20%20%3E%20which%20evaluate%20the%20full%20probability%20distribution%20and%20penalize%20overconfident%0A%20%20%20%20%3E%20wrong%20predictions.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_()%3A%0A%20%20%20%20from%20sklearn.tree%20import%20DecisionTreeClassifier%0A%0A%20%20%20%20from%20yohou.class_proba%20import%20ClassProbaReductionForecaster%0A%20%20%20%20from%20yohou.datasets%20import%20fetch_air_quality_classification%0A%20%20%20%20from%20yohou.metrics%20import%20Accuracy%0A%0A%20%20%20%20return%20(%0A%20%20%20%20%20%20%20%20Accuracy%2C%0A%20%20%20%20%20%20%20%20ClassProbaReductionForecaster%2C%0A%20%20%20%20%20%20%20%20DecisionTreeClassifier%2C%0A%20%20%20%20%20%20%20%20fetch_air_quality_classification%2C%0A%20%20%20%20)%0A%0A%0A%40app.cell%0Adef%20_(%0A%20%20%20%20ClassProbaReductionForecaster%2C%0A%20%20%20%20DecisionTreeClassifier%2C%0A%20%20%20%20LagTransformer%2C%0A%20%20%20%20fetch_air_quality_classification%2C%0A%20%20%20%20train_test_split%2C%0A)%3A%0A%20%20%20%20cls_data%20%3D%20fetch_air_quality_classification()%0A%20%20%20%20cls_y%2C%20cls_X%20%3D%20cls_data.y%2C%20cls_data.X_actual%0A%20%20%20%20cls_y_train%2C%20cls_y_test%2C%20cls_X_train%2C%20cls_X_test%20%3D%20train_test_split(%0A%20%20%20%20%20%20%20%20cls_y%2C%0A%20%20%20%20%20%20%20%20cls_X%2C%0A%20%20%20%20%20%20%20%20test_size%3D200%2C%0A%20%20%20%20)%0A%20%20%20%20cls_fh%20%3D%2024%0A%0A%20%20%20%20cls_forecaster%20%3D%20ClassProbaReductionForecaster(%0A%20%20%20%20%20%20%20%20estimator%3DDecisionTreeClassifier(random_state%3D42)%2C%0A%20%20%20%20%20%20%20%20actual_transformer%3DLagTransformer(lag%3D%5B1%2C%202%2C%203%2C%206%2C%2012%2C%2024%5D)%2C%0A%20%20%20%20)%0A%20%20%20%20cls_forecaster.fit(cls_y_train%2C%20cls_X_train%2C%20forecasting_horizon%3Dcls_fh)%0A%0A%20%20%20%20%23%20predict()%20returns%20hard%20class%20labels%20(argmax%20of%20probabilities)%0A%20%20%20%20cls_y_pred_labels%20%3D%20cls_forecaster.predict(%0A%20%20%20%20%20%20%20%20forecasting_horizon%3Dcls_fh%2C%0A%20%20%20%20)%0A%20%20%20%20%23%20predict_class_proba()%20returns%20the%20full%20probability%20distribution%0A%20%20%20%20cls_y_proba%20%3D%20cls_forecaster.predict_class_proba(%0A%20%20%20%20%20%20%20%20forecasting_horizon%3Dcls_fh%2C%0A%20%20%20%20)%0A%0A%20%20%20%20print(f%22Classes%3A%20%7Bcls_data.classes%7D%22)%0A%20%20%20%20print(%22%5CnHard%20predictions%20(predict)%3A%22)%0A%20%20%20%20print(cls_y_pred_labels)%0A%20%20%20%20print(%22%5CnSoft%20predictions%20(predict_class_proba)%3A%22)%0A%20%20%20%20print(cls_y_proba)%0A%20%20%20%20return%20cls_y_pred_labels%2C%20cls_y_proba%2C%20cls_y_test%2C%20cls_y_train%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%23%20Categorical%20Target%20Over%20Time%0A%0A%20%20%20%20%5B%60plot_forecast%60%5D(%2Fpages%2Fapi%2Fgenerated%2Fyohou.plotting.forecasting.plot_forecast%2F)%20auto-detects%20categorical%20columns%20and%20renders%20a%20step%20chart.%0A%20%20%20%20Here%20we%20visualize%20the%20training%20and%20test%20target%20side%20by%20side.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(cls_y_test%2C%20cls_y_train%2C%20plot_forecast)%3A%0A%20%20%20%20plot_forecast(%0A%20%20%20%20%20%20%20%20cls_y_test%2C%0A%20%20%20%20%20%20%20%20cls_y_test%2C%0A%20%20%20%20%20%20%20%20y_train%3Dcls_y_train%2C%0A%20%20%20%20%20%20%20%20n_history%3D100%2C%0A%20%20%20%20%20%20%20%20title%3D%22Air%20Quality%20Target%20(Categorical%20Time%20Series)%22%2C%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%23%20Categorical%20Forecast%20vs%20Actual%0A%0A%20%20%20%20The%20hard-label%20predictions%20from%20%60predict()%60%20are%20compared%20against%20the%0A%20%20%20%20true%20classes.%20Dashed%20lines%20show%20the%20forecast%2C%20solid%20lines%20the%20actual.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(cls_y_pred_labels%2C%20cls_y_test%2C%20plot_forecast)%3A%0A%20%20%20%20plot_forecast(%0A%20%20%20%20%20%20%20%20cls_y_test%2C%0A%20%20%20%20%20%20%20%20cls_y_pred_labels%2C%0A%20%20%20%20%20%20%20%20title%3D%22Categorical%20Forecast%20vs%20Actual%22%2C%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%23%20Probability%20Forecast%0A%0A%20%20%20%20The%20full%20probability%20distribution%20from%20%60predict_class_proba()%60%20is%20shown%0A%20%20%20%20as%20a%20stacked%20area%20chart.%20Diamond%20markers%20indicate%20the%20true%20class.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(cls_y_proba%2C%20cls_y_test%2C%20plot_forecast)%3A%0A%20%20%20%20plot_forecast(%0A%20%20%20%20%20%20%20%20cls_y_test%2C%0A%20%20%20%20%20%20%20%20cls_y_proba%2C%0A%20%20%20%20%20%20%20%20title%3D%22Class%20Probability%20Forecast%22%2C%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(Accuracy%2C%20cls_y_proba%2C%20cls_y_test)%3A%0A%20%20%20%20cls_y_truth%20%3D%20cls_y_test.head(len(cls_y_proba))%0A%0A%20%20%20%20acc_all%20%3D%20Accuracy()%0A%20%20%20%20acc_all.fit(cls_y_truth)%0A%20%20%20%20print(f%22Accuracy%20(scalar)%3A%20%7Bacc_all.score(cls_y_truth%2C%20cls_y_proba)%3A.4f%7D%22)%0A%0A%20%20%20%20%23%20Stepwise%2Bvintagewise%3A%20aggregate%20across%20time%20dimensions%0A%20%20%20%20acc_sv%20%3D%20Accuracy(aggregation_method%3D%5B%22stepwise%22%2C%20%22vintagewise%22%5D)%0A%20%20%20%20acc_sv.fit(cls_y_truth)%0A%20%20%20%20print(%22%5CnPer-timestep%20accuracy%3A%22)%0A%20%20%20%20print(acc_sv.score(cls_y_truth%2C%20cls_y_proba))%0A%20%20%20%20return%20(cls_y_truth%2C)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%23%20Accuracy%20Over%20Time%0A%0A%20%20%20%20Per-timestep%20accuracy%20for%20the%2024-hour%20forecast%20window.%20A%20score%20of%201.0%0A%20%20%20%20means%20the%20argmax%20prediction%20matched%20the%20true%20class%20at%20that%20step.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(Accuracy%2C%20cls_y_proba%2C%20cls_y_truth%2C%20plot_score_time_series)%3A%0A%20%20%20%20plot_score_time_series(%0A%20%20%20%20%20%20%20%20Accuracy()%2C%0A%20%20%20%20%20%20%20%20cls_y_truth%2C%0A%20%20%20%20%20%20%20%20cls_y_proba%2C%0A%20%20%20%20%20%20%20%20title%3D%22Accuracy%20per%20Timestep%22%2C%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%3E%20**Hard%20vs%20Soft**%3A%20%5B%60Accuracy%60%5D(%2Fpages%2Fapi%2Fgenerated%2Fyohou.metrics.classification.Accuracy%2F)%20scores%201.0%20for%20a%20correct%20prediction%20and%200.0%0A%20%20%20%20%3E%20otherwise%2C%20regardless%20of%20confidence.%20A%20model%20that%20predicts%20the%20right%20class%0A%20%20%20%20%3E%20with%2051%25%20probability%20gets%20the%20same%20Accuracy%20as%20one%20that%20predicts%20with%2099%25.%0A%20%20%20%20%3E%20For%20calibration-aware%20evaluation%2C%20see%20the%20%5Bsoft%20classification%20metrics%5D(%2Fexamples%2Fclass_proba_metrics%2F)%0A%20%20%20%20%3E%20(%5B%60LogLoss%60%5D(%2Fpages%2Fapi%2Fgenerated%2Fyohou.metrics.class_proba.LogLoss%2F)%2C%20%5B%60BrierScore%60%5D(%2Fpages%2Fapi%2Fgenerated%2Fyohou.metrics.class_proba.BrierScore%2F)).%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%20Multi-vintage%20Scoring%0A%0A%20%20%20%20The%20%60observe_predict%60%20method%20with%20%60stride%3D1%60%20produces%20one%20forecast%20per%0A%20%20%20%20observation%20point%2C%20creating%20multiple%20*vintages*.%20Each%20vintage%20represents%0A%20%20%20%20a%20different%20forecast%20origin%2C%20so%20you%20can%20analyse%20how%20accuracy%20evolves%20as%0A%20%20%20%20the%20model%20absorbs%20more%20data.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(deepcopy%2C%20fh%2C%20ridge_fc%2C%20y_test)%3A%0A%20%20%20%20_vintage_model%20%3D%20deepcopy(ridge_fc)%0A%20%20%20%20y_pred_vintages%20%3D%20_vintage_model.observe_predict(%0A%20%20%20%20%20%20%20%20y%3Dy_test%2C%0A%20%20%20%20%20%20%20%20stride%3D1%2C%0A%20%20%20%20%20%20%20%20forecasting_horizon%3Dfh%2C%0A%20%20%20%20)%0A%20%20%20%20print(f%22Vintages%3A%20%7By_pred_vintages%5B'vintage_time'%5D.n_unique()%7D%22)%0A%20%20%20%20y_pred_vintages.head(10)%0A%20%20%20%20return%20(y_pred_vintages%2C)%0A%0A%0A%40app.cell%0Adef%20_(MeanAbsoluteError%2C%20y_train)%3A%0A%20%20%20%20vintage_scorer%20%3D%20MeanAbsoluteError()%0A%20%20%20%20vintage_scorer.fit(y_train)%0A%20%20%20%20return%20(vintage_scorer%2C)%0A%0A%0A%40app.cell%0Adef%20_(plot_score_per_vintage%2C%20vintage_scorer%2C%20y_pred_vintages%2C%20y_test)%3A%0A%20%20%20%20plot_score_per_vintage(%0A%20%20%20%20%20%20%20%20vintage_scorer%2C%0A%20%20%20%20%20%20%20%20y_test%2C%0A%20%20%20%20%20%20%20%20y_pred_vintages%2C%0A%20%20%20%20%20%20%20%20title%3D%22MAE%20per%20Forecast%20Vintage%22%2C%0A%20%20%20%20%20%20%20%20y_label%3D%22MAE%22%2C%0A%20%20%20%20%20%20%20%20height%3D380%2C%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(plot_score_time_series%2C%20vintage_scorer%2C%20y_pred_vintages%2C%20y_test)%3A%0A%20%20%20%20plot_score_time_series(%0A%20%20%20%20%20%20%20%20vintage_scorer%2C%0A%20%20%20%20%20%20%20%20y_test%2C%0A%20%20%20%20%20%20%20%20y_pred_vintages%2C%0A%20%20%20%20%20%20%20%20title%3D%22Per-timestep%20MAE%20across%20Vintages%22%2C%0A%20%20%20%20%20%20%20%20y_label%3D%22MAE%22%2C%0A%20%20%20%20%20%20%20%20height%3D500%2C%0A%20%20%20%20%20%20%20%20facet_by%3D%22member%22%2C%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%20Next%20Steps%0A%0A%20%20%20%20-%20**Interval%20metrics**%3A%20See%20%5B%60interval_metrics.py%60%5D(%2Fexamples%2Finterval_metrics%2F)%20for%20interval%20scoring%0A%20%20%20%20-%20**Cross-validation**%3A%20See%20%5BEvaluation%20%26%20Search%5D(%2Fpages%2Fexamples%2F%23evaluation-search)%20for%20temporal%20CV%20with%20scoring%0A%20%20%20%20-%20**Time%20weighting**%3A%20See%20%5B%60time_weighted_scoring.py%60%5D(%2Fexamples%2Ftime_weighted_scoring%2F)%0A%20%20%20%20-%20**Classification%20metrics**%3A%20See%20%5B%60class_proba_metrics.py%60%5D(%2Fexamples%2Fclass_proba_metrics%2F)%20for%20soft%20classification%20metrics%20(LogLoss%2C%20BrierScore)%20and%20reliability%20diagrams%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0Aif%20__name__%20%3D%3D%20%22__main__%22%3A%0A%20%20%20%20app.run()%0A