catboost forecasting

%23%20%2F%2F%2F%20script%0A%23%20requires-python%20%3D%20%22%3E%3D3.11%22%0A%23%20dependencies%20%3D%20%5B%0A%23%20%20%20%20%20%22catboost%22%2C%0A%23%20%20%20%20%20%22scikit-learn%22%2C%0A%23%20%20%20%20%20%22yohou%5Bplotting%5D%22%2C%0A%23%20%5D%0A%23%20%2F%2F%2F%0A%0Aimport%20marimo%0A%0A__generated_with%20%3D%20%220.23.14%22%0Aapp%20%3D%20marimo.App(width%3D%22medium%22)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_()%3A%0A%20%20%20%20import%20marimo%20as%20mo%0A%0A%20%20%20%20return%20(mo%2C)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%20Point%20Forecasting%20with%20CatBoost%0A%0A%20%20%20%20%5BCatBoost%5D(https%3A%2F%2Fcatboost.ai%2F)%20is%20a%20gradient-boosting%20library%20that%20works%0A%20%20%20%20seamlessly%20as%20a%20drop-in%20sklearn%20estimator%20inside%20%5B%60PointReductionForecaster%60%5D(%2Fpages%2Fapi%2Fgenerated%2Fyohou.point.reduction.PointReductionForecaster%2F).%0A%0A%20%20%20%20%23%23%201.%20Prepare%20Data%0A%0A%20%20%20%20We%20load%20the%20Monthly%20Tourism%20dataset%20via%20%5B%60fetch_tourism_monthly%60%5D(%2Fpages%2Fapi%2Fgenerated%2Fyohou.datasets._fetchers.fetch_tourism_monthly%2F)%2C%20extract%20a%0A%20%20%20%20single%20univariate%20series%2C%20and%20split%20it%2080%2F20%20into%20training%20and%20test%20sets%0A%20%20%20%20while%20preserving%20temporal%20order.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_()%3A%0A%20%20%20%20from%20copy%20import%20deepcopy%0A%0A%20%20%20%20from%20catboost%20import%20CatBoostRegressor%0A%20%20%20%20from%20sklearn.linear_model%20import%20Ridge%0A%20%20%20%20from%20sklearn.multioutput%20import%20MultiOutputRegressor%0A%0A%20%20%20%20from%20yohou.datasets%20import%20fetch_tourism_monthly%0A%20%20%20%20from%20yohou.metrics%20import%20MeanAbsoluteError%0A%20%20%20%20from%20yohou.model_selection%20import%20train_test_split%0A%20%20%20%20from%20yohou.plotting%20import%20(%0A%20%20%20%20%20%20%20%20plot_forecast%2C%0A%20%20%20%20%20%20%20%20plot_score_per_step%2C%0A%20%20%20%20%20%20%20%20plot_score_time_series%2C%0A%20%20%20%20%20%20%20%20plot_time_series%2C%0A%20%20%20%20)%0A%20%20%20%20from%20yohou.point%20import%20PointReductionForecaster%0A%20%20%20%20from%20yohou.preprocessing%20import%20LagTransformer%0A%0A%20%20%20%20return%20(%0A%20%20%20%20%20%20%20%20CatBoostRegressor%2C%0A%20%20%20%20%20%20%20%20LagTransformer%2C%0A%20%20%20%20%20%20%20%20MeanAbsoluteError%2C%0A%20%20%20%20%20%20%20%20MultiOutputRegressor%2C%0A%20%20%20%20%20%20%20%20PointReductionForecaster%2C%0A%20%20%20%20%20%20%20%20Ridge%2C%0A%20%20%20%20%20%20%20%20deepcopy%2C%0A%20%20%20%20%20%20%20%20fetch_tourism_monthly%2C%0A%20%20%20%20%20%20%20%20plot_forecast%2C%0A%20%20%20%20%20%20%20%20plot_score_per_step%2C%0A%20%20%20%20%20%20%20%20plot_score_time_series%2C%0A%20%20%20%20%20%20%20%20plot_time_series%2C%0A%20%20%20%20%20%20%20%20train_test_split%2C%0A%20%20%20%20)%0A%0A%0A%40app.cell%0Adef%20_(fetch_tourism_monthly%2C%20plot_time_series%2C%20train_test_split)%3A%0A%0A%20%20%20%20y%20%3D%20fetch_tourism_monthly().frame.select(%22time%22%2C%20%22T1__tourists%22).drop_nulls().rename(%7B%22T1__tourists%22%3A%20%22tourists%22%7D)%0A%0A%20%20%20%20y_train%2C%20y_test%20%3D%20train_test_split(y%2C%20test_size%3D0.2)%0A%20%20%20%20forecasting_horizon%20%3D%20len(y_test)%0A%0A%20%20%20%20plot_time_series(y%2C%20title%3D%22Monthly%20Tourism%20(T1)%22)%0A%20%20%20%20return%20forecasting_horizon%2C%20y_test%2C%20y_train%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%202.%20CatBoost%20Forecaster%0A%0A%20%20%20%20%5B%60CatBoostRegressor%60%5D(https%3A%2F%2Fcatboost.ai%2Fen%2Fdocs%2Fconcepts%2Fpython-reference_catboostregressor)%20implements%20the%20sklearn%20%60fit%60%2F%60predict%60%20API%2C%20so%20it%0A%20%20%20%20plugs%20directly%20into%20%5B%60PointReductionForecaster%60%5D(%2Fpages%2Fapi%2Fgenerated%2Fyohou.point.reduction.PointReductionForecaster%2F).%20%20Set%20%60verbose%3D0%60%20to%0A%20%20%20%20suppress%20per-iteration%20training%20logs.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(%0A%20%20%20%20CatBoostRegressor%2C%0A%20%20%20%20LagTransformer%2C%0A%20%20%20%20MultiOutputRegressor%2C%0A%20%20%20%20PointReductionForecaster%2C%0A%20%20%20%20forecasting_horizon%2C%0A%20%20%20%20y_train%2C%0A)%3A%0A%20%20%20%20catboost_fc%20%3D%20PointReductionForecaster(%0A%20%20%20%20%20%20%20%20estimator%3DMultiOutputRegressor(%0A%20%20%20%20%20%20%20%20%20%20%20%20CatBoostRegressor(%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20iterations%3D200%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20depth%3D4%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20learning_rate%3D0.1%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20verbose%3D0%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20)%0A%20%20%20%20%20%20%20%20)%2C%0A%20%20%20%20%20%20%20%20actual_transformer%3DLagTransformer(lag%3Dlist(range(1%2C%2013)))%2C%0A%20%20%20%20)%0A%0A%20%20%20%20catboost_fc.fit(y_train%2C%20forecasting_horizon%3Dforecasting_horizon)%0A%20%20%20%20y_pred_cb%20%3D%20catboost_fc.predict(forecasting_horizon%3Dforecasting_horizon)%0A%0A%20%20%20%20y_pred_cb.head()%0A%20%20%20%20return%20catboost_fc%2C%20y_pred_cb%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%5B%60plot_forecast%60%5D(%2Fpages%2Fapi%2Fgenerated%2Fyohou.plotting.forecasting.plot_forecast%2F)%20overlays%20the%20predicted%20values%20against%20the%20test%20actuals%2C%0A%20%20%20%20optionally%20showing%20the%20training%20history%20for%20context.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(plot_forecast%2C%20y_pred_cb%2C%20y_test%2C%20y_train)%3A%0A%20%20%20%20plot_forecast(%0A%20%20%20%20%20%20%20%20y_test%2C%0A%20%20%20%20%20%20%20%20y_pred_cb%2C%0A%20%20%20%20%20%20%20%20y_train%3Dy_train%2C%0A%20%20%20%20%20%20%20%20title%3D%22CatBoost%20Point%20Forecast%22%2C%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%203.%20Compare%20with%20Linear%20Baseline%0A%0A%20%20%20%20We%20fit%20a%20%5B%60PointReductionForecaster%60%5D(%2Fpages%2Fapi%2Fgenerated%2Fyohou.point.reduction.PointReductionForecaster%2F)%20backed%20by%20%5B%60Ridge%60%5D(https%3A%2F%2Fscikit-learn.org%2Fstable%2Fmodules%2Fgenerated%2Fsklearn.linear_model.Ridge.html)%20regression%20using%20the%0A%20%20%20%20same%20lag%20features%20and%20compare%20its%20MAE%20against%20CatBoost%20to%20quantify%20the%0A%20%20%20%20benefit%20of%20gradient%20boosting%20on%20this%20dataset.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(%0A%20%20%20%20LagTransformer%2C%0A%20%20%20%20MeanAbsoluteError%2C%0A%20%20%20%20PointReductionForecaster%2C%0A%20%20%20%20Ridge%2C%0A%20%20%20%20forecasting_horizon%2C%0A%20%20%20%20y_pred_cb%2C%0A%20%20%20%20y_test%2C%0A%20%20%20%20y_train%2C%0A)%3A%0A%20%20%20%20ridge_fc%20%3D%20PointReductionForecaster(%0A%20%20%20%20%20%20%20%20estimator%3DRidge()%2C%0A%20%20%20%20%20%20%20%20actual_transformer%3DLagTransformer(lag%3Dlist(range(1%2C%2013)))%2C%0A%20%20%20%20)%0A%20%20%20%20ridge_fc.fit(y_train%2C%20forecasting_horizon%3Dforecasting_horizon)%0A%20%20%20%20y_pred_ridge%20%3D%20ridge_fc.predict(forecasting_horizon%3Dforecasting_horizon)%0A%0A%20%20%20%20scorer%20%3D%20MeanAbsoluteError()%0A%20%20%20%20scorer.fit(y_train)%0A%0A%20%20%20%20mae_cb%20%3D%20scorer.score(y_test%2C%20y_pred_cb)%0A%20%20%20%20mae_ridge%20%3D%20scorer.score(y_test%2C%20y_pred_ridge)%0A%20%20%20%20print(f%22CatBoost%20MAE%3A%20%7Bmae_cb%3A.2f%7D%22)%0A%20%20%20%20print(f%22Ridge%20%20%20%20MAE%3A%20%7Bmae_ridge%3A.2f%7D%22)%0A%20%20%20%20return%20(y_pred_ridge%2C)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%204.%20CatBoost%20with%20Direct%20Strategy%0A%0A%20%20%20%20The%20default%20%60%22multi-output%22%60%20strategy%20trains%20a%20single%20model%20that%20predicts%0A%20%20%20%20all%20H%20horizon%20steps%20simultaneously.%20CatBoost%20handles%20this%20natively.%0A%0A%20%20%20%20For%20a%20**univariate**%20target%2C%20%60reduction_strategy%3D%22direct%22%60%20trains%20**H%0A%20%20%20%20independent%20CatBoost%20models**%2C%20each%20predicting%20a%20single%20scalar.%20Every%0A%20%20%20%20model's%20splits%20are%20optimised%20exclusively%20for%20its%20own%20horizon%20step%2C%0A%20%20%20%20giving%20it%20full%20capacity%20to%20capture%20step-specific%20patterns.%0A%0A%20%20%20%20With%20a%20**multivariate**%20target%20(multiple%20columns%20in%20%60y%60)%2C%20each%20per-step%0A%20%20%20%20model%20still%20faces%20a%20multi-output%20problem%2C%20so%20a%20%60MultiOutputRegressor%60%0A%20%20%20%20wrapper%20may%20be%20necessary%20for%20estimators%20that%20do%20not%20support%20multi-output%0A%20%20%20%20natively.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(%0A%20%20%20%20CatBoostRegressor%2C%0A%20%20%20%20LagTransformer%2C%0A%20%20%20%20MeanAbsoluteError%2C%0A%20%20%20%20PointReductionForecaster%2C%0A%20%20%20%20forecasting_horizon%2C%0A%20%20%20%20plot_forecast%2C%0A%20%20%20%20y_pred_cb%2C%0A%20%20%20%20y_pred_ridge%2C%0A%20%20%20%20y_test%2C%0A%20%20%20%20y_train%2C%0A)%3A%0A%20%20%20%20catboost_direct_fc%20%3D%20PointReductionForecaster(%0A%20%20%20%20%20%20%20%20estimator%3DCatBoostRegressor(%0A%20%20%20%20%20%20%20%20%20%20%20%20iterations%3D200%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20depth%3D4%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20learning_rate%3D0.1%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20verbose%3D0%2C%0A%20%20%20%20%20%20%20%20)%2C%0A%20%20%20%20%20%20%20%20reduction_strategy%3D%22direct%22%2C%0A%20%20%20%20%20%20%20%20actual_transformer%3DLagTransformer(lag%3Dlist(range(1%2C%2013)))%2C%0A%20%20%20%20)%0A%20%20%20%20catboost_direct_fc.fit(y_train%2C%20forecasting_horizon%3Dforecasting_horizon)%0A%20%20%20%20y_pred_cb_direct%20%3D%20catboost_direct_fc.predict(forecasting_horizon%3Dforecasting_horizon)%0A%0A%20%20%20%20_scorer%20%3D%20MeanAbsoluteError()%0A%20%20%20%20_scorer.fit(y_train)%0A%20%20%20%20print(f%22CatBoost%20Direct%20MAE%3A%20%7B_scorer.score(y_test%2C%20y_pred_cb_direct)%3A.2f%7D%22)%0A%0A%20%20%20%20plot_forecast(%0A%20%20%20%20%20%20%20%20y_test%2C%0A%20%20%20%20%20%20%20%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%22CatBoost%20(multi-output)%22%3A%20y_pred_cb%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%22CatBoost%20(direct)%22%3A%20y_pred_cb_direct%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%22Ridge%22%3A%20y_pred_ridge%2C%0A%20%20%20%20%20%20%20%20%7D%2C%0A%20%20%20%20%20%20%20%20y_train%3Dy_train%2C%0A%20%20%20%20%20%20%20%20title%3D%22CatBoost%20Multi-Output%20vs%20Direct%20vs%20Ridge%22%2C%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%20Multi-vintage%20Scoring%0A%0A%20%20%20%20The%20%60observe_predict%60%20method%20with%20%60stride%3D1%60%20produces%20one%20forecast%20per%0A%20%20%20%20observation%20point%2C%20creating%20multiple%20*vintages*.%20Each%20vintage%20represents%0A%20%20%20%20a%20different%20forecast%20origin%2C%20so%20you%20can%20analyse%20how%20accuracy%20evolves%20as%0A%20%20%20%20the%20model%20absorbs%20more%20data.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(catboost_fc%2C%20deepcopy%2C%20forecasting_horizon%2C%20y_test)%3A%0A%20%20%20%20_vintage_model%20%3D%20deepcopy(catboost_fc)%0A%20%20%20%20y_pred_vintages%20%3D%20_vintage_model.observe_predict(%0A%20%20%20%20%20%20%20%20y%3Dy_test%2C%0A%20%20%20%20%20%20%20%20stride%3D1%2C%0A%20%20%20%20%20%20%20%20forecasting_horizon%3Dforecasting_horizon%2C%0A%20%20%20%20)%0A%20%20%20%20print(f%22Vintages%3A%20%7By_pred_vintages%5B'vintage_time'%5D.n_unique()%7D%22)%0A%20%20%20%20y_pred_vintages.head(10)%0A%20%20%20%20return%20(y_pred_vintages%2C)%0A%0A%0A%40app.cell%0Adef%20_(MeanAbsoluteError%2C%20y_train)%3A%0A%20%20%20%20vintage_scorer%20%3D%20MeanAbsoluteError()%0A%20%20%20%20vintage_scorer.fit(y_train)%0A%20%20%20%20return%20(vintage_scorer%2C)%0A%0A%0A%40app.cell%0Adef%20_(plot_score_per_step%2C%20vintage_scorer%2C%20y_pred_vintages%2C%20y_test)%3A%0A%20%20%20%20plot_score_per_step(%0A%20%20%20%20%20%20%20%20vintage_scorer%2C%0A%20%20%20%20%20%20%20%20y_test%2C%0A%20%20%20%20%20%20%20%20y_pred_vintages%2C%0A%20%20%20%20%20%20%20%20title%3D%22MAE%20per%20Forecast%20Step%22%2C%0A%20%20%20%20%20%20%20%20y_label%3D%22MAE%22%2C%0A%20%20%20%20%20%20%20%20height%3D380%2C%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(plot_score_time_series%2C%20vintage_scorer%2C%20y_pred_vintages%2C%20y_test)%3A%0A%20%20%20%20plot_score_time_series(%0A%20%20%20%20%20%20%20%20vintage_scorer%2C%0A%20%20%20%20%20%20%20%20y_test%2C%0A%20%20%20%20%20%20%20%20y_pred_vintages%2C%0A%20%20%20%20%20%20%20%20title%3D%22MAE%20over%20Time%22%2C%0A%20%20%20%20%20%20%20%20y_label%3D%22MAE%22%2C%0A%20%20%20%20%20%20%20%20height%3D380%2C%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%20Next%20Steps%0A%0A%20%20%20%20-%20%5BForecast%20with%20CatBoost%5D(%2Fpages%2Fhow-to%2Fforecast-with-catboost%2F)%20for%20the%20full%20guide%0A%20%20%20%20-%20%5BBuild%20Reduction%20Forecasters%5D(%2Fpages%2Fhow-to%2Fbuild-reduction-forecasters%2F)%20for%20related%20techniques%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0Aif%20__name__%20%3D%3D%20%22__main__%22%3A%0A%20%20%20%20app.run()%0A