catboost multiquantile

%23%20%2F%2F%2F%20script%0A%23%20requires-python%20%3D%20%22%3E%3D3.11%22%0A%23%20dependencies%20%3D%20%5B%0A%23%20%20%20%20%20%22catboost%22%2C%0A%23%20%20%20%20%20%22scikit-learn%22%2C%0A%23%20%20%20%20%20%22yohou%5Bplotting%5D%22%2C%0A%23%20%5D%0A%23%20%2F%2F%2F%0A%0Aimport%20marimo%0A%0A__generated_with%20%3D%20%220.23.14%22%0Aapp%20%3D%20marimo.App(width%3D%22medium%22)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_()%3A%0A%20%20%20%20import%20marimo%20as%20mo%0A%0A%20%20%20%20return%20(mo%2C)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%20Interval%20Forecasting%20with%20CatBoost%20MultiQuantile%0A%0A%20%20%20%20CatBoost%20supports%20a%20%60MultiQuantile%60%20loss%20function%20that%20predicts%20**all%0A%20%20%20%20quantiles%20in%20a%20single%20model**%2C%20avoiding%20the%202N-model%20overhead%20of%0A%20%20%20%20standard%20quantile%20regression%20(where%20N%20%3D%20number%20of%20coverage%20rates).%0A%0A%20%20%20%20%5B%60IntervalReductionForecaster%60%5D(%2Fpages%2Fapi%2Fgenerated%2Fyohou.interval.reduction.IntervalReductionForecaster%2F)%20automatically%20detects%20this%20loss%20and%0A%20%20%20%20activates%20the%20optimised%20code%20path.%0A%0A%20%20%20%20**Prerequisites%3A**%20Familiarity%20with%20%5B%60IntervalReductionForecaster%60%5D(%2Fpages%2Fapi%2Fgenerated%2Fyohou.interval.reduction.IntervalReductionForecaster%2F).%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_()%3A%0A%20%20%20%20import%20time%0A%20%20%20%20from%20copy%20import%20deepcopy%0A%0A%20%20%20%20from%20catboost%20import%20CatBoostRegressor%0A%0A%20%20%20%20from%20yohou.datasets%20import%20fetch_tourism_monthly%0A%20%20%20%20from%20yohou.interval%20import%20IntervalReductionForecaster%0A%20%20%20%20from%20yohou.metrics%20import%20EmpiricalCoverage%2C%20IntervalScore%2C%20MeanIntervalWidth%0A%20%20%20%20from%20yohou.model_selection%20import%20train_test_split%0A%20%20%20%20from%20yohou.plotting%20import%20(%0A%20%20%20%20%20%20%20%20plot_forecast%2C%0A%20%20%20%20%20%20%20%20plot_score_per_step%2C%0A%20%20%20%20%20%20%20%20plot_score_per_vintage%2C%0A%20%20%20%20)%0A%20%20%20%20from%20yohou.preprocessing%20import%20LagTransformer%0A%0A%20%20%20%20return%20(%0A%20%20%20%20%20%20%20%20CatBoostRegressor%2C%0A%20%20%20%20%20%20%20%20EmpiricalCoverage%2C%0A%20%20%20%20%20%20%20%20IntervalReductionForecaster%2C%0A%20%20%20%20%20%20%20%20IntervalScore%2C%0A%20%20%20%20%20%20%20%20LagTransformer%2C%0A%20%20%20%20%20%20%20%20MeanIntervalWidth%2C%0A%20%20%20%20%20%20%20%20deepcopy%2C%0A%20%20%20%20%20%20%20%20fetch_tourism_monthly%2C%0A%20%20%20%20%20%20%20%20plot_forecast%2C%0A%20%20%20%20%20%20%20%20plot_score_per_step%2C%0A%20%20%20%20%20%20%20%20plot_score_per_vintage%2C%0A%20%20%20%20%20%20%20%20time%2C%0A%20%20%20%20%20%20%20%20train_test_split%2C%0A%20%20%20%20)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%201.%20Prepare%20Data%0A%0A%20%20%20%20We%20load%20the%20Monthly%20Tourism%20dataset%20(series%20T1)%20and%20split%20it%20into%20training%0A%20%20%20%20and%20test%20sets%20for%20interval%20forecasting.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(fetch_tourism_monthly%2C%20train_test_split)%3A%0A%20%20%20%20y%20%3D%20fetch_tourism_monthly().frame.select(%22time%22%2C%20%22T1__tourists%22).drop_nulls().rename(%7B%22T1__tourists%22%3A%20%22tourists%22%7D)%0A%0A%20%20%20%20y_train%2C%20y_test%20%3D%20train_test_split(y%2C%20test_size%3D0.2)%0A%20%20%20%20forecasting_horizon%20%3D%20len(y_test)%0A%0A%20%20%20%20print(f%22Train%3A%20%7Blen(y_train)%7D%2C%20Test%3A%20%7Blen(y_test)%7D%22)%0A%20%20%20%20return%20forecasting_horizon%2C%20y_test%2C%20y_train%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%202.%20CatBoost%20MultiQuantile%20Forecaster%0A%0A%20%20%20%20Pass%20%60CatBoostRegressor(loss_function%3D'MultiQuantile%3Aalpha%3D...')%60%20to%0A%20%20%20%20%5B%60IntervalReductionForecaster%60%5D(%2Fpages%2Fapi%2Fgenerated%2Fyohou.interval.reduction.IntervalReductionForecaster%2F).%20%20The%20%60alpha%60%20values%20in%20the%20loss%20function%0A%20%20%20%20are%20**ignored**%2C%20the%20forecaster%20rewrites%20them%20at%20fit%20time%20to%20match%20the%0A%20%20%20%20requested%20%60coverage_rates%60.%20%20Any%20placeholder%20value%20is%20fine.%0A%0A%20%20%20%20Because%20CatBoost's%20%60MultiQuantile%60%20loss%20produces%20a%202D%20output%20(one%0A%20%20%20%20column%20per%20quantile)%2C%20it%20cannot%20be%20wrapped%20in%20%60MultiOutputRegressor%60%0A%20%20%20%20and%20requires%20%60forecasting_horizon%3D1%60%20with%20recursive%20prediction.%0A%0A%20%20%20%20%3E%20**Note%20on%20%60reduction_strategy%60**%3A%20Because%20the%20multi-quantile%20loss%0A%20%20%20%20%3E%20already%20produces%20a%20vector%20of%20quantiles%20as%20its%20native%20output%2C%0A%20%20%20%20%3E%20it%20is%20always%20fitted%20with%20%60forecasting_horizon%3D1%60%20and%20predicts%0A%20%20%20%20%3E%20recursively.%20This%20means%20%60reduction_strategy%60%20does%20not%20change%20the%0A%20%20%20%20%3E%20single-model%20advantage%20here%20-%20you%20still%20get%20one%20model%20for%20all%20quantiles.%0A%20%20%20%20%3E%20For%20per-step%20specialisation%20with%20CatBoost%20intervals%2C%20train%20separate%0A%20%20%20%20%3E%20%60CatBoostRegressor(loss_function%3D'Quantile%3Aalpha%3D...')%60%20models%20using%0A%20%20%20%20%3E%20%60reduction_strategy%3D%22direct%22%60%20instead.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(%0A%20%20%20%20CatBoostRegressor%2C%0A%20%20%20%20IntervalReductionForecaster%2C%0A%20%20%20%20LagTransformer%2C%0A%20%20%20%20forecasting_horizon%2C%0A%20%20%20%20time%2C%0A%20%20%20%20y_train%2C%0A)%3A%0A%20%20%20%20coverage_rates%20%3D%20%5B0.5%2C%200.9%5D%0A%0A%20%20%20%20catboost_fc%20%3D%20IntervalReductionForecaster(%0A%20%20%20%20%20%20%20%20estimator%3DCatBoostRegressor(%0A%20%20%20%20%20%20%20%20%20%20%20%20iterations%3D200%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20depth%3D4%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20learning_rate%3D0.1%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20loss_function%3D%22MultiQuantile%3Aalpha%3D0.5%22%2C%20%20%23%20placeholder%0A%20%20%20%20%20%20%20%20%20%20%20%20verbose%3D0%2C%0A%20%20%20%20%20%20%20%20)%2C%0A%20%20%20%20%20%20%20%20actual_transformer%3DLagTransformer(lag%3Dlist(range(1%2C%2013)))%2C%0A%20%20%20%20)%0A%0A%20%20%20%20t0%20%3D%20time.perf_counter()%0A%20%20%20%20catboost_fc.fit(%0A%20%20%20%20%20%20%20%20y_train%2C%0A%20%20%20%20%20%20%20%20forecasting_horizon%3D1%2C%0A%20%20%20%20%20%20%20%20coverage_rates%3Dcoverage_rates%2C%0A%20%20%20%20)%0A%20%20%20%20elapsed_mq%20%3D%20time.perf_counter()%20-%20t0%0A%0A%20%20%20%20y_pred_mq%20%3D%20catboost_fc.predict_interval(%0A%20%20%20%20%20%20%20%20forecasting_horizon%3Dforecasting_horizon%2C%0A%20%20%20%20%20%20%20%20coverage_rates%3Dcoverage_rates%2C%0A%20%20%20%20)%0A%0A%20%20%20%20print(f%22MultiQuantile%20fit%20time%3A%20%7Belapsed_mq%3A.2f%7Ds%20(single%20model)%22)%0A%20%20%20%20print(f%22Prediction%20columns%3A%20%7By_pred_mq.columns%7D%22)%0A%20%20%20%20return%20catboost_fc%2C%20coverage_rates%2C%20y_pred_mq%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%5B%60plot_forecast%60%5D(%2Fpages%2Fapi%2Fgenerated%2Fyohou.plotting.forecasting.plot_forecast%2F)%20visualises%20the%20prediction%20intervals%20alongside%20the%20training%0A%20%20%20%20history%20and%20test%20data.%20The%20%60coverage_rates%60%20parameter%20controls%20which%0A%20%20%20%20interval%20bands%20are%20shown.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(coverage_rates%2C%20plot_forecast%2C%20y_pred_mq%2C%20y_test%2C%20y_train)%3A%0A%20%20%20%20plot_forecast(%0A%20%20%20%20%20%20%20%20y_test%2C%0A%20%20%20%20%20%20%20%20y_pred_mq%2C%0A%20%20%20%20%20%20%20%20y_train%3Dy_train%2C%0A%20%20%20%20%20%20%20%20coverage_rates%3Dcoverage_rates%2C%0A%20%20%20%20%20%20%20%20title%3D%22CatBoost%20MultiQuantile%20Intervals%22%2C%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%203.%20Compare%20with%20Standard%20Quantile%20Regression%0A%0A%20%20%20%20The%20default%20estimator%20is%20%60MultiOutputRegressor(QuantileRegressor())%60%2C%20let's%20compare%20it%20with%20our%20%60MultiOutputRegressor(CatBoostRegressor())%60.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(%0A%20%20%20%20IntervalReductionForecaster%2C%0A%20%20%20%20LagTransformer%2C%0A%20%20%20%20coverage_rates%2C%0A%20%20%20%20forecasting_horizon%2C%0A%20%20%20%20plot_forecast%2C%0A%20%20%20%20time%2C%0A%20%20%20%20y_pred_mq%2C%0A%20%20%20%20y_test%2C%0A%20%20%20%20y_train%2C%0A)%3A%0A%20%20%20%20standard_fc%20%3D%20IntervalReductionForecaster(%0A%20%20%20%20%20%20%20%20actual_transformer%3DLagTransformer(lag%3Dlist(range(1%2C%2013)))%2C%0A%20%20%20%20)%0A%0A%20%20%20%20t0_std%20%3D%20time.perf_counter()%0A%20%20%20%20standard_fc.fit(%0A%20%20%20%20%20%20%20%20y_train%2C%0A%20%20%20%20%20%20%20%20forecasting_horizon%3Dforecasting_horizon%2C%0A%20%20%20%20%20%20%20%20coverage_rates%3Dcoverage_rates%2C%0A%20%20%20%20)%0A%0A%20%20%20%20y_pred_std%20%3D%20standard_fc.predict_interval(%0A%20%20%20%20%20%20%20%20forecasting_horizon%3Dforecasting_horizon%2C%0A%20%20%20%20%20%20%20%20coverage_rates%3Dcoverage_rates%2C%0A%20%20%20%20)%0A%0A%20%20%20%20plot_forecast(%0A%20%20%20%20%20%20%20%20y_test%2C%0A%20%20%20%20%20%20%20%20%7B%22CatBoost%20with%20MultiQuantile%20loss%22%3A%20y_pred_mq%2C%20%22Standard%20quantile%20regression%22%3A%20y_pred_std%7D%2C%0A%20%20%20%20%20%20%20%20y_train%3Dy_train%2C%0A%20%20%20%20%20%20%20%20coverage_rates%3Dcoverage_rates%2C%0A%20%20%20%20%20%20%20%20title%3D%22CatBoost%20MultiQuantile%20Intervals%22%2C%0A%20%20%20%20)%0A%20%20%20%20return%20(y_pred_std%2C)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%204.%20Evaluate%20Interval%20Quality%0A%0A%20%20%20%20We%20evaluate%20both%20approaches%20using%20%5B%60EmpiricalCoverage%60%5D(%2Fpages%2Fapi%2Fgenerated%2Fyohou.metrics.interval.EmpiricalCoverage%2F)%20(does%20the%20interval%0A%20%20%20%20contain%20the%20true%20value%20at%20the%20target%20rate%3F)%2C%20%5B%60IntervalScore%60%5D(%2Fpages%2Fapi%2Fgenerated%2Fyohou.metrics.interval.IntervalScore%2F)%20(penalises%0A%20%20%20%20width%20and%20miscoverage)%2C%20and%20%5B%60MeanIntervalWidth%60%5D(%2Fpages%2Fapi%2Fgenerated%2Fyohou.metrics.interval.MeanIntervalWidth%2F)%20(average%20band%20width).%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(%0A%20%20%20%20EmpiricalCoverage%2C%0A%20%20%20%20IntervalScore%2C%0A%20%20%20%20MeanIntervalWidth%2C%0A%20%20%20%20coverage_rates%2C%0A%20%20%20%20mo%2C%0A%20%20%20%20y_pred_mq%2C%0A%20%20%20%20y_pred_std%2C%0A%20%20%20%20y_test%2C%0A%20%20%20%20y_train%2C%0A)%3A%0A%20%20%20%20rows%20%3D%20%5B%5D%0A%20%20%20%20for%20label%2C%20y_pred%20in%20%5B(%22MultiQuantile%22%2C%20y_pred_mq)%2C%20(%22Standard%22%2C%20y_pred_std)%5D%3A%0A%20%20%20%20%20%20%20%20for%20scorer_cls%20in%20%5BEmpiricalCoverage%2C%20IntervalScore%2C%20MeanIntervalWidth%5D%3A%0A%20%20%20%20%20%20%20%20%20%20%20%20scorer%20%3D%20scorer_cls(coverage_rates%3Dcoverage_rates)%0A%20%20%20%20%20%20%20%20%20%20%20%20scorer.fit(y_train)%0A%20%20%20%20%20%20%20%20%20%20%20%20score%20%3D%20scorer.score(y_test%2C%20y_pred)%0A%20%20%20%20%20%20%20%20%20%20%20%20rows.append(f%22%7C%20%7Blabel%7D%20%7C%20%7Bscorer_cls.__name__%7D%20%7C%20%7Bscore%3A.4f%7D%20%7C%22)%0A%0A%20%20%20%20table%20%3D%20%22%7C%20Approach%20%7C%20Metric%20%7C%20Score%20%7C%5Cn%7C---%7C---%7C---%7C%5Cn%22%20%2B%20%22%5Cn%22.join(rows)%0A%20%20%20%20mo.md(table)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%20Multi-vintage%20Scoring%0A%0A%20%20%20%20The%20%60observe_predict_interval%60%20method%20with%20%60stride%3D1%60%20produces%20one%0A%20%20%20%20interval%20forecast%20per%20observation%20point%2C%20creating%20multiple%20*vintages*.%0A%20%20%20%20Each%20vintage%20represents%20a%20different%20forecast%20origin%2C%20so%20you%20can%20analyse%0A%20%20%20%20how%20interval%20quality%20evolves%20as%20the%20model%20absorbs%20more%20data.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(catboost_fc%2C%20coverage_rates%2C%20deepcopy%2C%20forecasting_horizon%2C%20y_test)%3A%0A%20%20%20%20_vintage_model%20%3D%20deepcopy(catboost_fc)%0A%20%20%20%20y_pred_vintages%20%3D%20_vintage_model.observe_predict_interval(%0A%20%20%20%20%20%20%20%20y%3Dy_test%2C%0A%20%20%20%20%20%20%20%20stride%3D1%2C%0A%20%20%20%20%20%20%20%20forecasting_horizon%3Dforecasting_horizon%2C%0A%20%20%20%20%20%20%20%20coverage_rates%3Dcoverage_rates%2C%0A%20%20%20%20)%0A%20%20%20%20print(f%22Vintages%3A%20%7By_pred_vintages%5B'vintage_time'%5D.n_unique()%7D%22)%0A%20%20%20%20y_pred_vintages.head(10)%0A%20%20%20%20return%20(y_pred_vintages%2C)%0A%0A%0A%40app.cell%0Adef%20_(IntervalScore%2C%20y_train)%3A%0A%20%20%20%20vintage_scorer%20%3D%20IntervalScore()%0A%20%20%20%20vintage_scorer.fit(y_train)%0A%20%20%20%20return%20(vintage_scorer%2C)%0A%0A%0A%40app.cell%0Adef%20_(plot_score_per_step%2C%20vintage_scorer%2C%20y_pred_vintages%2C%20y_test)%3A%0A%20%20%20%20plot_score_per_step(%0A%20%20%20%20%20%20%20%20vintage_scorer%2C%0A%20%20%20%20%20%20%20%20y_test%2C%0A%20%20%20%20%20%20%20%20y_pred_vintages%2C%0A%20%20%20%20%20%20%20%20title%3D%22Interval%20Score%20per%20Step%22%2C%0A%20%20%20%20%20%20%20%20y_label%3D%22Interval%20Score%22%2C%0A%20%20%20%20%20%20%20%20height%3D380%2C%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(plot_score_per_vintage%2C%20vintage_scorer%2C%20y_pred_vintages%2C%20y_test)%3A%0A%20%20%20%20plot_score_per_vintage(%0A%20%20%20%20%20%20%20%20vintage_scorer%2C%0A%20%20%20%20%20%20%20%20y_test%2C%0A%20%20%20%20%20%20%20%20y_pred_vintages%2C%0A%20%20%20%20%20%20%20%20title%3D%22Interval%20Score%20per%20Vintage%22%2C%0A%20%20%20%20%20%20%20%20y_label%3D%22Interval%20Score%22%2C%0A%20%20%20%20%20%20%20%20height%3D380%2C%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%20Next%20Steps%0A%0A%20%20%20%20-%20**Standard%20interval%20reduction**%3A%20See%20%5B%60interval_reduction.py%60%5D(%2Fexamples%2Finterval_reduction%2F)%20for%20%60reduction_strategy%60%20comparison%0A%20%20%20%20-%20**Point%20CatBoost**%3A%20See%20%5B%60catboost_forecasting.py%60%5D(%2Fexamples%2Fcatboost_forecasting%2F)%20for%20point%20forecasting%20with%20CatBoost%0A%20%20%20%20-%20**Reduction%20strategies**%3A%20See%20%5B%60reduction_strategies.py%60%5D(%2Fexamples%2Freduction_strategies%2F)%20for%20multi-output%20vs%20direct%20vs%20dir-rec%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0Aif%20__name__%20%3D%3D%20%22__main__%22%3A%0A%20%20%20%20app.run()%0A