panel intervals

%23%20%2F%2F%2F%20script%0A%23%20requires-python%20%3D%20%22%3E%3D3.11%22%0A%23%20dependencies%20%3D%20%5B%0A%23%20%20%20%20%20%22scikit-learn%22%2C%0A%23%20%20%20%20%20%22yohou%5Bplotting%5D%22%2C%0A%23%20%5D%0A%23%20%2F%2F%2F%0A%0Aimport%20marimo%0A%0A__generated_with%20%3D%20%220.23.14%22%0Aapp%20%3D%20marimo.App(width%3D%22medium%22)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_()%3A%0A%20%20%20%20import%20marimo%20as%20mo%0A%0A%20%20%20%20return%20(mo%2C)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%20Panel%20Prediction%20Intervals%0A%0A%20%20%20%20Interval%20forecasters%20automatically%20produce%20prediction%20intervals%20for%0A%20%20%20%20each%20panel%20group.%20This%20notebook%20demonstrates%20conformal%20and%20quantile%0A%20%20%20%20regression%20approaches%20on%20panel%20data.%0A%0A%20%20%20%20%23%23%201.%20Prepare%20Panel%20Data%0A%0A%20%20%20%20We%20load%20the%20KDD%20Cup%202018%20air%20quality%20dataset%20with%203%20Beijing%20stations%2C%0A%20%20%20%20each%20monitoring%206%20pollutants.%20Each%20station%20is%20a%20panel%20group%20with%0A%20%20%20%206%20measurement%20members.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_()%3A%0A%20%20%20%20from%20copy%20import%20deepcopy%0A%0A%20%20%20%20import%20polars%20as%20pl%0A%20%20%20%20from%20sklearn.linear_model%20import%20Ridge%0A%0A%20%20%20%20from%20yohou.datasets%20import%20fetch_kdd_cup%0A%20%20%20%20from%20yohou.interval%20import%20IntervalReductionForecaster%2C%20SplitConformalForecaster%0A%20%20%20%20from%20yohou.metrics%20import%20EmpiricalCoverage%2C%20IntervalScore%2C%20MeanIntervalWidth%0A%20%20%20%20from%20yohou.model_selection%20import%20train_test_split%0A%20%20%20%20from%20yohou.plotting%20import%20plot_forecast%2C%20plot_score_per_vintage%0A%20%20%20%20from%20yohou.point%20import%20PointReductionForecaster%0A%20%20%20%20from%20yohou.preprocessing%20import%20LagTransformer%0A%20%20%20%20from%20yohou.utils.panel%20import%20inspect_panel%0A%0A%20%20%20%20return%20(%0A%20%20%20%20%20%20%20%20EmpiricalCoverage%2C%0A%20%20%20%20%20%20%20%20IntervalReductionForecaster%2C%0A%20%20%20%20%20%20%20%20IntervalScore%2C%0A%20%20%20%20%20%20%20%20LagTransformer%2C%0A%20%20%20%20%20%20%20%20MeanIntervalWidth%2C%0A%20%20%20%20%20%20%20%20PointReductionForecaster%2C%0A%20%20%20%20%20%20%20%20Ridge%2C%0A%20%20%20%20%20%20%20%20SplitConformalForecaster%2C%0A%20%20%20%20%20%20%20%20deepcopy%2C%0A%20%20%20%20%20%20%20%20fetch_kdd_cup%2C%0A%20%20%20%20%20%20%20%20inspect_panel%2C%0A%20%20%20%20%20%20%20%20pl%2C%0A%20%20%20%20%20%20%20%20plot_forecast%2C%0A%20%20%20%20%20%20%20%20plot_score_per_vintage%2C%0A%20%20%20%20%20%20%20%20train_test_split%2C%0A%20%20%20%20)%0A%0A%0A%40app.cell%0Adef%20_(fetch_kdd_cup%2C%20inspect_panel%2C%20mo%2C%20train_test_split)%3A%0A%20%20%20%20_bunch%20%3D%20fetch_kdd_cup(n_groups%3D3)%0A%20%20%20%20aq%20%3D%20_bunch.frame.drop_nulls().tail(300)%0A%20%20%20%20_globals%2C%20groups%20%3D%20inspect_panel(aq)%0A%20%20%20%20y_train%2C%20y_test%20%3D%20train_test_split(aq%2C%20test_size%3D0.15)%0A%20%20%20%20horizon%20%3D%20len(y_test)%0A%20%20%20%20coverage_rates%20%3D%20%5B0.9%5D%0A%0A%20%20%20%20mo.md(%0A%20%20%20%20%20%20%20%20f%22**Groups**%3A%20%7Blist(groups.keys())%7D%5Cn%5Cn%22%0A%20%20%20%20%20%20%20%20f%22**Train**%3A%20%7Blen(y_train)%7D%20hours%2C%20**Test**%3A%20%7Blen(y_test)%7D%20hours%5Cn%5Cn%22%0A%20%20%20%20%20%20%20%20f%22**Coverage%20target**%3A%20%7Bcoverage_rates%7D%22%0A%20%20%20%20)%0A%20%20%20%20return%20coverage_rates%2C%20groups%2C%20horizon%2C%20y_test%2C%20y_train%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%202.%20Split%20Conformal%20Forecaster%20on%20Panel%20Data%0A%0A%20%20%20%20%5B%60SplitConformalForecaster%60%5D(%2Fpages%2Fapi%2Fgenerated%2Fyohou.interval.split_conformal.SplitConformalForecaster%2F)%20calibrates%20per-group%3A%20each%20panel%20group%0A%20%20%20%20gets%20its%20own%20conformal%20quantile%20based%20on%20its%20residual%20distribution.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(%0A%20%20%20%20LagTransformer%2C%0A%20%20%20%20PointReductionForecaster%2C%0A%20%20%20%20Ridge%2C%0A%20%20%20%20SplitConformalForecaster%2C%0A%20%20%20%20coverage_rates%2C%0A%20%20%20%20horizon%2C%0A%20%20%20%20y_train%2C%0A)%3A%0A%20%20%20%20fc_conformal%20%3D%20SplitConformalForecaster(%0A%20%20%20%20%20%20%20%20point_forecaster%3DPointReductionForecaster(%0A%20%20%20%20%20%20%20%20%20%20%20%20estimator%3DRidge(alpha%3D1.0)%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20actual_transformer%3DLagTransformer(lag%3D%5B1%2C%204%5D)%2C%0A%20%20%20%20%20%20%20%20)%2C%0A%20%20%20%20%20%20%20%20calibration_size%3Dhorizon%20%2B%205%2C%0A%20%20%20%20)%0A%20%20%20%20fc_conformal.fit(y_train%2C%20forecasting_horizon%3Dhorizon%2C%20coverage_rates%3Dcoverage_rates)%0A%20%20%20%20y_pred_conf%20%3D%20fc_conformal.predict_interval(forecasting_horizon%3Dhorizon%2C%20coverage_rates%3Dcoverage_rates)%0A%20%20%20%20_y_point%20%3D%20fc_conformal.predict(forecasting_horizon%3Dhorizon)%0A%20%20%20%20y_pred_conf%20%3D%20y_pred_conf.hstack(_y_point.drop(%22time%22%2C%20%22vintage_time%22))%0A%20%20%20%20return%20fc_conformal%2C%20y_pred_conf%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%5B%60plot_forecast%60%5D(%2Fpages%2Fapi%2Fgenerated%2Fyohou.plotting.forecasting.plot_forecast%2F)%20renders%20per-group%20prediction%20intervals.%20Use%0A%20%20%20%20%60groups%60%20to%20select%20which%20groups%20to%20display.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(coverage_rates%2C%20plot_forecast%2C%20y_pred_conf%2C%20y_test%2C%20y_train)%3A%0A%20%20%20%20_groups%20%3D%20sorted(%7Bc.split(%22__%22)%5B0%5D%20for%20c%20in%20y_train.columns%20if%20%22__%22%20in%20c%7D)%0A%20%20%20%20plot_forecast(%0A%20%20%20%20%20%20%20%20y_test%2C%0A%20%20%20%20%20%20%20%20y_pred_conf%2C%0A%20%20%20%20%20%20%20%20y_train%3Dy_train%2C%0A%20%20%20%20%20%20%20%20n_history%3D48%2C%0A%20%20%20%20%20%20%20%20coverage_rates%3Dcoverage_rates%2C%0A%20%20%20%20%20%20%20%20groups%3D_groups%5B%3A2%5D%2C%0A%20%20%20%20%20%20%20%20title%3D%22Split%20Conformal%3A%20Panel%20(90%25%20Interval)%22%2C%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%203.%20Interval%20Reduction%20Forecaster%20on%20Panel%20Data%0A%0A%20%20%20%20%5B%60IntervalReductionForecaster%60%5D(%2Fpages%2Fapi%2Fgenerated%2Fyohou.interval.reduction.IntervalReductionForecaster%2F)%20uses%20quantile%20regression%20to%20produce%0A%20%20%20%20prediction%20intervals.%20Each%20panel%20group%20gets%20independent%20quantile%0A%20%20%20%20estimates.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(IntervalReductionForecaster%2C%20coverage_rates%2C%20horizon%2C%20y_train)%3A%0A%20%20%20%20fc_interval%20%3D%20IntervalReductionForecaster()%0A%20%20%20%20fc_interval.fit(y_train%2C%20forecasting_horizon%3Dhorizon%2C%20coverage_rates%3Dcoverage_rates)%0A%20%20%20%20y_pred_interval%20%3D%20fc_interval.predict_interval(forecasting_horizon%3Dhorizon%2C%20coverage_rates%3Dcoverage_rates)%0A%20%20%20%20return%20(y_pred_interval%2C)%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%5B%60plot_forecast%60%5D(%2Fpages%2Fapi%2Fgenerated%2Fyohou.plotting.forecasting.plot_forecast%2F)%20for%20the%20quantile%20reduction%20approach%20shows%20the%20same%0A%20%20%20%20groups%2C%20allowing%20visual%20comparison%20of%20interval%20width%20and%20shape.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(coverage_rates%2C%20plot_forecast%2C%20y_pred_interval%2C%20y_test%2C%20y_train)%3A%0A%20%20%20%20_groups%20%3D%20sorted(%7Bc.split(%22__%22)%5B0%5D%20for%20c%20in%20y_train.columns%20if%20%22__%22%20in%20c%7D)%0A%20%20%20%20plot_forecast(%0A%20%20%20%20%20%20%20%20y_test%2C%0A%20%20%20%20%20%20%20%20y_pred_interval%2C%0A%20%20%20%20%20%20%20%20y_train%3Dy_train%2C%0A%20%20%20%20%20%20%20%20n_history%3D48%2C%0A%20%20%20%20%20%20%20%20coverage_rates%3Dcoverage_rates%2C%0A%20%20%20%20%20%20%20%20groups%3D_groups%5B%3A2%5D%2C%0A%20%20%20%20%20%20%20%20title%3D%22Interval%20Reduction%3A%20Panel%20(90%25%20Interval)%22%2C%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%204.%20Per-Group%20Coverage%20Analysis%0A%0A%20%20%20%20Check%20whether%20each%20group%20achieves%20the%20target%20coverage%20rate%20and%0A%20%20%20%20compare%20interval%20widths.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(%0A%20%20%20%20EmpiricalCoverage%2C%0A%20%20%20%20MeanIntervalWidth%2C%0A%20%20%20%20groups%2C%0A%20%20%20%20mo%2C%0A%20%20%20%20pl%2C%0A%20%20%20%20y_pred_conf%2C%0A%20%20%20%20y_pred_interval%2C%0A%20%20%20%20y_test%2C%0A%20%20%20%20y_train%2C%0A)%3A%0A%20%20%20%20_cov_scorer%20%3D%20EmpiricalCoverage()%0A%20%20%20%20_width_scorer%20%3D%20MeanIntervalWidth()%0A%0A%20%20%20%20_cov_scorer.fit(y_train)%0A%20%20%20%20_width_scorer.fit(y_train)%0A%0A%20%20%20%20_rows%20%3D%20%5B%5D%0A%20%20%20%20for%20_state%20in%20sorted(groups.keys())%3A%0A%20%20%20%20%20%20%20%20_cov_c%20%3D%20float(_cov_scorer.score(y_test%2C%20y_pred_conf%2C%20groups%3D%5B_state%5D))%0A%20%20%20%20%20%20%20%20_cov_i%20%3D%20float(_cov_scorer.score(y_test%2C%20y_pred_interval%2C%20groups%3D%5B_state%5D))%0A%20%20%20%20%20%20%20%20_w_c%20%3D%20float(_width_scorer.score(y_test%2C%20y_pred_conf%2C%20groups%3D%5B_state%5D))%0A%20%20%20%20%20%20%20%20_w_i%20%3D%20float(_width_scorer.score(y_test%2C%20y_pred_interval%2C%20groups%3D%5B_state%5D))%0A%0A%20%20%20%20%20%20%20%20_rows.append(%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%22Station%22%3A%20_state%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%22Conformal%20Coverage%22%3A%20round(_cov_c%2C%203)%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%22Reduction%20Coverage%22%3A%20round(_cov_i%2C%203)%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%22Conformal%20Width%22%3A%20round(_w_c%2C%201)%2C%0A%20%20%20%20%20%20%20%20%20%20%20%20%22Reduction%20Width%22%3A%20round(_w_i%2C%201)%2C%0A%20%20%20%20%20%20%20%20%7D)%0A%0A%20%20%20%20_results%20%3D%20pl.DataFrame(_rows)%0A%20%20%20%20mo.ui.table(_results)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%20Multi-vintage%20Scoring%0A%0A%20%20%20%20The%20%60observe_predict_interval%60%20method%20with%20%60stride%3D1%60%20produces%20one%0A%20%20%20%20interval%20forecast%20per%20observation%20point%2C%20creating%20multiple%20*vintages*.%0A%20%20%20%20Each%20vintage%20represents%20a%20different%20forecast%20origin%2C%20so%20you%20can%20analyse%0A%20%20%20%20how%20interval%20quality%20evolves%20as%20the%20model%20absorbs%20more%20data.%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0A%40app.cell%0Adef%20_(coverage_rates%2C%20deepcopy%2C%20fc_conformal%2C%20horizon%2C%20y_test)%3A%0A%20%20%20%20_vintage_model%20%3D%20deepcopy(fc_conformal)%0A%20%20%20%20y_pred_vintages%20%3D%20_vintage_model.observe_predict_interval(%0A%20%20%20%20%20%20%20%20y%3Dy_test%2C%0A%20%20%20%20%20%20%20%20stride%3D1%2C%0A%20%20%20%20%20%20%20%20forecasting_horizon%3Dhorizon%2C%0A%20%20%20%20%20%20%20%20coverage_rates%3Dcoverage_rates%2C%0A%20%20%20%20)%0A%20%20%20%20print(f%22Vintages%3A%20%7By_pred_vintages%5B'vintage_time'%5D.n_unique()%7D%22)%0A%20%20%20%20y_pred_vintages.head(10)%0A%20%20%20%20return%20(y_pred_vintages%2C)%0A%0A%0A%40app.cell%0Adef%20_(IntervalScore%2C%20y_train)%3A%0A%20%20%20%20vintage_scorer%20%3D%20IntervalScore()%0A%20%20%20%20vintage_scorer.fit(y_train)%0A%20%20%20%20return%20(vintage_scorer%2C)%0A%0A%0A%40app.cell%0Adef%20_(plot_score_per_vintage%2C%20vintage_scorer%2C%20y_pred_vintages%2C%20y_test)%3A%0A%20%20%20%20plot_score_per_vintage(%0A%20%20%20%20%20%20%20%20vintage_scorer%2C%0A%20%20%20%20%20%20%20%20y_test%2C%0A%20%20%20%20%20%20%20%20y_pred_vintages%2C%0A%20%20%20%20%20%20%20%20title%3D%22Interval%20Score%20per%20Vintage%22%2C%0A%20%20%20%20%20%20%20%20y_label%3D%22Interval%20Score%22%2C%0A%20%20%20%20%20%20%20%20height%3D380%2C%0A%20%20%20%20)%0A%20%20%20%20return%0A%0A%0A%40app.cell(hide_code%3DTrue)%0Adef%20_(mo)%3A%0A%20%20%20%20mo.md(r%22%22%22%0A%20%20%20%20%23%23%20Next%20Steps%0A%0A%20%20%20%20-%20**Aggregation%20modes**%3A%20See%20%5B%60examples%2Fevaluation-search%2Faggregation_modes.py%60%5D(%2Fexamples%2Faggregation_modes%2F)%20for%20coveragewise%20scoring%0A%20%20%20%20-%20**Conformity%20scorers**%3A%20See%20%5B%60examples%2Fevaluation-search%2Fconformity_scorers.py%60%5D(%2Fexamples%2Fconformity_scorers%2F)%0A%20%20%20%20%22%22%22)%0A%20%20%20%20return%0A%0A%0Aif%20__name__%20%3D%3D%20%22__main__%22%3A%0A%20%20%20%20app.run()%0A