Skip to content

check_randomized_search_reproducibility

yohou.testing.search.check_randomized_search_reproducibility(search_cv, y, X_actual=None, forecasting_horizon=3, X_future=None, X_forecast=None)

Check random_state produces same parameter samples.

Parameters

Name Type Description Default
search_cv RandomizedSearchCV

Unfitted RandomizedSearchCV instance with random_state set

required
y DataFrame

Training target data

required
X_actual DataFrame

Training features

None
forecasting_horizon int

Number of steps ahead to forecast

3

Raises

Type Description
AssertionError

If random_state doesn't produce reproducible results

Source Code

Show/Hide source
def check_randomized_search_reproducibility(
    search_cv,
    y: pl.DataFrame,
    X_actual: pl.DataFrame | None = None,
    forecasting_horizon: int = 3,
    X_future: pl.DataFrame | None = None,
    X_forecast: pl.DataFrame | None = None,
) -> None:
    """Check random_state produces same parameter samples.

    Parameters
    ----------
    search_cv : RandomizedSearchCV
        Unfitted RandomizedSearchCV instance with random_state set
    y : pl.DataFrame
        Training target data
    X_actual : pl.DataFrame, optional
        Training features
    forecasting_horizon : int, default=3
        Number of steps ahead to forecast

    Raises
    ------
    AssertionError
        If random_state doesn't produce reproducible results

    """
    if not isinstance(search_cv, RandomizedSearchCV):
        raise ValueError("This check requires RandomizedSearchCV instance")

    if search_cv.random_state is None:
        raise ValueError("This check requires random_state to be set")

    # Fit first time
    search_cv_clone1 = clone(search_cv)
    search_cv_clone1.fit(y, X_actual, forecasting_horizon=forecasting_horizon, X_future=X_future, X_forecast=X_forecast)

    # Fit second time with same random_state
    search_cv_clone2 = clone(search_cv)
    search_cv_clone2.fit(y, X_actual, forecasting_horizon=forecasting_horizon, X_future=X_future, X_forecast=X_forecast)

    # Check that same parameters were sampled
    params1 = search_cv_clone1.cv_results_["params"]
    params2 = search_cv_clone2.cv_results_["params"]

    assert params1 == params2, "RandomizedSearchCV should produce same parameters with same random_state"

    # Check that scores are identical
    if not search_cv_clone1.multimetric_:
        scores1 = search_cv_clone1.cv_results_["mean_test_score"]
        scores2 = search_cv_clone2.cv_results_["mean_test_score"]

        np.testing.assert_array_equal(
            scores1,
            scores2,
            err_msg="RandomizedSearchCV should produce same scores with same random_state",
        )
    else:
        # For multimetric, check all scorer scores
        scorer_names = list(search_cv_clone1.scorer_.keys()) if hasattr(search_cv_clone1.scorer_, "keys") else []
        for scorer_name in scorer_names:
            scores1 = search_cv_clone1.cv_results_[f"mean_test_{scorer_name}"]
            scores2 = search_cv_clone2.cv_results_[f"mean_test_{scorer_name}"]

            np.testing.assert_array_equal(
                scores1,
                scores2,
                err_msg=f"RandomizedSearchCV should produce same {scorer_name} scores with same random_state",
            )