Skip to content

validate_forecaster_data

yohou.utils.validate_data.validate_forecaster_data(forecaster, y=None, X_actual=None, *, reset=True, groups=None, X_future=None, X_forecast=None)

validate_forecaster_data(
    forecaster: BaseForecaster,
    y: pl.DataFrame,
    X_actual: pl.DataFrame | None = None,
    *,
    reset: Literal[True] = True,
    groups: list[str] | None = None,
    X_future: pl.DataFrame | None = None,
    X_forecast: pl.DataFrame | None = None,
) -> tuple[pl.DataFrame, pl.DataFrame | None, None]
validate_forecaster_data(
    forecaster: BaseForecaster,
    y: None,
    X_actual: pl.DataFrame | None = None,
    *,
    reset: Literal[True] = True,
    groups: list[str] | None = None,
    X_future: pl.DataFrame | None = None,
    X_forecast: pl.DataFrame | None = None,
) -> tuple[None, pl.DataFrame | None, None]
validate_forecaster_data(
    forecaster: BaseForecaster,
    y: pl.DataFrame,
    X_actual: pl.DataFrame | None = None,
    *,
    reset: Literal[False],
    groups: list[str] | None = None,
    X_future: pl.DataFrame | None = None,
    X_forecast: pl.DataFrame | None = None,
) -> tuple[
    pl.DataFrame, pl.DataFrame | None, list[str] | None
]
validate_forecaster_data(
    forecaster: BaseForecaster,
    y: None,
    X_actual: pl.DataFrame | None = None,
    *,
    reset: Literal[False],
    groups: list[str] | None = None,
    X_future: pl.DataFrame | None = None,
    X_forecast: pl.DataFrame | None = None,
) -> tuple[None, pl.DataFrame | None, list[str] | None]

Validate and prepare input data for forecasters.

Handles two contexts: fit (reset=True) where time interval is inferred and stored on the forecaster, and predict/update (reset=False) where schemas and panel groups are validated against the fitted state.

Parameters

Name Type Description Default
forecaster BaseForecaster

The forecaster instance. interval_ and schema attributes are set during fit context.

required
y DataFrame or None

Target time series with "time" column.

None
X_actual DataFrame or None

Exogenous features with "time" column.

None
reset bool

If True, validate in fit context (infer interval, set schemas). If False, validate in predict/update context (check schemas).

True
groups list of str or None

Panel groups to validate. Normalized against the fitted groups when reset=False.

None
X_future DataFrame or None

Known future features with a "time" column. Validated for structure and, when fitted, schema consistency.

None
X_forecast DataFrame or None

External forecasts with "vintage_time" and "time" columns. Validated for structure and, when fitted, schema consistency.

None

Returns

Type Description
tuple of (pl.DataFrame or None, pl.DataFrame or None, list of str or None)

Validated (y, X_actual, groups). groups is None in fit context.

Raises

Type Description
ValueError

If time columns are missing, schema does not match fitted state, or panel groups are inconsistent.

See Also

Source Code

Show/Hide source
def validate_forecaster_data(
    forecaster: BaseForecaster,
    y: pl.DataFrame | None = None,
    X_actual: pl.DataFrame | None = None,
    *,
    reset: bool = True,
    groups: list[str] | None = None,
    X_future: pl.DataFrame | None = None,
    X_forecast: pl.DataFrame | None = None,
) -> tuple[pl.DataFrame | None, pl.DataFrame | None, list[str] | None]:
    """Validate and prepare input data for forecasters.

    Handles two contexts: **fit** (``reset=True``) where time interval is
    inferred and stored on the forecaster, and **predict/update**
    (``reset=False``) where schemas and panel groups are validated
    against the fitted state.

    Parameters
    ----------
    forecaster : BaseForecaster
        The forecaster instance.  ``interval_`` and schema attributes are
        set during fit context.
    y : pl.DataFrame or None
        Target time series with ``"time"`` column.
    X_actual : pl.DataFrame or None, default=None
        Exogenous features with ``"time"`` column.
    reset : bool, default=True
        If ``True``, validate in fit context (infer interval, set schemas).
        If ``False``, validate in predict/update context (check schemas).
    groups : list of str or None, default=None
        Panel groups to validate. Normalized against the fitted groups
        when ``reset=False``.
    X_future : pl.DataFrame or None, default=None
        Known future features with a ``"time"`` column. Validated for
        structure and, when fitted, schema consistency.
    X_forecast : pl.DataFrame or None, default=None
        External forecasts with ``"vintage_time"`` and ``"time"`` columns.
        Validated for structure and, when fitted, schema consistency.

    Returns
    -------
    tuple of (pl.DataFrame or None, pl.DataFrame or None, list of str or None)
        Validated ``(y, X_actual, groups)``.  ``groups`` is
        ``None`` in fit context.

    Raises
    ------
    ValueError
        If time columns are missing, schema does not match fitted state,
        or panel groups are inconsistent.

    See Also
    --------
    - [`BaseForecaster`][yohou.base.forecaster.BaseForecaster] : Base class for all forecasters.
    - [`validate_time_weight`][yohou.utils.validate_data.validate_time_weight] : Validate time weighting parameters.
    - [`check_inputs`][yohou.utils.validation.check_inputs] : Low-level input validation helper.

    """
    # Validate X_future structure
    if X_future is not None:
        _validate_X_future_structure(X_future)

    # Validate X_forecast structure
    if X_forecast is not None:
        _validate_X_forecast_structure(X_forecast)

    if reset:
        # Fit context: validate and set interval
        # Type narrowing: check_inputs requires non-None y
        if y is not None:
            interval = check_inputs(y, X_actual)
            forecaster.interval_ = interval
        return y, X_actual, None

    # Predict/Update context (reset=False)

    # Validate time columns
    if y is not None:
        check_time_column(y)
    if X_actual is not None:
        check_time_column(X_actual)

    # Validate and normalize groups parameter
    groups = check_groups(
        fitted_panel_groups=forecaster.groups_,
        requested_panel_groups=groups,
    )

    # Validate schema and enforce column order
    if y is not None:
        y = check_schema(
            y,
            forecaster.local_y_schema_,
            groups=groups,
        )

    if X_actual is not None:
        # Handle panel data X_actual (local + global schemas)
        if forecaster.groups_ is not None:
            # Validate local X_actual columns (with panel prefixes)
            if hasattr(forecaster, "local_X_actual_schema_") and forecaster.local_X_actual_schema_:
                X_local = check_schema(
                    X_actual,
                    forecaster.local_X_actual_schema_,
                    groups=forecaster.groups_,
                )

            # Validate shared X_actual columns (no prefixes)
            X_shared = None
            if hasattr(forecaster, "shared_X_actual_schema_") and forecaster.shared_X_actual_schema_:
                X_shared = check_schema(X_actual, forecaster.shared_X_actual_schema_)

            # Reconstruct X_actual with both local and shared columns
            if (
                hasattr(forecaster, "local_X_actual_schema_")
                and forecaster.local_X_actual_schema_
                and hasattr(forecaster, "shared_X_actual_schema_")
                and forecaster.shared_X_actual_schema_
            ):
                assert X_shared is not None
                X_actual = pl.concat(
                    [X_local, X_shared.select(~cs.by_name("time"))],
                    how="horizontal",
                )
            elif hasattr(forecaster, "local_X_actual_schema_") and forecaster.local_X_actual_schema_:
                X_actual = X_local
            elif hasattr(forecaster, "shared_X_actual_schema_") and forecaster.shared_X_actual_schema_:
                X_actual = X_shared
        # Non-panel data: simple schema check (if schema exists)
        elif (
            X_actual is not None and hasattr(forecaster, "local_X_actual_schema_") and forecaster.local_X_actual_schema_
        ):
            X_actual = check_schema(X_actual, forecaster.local_X_actual_schema_)

    # Validate X_future schema against fitted state
    if X_future is not None:
        fitted_schema = getattr(forecaster, "_X_future_schema_", None)
        if fitted_schema is not None:
            _validate_step_source_schema(X_future, fitted_schema, "X_future", exclude_cols={"time"})

    # Validate X_forecast schema against fitted state
    if X_forecast is not None:
        fitted_schema = getattr(forecaster, "_X_forecast_schema_", None)
        if fitted_schema is not None:
            _validate_step_source_schema(X_forecast, fitted_schema, "X_forecast", exclude_cols={"time", "vintage_time"})

    return y, X_actual, groups