Skip to content

validate_transformer_data

yohou.utils.validate_data.validate_transformer_data(transformer, X=None, *, reset=True, inverse=False, X_t=None, X_p=None, observation_horizon=None, stateful=False, **check_params)

validate_transformer_data(
    transformer: BaseTransformer,
    X: pl.DataFrame | None = None,
    *,
    reset: Literal[True],
    inverse: bool = False,
    X_t: pl.DataFrame | None = None,
    X_p: pl.DataFrame | None = None,
    observation_horizon: int | None = None,
    stateful: bool = False,
    **check_params,
) -> pl.DataFrame
validate_transformer_data(
    transformer: BaseTransformer,
    X: pl.DataFrame | None = None,
    *,
    reset: Literal[False],
    inverse: Literal[True],
    X_t: pl.DataFrame | None = None,
    X_p: pl.DataFrame | None = None,
    observation_horizon: int | None = None,
    stateful: Literal[True],
    **check_params,
) -> tuple[pl.DataFrame, pl.DataFrame]
validate_transformer_data(
    transformer: BaseTransformer,
    X: pl.DataFrame | None = None,
    *,
    reset: Literal[False],
    inverse: Literal[True],
    X_t: pl.DataFrame | None = None,
    X_p: pl.DataFrame | None = None,
    observation_horizon: int | None = None,
    stateful: Literal[False] = ...,
    **check_params,
) -> tuple[pl.DataFrame, None]
validate_transformer_data(
    transformer: BaseTransformer,
    X: pl.DataFrame | None = None,
    *,
    reset: Literal[False],
    inverse: Literal[False] = ...,
    X_t: pl.DataFrame | None = None,
    X_p: pl.DataFrame | None = None,
    observation_horizon: int | None = None,
    stateful: bool = False,
    **check_params,
) -> pl.DataFrame

Validate data for transformers.

Parameters

Name Type Description Default
transformer BaseTransformer

The transformer instance.

required
X DataFrame or None

Input data.

None
reset bool

Whether this is a fit context.

True
inverse bool

Whether this is an inverse transform context.

False
X_t DataFrame or None

Transformed data for inverse transform.

None
X_p DataFrame or None

Previous untransformed data for stateful inverse transform.

None
observation_horizon int or None

Required observation horizon for inverse transform.

None
stateful bool

If True (and inverse=True), X_p is required and guaranteed non-None in return. Use Literal[True] at call site for type narrowing.

False
**check_params dict

Additional validation parameters.

{}

Returns

Type Description
DataFrame or tuple

Validated data.

See Also

Source Code

Show/Hide source
def validate_transformer_data(
    transformer: BaseTransformer,
    X: pl.DataFrame | None = None,
    *,
    reset: bool = True,
    inverse: bool = False,
    X_t: pl.DataFrame | None = None,
    X_p: pl.DataFrame | None = None,
    observation_horizon: int | None = None,
    stateful: bool = False,
    **check_params,
) -> pl.DataFrame | tuple[pl.DataFrame, pl.DataFrame | None] | tuple[pl.DataFrame, pl.DataFrame]:
    """Validate data for transformers.

    Parameters
    ----------
    transformer : BaseTransformer
        The transformer instance.
    X : pl.DataFrame or None, default=None
        Input data.
    reset : bool, default=True
        Whether this is a fit context.
    inverse : bool, default=False
        Whether this is an inverse transform context.
    X_t : pl.DataFrame or None, default=None
        Transformed data for inverse transform.
    X_p : pl.DataFrame or None, default=None
        Previous untransformed data for stateful inverse transform.
    observation_horizon : int or None, default=None
        Required observation horizon for inverse transform.
    stateful : bool, default=False
        If True (and inverse=True), X_p is required and guaranteed non-None in return.
        Use Literal[True] at call site for type narrowing.
    **check_params : dict
        Additional validation parameters.

    Returns
    -------
    pl.DataFrame or tuple
        Validated data.

    See Also
    --------
    - [`BaseTransformer`][yohou.base.transformer.BaseTransformer] : Base class for all transformers.
    - [`check_inputs`][yohou.utils.validation.check_inputs] : Low-level input validation helper.

    """
    if reset:
        # Fit context
        if X is None:
            raise ValueError("`X` cannot be None in fit context.")
        interval = check_inputs(X, None)
        transformer.interval_ = interval
        transformer.feature_names_in_ = X.select(~cs.by_name("time")).columns
        transformer.n_features_in_ = len(transformer.feature_names_in_)
        transformer.X_schema_ = dict(X.select(~cs.by_name("time")).schema)
        return X

    # Transform/Inverse context (reset=False)
    if inverse:
        # Use X_t if provided, otherwise treat X as X_t (transformed data)
        if X_t is None:
            if X is None:
                raise ValueError("Either `X_t` or `X` must be provided for inverse transform.")
            X_t = X

        # Validate time columns
        check_time_column(X_t)
        if X_p is not None:
            check_time_column(X_p)

        if stateful and X_p is None:
            raise ValueError(
                "X_p cannot be None for stateful inverse transform. Provide the necessary previous untransformed data."
            )

        if observation_horizon is not None and observation_horizon > 0 and X_p is None:
            raise ValueError(
                "X_p cannot be None to invert a transform that has observation_horizon > 0. "
                "Provide the necessary previous untransformed data."
            )

        X_t_interval = None
        if len(X_t) >= 2:
            X_t_interval = check_interval_consistency(X_t)

        if X_p is not None and len(X_p) > 0 and observation_horizon is not None:
            if len(X_p) < observation_horizon:
                raise ValueError(
                    f"X_p must have at least {observation_horizon} rows (observation_horizon), "
                    f"but has only {len(X_p)} rows."
                )

            if len(X_p) > 1:
                X_p_interval = check_interval_consistency(X_p)
                if X_t_interval is not None and X_p_interval != X_t_interval:
                    raise ValueError(
                        f"Time intervals do not match: X_p has interval {X_p_interval}, "
                        f"but X_t has interval {X_t_interval}."
                    )

        return X_t, X_p

    # transform context
    if X is None:
        raise ValueError("`X` cannot be None for transform (when inverse=False).")
    check_time_column(X)
    X = check_schema(X, transformer.X_schema_)

    if check_params.get("check_intervals", True) and len(X) >= 2:
        check_interval_consistency(X)

    if (
        check_params.get("check_continuity", True)
        and hasattr(transformer, "_X_observed")
        and len(transformer._X_observed) > 0
    ):
        interval = None
        if len(X) >= 2:
            interval = check_interval_consistency(X)
        check_continuity(
            transformer._X_observed,
            X,
            expected_interval=interval,
            check_intervals=(interval is not None),
        )

    return X