Skip to content

validate_time_weight

yohou.utils.validate_data.validate_time_weight(time_weight, y, groups=None)

Validate time_weight parameter for forecasters and scorers.

Parameters

Name Type Description Default
time_weight callable, pl.DataFrame, or None

Time weighting specification to validate.

required
y DataFrame

Target time series with "time" column.

required
groups list of str or None

Panel group names if panel data.

None

Raises

Type Description
ValueError

If time_weight validation fails.

See Also

Source Code

Show/Hide source
def validate_time_weight(
    time_weight: Callable | pl.DataFrame | None,
    y: pl.DataFrame,
    groups: list[str] | None = None,
) -> None:
    """Validate time_weight parameter for forecasters and scorers.

    Parameters
    ----------
    time_weight : callable, pl.DataFrame, or None
        Time weighting specification to validate.
    y : pl.DataFrame
        Target time series with "time" column.
    groups : list of str or None
        Panel group names if panel data.

    Raises
    ------
    ValueError
        If time_weight validation fails.

    See Also
    --------
    - [`validate_forecaster_data`][yohou.utils.validate_data.validate_forecaster_data] : Validate forecaster input data.
    - [`validate_scorer_data`][yohou.utils.validate_data.validate_scorer_data] : Validate scorer input data.

    """
    if time_weight is None:
        return

    if callable(time_weight):
        # Callable validation is done via validate_callable_signature
        # in the actual processing methods
        return

    # DataFrame validation
    if not isinstance(time_weight, pl.DataFrame):
        raise ValueError(f"time_weight must be callable, pl.DataFrame, or None, got {type(time_weight).__name__}")

    # Must have time column
    if "time" not in time_weight.columns:
        raise ValueError("time_weight DataFrame must have 'time' column")

    # Check for weight columns
    weight_cols = [c for c in time_weight.columns if c != "time"]
    if not weight_cols:
        raise ValueError(
            "time_weight DataFrame must have at least one weight column "
            "('weight' for global data or '{group}_weight' for panel data)"
        )

    # Validate weight column naming
    if groups is None:
        # Global data: must have "weight" column
        if "weight" not in time_weight.columns:
            raise ValueError("time_weight DataFrame for global data must have 'weight' column")
        weight_cols_to_check = ["weight"]
    else:
        # Panel data: check for group-specific or global weight columns
        expected_group_cols = {f"{group}_weight" for group in groups}
        has_group_specific = any(col in time_weight.columns for col in expected_group_cols)
        has_global = "weight" in time_weight.columns

        if not has_group_specific and not has_global:
            raise ValueError(
                f"time_weight DataFrame for panel data must have either "
                f"group-specific columns {sorted(expected_group_cols)} "
                f"or global 'weight' column"
            )

        # Collect all weight columns to validate
        weight_cols_to_check = [c for c in weight_cols if c.endswith("_weight") or c == "weight"]

    # Validate weight values (non-negative, finite, non-zero sum)
    for col in weight_cols_to_check:
        if col not in time_weight.columns:
            continue

        weights = time_weight[col]

        # Check for NaN
        if weights.is_null().any():
            raise ValueError(f"Weight column '{col}' contains NaN values")

        # Check for negative values
        if (weights < 0).any():
            raise ValueError(f"Weight column '{col}' contains negative values")

        # Check for infinite values
        if weights.is_infinite().any():
            raise ValueError(f"Weight column '{col}' contains infinite values")

        # Check for all-zero weights
        if weights.sum() == 0:
            raise ValueError(f"Weight column '{col}' sums to zero")