Skip to content

check_splitter_produces_valid_indices

yohou.testing.splitter.check_splitter_produces_valid_indices(splitter, y, X_actual=None)

Check all train/test indices are valid row positions.

Indices should be non-negative integers within [0, len(y)).

Parameters

Name Type Description Default
splitter BaseSplitter

Splitter instance

required
y DataFrame

Target time series with "time" column

required
X_actual DataFrame None

Raises

Type Description
AssertionError

If indices are out of bounds or invalid

Source Code

Show/Hide source
def check_splitter_produces_valid_indices(splitter, y: pl.DataFrame, X_actual: pl.DataFrame | None = None) -> None:
    """Check all train/test indices are valid row positions.

    Indices should be non-negative integers within [0, len(y)).

    Parameters
    ----------
    splitter : BaseSplitter
        Splitter instance
    y : pl.DataFrame
        Target time series with "time" column
    X_actual : pl.DataFrame, optional
        Exogenous features

    Raises
    ------
    AssertionError
        If indices are out of bounds or invalid

    """
    n_samples = len(y)

    for i, (train_idx, test_idx) in enumerate(splitter.split(y, X_actual)):
        # Check train indices
        assert isinstance(train_idx, np.ndarray), f"Split {i}: train indices should be ndarray"
        assert train_idx.dtype == np.intp, f"Split {i}: train indices should have dtype intp"
        assert len(train_idx) > 0, f"Split {i}: train set cannot be empty"
        assert np.all(train_idx >= 0), f"Split {i}: train indices contain negative values"
        assert np.all(train_idx < n_samples), f"Split {i}: train indices out of bounds (>= {n_samples})"

        # Check test indices
        assert isinstance(test_idx, np.ndarray), f"Split {i}: test indices should be ndarray"
        assert test_idx.dtype == np.intp, f"Split {i}: test indices should have dtype intp"
        assert len(test_idx) > 0, f"Split {i}: test set cannot be empty"
        assert np.all(test_idx >= 0), f"Split {i}: test indices contain negative values"
        assert np.all(test_idx < n_samples), f"Split {i}: test indices out of bounds (>= {n_samples})"

        # Check temporal ordering (train should come before test)
        assert np.max(train_idx) < np.min(test_idx), (
            f"Split {i}: train indices must be before test indices (time series ordering)"
        )