Skip to content

BaseSimilarity

yohou.interval.base.BaseSimilarity

Bases: BaseEstimator

Base class for similarity measures used in interval forecasting.

Similarity measures assign weights to calibration residuals based on how similar past prediction contexts are to the current one.

Notes

Used by SplitConformalForecaster to produce adaptive (locally weighted) prediction intervals. When similarity=None, uniform weights are used.

See Also

Source Code

Show/Hide source
class BaseSimilarity(BaseEstimator, metaclass=abc.ABCMeta):
    """Base class for similarity measures used in interval forecasting.

    Similarity measures assign weights to calibration residuals based
    on how similar past prediction contexts are to the current one.

    Notes
    -----
    Used by ``SplitConformalForecaster`` to produce adaptive (locally
    weighted) prediction intervals.  When ``similarity=None``, uniform
    weights are used.

    See Also
    --------
    - [`DistanceSimilarity`][yohou.interval.similarity.DistanceSimilarity] : Distance-based similarity measure.
    - [`SplitConformalForecaster`][yohou.interval.split_conformal.SplitConformalForecaster] : Conformal forecaster that uses similarities.

    """

    _parameter_constraints: dict = {}

    @staticmethod
    def _validate_no_nulls(df: pl.DataFrame, method_name: str) -> None:
        """Raise if any column contains null or NaN values.

        Parameters
        ----------
        df : pl.DataFrame
            DataFrame to validate.
        method_name : str
            Name of the calling method (for the error message).

        Raises
        ------
        ValueError
            If any column contains null or NaN values.

        """
        null_cols = [col for col in df.columns if df[col].is_null().any()]
        nan_cols = [col for col in df.select(cs.numeric()).columns if df[col].is_nan().any()]
        bad_cols = sorted(set(null_cols + nan_cols))
        if bad_cols:
            raise ValueError(f"{method_name}() received data with null or NaN values in columns: {bad_cols}")

    def __sklearn_tags__(self) -> Tags:
        """Get estimator tags.

        Returns
        -------
        Tags
            Estimator tags with similarity-specific attributes.

        """
        tags = Tags(estimator_type="similarity", requires_fit=True)

        # Most similarity measures are symmetric and require predictions
        assert tags.similarity_tags is not None
        tags.similarity_tags.symmetric = True
        tags.similarity_tags.requires_predictions = True
        tags.similarity_tags.produces_weights = True

        return tags

    @property
    def discarded_time_stamps(self) -> None:
        """Get discarded timestamps (placeholder property).

        Returns
        -------
        None

        """
        return None

    @abc.abstractmethod
    def fit(
        self,
        y: pl.DataFrame,
        y_pred: pl.DataFrame,
        X_actual: pl.DataFrame | None = None,
    ) -> "BaseSimilarity":
        """Fit the similarity measure.

        Parameters
        ----------
        y : pl.DataFrame
            Target time series.

        y_pred : pl.DataFrame
            Point predictions.

        X_actual : pl.DataFrame or None, default=None
            Exogenous features.

        Returns
        -------
        self

        """

    @abc.abstractmethod
    def observe(
        self,
        y: pl.DataFrame,
        y_pred: pl.DataFrame,
        X_actual: pl.DataFrame | None = None,
    ) -> "BaseSimilarity":
        """Observe new data and update the similarity measure.

        Parameters
        ----------
        y : pl.DataFrame
            New target observations.

        y_pred : pl.DataFrame
            New predictions.

        X_actual : pl.DataFrame or None, default=None
            New exogenous features.

        Returns
        -------
        self

        """

    @abc.abstractmethod
    def predict(
        self,
        y_pred: pl.DataFrame,
        X_actual: pl.DataFrame | None = None,
    ) -> np.ndarray[tuple[int, int], np.dtype[np.floating[Any]]]:
        """Compute similarity weights for predictions.

        Parameters
        ----------
        y_pred : pl.DataFrame
            Predictions to compute similarities for.

        X_actual : pl.DataFrame or None, default=None
            Exogenous features.

        Returns
        -------
        np.ndarray
            Similarity weights.

        """

    def rewind(
        self,
        y: pl.DataFrame,
        y_pred: pl.DataFrame,
        X_actual: pl.DataFrame | None = None,
    ) -> "BaseSimilarity":
        """Rewind observed data from the similarity measure.

        Default implementation is a no-op. Concrete subclasses that
        track observed data should override this to remove the most
        recently observed rows.

        Parameters
        ----------
        y : pl.DataFrame
            Target observations to rewind.

        y_pred : pl.DataFrame
            Predictions to rewind.

        X_actual : pl.DataFrame or None, default=None
            Exogenous features to rewind.

        Returns
        -------
        self

        """
        return self

Methods

discarded_time_stamps property

Get discarded timestamps (placeholder property).

Returns
Type Description
None

__sklearn_tags__()

Get estimator tags.

Returns
Type Description
Tags

Estimator tags with similarity-specific attributes.

Source Code
Show/Hide source
def __sklearn_tags__(self) -> Tags:
    """Get estimator tags.

    Returns
    -------
    Tags
        Estimator tags with similarity-specific attributes.

    """
    tags = Tags(estimator_type="similarity", requires_fit=True)

    # Most similarity measures are symmetric and require predictions
    assert tags.similarity_tags is not None
    tags.similarity_tags.symmetric = True
    tags.similarity_tags.requires_predictions = True
    tags.similarity_tags.produces_weights = True

    return tags

fit(y, y_pred, X_actual=None) abstractmethod

Fit the similarity measure.

Parameters
Name Type Description Default
y DataFrame

Target time series.

required
y_pred DataFrame

Point predictions.

required
X_actual DataFrame or None None
Returns
Type Description
self
Source Code
Show/Hide source
@abc.abstractmethod
def fit(
    self,
    y: pl.DataFrame,
    y_pred: pl.DataFrame,
    X_actual: pl.DataFrame | None = None,
) -> "BaseSimilarity":
    """Fit the similarity measure.

    Parameters
    ----------
    y : pl.DataFrame
        Target time series.

    y_pred : pl.DataFrame
        Point predictions.

    X_actual : pl.DataFrame or None, default=None
        Exogenous features.

    Returns
    -------
    self

    """

observe(y, y_pred, X_actual=None) abstractmethod

Observe new data and update the similarity measure.

Parameters
Name Type Description Default
y DataFrame

New target observations.

required
y_pred DataFrame

New predictions.

required
X_actual DataFrame or None

New exogenous features.

None
Returns
Type Description
self
Source Code
Show/Hide source
@abc.abstractmethod
def observe(
    self,
    y: pl.DataFrame,
    y_pred: pl.DataFrame,
    X_actual: pl.DataFrame | None = None,
) -> "BaseSimilarity":
    """Observe new data and update the similarity measure.

    Parameters
    ----------
    y : pl.DataFrame
        New target observations.

    y_pred : pl.DataFrame
        New predictions.

    X_actual : pl.DataFrame or None, default=None
        New exogenous features.

    Returns
    -------
    self

    """

predict(y_pred, X_actual=None) abstractmethod

Compute similarity weights for predictions.

Parameters
Name Type Description Default
y_pred DataFrame

Predictions to compute similarities for.

required
X_actual DataFrame or None

Exogenous features.

None
Returns
Type Description
ndarray

Similarity weights.

Source Code
Show/Hide source
@abc.abstractmethod
def predict(
    self,
    y_pred: pl.DataFrame,
    X_actual: pl.DataFrame | None = None,
) -> np.ndarray[tuple[int, int], np.dtype[np.floating[Any]]]:
    """Compute similarity weights for predictions.

    Parameters
    ----------
    y_pred : pl.DataFrame
        Predictions to compute similarities for.

    X_actual : pl.DataFrame or None, default=None
        Exogenous features.

    Returns
    -------
    np.ndarray
        Similarity weights.

    """

rewind(y, y_pred, X_actual=None)

Rewind observed data from the similarity measure.

Default implementation is a no-op. Concrete subclasses that track observed data should override this to remove the most recently observed rows.

Parameters
Name Type Description Default
y DataFrame

Target observations to rewind.

required
y_pred DataFrame

Predictions to rewind.

required
X_actual DataFrame or None

Exogenous features to rewind.

None
Returns
Type Description
self
Source Code
Show/Hide source
def rewind(
    self,
    y: pl.DataFrame,
    y_pred: pl.DataFrame,
    X_actual: pl.DataFrame | None = None,
) -> "BaseSimilarity":
    """Rewind observed data from the similarity measure.

    Default implementation is a no-op. Concrete subclasses that
    track observed data should override this to remove the most
    recently observed rows.

    Parameters
    ----------
    y : pl.DataFrame
        Target observations to rewind.

    y_pred : pl.DataFrame
        Predictions to rewind.

    X_actual : pl.DataFrame or None, default=None
        Exogenous features to rewind.

    Returns
    -------
    self

    """
    return self