Skip to content

CompositeSimilarity

yohou.interval.similarity.CompositeSimilarity

Bases: BaseSimilarity

Combine multiple similarity measures into a single weight vector.

Delegates fit, observe, rewind, and predict to each sub-similarity and then combines their weight matrices using either element-wise multiplication or weighted averaging.

Parameters

Name Type Description Default
similarities list of BaseSimilarity

At least two similarity instances to combine.

None
combination (multiply, mean)

How to combine the individual weight matrices.

"multiply" Element-wise product with optional exponents: w_combined = prod(w_i ** alpha_i), then re-normalised so rows lie in (0, 1). "mean" Weighted average: w_combined = sum(alpha_i * w_i).

"multiply"
weights list of float or None

Per-similarity exponents (multiply) or mixing coefficients (mean). If None, all similarities contribute equally (exponents/coefficients of 1.0).

None

Attributes

Name Type Description
similarities_ list of BaseSimilarity

Fitted copies of the sub-similarities (set after fit).

See Also

Examples

>>> from datetime import datetime, timedelta
>>> import polars as pl
>>> import numpy as np
>>> from yohou.interval.similarity import (
...     CompositeSimilarity,
...     DistanceSimilarity,
...     TemporalSimilarity,
... )
>>>
>>> dates = [datetime(2021, 1, 1) + timedelta(days=i) for i in range(28)]
>>> y = pl.DataFrame({"time": dates, "value": np.random.randn(28)})
>>> y_pred = pl.DataFrame({"time": dates, "value": np.random.randn(28)})
>>>
>>> comp = CompositeSimilarity(
...     similarities=[
...         DistanceSimilarity(metric="euclidean"),
...         TemporalSimilarity(seasonalities=[7.0]),
...     ],
...     combination="multiply",
... )
>>> _ = comp.fit(y, y_pred)
>>> new_date = [datetime(2021, 1, 29)]
>>> y_pred_new = pl.DataFrame({"time": new_date, "value": [0.5]})
>>> weights = comp.predict(y_pred_new)
>>> weights.shape
(1, 28)

Source Code

Show/Hide source
class CompositeSimilarity(BaseSimilarity):
    r"""Combine multiple similarity measures into a single weight vector.

    Delegates ``fit``, ``observe``, ``rewind``, and ``predict`` to each
    sub-similarity and then combines their weight matrices using either
    element-wise multiplication or weighted averaging.

    Parameters
    ----------
    similarities : list of BaseSimilarity
        At least two similarity instances to combine.
    combination : {"multiply", "mean"}, default="multiply"
        How to combine the individual weight matrices.

        ``"multiply"``
            Element-wise product with optional exponents:
            ``w_combined = prod(w_i ** alpha_i)``, then re-normalised
            so rows lie in (0, 1).
        ``"mean"``
            Weighted average: ``w_combined = sum(alpha_i * w_i)``.

    weights : list of float or None, default=None
        Per-similarity exponents (multiply) or mixing coefficients
        (mean). If ``None``, all similarities contribute equally
        (exponents/coefficients of 1.0).

    Attributes
    ----------
    similarities_ : list of BaseSimilarity
        Fitted copies of the sub-similarities (set after ``fit``).

    See Also
    --------
    - [`DistanceSimilarity`][yohou.interval.similarity.DistanceSimilarity] : Value-based distance similarity.
    - [`TemporalSimilarity`][yohou.interval.similarity.TemporalSimilarity] : Temporal Fourier feature similarity.

    Examples
    --------
    >>> from datetime import datetime, timedelta
    >>> import polars as pl
    >>> import numpy as np
    >>> from yohou.interval.similarity import (
    ...     CompositeSimilarity,
    ...     DistanceSimilarity,
    ...     TemporalSimilarity,
    ... )
    >>>
    >>> dates = [datetime(2021, 1, 1) + timedelta(days=i) for i in range(28)]
    >>> y = pl.DataFrame({"time": dates, "value": np.random.randn(28)})
    >>> y_pred = pl.DataFrame({"time": dates, "value": np.random.randn(28)})
    >>>
    >>> comp = CompositeSimilarity(
    ...     similarities=[
    ...         DistanceSimilarity(metric="euclidean"),
    ...         TemporalSimilarity(seasonalities=[7.0]),
    ...     ],
    ...     combination="multiply",
    ... )
    >>> _ = comp.fit(y, y_pred)
    >>> new_date = [datetime(2021, 1, 29)]
    >>> y_pred_new = pl.DataFrame({"time": new_date, "value": [0.5]})
    >>> weights = comp.predict(y_pred_new)
    >>> weights.shape
    (1, 28)

    """

    _parameter_constraints: dict = {
        "similarities": [list],
        "combination": [str],
        "weights": [list, None],
    }

    def __init__(
        self,
        similarities: list[BaseSimilarity] | None = None,
        combination: Literal["multiply", "mean"] = "multiply",
        weights: list[float] | None = None,
    ) -> None:
        self.similarities = similarities
        self.combination = combination
        self.weights = weights

    def _validate_params(self) -> None:
        """Validate constructor parameters."""
        if self.similarities is None or len(self.similarities) < 2:
            raise ValueError(
                "CompositeSimilarity requires at least 2 sub-similarities, "
                f"got {0 if self.similarities is None else len(self.similarities)}"
            )
        if self.combination not in ("multiply", "mean"):
            raise ValueError(f"combination must be 'multiply' or 'mean', got {self.combination!r}")
        if self.weights is not None and len(self.weights) != len(self.similarities):
            raise ValueError(
                f"weights length ({len(self.weights)}) must match similarities length ({len(self.similarities)})"
            )

    def _resolved_weights(self) -> list[float]:
        """Return per-similarity weights, defaulting to 1.0 each."""
        if self.weights is not None:
            return self.weights
        return [1.0] * len(self.similarities)  # ty: ignore[invalid-argument-type]

    def fit(
        self,
        y: pl.DataFrame,
        y_pred: pl.DataFrame,
        X_actual: pl.DataFrame | None = None,
    ) -> "CompositeSimilarity":
        """Fit all sub-similarities on the calibration data.

        Parameters
        ----------
        y : pl.DataFrame
            Target time series.
        y_pred : pl.DataFrame
            Point forecast time series.
        X_actual : pl.DataFrame or None, default=None
            Exogenous features.

        Returns
        -------
        self

        """
        self._validate_params()
        self.similarities_ = [clone(sim).fit(y=y, y_pred=y_pred, X_actual=X_actual) for sim in self.similarities]  # ty: ignore[not-iterable]
        return self

    def observe(
        self,
        y: pl.DataFrame,
        y_pred: pl.DataFrame,
        X_actual: pl.DataFrame | None = None,
    ) -> "CompositeSimilarity":
        """Forward observation to all sub-similarities.

        Parameters
        ----------
        y : pl.DataFrame
            New target observations.
        y_pred : pl.DataFrame
            New predictions.
        X_actual : pl.DataFrame or None, default=None
            New exogenous features.

        Returns
        -------
        self

        """
        for sim in self.similarities_:
            sim.observe(y=y, y_pred=y_pred, X_actual=X_actual)
        return self

    def rewind(
        self,
        y: pl.DataFrame,
        y_pred: pl.DataFrame,
        X_actual: pl.DataFrame | None = None,
    ) -> "CompositeSimilarity":
        """Forward rewind to all sub-similarities.

        Parameters
        ----------
        y : pl.DataFrame
            Target observations to rewind.
        y_pred : pl.DataFrame
            Predictions to rewind.
        X_actual : pl.DataFrame or None, default=None
            Exogenous features to rewind.

        Returns
        -------
        self

        """
        for sim in self.similarities_:
            sim.rewind(y=y, y_pred=y_pred, X_actual=X_actual)
        return self

    def predict(
        self,
        y_pred: pl.DataFrame,
        X_actual: pl.DataFrame | None = None,
    ) -> np.ndarray[tuple[int, int], np.dtype[np.floating[Any]]]:
        """Combine sub-similarity weights into a single weight matrix.

        Parameters
        ----------
        y_pred : pl.DataFrame
            Predictions to compute similarities for.
        X_actual : pl.DataFrame or None, default=None
            Exogenous features.

        Returns
        -------
        np.ndarray
            Combined weight matrix of shape
            ``(n_predictions, n_calibration)``.

        """
        alphas = self._resolved_weights()
        weight_matrices = [sim.predict(y_pred=y_pred, X_actual=X_actual) for sim in self.similarities_]

        if self.combination == "multiply":
            combined = np.ones_like(weight_matrices[0])
            for w, alpha in zip(weight_matrices, alphas, strict=True):
                combined *= np.power(w, alpha)
            # Re-normalise so rows lie in (0, 1)
            row_sums = combined.sum(axis=1, keepdims=True)
            row_sums = np.where(row_sums == 0, 1.0, row_sums)
            n_features = combined.shape[1]
            combined = combined / row_sums * n_features
            combined = combined / (1 + combined.sum(axis=1, keepdims=True))
        else:  # mean
            combined = np.zeros_like(weight_matrices[0])
            for w, alpha in zip(weight_matrices, alphas, strict=True):
                combined += alpha * w
            total_alpha = sum(alphas)
            if total_alpha != 0:
                combined /= total_alpha

        return combined

Methods

fit(y, y_pred, X_actual=None)

Fit all sub-similarities on the calibration data.

Parameters
Name Type Description Default
y DataFrame

Target time series.

required
y_pred DataFrame

Point forecast time series.

required
X_actual DataFrame or None None
Returns
Type Description
self
Source Code
Show/Hide source
def fit(
    self,
    y: pl.DataFrame,
    y_pred: pl.DataFrame,
    X_actual: pl.DataFrame | None = None,
) -> "CompositeSimilarity":
    """Fit all sub-similarities on the calibration data.

    Parameters
    ----------
    y : pl.DataFrame
        Target time series.
    y_pred : pl.DataFrame
        Point forecast time series.
    X_actual : pl.DataFrame or None, default=None
        Exogenous features.

    Returns
    -------
    self

    """
    self._validate_params()
    self.similarities_ = [clone(sim).fit(y=y, y_pred=y_pred, X_actual=X_actual) for sim in self.similarities]  # ty: ignore[not-iterable]
    return self

observe(y, y_pred, X_actual=None)

Forward observation to all sub-similarities.

Parameters
Name Type Description Default
y DataFrame

New target observations.

required
y_pred DataFrame

New predictions.

required
X_actual DataFrame or None

New exogenous features.

None
Returns
Type Description
self
Source Code
Show/Hide source
def observe(
    self,
    y: pl.DataFrame,
    y_pred: pl.DataFrame,
    X_actual: pl.DataFrame | None = None,
) -> "CompositeSimilarity":
    """Forward observation to all sub-similarities.

    Parameters
    ----------
    y : pl.DataFrame
        New target observations.
    y_pred : pl.DataFrame
        New predictions.
    X_actual : pl.DataFrame or None, default=None
        New exogenous features.

    Returns
    -------
    self

    """
    for sim in self.similarities_:
        sim.observe(y=y, y_pred=y_pred, X_actual=X_actual)
    return self

rewind(y, y_pred, X_actual=None)

Forward rewind to all sub-similarities.

Parameters
Name Type Description Default
y DataFrame

Target observations to rewind.

required
y_pred DataFrame

Predictions to rewind.

required
X_actual DataFrame or None

Exogenous features to rewind.

None
Returns
Type Description
self
Source Code
Show/Hide source
def rewind(
    self,
    y: pl.DataFrame,
    y_pred: pl.DataFrame,
    X_actual: pl.DataFrame | None = None,
) -> "CompositeSimilarity":
    """Forward rewind to all sub-similarities.

    Parameters
    ----------
    y : pl.DataFrame
        Target observations to rewind.
    y_pred : pl.DataFrame
        Predictions to rewind.
    X_actual : pl.DataFrame or None, default=None
        Exogenous features to rewind.

    Returns
    -------
    self

    """
    for sim in self.similarities_:
        sim.rewind(y=y, y_pred=y_pred, X_actual=X_actual)
    return self

predict(y_pred, X_actual=None)

Combine sub-similarity weights into a single weight matrix.

Parameters
Name Type Description Default
y_pred DataFrame

Predictions to compute similarities for.

required
X_actual DataFrame or None

Exogenous features.

None
Returns
Type Description
ndarray

Combined weight matrix of shape (n_predictions, n_calibration).

Source Code
Show/Hide source
def predict(
    self,
    y_pred: pl.DataFrame,
    X_actual: pl.DataFrame | None = None,
) -> np.ndarray[tuple[int, int], np.dtype[np.floating[Any]]]:
    """Combine sub-similarity weights into a single weight matrix.

    Parameters
    ----------
    y_pred : pl.DataFrame
        Predictions to compute similarities for.
    X_actual : pl.DataFrame or None, default=None
        Exogenous features.

    Returns
    -------
    np.ndarray
        Combined weight matrix of shape
        ``(n_predictions, n_calibration)``.

    """
    alphas = self._resolved_weights()
    weight_matrices = [sim.predict(y_pred=y_pred, X_actual=X_actual) for sim in self.similarities_]

    if self.combination == "multiply":
        combined = np.ones_like(weight_matrices[0])
        for w, alpha in zip(weight_matrices, alphas, strict=True):
            combined *= np.power(w, alpha)
        # Re-normalise so rows lie in (0, 1)
        row_sums = combined.sum(axis=1, keepdims=True)
        row_sums = np.where(row_sums == 0, 1.0, row_sums)
        n_features = combined.shape[1]
        combined = combined / row_sums * n_features
        combined = combined / (1 + combined.sum(axis=1, keepdims=True))
    else:  # mean
        combined = np.zeros_like(weight_matrices[0])
        for w, alpha in zip(weight_matrices, alphas, strict=True):
            combined += alpha * w
        total_alpha = sum(alphas)
        if total_alpha != 0:
            combined /= total_alpha

    return combined