Skip to content

SeasonalImputer

yohou.preprocessing.imputation.SeasonalImputer

Bases: BaseTransformer

Seasonal decomposition-based imputation for missing values.

Imputes missing values by leveraging seasonal patterns in the data. Missing values are replaced with the seasonally-adjusted expected value based on the seasonal component estimated from non-missing data.

Parameters

Name Type Description Default
period int

Seasonal period (e.g., 7 for weekly, 12 for monthly with annual seasonality). Must be >= 2.

required
fill_method (seasonal_mean, seasonal_median)

Method to compute seasonal values: - "seasonal_mean": Use mean of same-season observations - "seasonal_median": Use median of same-season observations

"seasonal_mean"

Attributes

Name Type Description
seasonal_values_ dict

Dictionary mapping column names to seasonal value arrays of shape (period,).

Examples

>>> import polars as pl
>>> from datetime import datetime
>>> import numpy as np
>>> from yohou.preprocessing import SeasonalImputer
>>> # Weekly data with missing values
>>> X = pl.DataFrame({
...     "time": [datetime(2020, 1, i) for i in range(1, 15)],
...     "value": [10.0, 20.0, 30.0, 25.0, 15.0, 5.0, 8.0, np.nan, 21.0, 31.0, np.nan, 16.0, 6.0, 9.0],
... })
>>> imputer = SeasonalImputer(period=7)
>>> imputer.fit(X)
SeasonalImputer(period=7)
>>> X_imputed = imputer.transform(X)
>>> X_imputed["value"].null_count()
0

See Also

Source Code

Show/Hide source
class SeasonalImputer(BaseTransformer):
    """Seasonal decomposition-based imputation for missing values.

    Imputes missing values by leveraging seasonal patterns in the data.
    Missing values are replaced with the seasonally-adjusted expected value
    based on the seasonal component estimated from non-missing data.

    Parameters
    ----------
    period : int
        Seasonal period (e.g., 7 for weekly, 12 for monthly with annual seasonality).
        Must be >= 2.
    fill_method : {"seasonal_mean", "seasonal_median"}, default="seasonal_mean"
        Method to compute seasonal values:
        - "seasonal_mean": Use mean of same-season observations
        - "seasonal_median": Use median of same-season observations

    Attributes
    ----------
    seasonal_values_ : dict
        Dictionary mapping column names to seasonal value arrays of shape (period,).

    Examples
    --------
    >>> import polars as pl
    >>> from datetime import datetime
    >>> import numpy as np
    >>> from yohou.preprocessing import SeasonalImputer

    >>> # Weekly data with missing values
    >>> X = pl.DataFrame({
    ...     "time": [datetime(2020, 1, i) for i in range(1, 15)],
    ...     "value": [10.0, 20.0, 30.0, 25.0, 15.0, 5.0, 8.0, np.nan, 21.0, 31.0, np.nan, 16.0, 6.0, 9.0],
    ... })
    >>> imputer = SeasonalImputer(period=7)
    >>> imputer.fit(X)
    SeasonalImputer(period=7)
    >>> X_imputed = imputer.transform(X)
    >>> X_imputed["value"].null_count()
    0

    See Also
    --------
    - [`SimpleTimeImputer`][yohou.preprocessing.imputation.SimpleTimeImputer] : Interpolation-based imputation.
    - [`SimpleImputer`][yohou.preprocessing.imputation.SimpleImputer] : Simple constant-strategy imputation.

    """

    _valid_fill_methods = {"seasonal_mean", "seasonal_median"}

    _parameter_constraints: dict = {
        "period": [Interval(numbers.Integral, 2, None, closed="left")],
        "fill_method": [StrOptions(_valid_fill_methods)],
    }

    _tags = {"stateful": False, "invertible": False}

    def __init__(
        self,
        period: int,
        fill_method: str = "seasonal_mean",
    ):
        self.period = period
        self.fill_method = fill_method

    @_fit_context(prefer_skip_nested_validation=True)
    def fit(self, X: pl.DataFrame, y: pl.DataFrame | None = None, **params) -> "SeasonalImputer":
        """Fit the imputer by computing seasonal values.

        Parameters
        ----------
        X : pl.DataFrame
            Input time series with a ``"time"`` column (datetime) and one or
            more numeric columns.
        y : pl.DataFrame or None, default=None
            Ignored.  Present for API compatibility.
        **params : dict
            Metadata to route to nested estimators.

        Returns
        -------
        self
            The fitted transformer instance.

        """
        X = validate_transformer_data(self, X=X, reset=True)
        BaseTransformer.fit(self, X, y, **params)

        # Compute seasonal values for each column
        data_cols = [c for c in X.columns if c != "time"]
        self.seasonal_values_: dict[str, np.ndarray] = {}

        # Add season index
        X_with_season = X.with_columns((pl.arange(0, len(X)) % self.period).alias("_season_idx"))

        for col_name in data_cols:
            seasonal_vals = np.zeros(self.period)

            for season_idx in range(self.period):
                season_data = (
                    X_with_season
                    .filter(pl.col("_season_idx") == season_idx)[col_name]
                    .drop_nulls()
                    .drop_nans()
                    .to_numpy()
                )

                if len(season_data) > 0:
                    if self.fill_method == "seasonal_mean":
                        seasonal_vals[season_idx] = np.mean(season_data)
                    else:  # seasonal_median
                        seasonal_vals[season_idx] = np.median(season_data)
                else:
                    seasonal_vals[season_idx] = np.nan

            self.seasonal_values_[col_name] = seasonal_vals

        return self

    def _transform(self, X: pl.DataFrame) -> pl.DataFrame:
        """Impute missing values using seasonal patterns.

        Parameters
        ----------
        X : pl.DataFrame
            Validated input time series.

        Returns
        -------
        pl.DataFrame
            Imputed time series.

        """
        # Get data columns
        data_cols = [c for c in X.columns if c != "time"]

        # Impute each column
        result_cols = {"time": X["time"]}

        for col_name in data_cols:
            values = X[col_name].to_numpy().copy()
            seasonal_vals = self.seasonal_values_.get(col_name)

            if seasonal_vals is not None:
                # Find null/nan indices
                null_mask = np.isnan(values) | (X[col_name].is_null().to_numpy())

                # Replace with seasonal values
                for i in np.where(null_mask)[0]:
                    season_idx = i % self.period
                    values[i] = seasonal_vals[season_idx]

            result_cols[col_name] = pl.Series(values)

        return pl.DataFrame(result_cols)

    def get_feature_names_out(self, input_features: list[str] | None = None) -> list[str]:
        """Get output feature names for transformation.

        Parameters
        ----------
        input_features : list of str or None, default=None
            Column names of the input features.  If ``None``, uses the
            feature names seen during ``fit``.

        Returns
        -------
        list of str
            Output feature names after transformation.

        """
        check_is_fitted(self, ["feature_names_in_"])
        input_features = _check_feature_names_in(self, input_features)
        return list(input_features)

Methods

fit(X, y=None, **params)

Fit the imputer by computing seasonal values.

Parameters
Name Type Description Default
X DataFrame

Input time series with a "time" column (datetime) and one or more numeric columns.

required
y DataFrame or None

Ignored. Present for API compatibility.

None
**params dict

Metadata to route to nested estimators.

{}
Returns
Type Description
self

The fitted transformer instance.

Source Code
Show/Hide source
@_fit_context(prefer_skip_nested_validation=True)
def fit(self, X: pl.DataFrame, y: pl.DataFrame | None = None, **params) -> "SeasonalImputer":
    """Fit the imputer by computing seasonal values.

    Parameters
    ----------
    X : pl.DataFrame
        Input time series with a ``"time"`` column (datetime) and one or
        more numeric columns.
    y : pl.DataFrame or None, default=None
        Ignored.  Present for API compatibility.
    **params : dict
        Metadata to route to nested estimators.

    Returns
    -------
    self
        The fitted transformer instance.

    """
    X = validate_transformer_data(self, X=X, reset=True)
    BaseTransformer.fit(self, X, y, **params)

    # Compute seasonal values for each column
    data_cols = [c for c in X.columns if c != "time"]
    self.seasonal_values_: dict[str, np.ndarray] = {}

    # Add season index
    X_with_season = X.with_columns((pl.arange(0, len(X)) % self.period).alias("_season_idx"))

    for col_name in data_cols:
        seasonal_vals = np.zeros(self.period)

        for season_idx in range(self.period):
            season_data = (
                X_with_season
                .filter(pl.col("_season_idx") == season_idx)[col_name]
                .drop_nulls()
                .drop_nans()
                .to_numpy()
            )

            if len(season_data) > 0:
                if self.fill_method == "seasonal_mean":
                    seasonal_vals[season_idx] = np.mean(season_data)
                else:  # seasonal_median
                    seasonal_vals[season_idx] = np.median(season_data)
            else:
                seasonal_vals[season_idx] = np.nan

        self.seasonal_values_[col_name] = seasonal_vals

    return self

get_feature_names_out(input_features=None)

Get output feature names for transformation.

Parameters
Name Type Description Default
input_features list of str or None

Column names of the input features. If None, uses the feature names seen during fit.

None
Returns
Type Description
list of str

Output feature names after transformation.

Source Code
Show/Hide source
def get_feature_names_out(self, input_features: list[str] | None = None) -> list[str]:
    """Get output feature names for transformation.

    Parameters
    ----------
    input_features : list of str or None, default=None
        Column names of the input features.  If ``None``, uses the
        feature names seen during ``fit``.

    Returns
    -------
    list of str
        Output feature names after transformation.

    """
    check_is_fitted(self, ["feature_names_in_"])
    input_features = _check_feature_names_in(self, input_features)
    return list(input_features)

Tutorials

The following example notebooks use this component:

  • How to Handle Missing Data


    Data-Features

    Compare SimpleTimeImputer, SeasonalImputer, SimpleImputer, and TransformedSpaceKNNImputer on synthetic block and scattered gaps in monthly tourism data.

    View · Open in marimo

  • How to Clean Time Series Data


    Data-Features

    End-to-end data cleaning pipeline combining SimpleTimeImputer and SeasonalImputer for missing values with OutlierThresholdHandler for anomaly clipping.

    View · Open in marimo