Skip to content

plot_boxplot

yohou.plotting.exploration.plot_boxplot(df, *, columns=None, period='1mo', groups=None, facet_by='member', facet_n_cols=2, color_palette=None, show_legend=True, title=None, x_label=None, y_label=None, width=None, height=None, bar_opacity=0.7, show_points='outliers', marker_size=4.0)

Plot boxplots grouped by time periods.

Parameters

Name Type Description Default
df DataFrame

Input DataFrame with 'time' column and numeric columns to plot.

required
columns str | list[str] | None

Column(s) to create boxplots for. If None, uses all numeric columns except 'time'.

None
period str

Time period for grouping. Polars duration string. Options: "1d" (daily), "1w" (weekly), "1mo" (monthly), "1q" (quarterly), "1y" (yearly).

"1mo"
groups list[str] | None

Panel group prefixes to plot.

None
facet_by Literal['group', 'member'] | None

Faceting axis for panel data. "group" creates one subplot per group, "member" one per member. None disables faceting. Ignored for non-panel data.

"member"
facet_n_cols int

Number of columns in facet grid.

2
color_palette list[str] | None

Custom color palette for multi-column plots.

None
show_legend bool

Whether to show the legend.

True
title str | None

Plot title.

None
x_label str | None

X-axis label.

None
y_label str | None

Y-axis label.

None
width int | None

Plot width in pixels.

None
height int | None

Plot height in pixels.

None
bar_opacity float

Opacity of the box shapes (0.0 to 1.0).

0.7
show_points bool | str

Which data points to show. One of "outliers", "all", or False to hide all points.

"outliers"
marker_size float

Size of the point markers in pixels.

4.0

Returns

Type Description
Figure

Plotly figure object.

Examples

>>> import polars as pl
>>> from yohou.plotting import plot_boxplot
>>> # Create sample data
>>> df = pl.DataFrame({
...     "time": pl.date_range(pl.date(2020, 1, 1), pl.date(2020, 12, 31), "1w", eager=True),
...     "y": [100 + i * 2 + (i % 4) * 10 for i in range(53)],
... })
>>> # Monthly boxplots
>>> fig = plot_boxplot(df, period="1mo")
>>> len(fig.data) > 0
True

See Also

plot_time_series : Plot basic time series.

Source Code

Show/Hide source
def plot_boxplot(
    df: pl.DataFrame,
    *,
    columns: str | list[str] | None = None,
    period: str = "1mo",
    groups: list[str] | None = None,
    facet_by: Literal["group", "member"] | None = "member",
    facet_n_cols: int = 2,
    color_palette: list[str] | None = None,
    show_legend: bool = True,
    title: str | None = None,
    x_label: str | None = None,
    y_label: str | None = None,
    width: int | None = None,
    height: int | None = None,
    bar_opacity: float = 0.7,
    show_points: bool | str = "outliers",
    marker_size: float = 4.0,
) -> go.Figure:
    """
    Plot boxplots grouped by time periods.

    Parameters
    ----------
    df : pl.DataFrame
        Input DataFrame with 'time' column and numeric columns to plot.
    columns : str | list[str] | None, default=None
        Column(s) to create boxplots for. If None, uses all numeric columns except 'time'.
    period : str, default="1mo"
        Time period for grouping. Polars duration string.
        Options: "1d" (daily), "1w" (weekly), "1mo" (monthly), "1q" (quarterly), "1y" (yearly).
    groups : list[str] | None, default=None
        Panel group prefixes to plot.
    facet_by : Literal["group", "member"] | None, default="member"
        Faceting axis for panel data.  ``"group"`` creates one subplot per
        group, ``"member"`` one per member.  ``None`` disables faceting.
        Ignored for non-panel data.
    facet_n_cols : int, default=2
        Number of columns in facet grid.
    color_palette : list[str] | None, default=None
        Custom color palette for multi-column plots.
    show_legend : bool, default=True
        Whether to show the legend.
    title : str | None, default=None
        Plot title.
    x_label : str | None, default=None
        X-axis label.
    y_label : str | None, default=None
        Y-axis label.
    width : int | None, default=None
        Plot width in pixels.
    height : int | None, default=None
        Plot height in pixels.
    bar_opacity : float, default=0.7
        Opacity of the box shapes (0.0 to 1.0).
    show_points : bool | str, default="outliers"
        Which data points to show. One of ``"outliers"``, ``"all"``, or
        ``False`` to hide all points.
    marker_size : float, default=4.0
        Size of the point markers in pixels.

    Returns
    -------
    go.Figure
        Plotly figure object.

    Examples
    --------
    >>> import polars as pl
    >>> from yohou.plotting import plot_boxplot

    >>> # Create sample data
    >>> df = pl.DataFrame({
    ...     "time": pl.date_range(pl.date(2020, 1, 1), pl.date(2020, 12, 31), "1w", eager=True),
    ...     "y": [100 + i * 2 + (i % 4) * 10 for i in range(53)],
    ... })

    >>> # Monthly boxplots
    >>> fig = plot_boxplot(df, period="1mo")
    >>> len(fig.data) > 0
    True

    See Also
    --------
    [`plot_time_series`][yohou.plotting.plot_time_series] : Plot basic time series.
    """
    # Validate inputs
    validate_plotting_data(df)
    validate_plotting_params(width=width, height=height)

    if groups is None and columns is None and _auto_detect_panel(df):
        groups = []

    if groups is not None:
        _color_mgr = PanelColorManager(color_palette)
        _legend_tracker = LegendTracker(show_legend=show_legend)

        def _render_boxplot(ctx: RenderContext) -> None:
            """Render period-grouped box plots for a single column."""
            base = [c for c in ctx.sub_df.columns if c != "time"][0]
            _c = _color_mgr.get_color(ctx.display_name)
            _ba = bar_opacity
            _sp = show_points
            _ps = marker_size
            df_g = ctx.sub_df.with_columns(pl.col("time").dt.truncate(period).alias("period"))
            periods_list = df_g.select("period").unique().sort("period")["period"].to_list()
            _show = _legend_tracker.should_show(ctx.display_name)
            for p_idx, pv in enumerate(periods_list):
                pd_data = df_g.filter(pl.col("period") == pv)[base]
                bp = "all" if _sp == "all" else ("outliers" if _sp == "outliers" else False)
                ctx.fig.add_trace(
                    go.Box(
                        y=pd_data,
                        x=[str(pv)] * len(pd_data),
                        name=ctx.display_name,
                        marker={"color": _c},
                        opacity=_ba,
                        boxpoints=bp,
                        marker_size=_ps if bp else None,
                        legendgroup=ctx.display_name,
                        showlegend=_show and p_idx == 0,
                    ),
                    row=ctx.row,
                    col=ctx.col,
                )

        effective_facet_by = facet_by or "member"
        fig = facet_figure(
            df,
            _render_boxplot,
            groups=groups,
            columns=columns,
            facet_by=effective_facet_by,
            facet_n_cols=facet_n_cols,
            title=title or "Boxplots",
            x_label=x_label or "Period",
            y_label=y_label,
            width=width,
            height=height,
            shared_xaxes=False,
        )
        fig.update_layout(showlegend=show_legend)
        return fig

    # Non-panel case: column-mode facet_figure
    plot_columns = validate_plotting_data(df, columns=columns, exclude=["time"])
    _colors = resolve_color_palette(color_palette, len(plot_columns))
    _col_colors = dict(zip(plot_columns, _colors, strict=False))

    if show_points == "all":
        boxpoints: str | bool = "all"
    elif show_points == "outliers":
        boxpoints = "outliers"
    else:
        boxpoints = False

    df_grouped = df.with_columns([pl.col("time").dt.truncate(period).alias("period")])
    periods = df_grouped.select("period").unique().sort("period")["period"].to_list()
    period_labels = [str(p) for p in periods]

    def _render_boxplot(ctx: RenderContext) -> None:
        """Render boxplots for one column into a subplot."""
        base = ctx.display_name
        col_color = _col_colors[base]
        for p_idx, period_val in enumerate(periods):
            period_data = df_grouped.filter(pl.col("period") == period_val)[base]
            ctx.fig.add_trace(
                go.Box(
                    y=period_data,
                    x=[period_labels[p_idx]] * len(period_data),
                    name=period_labels[p_idx],
                    marker={"color": col_color},
                    opacity=bar_opacity,
                    boxpoints=boxpoints,
                    marker_size=marker_size if boxpoints else None,
                    showlegend=False,
                    hovertemplate=_make_hovertemplate(base, "Period", "Value"),
                ),
                row=ctx.row,
                col=ctx.col,
            )

    fig = facet_figure(
        df,
        _render_boxplot,
        columns=plot_columns,
        facet_n_cols=facet_n_cols,
        title=title or "Boxplots",
        x_label=x_label or "Period",
        y_label=y_label,
        width=width,
        height=height,
        shared_xaxes=False,
    )
    fig.update_layout(showlegend=show_legend)

    return fig

Tutorials

The following example notebooks use this component:

  • Exploratory Visualization


    Visualization

    Exploratory time series visualisation with raw series plots, rolling statistics overlays, seasonal overlays, subseries diagnostics, distribution boxplots, missing data pattern auditing, outlier detection, and resampling comparison.

    View · Open in marimo