Source code for arviz_stats.visualization

"""Top level functions related to visualization of distributions."""

from arviz_stats.utils import _apply_multi_input_function
from arviz_stats.validate import validate_ci_prob



[docs]
def hdi(
    data,
    prob=None,
    dim=None,
    group="posterior",
    var_names=None,
    filter_vars=None,
    coords=None,
    method="nearest",
    circular=False,
    max_modes=10,
    skipna=False,
    **kwargs,
):
    r"""Compute the highest density interval (HDI) given a probability.

    The HDI is the shortest interval that contains the specified probability mass.

    Parameters
    ----------
    data : array-like, DataArray, Dataset, DataTree, DataArrayGroupBy, DatasetGroupBy, or idata-like
        Input data. It will have different pre-processing applied to it depending on its type:

        - array-like: call array layer within ``arviz-stats``.
        - xarray object: apply dimension aware function to all relevant subsets
        - others: passed to :func:`arviz_base.convert_to_dataset` then treated as
          :class:`xarray.Dataset`. This option is discouraged due to needing this conversion
          which is completely automated and will be needed again in future executions or
          similar functions.

          It is recommended to first perform the conversion manually and then call
          ``arviz_stats.hdi``. This allows controlling the conversion step and inspecting
          its results.
    prob : float, optional
        Probability for the credible interval. Defaults to ``rcParams["stats.ci_prob"]``
    dim : sequence of hashable, optional
        Dimensions to be reduced when computing the HDI. Default ``rcParams["data.sample_dims"]``.
    group : hashable, default "posterior"
        Group on which to compute the HDI.
    var_names : str or list of str, optional
        Names of the variables for which the HDI should be computed.
    filter_vars : {None, "like", "regex"}, default None
    coords : dict, optional
        Dictionary of dimension/index names to coordinate values defining a subset
        of the data for which to perform the computation.
    method : str, default "nearest"
        Valid options are "nearest", "multimodal" or "multimodal_sample"
    circular : bool, default False
        Whether to compute the HDI taking into account that `data` represent circular variables
        (in the range [-np.pi, np.pi]) or not. Defaults to False (i.e non-circular variables).
    max_modes : int, default 10
        Maximum number of modes to consider when computing the HDI using the multimodal method.
    skipna : bool, default False
        If true ignores nan values when computing the HDI.
    **kwargs : any, optional
        Forwarded to the array or dataarray interface for HDI.

    Returns
    -------
    ndarray, DataArray, Dataset, DataTree
        Requested HDI of the provided input. It will have a ``ci_bound`` dimension
        with coordinate values "lower" and "upper" indicating the two extremes of
        the credible interval. In addition when using a multimodal `method` a
        ``mode`` dimension is also added.

    See Also
    --------
    arviz_stats.eti : Calculate the equal tail interval (ETI).
    arviz_stats.summary : Calculate summary statistics and diagnostics.

    Examples
    --------
    Calculate the HDI of a Normal random variable:

    .. ipython::

        In [1]: import arviz_stats as azs
           ...: import numpy as np
           ...: data = np.random.default_rng().normal(size=2000)
           ...: azs.hdi(data, 0.68)

    Calculate the HDI for specific variables:

    .. ipython::

        In [1]: import arviz_base as azb
           ...: dt = azb.load_arviz_data("centered_eight")
           ...: azs.hdi(dt, var_names=["mu", "theta"])

    Calculate the HDI also over the school dimension (for variables where present):

    .. ipython::

        In [1]: azs.hdi(dt, dim=["chain","draw", "school"])
    """
    prob = validate_ci_prob(prob)
    return _apply_multi_input_function(
        "hdi",
        data,
        dim,
        "dim",
        group=group,
        var_names=var_names,
        filter_vars=filter_vars,
        coords=coords,
        prob=prob,
        method=method,
        circular=circular,
        max_modes=max_modes,
        skipna=skipna,
        **kwargs,
    )




[docs]
def eti(
    data,
    prob=None,
    dim=None,
    group="posterior",
    var_names=None,
    filter_vars=None,
    coords=None,
    method="linear",
    skipna=False,
    **kwargs,
):
    r"""Compute the  equal tail interval (ETI) given a probability.

    The ETI is constructed by dividing the remaining probability (e.g., 6% for a 94% interval)
    equally between the two tails of a distribution. Other names for ETI are central interval and
    quantile-based interval.

    Parameters
    ----------
    data : array-like, DataArray, Dataset, DataTree, DataArrayGroupBy, DatasetGroupBy, or idata-like
        Input data. It will have different pre-processing applied to it depending on its type:

        - array-like: call array layer within ``arviz-stats``.
        - xarray object: apply dimension aware function to all relevant subsets
        - others: passed to :func:`arviz_base.convert_to_dataset` then treated as
          :class:`xarray.Dataset`. This option is discouraged due to needing this conversion
          which is completely automated and will be needed again in future executions or
          similar functions.

          It is recommended to first perform the conversion manually and then call
          ``arviz_stats.eti``. This allows controlling the conversion step and inspecting
          its results.
    prob : float, optional
        Probability for the credible interval. Defaults to ``rcParams["stats.ci_prob"]``
    dim : sequence of hashable, optional
        Dimensions to be reduced when computing the HDI. Default ``rcParams["data.sample_dims"]``.
    group : hashable, default "posterior"
        Group on which to compute the ETI.
    var_names : str or list of str, optional
        Names of the variables for which the ETI should be computed.
    filter_vars : {None, "like", "regex"}, default None
    coords : dict, optional
        Dictionary of dimension/index names to coordinate values defining a subset
        of the data for which to perform the computation.
    method : str, default "linear"
        For other options see :func:`numpy.quantile`.
    skipna : bool, default False
        If true ignores nan values when computing the ETI.
    **kwargs : any, optional
        Forwarded to the array or dataarray interface for ETI.

    Returns
    -------
    ndarray, DataArray, Dataset, DataTree
        Requested ETI of the provided input. It will have a ``ci_bound`` dimension
        with coordinate values "lower" and "upper" indicating the two extremes of
        the credible interval.

    See Also
    --------
    arviz_stats.hdi : Calculate the highest density interval (HDI).
    arviz_stats.summary : Calculate summary statistics and diagnostics.

    Examples
    --------
    Calculate the ETI of a Normal random variable:

    .. ipython::

        In [1]: import arviz_stats as azs
           ...: import numpy as np
           ...: data = np.random.default_rng().normal(size=2000)
           ...: azs.eti(data, 0.68)

    Calculate the ETI for specific variables:

    .. ipython::

        In [1]: import arviz_base as azb
           ...: dt = azb.load_arviz_data("centered_eight")
           ...: azs.eti(dt, var_names=["mu", "theta"])

    Calculate the ETI also over the school dimension (for variables where present):

    .. ipython::

        In [1]: azs.eti(dt, dim=["chain","draw", "school"])
    """
    prob = validate_ci_prob(prob)
    return _apply_multi_input_function(
        "eti",
        data,
        dim,
        "dim",
        group=group,
        var_names=var_names,
        filter_vars=filter_vars,
        coords=coords,
        prob=prob,
        method=method,
        skipna=skipna,
        **kwargs,
    )




[docs]
def ecdf(
    data,
    dim=None,
    group="posterior",
    var_names=None,
    filter_vars=None,
    coords=None,
    pit=False,
    **kwargs,
):
    r"""Compute the marginal empirical cumulative density functions (ECDF).

    See the EABM chapter on `Visualization of Random Variables with ArviZ <https://arviz-devs.github.io/EABM/Chapters/Distributions.html#distributions-in-arviz>`_
    for more details.

    Parameters
    ----------
    data : array-like, DataArray, Dataset, DataTree, DataArrayGroupBy, DatasetGroupBy, or idata-like
        Input data. It will have different pre-processing applied to it depending on its type:

        - array-like: call array layer within ``arviz-stats``.
        - xarray object: apply dimension aware function to all relevant subsets
        - others: passed to :func:`arviz_base.convert_to_dataset` then treated as
          :class:`xarray.Dataset`. This option is discouraged due to needing this conversion
          which is completely automated and will be needed again in future executions or
          similar functions.

          It is recommended to first perform the conversion manually and then call
          ``arviz_stats.ecdf``. This allows controlling the conversion step and inspecting
          its results.
    dim : sequence of hashable, optional
        Dimensions to be reduced when computing the ECDF. Default ``rcParams["data.sample_dims"]``.
    group : hashable, default "posterior"
        Group on which to compute the ECDF
    var_names : str or list of str, optional
        Names of the variables for which the ECDF should be computed.
    filter_vars : {None, "like", "regex"}, default None
    coords : dict, optional
        Dictionary of dimension/index names to coordinate values defining a subset
        of the data for which to perform the computation.
    pit : bool, default False
    **kwargs : any, optional
        Forwarded to the array or dataarray interface for ECDF.

    Returns
    -------
    ndarray, DataArray, Dataset, DataTree
        Requested ECDF of the provided input.
        It will have a ``quantile`` dimension and a ``plot_axis`` dimension with coordinate
        values "x" and "y".

    See Also
    --------
    arviz_stats.histogram, arviz_stats.kde, arviz_stats.qds :
        Alternative visual summaries for marginal distributions
    arviz_plots.plot_dist

    Examples
    --------
    Calculate the ECDF of a Normal random variable:

    .. ipython::

        In [1]: import arviz_stats as azs
           ...: import numpy as np
           ...: data = np.random.default_rng().normal(size=2000)
           ...: # not available yet in array interface azs.ecdf(data)

    Calculate the ECDF for specific variables:

    .. ipython::

        In [1]: import arviz_base as azb
           ...: dt = azb.load_arviz_data("centered_eight")
           ...: azs.ecdf(dt.posterior.dataset, var_names=["mu", "theta"])

    Calculate the ECDF also over the school dimension (for variables where present):

    .. ipython::

        In [1]: azs.ecdf(dt.posterior.dataset, dim=["chain", "draw", "school"])
    """
    return _apply_multi_input_function(
        "ecdf",
        data,
        dim,
        "dim",
        group=group,
        var_names=var_names,
        filter_vars=filter_vars,
        coords=coords,
        pit=pit,
        **kwargs,
    )




[docs]
def histogram(
    data,
    dim=None,
    group="posterior",
    var_names=None,
    filter_vars=None,
    coords=None,
    bins=None,
    range=None,  # pylint: disable=redefined-builtin
    weights=None,
    density=True,
):
    r"""Compute the batched histogram.

    See the EABM chapter on `Visualization of Random Variables with ArviZ <https://arviz-devs.github.io/EABM/Chapters/Distributions.html#distributions-in-arviz>`_
    for more details.

    Parameters
    ----------
    data : array-like, DataArray, Dataset, DataTree, DataArrayGroupBy, DatasetGroupBy, or idata-like
        Input data. It will have different pre-processing applied to it depending on its type:

        - array-like: call array layer within ``arviz-stats``.
        - xarray object: apply dimension aware function to all relevant subsets
        - others: passed to :func:`arviz_base.convert_to_dataset` then treated as
          :class:`xarray.Dataset`. This option is discouraged due to needing this conversion
          which is completely automated and will be needed again in future executions or
          similar functions.

          It is recommended to first perform the conversion manually and then call
          ``arviz_stats.histogram``. This allows controlling the conversion step and inspecting
          its results.
    dim : sequence of hashable, optional
        Dimensions to be reduced when computing the histogram.
        Default ``rcParams["data.sample_dims"]``.
    group : hashable, default "posterior"
        Group on which to compute the histogram
    var_names : str or list of str, optional
        Names of the variables for which the histogram should be computed.
    filter_vars : {None, "like", "regex"}, default None
    coords : dict, optional
        Dictionary of dimension/index names to coordinate values defining a subset
        of the data for which to perform the computation.
    bind : array-like, optional
    range : array-like, optional
    weights : array-like, optional
    density : bool, default True
    **kwargs : any, optional
        Forwarded to the array or dataarray interface for histogram.

    Returns
    -------
    ndarray, DataArray, Dataset, DataTree
        Requested histogram of the provided input.
        It will have a ``hist_dim_{var_name}`` dimension and a ``plot_axis`` dimension
        with coordinates "histogram", "left_edges" and "right_edges"

    See Also
    --------
    arviz_stats.ecdf, arviz_stats.kde, arviz_stats.qds :
        Alternative visual summaries for marginal distributions
    arviz_plots.plot_dist

    Examples
    --------
    Calculate the histogram of a Normal random variable:

    .. ipython::

        In [1]: import arviz_stats as azs
           ...: import numpy as np
           ...: data = np.random.default_rng().normal(size=2000)
           ...: azs.histogram(data)

    Calculate the histogram for specific variables:

    .. ipython::

        In [1]: import arviz_base as azb
           ...: dt = azb.load_arviz_data("centered_eight")
           ...: azs.histogram(dt, var_names=["mu", "theta"])

    Calculate the histogram also over the school dimension (for variables where present):

    .. ipython::

        In [1]: azs.histogram(dt, dim=["chain", "draw", "school"])
    """
    return _apply_multi_input_function(
        "histogram",
        data,
        dim,
        "dim",
        group=group,
        var_names=var_names,
        filter_vars=filter_vars,
        coords=coords,
        bins=bins,
        range=range,
        weights=weights,
        density=density,
    )




[docs]
def kde(
    data,
    dim=None,
    group="posterior",
    var_names=None,
    filter_vars=None,
    coords=None,
    circular=False,
    **kwargs,
):
    r"""Compute the marginal kernel density estimates (KDE).

    See the EABM chapter on `Visualization of Random Variables with ArviZ <https://arviz-devs.github.io/EABM/Chapters/Distributions.html#distributions-in-arviz>`_
    for more details.

    Parameters
    ----------
    data : array-like, DataArray, Dataset, DataTree, DataArrayGroupBy, DatasetGroupBy, or idata-like
        Input data. It will have different pre-processing applied to it depending on its type:

        - array-like: call array layer within ``arviz-stats``.
        - xarray object: apply dimension aware function to all relevant subsets
        - others: passed to :func:`arviz_base.convert_to_dataset` then treated as
          :class:`xarray.Dataset`. This option is discouraged due to needing this conversion
          which is completely automated and will be needed again in future executions or
          similar functions.

          It is recommended to first perform the conversion manually and then call
          ``arviz_stats.kde``. This allows controlling the conversion step and inspecting
          its results.
    dim : sequence of hashable, optional
        Dimensions to be reduced when computing the KDE.
        Default ``rcParams["data.sample_dims"]``.
    group : hashable, default "posterior"
        Group on which to compute the KDE
    var_names : str or list of str, optional
        Names of the variables for which the KDE should be computed.
    filter_vars : {None, "like", "regex"}, default None
    coords : dict, optional
        Dictionary of dimension/index names to coordinate values defining a subset
        of the data for which to perform the computation.
    circular : bool, default False
    **kwargs : any, optional
        Forwarded to the array or dataarray interface for KDE.

    Returns
    -------
    ndarray, DataArray, Dataset, DataTree
        Requested KDE of the provided input.
        The xarray objects will have a ``kde_dim`` dimension and a ``plot_axis`` dimension
        with coordinates "x", and "y".

    See Also
    --------
    arviz_stats.ecdf, arviz_stats.histogram, arviz_stats.qds:
        Alternative visual summaries for marginal distributions
    arviz_plots.plot_dist

    Examples
    --------
    Calculate the KDE of a Normal random variable:

    .. ipython::

        In [1]: import arviz_stats as azs
           ...: import numpy as np
           ...: data = np.random.default_rng().normal(size=2000)
           ...: azs.kde(data)

    Calculate the KDE for specific variables:

    .. ipython::

        In [1]: import arviz_base as azb
           ...: dt = azb.load_arviz_data("centered_eight")
           ...: azs.kde(dt, var_names=["mu", "theta"])

    Calculate the KDE also over the school dimension (for variables where present):

    .. ipython::

        In [1]: azs.kde(dt, dim=["chain", "draw", "school"])
    """
    return _apply_multi_input_function(
        "kde",
        data,
        dim,
        "dim",
        group=group,
        var_names=var_names,
        filter_vars=filter_vars,
        coords=coords,
        circular=circular,
        **kwargs,
    )




[docs]
def qds(
    data,
    dim=None,
    group="posterior",
    var_names=None,
    filter_vars=None,
    coords=None,
    nquantiles=100,
    binwidth=None,
    dotsize=1,
    stackratio=1,
    **kwargs,
):
    r"""Compute the marginal quantile dots.

    For details see [1]_ and check the EABM chapter on `Visualization of Random Variables with
    ArviZ <https://arviz-devs.github.io/EABM/Chapters/Distributions.html#distributions-in-arviz>`_.

    Parameters
    ----------
    data : array-like, DataArray, Dataset, DataTree, DataArrayGroupBy, DatasetGroupBy, or idata-like
        Input data. It will have different pre-processing applied to it depending on its type:

        - array-like: call array layer within ``arviz-stats``.
        - xarray object: apply dimension aware function to all relevant subsets
        - others: passed to :func:`arviz_base.convert_to_dataset` then treated as
          :class:`xarray.Dataset`. This option is discouraged due to needing this conversion
          which is completely automated and will be needed again in future executions or
          similar functions.

          It is recommended to first perform the conversion manually and then call
          ``arviz_stats.kde``. This allows controlling the conversion step and inspecting
          its results.
    dim : sequence of hashable, optional
        Dimensions to be reduced when computing the quantile dots
        Default ``rcParams["data.sample_dims"]``.
    group : hashable, default "posterior"
        Group on which to compute the quantile dots
    var_names : str or list of str, optional
        Names of the variables for which the quantile dots should be computed.
    filter_vars : {None, "like", "regex"}, default None
    coords : dict, optional
        Dictionary of dimension/index names to coordinate values defining a subset
        of the data for which to perform the computation.
    binwidth : float, optional
        Width of the bin for the dots.
    dotsize : float, default 1
        The size of the dots relative to the bin width. The default makes dots be just about as
        wide as the bin width.
    stackratio : float, default 1
        The distance between the center of the dots in the same stack relative to the bin height.
        The default makes dots in the same stack just touch each other.
    **kwargs : any, optional
        Forwarded to the array or dataarray interface for quantile dots.

    Returns
    -------
    ndarray, DataArray, Dataset, DataTree
        Requested QDs of the provided input.
        The xarray objects will have a ``qds_dim`` dimension and a ``plot_axis`` dimension
        with coordinates "x", and "y".

    See Also
    --------
    arviz_stats.ecdf, arviz_stats.histogram, arviz_stats.kde:
        Alternative visual summaries for marginal distributions
    arviz_plots.plot_dist

    References
    ----------
    .. [1] Kay M, Kola T, Hullman JR, and Munson SA. *When (ish) is My Bus?:
       User-centered Visualizations of Uncertainty in Everyday, Mobile Predictive
       Systems.* In Proceedings of the 2016 CHI Conference Association for Computing
       Machinery. 2016. https://doi.org/10.1145/2858036.2858558

    Examples
    --------
    Calculate the QDs of a Normal random variable:

    .. ipython::

        In [1]: import arviz_stats as azs
           ...: import numpy as np
           ...: data = np.random.default_rng().normal(size=2000)
           ...: azs.qds(data)

    Calculate the QDs for specific variables:

    .. ipython::

        In [1]: import arviz_base as azb
           ...: dt = azb.load_arviz_data("centered_eight")
           ...: azs.qds(dt, var_names=["mu", "theta"])

    Calculate the QDs also over the school dimension (for variables where present):

    .. ipython::

        In [1]: azs.qds(dt, dim=["chain", "draw", "school"])
    """
    return _apply_multi_input_function(
        "qds",
        data,
        dim,
        "dim",
        group=group,
        var_names=var_names,
        filter_vars=filter_vars,
        coords=coords,
        nquantiles=nquantiles,
        binwidth=binwidth,
        dotsize=dotsize,
        stackratio=stackratio,
        **kwargs,
    )