Source code for KDEpy.NaiveKDE

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Module for the NaiveKDE.
"""

import itertools
import numbers
from typing import Callable, Optional, Union

import numpy as np

from KDEpy.BaseKDE import BaseKDE


[docs]class NaiveKDE(BaseKDE):
    """
    This class implements a naive computation of a kernel density estimate. The
    advantages are that choices of bandwidth, norms, weights and grids are
    straightforward -- the user can do almost anything. The disadvantage is
    that computations are slow on more than a couple of thousand data points.

    Parameters
    ----------
    kernel : str or callable
        Kernel function, or string matching available options. See cls._available_kernels.keys() for choices.
    bw : float, str or array-like
        Bandwidth or bandwidth selection method. If a float is passed, it
        is the standard deviation of the kernel. If a string it passed, it
        is the bandwidth selection method, see cls._bw_methods.keys() for
        choices. If an array-like it passed, it is the bandwidth of each
        point.
    norm : float
        The p-norm used to compute the distances in higher dimensions.

    Examples
    --------
    >>> data = np.random.randn(2**10)
    >>> # (1) Automatic bw selection using Improved Sheather Jones
    >>> x, y = NaiveKDE(bw='ISJ').fit(data).evaluate()
    >>> # (2) Explicit choice of kernel and bw (standard deviation of kernel)
    >>> x, y = NaiveKDE(kernel='triweight', bw=0.5).fit(data).evaluate()
    >>> weights = data + 10
    >>> # (3) Using a grid and weights for the data
    >>> y = NaiveKDE(kernel='epa', bw=0.5).fit(data, weights).evaluate(x)

    References
    ----------
    - Silverman, B. W. Density Estimation for Statistics and Data Analysis.
      Boca Raton: Chapman and Hall, 1986.
    - Wand, M. P., and M. C. Jones. Kernel Smoothing.
      London ; New York: Chapman and Hall/CRC, 1995.
    - Scipy implementation, at ``scipy.stats.gaussian_kde``.
    """

    def __init__(
        self, kernel: Union[str, Callable] = "gaussian", bw: Union[float, str, np.ndarray] = 1, norm: float = 2
    ):
        super().__init__(kernel, bw)
        self.norm = norm

[docs]    def fit(self, data: np.ndarray, weights: Optional[np.ndarray] = None) -> "NaiveKDE":
        """
        Fit the KDE to the data. This validates the data and stores it.
        Computations are performed when the KDE is evaluated on a grid.

        Parameters
        ----------
        data: array-like
            The data points. High dimensional data must have shape (obs, dims).
        weights: array-like
            One weight per data point. Must have shape (obs,). If None is
            passed, uniform weights are used.

        Returns
        -------
        self
            Returns the instance.

        Examples
        --------
        >>> data = [1, 3, 4, 7]
        >>> weights = [3, 4, 2, 1]
        >>> kde = NaiveKDE().fit(data, weights=None)
        >>> kde = NaiveKDE().fit(data, weights=weights)
        >>> x, y = kde()
        """
        # Sets self.data
        super().fit(data, weights)
        return self

[docs]    def evaluate(self, grid_points: Optional[Union[np.ndarray, int, tuple]] = None) -> Union[np.ndarray, tuple]:
        """
        Evaluate on grid points.

        Parameters
        ----------
        grid_points: array-like, int, tuple or None
            A grid (mesh) to evaluate on. High dimensional grids must have
            shape (obs, dims). If an integer is passed, it's the number of grid
            points on an equidistant grid. If a tuple is passed, it's the
            number of grid points in each dimension. If None, a grid will be
            automatically created.

        Returns
        -------
        y: array-like
            If a grid is supplied, `y` is returned. If no grid is supplied,
            a tuple (`x`, `y`) is returned.

        Examples
        --------
        >>> kde = NaiveKDE().fit([1, 3, 4, 7])
        >>> # Two ways to evaluate, either with a grid or without
        >>> x, y = kde.evaluate()
        >>> x, y = kde.evaluate(256)
        >>> y = kde.evaluate(x)
        """
        # This method sets self.grid points and verifies it
        # NaiveKDE does not convert the bw to a scalar, since a vector is
        # allowed too.
        super().evaluate(grid_points, bw_to_scalar=False)

        # Create zeros on the grid points
        evaluated = np.zeros(self.grid_points.shape[0])

        # For every data point, compute the kernel and add to the grid
        bw = self.bw
        if isinstance(bw, numbers.Number):
            bw = np.asarray(np.ones(self.data.shape[0]) * bw, dtype=float)

        # TODO: Implementation w.r.t grid points for faster evaluation
        # See the SciPy evaluation for how this can be done
        weights = itertools.repeat(1 / self.data.shape[0]) if self.weights is None else self.weights

        for weight, data_point, bw in zip(weights, self.data, bw):
            x = self.grid_points - data_point
            evaluated += weight * self.kernel(x, bw=bw, norm=self.norm)

        return self._evalate_return_logic(evaluated, self.grid_points)


if __name__ == "__main__":
    import pytest

    # --durations=10  <- May be used to show potentially slow tests
    pytest.main(args=[".", "--doctest-modules", "-v"])
Source code for KDEpy.NaiveKDE

KDEpy

Navigation

Related Topics