Source code for syntropy.gaussian.shannon

import math
import numpy as np
import scipy.stats as stats
from numpy.typing import NDArray
from .utils import check_cov
from ..utils import check_idxs

LN_TWO_PI_E: float = math.log(2.0 * math.pi * math.e)
TWO_PI: float = 2.0 * math.pi
SQRT_TWO_PI: float = math.sqrt(TWO_PI)



[docs]
def differential_entropy(
    cov: NDArray[np.floating], idxs: tuple[int, ...] | None = None
) -> float:
    """
    Computes the expected differential entropy of a multivariate
    distribution parameterized by a covariance matrix using a
    Gaussian estimator via direct computation.

    The differential entropy is given by:

    .. math::

        H(X) = \\int dx P(x)\\log P(x)

    And if :math:`X` is drawn from a k-dimensional Gaussian, it is equal to

    .. math::

        H(X) = \\frac{k}{2}\\log 2\\pi\\textnormal{e} + \\frac{1}{2}\\log|\\Sigma|

    Where :math:`|\\Sigma|` is the determinant of the covariance matrix.

    Parameters
    ----------
    cov : NDArray[np.floating]
        The covariance matrix that defines the distribution.
    idxs : tuple[int, ...], optional
        The specific subset of variables to compute the entropy of.
        Defaults to using the entire covariance matrix.

    Returns
    -------
    float

    """
    idxs_: tuple[int, ...] = check_idxs(idxs, cov)

    cov_subset: NDArray[np.floating] = cov[np.ix_(idxs_, idxs_)]
    k: int = len(idxs_)

    # Use slogdet for numerical stability
    sign, logdet = np.linalg.slogdet(cov_subset)

    # H(X) = (k/2) * log(2πe) + (1/2) * log|Σ|
    return 0.5 * (k * LN_TWO_PI_E + logdet)




[docs]
def local_differential_entropy(
    data: NDArray[np.floating],
    cov: NDArray[np.floating] | None = None,
    idxs: tuple[int, ...] | None = None,
) -> NDArray[np.floating]:
    """
    Computes the framewise differential entropy for a set of variables.

    .. math::
        h(x) = -\\log P(x)

    For data drawn from a k-dimensional Gaussian

    .. math::
        P(x) = (2\\pi)^{-k/2}|\\Sigma|^{-1/2}\\textnormal{e}^{\\frac{-(x - \\mu)^\\mathrm{T} \\Sigma^{-1}(x - \\mu)}{2}}

    Parameters
    ----------
    data : NDArray[np.floating]
        The data in channels x samples format.
    cov : NDArray[np.floating], optional
        The covariance matrix that defines the distribution.
        If none is provided, it is computed from the data object.
    idxs : tuple[int, ...], optional
        The specific subset of variables to compute the entropy of.
        Defaults to using all channels.

    Returns
    -------
    NDArray[np.floating]
        The series of pointwise entropies.

    """

    if data.ndim == 1:
        assert idxs is None, "Don't try an index a 1D array like that."
        return -stats.norm.logpdf(x=data, loc=data.mean(), scale=data.std())
    else:
        idxs_: tuple[int, ...] = check_idxs(idxs, data)
        cov_: NDArray[np.floating] = check_cov(cov, data)

        return -(
            stats.multivariate_normal.logpdf(
                x=data[idxs_, :].T,
                mean=data[idxs_, :].mean(axis=-1),
                cov=cov_[np.ix_(idxs_, idxs_)],
                allow_singular=True,
            ).reshape((1, data.shape[-1]))
        )




[docs]
def conditional_entropy(
    idxs_x: tuple[int, ...],
    idxs_y: tuple[int, ...],
    cov: NDArray[np.floating],
) -> float:
    """
    Computes the conditional entropy of X given Y using Gaussian estimation.

    .. math::
        H(X|Y) = H(X,Y) - H(Y)

    Parameters
    ----------
    idxs_x : tuple
        The indices of the variables to compute the conditional entropy on.
    idxs_y : tuple
        The indices of the conditioning set.
    cov : NDArray[np.floating], optional
        The covariance matrix that defines the distribution.
        If none is provided, it is computed from the data object.

    Returns
    -------
    float

    """

    joint: tuple[int, ...] = idxs_x + idxs_y

    h_joint: float = differential_entropy(cov, joint)
    h_y: float = differential_entropy(cov, idxs_y)

    return h_joint - h_y




[docs]
def local_conditional_entropy(
    idxs_x: tuple[int, ...],
    idxs_y: tuple[int, ...],
    data: NDArray[np.floating],
    cov: NDArray[np.floating] | None = None,
) -> NDArray[np.floating]:
    """
    Computes the local condition entropy for every sample in data using Gaussian estimation.

    .. math::

        h(x|y) = h(x,y) - h(y)


    Parameters
    ----------
    idxs_x : tuple
        The indices of the variables to compute the conditional entropy on.
    idxs_y : tuple
        The indices of the conditioning set.
    data : NDArray[np.floating]
        The data in channels x samples format.
    cov : NDArray[np.floating], optional
        The covariance matrix that defines the distribution.
        If none is provided, it is computed from the data object.

    Returns
    -------
    NDArray[np.floating]

    """

    cov_: NDArray[np.floating] = check_cov(cov, data)

    joint: tuple[int, ...] = idxs_x + idxs_y

    h_y = local_differential_entropy(data[idxs_y, :], cov_[np.ix_(idxs_y, idxs_y)])
    h_joint = local_differential_entropy(data[joint, :], cov_[np.ix_(joint, joint)])

    return h_joint - h_y




[docs]
def mutual_information(
    idxs_x: tuple[int, ...], idxs_y: tuple[int, ...], cov: NDArray[np.floating]
) -> float:
    r"""
    Computes the mutual information between two (potentially multivariate) sets of elements.

    .. math::
        I(X;Y) &= H(X) + H(Y) - H(X,Y) \\
               &= H(X) - H(X|Y) \\
               &= H(Y) - H(Y|X) \\
               &= H(X,Y) - H(X|Y) - H(Y|X)

    For Gaussian random variables:

    .. math::
        I(X;Y) = \frac{1}{2}\log\frac{|\Sigma_{X}||\Sigma_{Y}|}{|\Sigma_{XY}|}

    In the particular case where :math:`X` and :math:`Y` are univariate, the mutual information can be computed directly from the Pearson correlation coefficient :math:`r`:

    .. math::
        I(X;Y) = \frac{-\log(1-r^{2})}{2}
    
    If you wish to use a Gaussian copula estimator, use the correlation matrix returned by the function :func:`utils.copula_transform`. 


    Parameters
    ----------
    idxs_x : tuple
        The indices of the source variables. Can be multivariate.
    idxs_y : tuple
        The indices of the idxs_y variable. Can be multivariate.
    cov : NDArray[np.floating]
        The covariance matrix that defines the distribution.

    Returns
    -------
    float

    """
    joint: tuple[int, ...] = idxs_x + idxs_y

    h_x = differential_entropy(cov=cov, idxs=idxs_x)
    h_y = differential_entropy(cov=cov, idxs=idxs_y)
    h_joint = differential_entropy(cov=cov, idxs=joint)

    return h_x + h_y - h_joint




[docs]
def local_mutual_information(
    idxs_x: tuple[int, ...],
    idxs_y: tuple[int, ...],
    data: NDArray[np.floating],
    cov: NDArray[np.floating] | None = None,
) -> NDArray[np.floating]:
    """
    Computes the local mutual information between X and Y for every sample in data using Gaussian estimation.
    Note that the local mutual information can be negative.
    
    .. math::
        i(x;y) &= h(x) + h(y) - h(x,y) \\\\
               &= \\log\\frac{p(x|y)}{p(x)} \\\\
               &= \\log\\frac{p(y|x)}{p(y)} \\\\
               &= \\log\\frac{p(x,y)}{p(x)p(y)} \\\\
    
    If you wish to use a Gaussian copula estimator, use the transformed data and the correlation matrix returned by the function :func:`utils.copula_transform`. 

    Parameters
    ----------
    idxs_x : tuple
        The indices of the source variables. Can be multivariate.
    idxs_y : tuple
        The indices of the idxs_y variable. Can be multivariate.
    data : NDArray[np.floating]
        The data in channels x samples format.
    cov : NDArray[np.floating], optional
        The covariance matrix that defines the distribution.
        If none is provided, it is computed from the data object.

    Returns
    -------
     NDArray[np.floating]

    """

    cov_: NDArray[np.floating] = check_cov(cov, data)

    joint: tuple[int, ...] = idxs_x + idxs_y

    h_x: NDArray[np.floating] = local_differential_entropy(
        data=data[idxs_x, :], cov=cov_[np.ix_(idxs_x, idxs_x)]
    )
    h_y: NDArray[np.floating] = local_differential_entropy(
        data=data[idxs_y, :], cov=cov_[np.ix_(idxs_y, idxs_y)]
    )
    h_joint: NDArray[np.floating] = local_differential_entropy(
        data=data[joint, :], cov=cov_[np.ix_(joint, joint)]
    )

    return h_x + h_y - h_joint




[docs]
def conditional_mutual_information(
    idxs_x: tuple[int, ...],
    idxs_y: tuple[int, ...],
    idxs_z: tuple[int, ...],
    cov: NDArray[np.floating],
) -> float:
    """
    Computes the expected mutual information between a set of variables X
    and Y, conditional on a third set Z.
    
    .. math:: 
        I(X,Y|Z) &= H(X|Z) + H(Y|Z) - H(X,Y|Z) \\\\
                 &= I(X;Y,Z) - I(X;Z)
    
    If you wish to use a Gaussian copula estimator, use the correlation matrix returned by the function :func:`utils.copula_transform`. 

    Parameters
    ----------
    idxs_x : tuple
        The indices of the X variables. Can be multivariate.
    idxs_y : tuple
        The indices of the Y variable. Can be multivariate.
    idxs_z : tuple
        The indices of the conditioning set. Can be multivariate.
    cov : NDArray[np.floating]
        The covariance matrix that defines the distribution.

    Returns
    -------
    float

    """

    joint: tuple[int, ...] = idxs_x + idxs_y

    h_x_z: float = conditional_entropy(idxs_x, idxs_z, cov)
    h_y_z: float = conditional_entropy(idxs_y, idxs_z, cov)
    h_xy_z: float = conditional_entropy(joint, idxs_z, cov)

    return h_x_z + h_y_z - h_xy_z




[docs]
def local_conditional_mutual_information(
    idxs_x: tuple[int, ...],
    idxs_y: tuple[int, ...],
    idxs_z: tuple[int, ...],
    data: NDArray[np.floating],
    cov: NDArray[np.floating] | None = None,
):
    """
    Computes the local conditional mutual information between
    two sets of variables X and Y, conditional on another set Z.
    
    .. math:: 
    
        i(x,y|z) &= h(x|z) + h(y|z) - h(x,y|z) \\\\
                 &= i(x;y,z) - i(x;z)
    
    If you wish to use a Gaussian copula estimator, use the transformed data and the correlation matrix returned by the function :func:`utils.copula_transform`. 

    Parameters
    ----------
    idxs_x : tuple
        The indices of the X variables. Can be multivariate.
    idxs_y : tuple
        The indices of the Y variable. Can be multivariate.
    idxs_z : tuple
        The indices of the conditioning set. Can be multivariate.
    data : NDArray[np.floating]
         The data in channels x samples format.
    cov : NDArray[np.floating], optional
        The covariance matrix that defines the distribution.
        If none is provided, it is computed from the data object.

    Returns
    -------
    NDArray[np.floating]

    """
    cov_: NDArray[np.floating] = check_cov(cov, data)

    joint: tuple[int, ...] = idxs_x + idxs_y

    h_x_z = local_conditional_entropy(idxs_x, idxs_z, data=data, cov=cov_)
    h_y_z = local_conditional_entropy(idxs_y, idxs_z, data=data, cov=cov_)
    h_xy_z = local_conditional_entropy(joint, idxs_z, data=data, cov=cov_)

    return h_x_z + h_y_z - h_xy_z




[docs]
def kullback_leibler_divergence(
    cov_posterior: NDArray[np.floating],
    cov_prior: NDArray[np.floating],
    mu_posterior: NDArray[np.floating] | int = 0,
    mu_prior: NDArray[np.floating] | int = 0,
) -> float:
    """
    Computes the Gaussian Kullback-Leibler divergence between two :math:`k`-dimensional multivariate Gaussians parameterized by covariance matrices.

    .. math::

        D_{KL}(\\mathcal{N}_0 || \\mathcal{N}_1) = \\frac{1}{2}[ \\operatorname{tr}(\\Sigma_{1}^{-1}\\Sigma_{0}) - k + (\\mu_1 - \\mu_0)^\\mathsf{T} \\Sigma_{1}^{-1}(\\mu_1 - \\mu_0) + \\log\\frac{|\\Sigma_{1}|}{|\\Sigma_{0}|}]

    Parameters
    ----------
    cov_posterior : NDArray[np.floating]
        The covariance matrix that defines the posterior distribution.
    cov_prior : NDArray[np.floating]
        The covariance matrix that defines the prior distribution .

    Returns
    -------
    float

    """
    k = cov_posterior.shape[0]
    
    if mu_posterior == 0:
        mu_posterior = np.zeros(cov_posterior.shape[0])
    if mu_prior == 0:
        mu_prior = np.zeros(cov_prior.shape[0])
    
    # Mean difference
    d = mu_prior - mu_posterior

    # 1. Trace term: tr(cov_prior^-1 * cov_posterior)
    # Efficiently compute cov_prior^-1 * cov_posterior using solve
    trace_term = np.trace(np.linalg.solve(cov_prior, cov_posterior))

    # 2. Quadratic term: (mu_prior-mu_posterior)^T * cov_prior^-1 * (mu_prior-mu_posterior)
    # Solve cov_prior * x = d, then compute d^T * x
    quad_term = d.T @ np.linalg.solve(cov_prior, d)

    # 3. Log-determinant term: ln(|cov_prior| / |cov_posterior|)
    _, logdet0 = np.linalg.slogdet(cov_posterior)
    _, logdet1 = np.linalg.slogdet(cov_prior)
    det_term = logdet1 - logdet0

    # Final KL formula
    return 0.5 * (trace_term + quad_term - k + det_term)




[docs]
def local_kullback_leibler_divergence(
    cov_posterior: NDArray[np.floating],
    cov_prior: NDArray[np.floating],
    data: NDArray[np.floating],
) -> NDArray[np.floating]:
    """
    Computes the local Kullback-Leibler divergence between the
    posterior and the prior for every sample in the data.
    The local KL divergence is a rarely used measure.

    .. math::

        d_{kl}^{P||Q}(x) = h^{Q}(x) - h^{P}(x)

    Parameters
    ----------
    cov_posterior : NDArray[np.floating]
        The covariance matrix that defines the posterior distribution.
    cov_prior : NDArray[np.floating]
        The covariance matrix that defines the prior distribution .
    data : NDArray[np.floating]
        The data, assumed to be in channels x samples format.

    Returns
    -------
    NDArray[np.floating]
        The local Kullback-Leibler divergence.

    References
    ----------
    Varley, T. F. (2024).
    Generalized decomposition of multivariate information.
    PLOS ONE, 19(2), e0297128.
    https://doi.org/10.1371/journal.pone.0297128

    """

    h_posterior: NDArray[np.floating] = local_differential_entropy(data, cov_posterior)
    h_prior: NDArray[np.floating] = local_differential_entropy(data, cov_prior)

    return h_prior - h_posterior
Source code for syntropy.gaussian.shannon

Syntropy

Navigation

Related Topics