import math
import numpy as np
import scipy.stats as stats
from numpy.typing import NDArray
from .utils import check_cov
from ..utils import check_idxs
LN_TWO_PI_E: float = math.log(2.0 * math.pi * math.e)
TWO_PI: float = 2.0 * math.pi
SQRT_TWO_PI: float = math.sqrt(TWO_PI)
[docs]
def differential_entropy(
cov: NDArray[np.floating], idxs: tuple[int, ...] | None = None
) -> float:
"""
Computes the expected differential entropy of a multivariate
distribution parameterized by a covariance matrix using a
Gaussian estimator via direct computation.
The differential entropy is given by:
.. math::
H(X) = \\int dx P(x)\\log P(x)
And if :math:`X` is drawn from a k-dimensional Gaussian, it is equal to
.. math::
H(X) = \\frac{k}{2}\\log 2\\pi\\textnormal{e} + \\frac{1}{2}\\log|\\Sigma|
Where :math:`|\\Sigma|` is the determinant of the covariance matrix.
Parameters
----------
cov : NDArray[np.floating]
The covariance matrix that defines the distribution.
idxs : tuple[int, ...], optional
The specific subset of variables to compute the entropy of.
Defaults to using the entire covariance matrix.
Returns
-------
float
"""
idxs_: tuple[int, ...] = check_idxs(idxs, cov)
cov_subset: NDArray[np.floating] = cov[np.ix_(idxs_, idxs_)]
k: int = len(idxs_)
# Use slogdet for numerical stability
sign, logdet = np.linalg.slogdet(cov_subset)
# H(X) = (k/2) * log(2πe) + (1/2) * log|Σ|
return 0.5 * (k * LN_TWO_PI_E + logdet)
[docs]
def local_differential_entropy(
data: NDArray[np.floating],
cov: NDArray[np.floating] | None = None,
idxs: tuple[int, ...] | None = None,
) -> NDArray[np.floating]:
"""
Computes the framewise differential entropy for a set of variables.
.. math::
h(x) = -\\log P(x)
For data drawn from a k-dimensional Gaussian
.. math::
P(x) = (2\\pi)^{-k/2}|\\Sigma|^{-1/2}\\textnormal{e}^{\\frac{-(x - \\mu)^\\mathrm{T} \\Sigma^{-1}(x - \\mu)}{2}}
Parameters
----------
data : NDArray[np.floating]
The data in channels x samples format.
cov : NDArray[np.floating], optional
The covariance matrix that defines the distribution.
If none is provided, it is computed from the data object.
idxs : tuple[int, ...], optional
The specific subset of variables to compute the entropy of.
Defaults to using all channels.
Returns
-------
NDArray[np.floating]
The series of pointwise entropies.
"""
if data.ndim == 1:
assert idxs is None, "Don't try an index a 1D array like that."
return -stats.norm.logpdf(x=data, loc=data.mean(), scale=data.std())
else:
idxs_: tuple[int, ...] = check_idxs(idxs, data)
cov_: NDArray[np.floating] = check_cov(cov, data)
return -(
stats.multivariate_normal.logpdf(
x=data[idxs_, :].T,
mean=data[idxs_, :].mean(axis=-1),
cov=cov_[np.ix_(idxs_, idxs_)],
allow_singular=True,
).reshape((1, data.shape[-1]))
)
[docs]
def conditional_entropy(
idxs_x: tuple[int, ...],
idxs_y: tuple[int, ...],
cov: NDArray[np.floating],
) -> float:
"""
Computes the conditional entropy of X given Y using Gaussian estimation.
.. math::
H(X|Y) = H(X,Y) - H(Y)
Parameters
----------
idxs_x : tuple
The indices of the variables to compute the conditional entropy on.
idxs_y : tuple
The indices of the conditioning set.
cov : NDArray[np.floating], optional
The covariance matrix that defines the distribution.
If none is provided, it is computed from the data object.
Returns
-------
float
"""
joint: tuple[int, ...] = idxs_x + idxs_y
h_joint: float = differential_entropy(cov, joint)
h_y: float = differential_entropy(cov, idxs_y)
return h_joint - h_y
[docs]
def local_conditional_entropy(
idxs_x: tuple[int, ...],
idxs_y: tuple[int, ...],
data: NDArray[np.floating],
cov: NDArray[np.floating] | None = None,
) -> NDArray[np.floating]:
"""
Computes the local condition entropy for every sample in data using Gaussian estimation.
.. math::
h(x|y) = h(x,y) - h(y)
Parameters
----------
idxs_x : tuple
The indices of the variables to compute the conditional entropy on.
idxs_y : tuple
The indices of the conditioning set.
data : NDArray[np.floating]
The data in channels x samples format.
cov : NDArray[np.floating], optional
The covariance matrix that defines the distribution.
If none is provided, it is computed from the data object.
Returns
-------
NDArray[np.floating]
"""
cov_: NDArray[np.floating] = check_cov(cov, data)
joint: tuple[int, ...] = idxs_x + idxs_y
h_y = local_differential_entropy(data[idxs_y, :], cov_[np.ix_(idxs_y, idxs_y)])
h_joint = local_differential_entropy(data[joint, :], cov_[np.ix_(joint, joint)])
return h_joint - h_y
[docs]
def kullback_leibler_divergence(
cov_posterior: NDArray[np.floating],
cov_prior: NDArray[np.floating],
mu_posterior: NDArray[np.floating] | int = 0,
mu_prior: NDArray[np.floating] | int = 0,
) -> float:
"""
Computes the Gaussian Kullback-Leibler divergence between two :math:`k`-dimensional multivariate Gaussians parameterized by covariance matrices.
.. math::
D_{KL}(\\mathcal{N}_0 || \\mathcal{N}_1) = \\frac{1}{2}[ \\operatorname{tr}(\\Sigma_{1}^{-1}\\Sigma_{0}) - k + (\\mu_1 - \\mu_0)^\\mathsf{T} \\Sigma_{1}^{-1}(\\mu_1 - \\mu_0) + \\log\\frac{|\\Sigma_{1}|}{|\\Sigma_{0}|}]
Parameters
----------
cov_posterior : NDArray[np.floating]
The covariance matrix that defines the posterior distribution.
cov_prior : NDArray[np.floating]
The covariance matrix that defines the prior distribution .
Returns
-------
float
"""
k = cov_posterior.shape[0]
if mu_posterior == 0:
mu_posterior = np.zeros(cov_posterior.shape[0])
if mu_prior == 0:
mu_prior = np.zeros(cov_prior.shape[0])
# Mean difference
d = mu_prior - mu_posterior
# 1. Trace term: tr(cov_prior^-1 * cov_posterior)
# Efficiently compute cov_prior^-1 * cov_posterior using solve
trace_term = np.trace(np.linalg.solve(cov_prior, cov_posterior))
# 2. Quadratic term: (mu_prior-mu_posterior)^T * cov_prior^-1 * (mu_prior-mu_posterior)
# Solve cov_prior * x = d, then compute d^T * x
quad_term = d.T @ np.linalg.solve(cov_prior, d)
# 3. Log-determinant term: ln(|cov_prior| / |cov_posterior|)
_, logdet0 = np.linalg.slogdet(cov_posterior)
_, logdet1 = np.linalg.slogdet(cov_prior)
det_term = logdet1 - logdet0
# Final KL formula
return 0.5 * (trace_term + quad_term - k + det_term)
[docs]
def local_kullback_leibler_divergence(
cov_posterior: NDArray[np.floating],
cov_prior: NDArray[np.floating],
data: NDArray[np.floating],
) -> NDArray[np.floating]:
"""
Computes the local Kullback-Leibler divergence between the
posterior and the prior for every sample in the data.
The local KL divergence is a rarely used measure.
.. math::
d_{kl}^{P||Q}(x) = h^{Q}(x) - h^{P}(x)
Parameters
----------
cov_posterior : NDArray[np.floating]
The covariance matrix that defines the posterior distribution.
cov_prior : NDArray[np.floating]
The covariance matrix that defines the prior distribution .
data : NDArray[np.floating]
The data, assumed to be in channels x samples format.
Returns
-------
NDArray[np.floating]
The local Kullback-Leibler divergence.
References
----------
Varley, T. F. (2024).
Generalized decomposition of multivariate information.
PLOS ONE, 19(2), e0297128.
https://doi.org/10.1371/journal.pone.0297128
"""
h_posterior: NDArray[np.floating] = local_differential_entropy(data, cov_posterior)
h_prior: NDArray[np.floating] = local_differential_entropy(data, cov_prior)
return h_prior - h_posterior