Source code for syntropy.discrete.multivariate_mi

import numpy as np
import itertools as it
from scipy.special import comb
from .utils import (
    reduce_state,
    get_marginal_distribution,
    marginalize_out,
    get_all_marginal_distributions,
)
from .shannon import kullback_leibler_divergence, shannon_entropy
from .optimization import constrained_maximum_entropy_distributions

binom_lookup = {N: {k: comb(N, k, exact=True) for k in range(N)} for N in range(16)}


[docs] def total_correlation( joint_distribution: dict[tuple[int, ...], float], ) -> (tuple[dict, float], float): """ Computes the average and pointwise total correlations: .. math:: TC(X) &= D_{KL}(P(X) || \\prod_{i=1}^{N}P(X_i) \\\\ &= \\sum_{i=1}^{N}H(X_i) - H(X) Parameters ---------- joint_distribution : dict[tuple[int, ...]], float] The joint probability distribution. Keys are tuples corresponding to the state of each element. The valules are the probabilities. Returns ------- ptw : dict[tuple, float] The pointwise TC . avg : float The average TC. References ---------- Watanabe, S. (1960). Information Theoretical Analysis of Multivariate Correlation. IBM Journal of Research and Development, 4(1), Article 1. https://doi.org/10.1147/rd.41.0066 Tononi, G., Sporns, O., & Edelman, G. M. (1994). A measure for brain complexity: Relating functional segregation and integration in the nervous system. Proceedings of the National Academy of Sciences, 91(11), Article 11. https://doi.org/10.1073/pnas.91.11.5033 """ maxent_distribution = constrained_maximum_entropy_distributions( joint_distribution, order=1 ) ptw, avg = kullback_leibler_divergence(joint_distribution, maxent_distribution) return ptw, avg
def delta_k( k: int, joint_distribution: dict[tuple[int, ...], float] ) -> (tuple[dict, float], float): r""" S-information, DTC, and negative O-information can all be written in a general form: .. math:: \Delta^{k}(X) = (N-k)TC(X) - \sum_{i=1}^{N}TC(X^{-i}) Parameters ---------- k : int The scale parameter joint_distribution : dict[tuple[int, ...], float] The joint probability distribution. Keys are tuples corresponding to the state of each element. The valules are the probabilities. Returns ------- ptw : dict[tuple[int, ...], float] The pointwise :math:`\delta^{k}`. avg : float The average :math:`\Delta^{k}`. References ---------- Varley, T. F., Pope, M., Faskowitz, J., & Sporns, O. (2023). Multivariate information theory uncovers synergistic subsystems of the human cerebral cortex. Communications Biology, 6(1), Article 1. https://doi.org/10.1038/s42003-023-04843-w """ states: list[tuple[int, ...]] = list(joint_distribution.keys()) N: int = len(states[0]) ptw_tc: dict avg_tc: float ptw_tc, avg_tc = total_correlation(joint_distribution) avg_whole: float = (N - k) * avg_tc avg_sum_parts: float = 0.0 ptw_whole = {state: ptw_tc[state] * (N - k) for state in states} ptw_sum_parts = {state: 0 for state in states} for i in range(N): residuals: tuple = tuple(j for j in range(N) if j != i) reduced_distribution: dict = marginalize_out((i,), joint_distribution) ptw_r: dict avg_r: dict ptw_r, avg_r = total_correlation(reduced_distribution) avg_sum_parts += avg_r for state in ptw_r: full_states = {s for s in states if reduce_state(s, residuals) == state} for full_state in full_states: ptw_sum_parts[full_state] += ptw_r[state] ptw: dict = {state: ptw_whole[state] - ptw_sum_parts[state] for state in states} avg: float = avg_whole - avg_sum_parts return ptw, avg
[docs] def s_information( joint_distribution: dict[tuple[int, ...], float], ) -> (tuple[dict, float], float): """ Computes the local and expected S-information for the joint distribution. .. math:: \\Sigma(X) &= \\sum_{i=1}^{N}I(X_i;X^{-i}) \\\\ &= N\\times TC(X) - \\sum_{i=1}^{N}TC(X^{-i}) \\\\ &= TC(X) + DTC(X) Parameters ---------- joint_distribution : dict[tuple, float] The joint probability distribution. Keys are tuples corresponding to the state of each element. The valules are the probabilities. Returns ------- ptw : dict[tuple, float] The pointwise S-information . avg : float The average S-information. References ---------- Rosas, F., Mediano, P. A. M., Gastpar, M., & Jensen, H. J. (2019). Quantifying High-order Interdependencies via Multivariate Extensions of the Mutual Information. Physical Review E, 100(3), Article 3. https://doi.org/10.1103/PhysRevE.100.032305 Varley, T. F., Pope, M., Faskowitz, J., & Sporns, O. (2023). Multivariate information theory uncovers synergistic subsystems of the human cerebral cortex. Communications Biology, 6(1), Article 1. https://doi.org/10.1038/s42003-023-04843-w """ ptw: dict avg: float ptw, avg = delta_k(k=0, joint_distribution=joint_distribution) return ptw, avg
[docs] def dual_total_correlation( joint_distribution: dict[tuple[int, ...], float], ) -> (tuple[dict, float], float): """ Computes the local and expected dual total correlations for the joint distribution. .. math:: DTC(X) &= H(X) - \\sum_{i=1}^{N}H(X_i|X^{-i}) \\\\ &= (N-1)\\times TC(X) - \\sum_{i=1}^{N}TC(X^{-i}) Parameters ---------- joint_distribution : dict[tuple, float] The joint probability distribution. Keys are tuples corresponding to the state of each element. The valules are the probabilities. Returns ------- ptw : dict[tuple, float] The pointwise DTC . avg : float The average DTC. References ---------- Abdallah, S. A., & Plumbley, M. D. (2012). A measure of statistical complexity based on predictive information with application to finite spin systems. Physics Letters A, 376(4), 275–281. https://doi.org/10.1016/j.physleta.2011.10.066 Rosas, F., Mediano, P. A. M., Gastpar, M., & Jensen, H. J. (2019). Quantifying High-order Interdependencies via Multivariate Extensions of the Mutual Information. Physical Review E, 100(3), Article 3. https://doi.org/10.1103/PhysRevE.100.032305 """ ptw: dict avg: float ptw, avg = delta_k(k=1, joint_distribution=joint_distribution) return ptw, avg
[docs] def o_information( joint_distribution: dict[tuple[int, ...], float], ) -> (tuple[dict, float], float): """ Computes the local and expected O-informations for the joint distribution. O-information quantifies the balance between redundancy (positive values) and synergy (negative values) in multivariate information. .. math:: \\Omega(X) &= (2-N)TC(X) + \\sum_{i=1}^{N}TC(X^{-i}) \\\\ &= TC(X) - DTC(X) Parameters ---------- joint_distribution : dict[tuple, float] The joint probability distribution. Keys are tuples corresponding to the state of each element. The valules are the probabilities. Returns ------- ptw : dict[tuple, float] The pointwise O-information . avg : float The average O-information. References ---------- Rosas, F., Mediano, P. A. M., Gastpar, M., & Jensen, H. J. (2019). Quantifying High-order Interdependencies via Multivariate Extensions of the Mutual Information. Physical Review E, 100(3), Article 3. https://doi.org/10.1103/PhysRevE.100.032305 Varley, T. F., Pope, M., Faskowitz, J., & Sporns, O. (2023). Multivariate information theory uncovers synergistic subsystems of the human cerebral cortex. Communications Biology, 6(1), Article 1. https://doi.org/10.1038/s42003-023-04843-w """ ptw: dict avg: float ptw, avg = delta_k(k=2, joint_distribution=joint_distribution) return {state: -ptw[state] for state in ptw.keys()}, -avg
[docs] def co_information( joint_distribution: dict[tuple[int, ...], float], ) -> (tuple[dict, float], float): """ Computes the Co-information, the third generalization of bivariate mutual information. Unlike total correlation and dual total correlation, the cO-information can be negative and is difficult to interpret. .. math:: Co(X) = \\sum_{\\xi\\subseteq X}(-1)^{|\\xi|}H(\\xi) Parameters ---------- joint_distribution : dict[tuple, float] The joint probability distribution. Keys are tuples corresponding to the state of each element. The valules are the probabilities. Returns ------- ptw : dict[tuple, float] The pointwise cO-information. avg : float The average cO-information. References ---------- Bell, A. J. (2003, April). The Co-information lattice. 4th International Symposium on Independent Component Analysis and Blind Signal Separation, Nara, Japan. https://www.semanticscholar.org/paper/THE-CO-INFORMATION-LATTICE-Bell/25a0cd8d486d5ffd204485685226f189e6eadd4d """ # Get the lattice of marginal distributions. marginals: dict = get_all_marginal_distributions(joint_distribution) # Convert them to local entropies as a batch. h_marginals = {key: shannon_entropy(marginals[key])[0] for key in marginals} ptw: dict = {state: 0.0 for state in joint_distribution.keys()} avg: float = 0.0 for state in joint_distribution.keys(): for source in h_marginals.keys(): sign = (-1) ** len(source) ptw[state] -= sign * h_marginals[source][reduce_state(state, source)] avg += joint_distribution[state] * ptw[state] return ptw, avg
[docs] def tse_complexity( joint_distribution: dict[tuple[int, ...], float], num_samples ) -> float: """ The Tononi-Sporns-Edelman neural complexity measure, which provides a measure of the balance between integration and segregation across scales. .. math:: TSE(X) &= \\sum_{k=1}^{\\lfloor N/2\\rfloor} \\bigg\\langle I(X^{k}_j;X^{-k}_j) \\bigg\\rangle_{j} \\\\ &= \\sum_{k=2}^{N}\\bigg[\\bigg(\\frac{k}{N}\\bigg)TC(X) - \\langle TC(X^{k}_{j}) \\rangle_{j} \\bigg] Runtimes scale very badly with system size (as it requires brute-forcing) all possible bipartitions of the system. If the system is too large, a sub-sampling approach is taken: at each scale, num_samples are drawn from the space of bipartitions. Parameters ---------- joint_distribution : dict[tuple, float] The joint probability distribution. Keys are tuples corresponding to the state of each element. The valules are the probabilities. num_samples : int The number of samples to do for each subset size.. Returns ------- float The TSE complexity. No local complexity is computed. . References ---------- Tononi, G., Sporns, O., & Edelman, G. M. (1994). A measure for brain complexity: Relating functional segregation and integration in the nervous system. Proceedings of the National Academy of Sciences, 91(11), Article 11. https://doi.org/10.1073/pnas.91.11.5033 Varley, T. F., Pope, M., Faskowitz, J., & Sporns, O. (2023). Multivariate information theory uncovers synergistic subsystems of the human cerebral cortex. Communications Biology, 6(1), Article 1. https://doi.org/10.1038/s42003-023-04843-w """ N: int = len(list(joint_distribution.keys())[0]) tc_whole: float = total_correlation(joint_distribution)[1] null_tcs: np.ndarray = np.array( [(float(i) / float(N)) * tc_whole for i in range(1, N + 1)] ) exp_tcs: np.ndarray = np.zeros(null_tcs.shape[0]) exp_tcs[-1] = tc_whole for k in range(1, N): # For each layer of the TSE-leaf. binom: int = binom_lookup[N][k] if ( binom > num_samples ): # if N-choose-k is greater than the pre-specified number of samples: # Samples is a set to avoid repeats. samples: set = { tuple(sorted(tuple(np.random.choice(N, size=k, replace=False)))) for i in range(num_samples) } else: samples: set = set(it.combinations(range(N), k)) tcs: float = 0.0 sample: tuple for sample in samples: marginal: dict = get_marginal_distribution(sample, joint_distribution) tcs += total_correlation(marginal)[1] exp_tcs[k - 1] = tcs / len(samples) return (null_tcs - exp_tcs).sum()
[docs] def description_complexity( joint_distribution: dict[tuple[int, ...], float], ) -> (tuple[dict, float], float): """ The description complexity was proposed by Tononi and Sporns as a heuristic, easy-to-compute approximation of the full TSE-Complexity. Later shown by Varley et al., to be directly proportional to the dual total correlation. .. math:: C(X) = \\frac{DTC(X)}{N} Where :math:`N` is the number of elements in :math:`X`. Parameters ---------- joint_distribution : dict[tuple, float] The joint probability distribution. Keys are tuples corresponding to the state of each element. The valules are the probabilities. Returns ------- ptw : dict[tuple, float] The pointwise description complexity. avg : float The average description complexity. References ---------- Varley, T. F., Pope, M., Faskowitz, J., & Sporns, O. (2023). Multivariate information theory uncovers synergistic subsystems of the human cerebral cortex. Communications Biology, 6(1), Article 1. https://doi.org/10.1038/s42003-023-04843-w """ N = float(len(list(joint_distribution.keys())[0])) ptw: dict avg: float ptw, avg = delta_k(k=1, joint_distribution=joint_distribution) avg /= N ptw = {key: ptw[key] / N for key in ptw.keys()} return ptw, avg
[docs] def connected_information( joint_distribution: dict[tuple[int, ...], float], maximum_order: int = -1 ) -> list[float]: """ Returns the connected information profile from Schneidman et al., which decomposes the total correlation into contributing parts of different orders: .. math:: TC(X) = \\sum_{k=2}^{N}TC^{k}(X) Where the `k` superscript refers to the maximum-entropy distribution that preserves all marginals of order `k`. One of the few measures that can reliably distinguish between the JAMES_DYADIC and JAMES_TRIADIC distributions. Parameters ---------- joint_distribution : dict[tuple[int, ...], float] The joint probability distribution. Keys are tuples corresponding to the state of each element. The valules are the probabilities. maximum_order : int, optional The highest order of marginals to sweep. The default sweeps all. Returns ------- list[float] The connected information profile. References ---------- Schneidman, E., Still, S., Berry, M. J., & Bialek, W. (2003). Network Information and Connected Correlations. Physical Review Letters, 91(23), 238701. https://doi.org/10.1103/PhysRevLett.91.238701 """ N: int = len(list(joint_distribution.keys())[0]) if maximum_order == -1: maximum_order = N profile: list[float] = [] for order in range(1, maximum_order + 1): maxent: dict[tuple[int, ...], float] = ( constrained_maximum_entropy_distributions( joint_distribution=joint_distribution, order=order ) ) profile.append(shannon_entropy(maxent)[1]) profile = [profile[i-1] - profile[i] for i in range(1, len(profile))] return profile