Source code for tensorly.metrics.similarity

import tensorly as tl

# Authors: Hratch Baghdassarian <hmbaghdassarian@gmail.com>, Erick Armingol <earmingol14@gmail.com>
# similarity metrics for tensor decompositions



[docs]
def correlation_index(
    factors_1: list, factors_2: list, tol: float = 5e-16, method: str = "stacked"
) -> float:
    """CorrIndex implementation to assess tensor decomposition outputs.
    From [1] Sobhani et al 2022 (https://doi.org/10.1016/j.sigpro.2022.108457).
    Metric is scaling and column-permutation invariant, wherein each column is a factor.

    Parameters
    ----------
    factors_1 : list
        The loading/factor matrices [A_1 ... A_N] for a low-rank tensor from its factors, output from first decomposition

    factors_2 : list
        The loading/factor matrices [A_1 ... A_N] for a low-rank tensor from its factors, output from second decomposition

    tol : float, optional
        Precision threshold below which to call the CorrIndex score 0, by default 5e-16

    method : str, optional
        Method to obtain the CorrIndex by comparing the A matrices from two decompositions, by default 'stacked'.
        Possible options are:
            - 'stacked' : The original method implemented in [1]. Here all A matrices from the same decomposition are
                          vertically concatenated, building a big A matrix for each decomposition.
            - 'max_score' : This computes the CorrIndex for each pair of A matrices (i.e. between A_1 in factors_1 and
                            factors_2, between A_2 in factors_1 and factors_2, and so on). Then the max score is
                            selected (the most conservative approach). In other words, it selects the max score among the
                            CorrIndexes computed dimension-wise.
            - 'min_score' : Similar to 'max_score', but the min score is selected (the least conservative approach).
            - 'avg_score' : Similar to 'max_score', but the avg score is selected.

    Returns
    -------
    score : float
         CorrIndex metric [0,1]; lower score indicates higher similarity between matrices
    """
    # check input factors shape
    for factors in [factors_1, factors_2]:
        if len({tl.shape(A)[1] for A in factors}) != 1:
            raise ValueError(
                "Factors should be a list of loading matrices of the same rank"
            )

    # check method
    options = ["stacked", "max_score", "min_score", "avg_score"]
    if method not in options:
        raise ValueError(f"The `method` must be either option among {options}")

    if method == "stacked":
        # vertically stack loading matrices -- shape sum(tensor.shape)xR)
        X_1 = [tl.concatenate(factors_1, 0)]
        X_2 = [tl.concatenate(factors_2, 0)]
    else:
        X_1 = factors_1
        X_2 = factors_2

    for x1, x2 in zip(X_1, X_2):
        if tl.shape(x1) != tl.shape(x2):
            raise ValueError("Factor matrices should be of the same shapes")

    # normalize columns to L2 norm - even if ran decomposition with normalize_factors=True
    col_norm_1 = [tl.norm(x1, axis=0) for x1 in X_1]
    col_norm_2 = [tl.norm(x2, axis=0) for x2 in X_2]
    for cn1, cn2 in zip(col_norm_1, col_norm_2):
        if tl.any(cn1 == 0) or tl.any(cn2 == 0):
            raise ValueError("Column norms must be non-zero")
    X_1 = [x1 / cn1 for x1, cn1 in zip(X_1, col_norm_1)]
    X_2 = [x2 / cn2 for x2, cn2 in zip(X_2, col_norm_2)]

    corr_idxs = [
        _compute_correlation_index(x1, x2, tol=tol) for x1, x2 in zip(X_1, X_2)
    ]

    if method == "stacked":
        score = corr_idxs[0]
    elif method == "max_score":
        score = tl.max(corr_idxs)
    elif method == "min_score":
        score = tl.min(corr_idxs)
    elif method == "avg_score":
        score = tl.mean(corr_idxs)
    else:
        score = 1.0
    return score



def _compute_correlation_index(x1: list, x2: list, tol: float = 5e-16) -> float:
    """Computes the CorrIndex from the L2-normalized A matrices.

    Parameters
    ----------
    x1 : list
        A list containing normalized A matrix(ces) from the first tensor decomposition.

    x2 : list
        A list containing normalized A matrix(ces) from the first tensor decomposition.

    tol : float, optional
        Precision threshold below which to call the CorrIndex score 0, by default 5e-16

    Returns
    -------
    score : float
         CorrIndex metric [0,1]; lower score indicates higher similarity between matrices
    """
    # generate the correlation index input
    c_prod_mtx = tl.abs(tl.matmul(tl.conj(tl.transpose(x1)), x2))

    # correlation index scoring
    n_elements = tl.shape(c_prod_mtx)[1] + tl.shape(c_prod_mtx)[0]
    score = (1 / (n_elements)) * (
        tl.sum(tl.abs(tl.max(c_prod_mtx, 1) - 1))
        + tl.sum(tl.abs(tl.max(c_prod_mtx, 0) - 1))
    )
    if score < tol:
        score = 0
    return score