Source code for tlviz.model_evaluation

# -*- coding: utf-8 -*-

__author__ = "Marie Roald & Yngve Mardal Moe"

"""
This module contains functions used to evaluate a single tensor factorisation model
by comparing it to a data tensor.
"""
import numpy as np
import scipy.linalg as sla

from ._tl_utils import _handle_tensorly_backends_cp, _handle_tensorly_backends_dataset, to_numpy
from ._xarray_wrapper import _handle_labelled_cp, _handle_labelled_dataset
from .utils import _alias_mode_axis, cp_to_tensor

__all__ = [
    "estimate_core_tensor",
    "core_consistency",
    "sse",
    "relative_sse",
    "fit",
    "predictive_power",
]


[docs]@_handle_tensorly_backends_dataset("dataset", None)
@_handle_labelled_dataset("dataset", None)
def estimate_core_tensor(factors, dataset):
    """Efficient estimation of the Tucker core from a factor matrices and a data tensor.

    Parameters
    ----------
    factors : tuple
        Tuple of factor matrices used to estimate the core tensor from
    dataset : np.ndarray
        The data tensor that the core tensor is estimated from

    Notes
    -----
    In the original paper, :cite:t:`papalexakis2015fast` present an algorithm
    for 3-way tensors. However, it is straightforward to generalise it to N-way tensors
    by using the inverse tensor product formula in :cite:p:`buis1996efficient`.
    """
    factors = [to_numpy(factor, cast_labelled=True) for factor in factors]

    svds = [sla.svd(factor, full_matrices=False) for factor in factors]
    for U, s, Vh in svds[::-1]:
        dataset = np.tensordot(U.T, dataset, (1, dataset.ndim - 1))
    for U, s, Vh in svds[::-1]:
        s_pinv = s.copy()
        mask = s_pinv != 0
        s_pinv[mask] = 1 / s_pinv[mask]
        dataset = np.tensordot(np.diag(s_pinv), dataset, (1, dataset.ndim - 1))
    for U, s, Vh in svds[::-1]:
        dataset = np.tensordot(Vh.T, dataset, (1, dataset.ndim - 1))
    return np.ascontiguousarray(dataset)


[docs]@_handle_tensorly_backends_dataset("dataset", None)
@_handle_tensorly_backends_cp("cp_tensor", None)
@_handle_labelled_dataset("dataset", None)
@_handle_labelled_cp("cp_tensor", None)
def core_consistency(cp_tensor, dataset, normalised=False):
    r"""Computes the core consistency :cite:p:`bro2003new`

    A CP model can be interpreted as a restricted Tucker model, where the
    core tensor is constrained to be superdiagonal. For a third order tensor,
    this means that the core tensor, :math:`\mathcal{G}`, satisfy :math:`g_{ijk}\neq0`
    only if :math:`i = j = k`. To compute the core consistency of a CP decomposition,
    we use this property, and calculate the optimal Tucker core tensor given
    the factor matrices of the CP model.

    The key observation is that if the data tensor follows the assumptions
    of the CP model, then the optimal core tensor should be similar to that
    of the CP model, i. e. superdiagonal. However, if the data can be better
    described by allowing for interactions between the components across modes,
    then the core tensor will have non-zero off-diagonal. The core consistency
    quantifies this measure and is defined as:

    .. math::

        \text{CC} = 100 - 100 \frac{\| \mathcal{G} - \mathcal{I} \|_F^2}{N}

    where :math:`\mathcal{G}` is the estimated core tensor, :math:`\mathcal{I}`
    is a superdiagonal tensor only ones on the superdiagonal and :math:`N`
    is a normalising factor, either equal to the number of components or the
    squared frobenius norm of the estimated core tensor. A core consistency
    score close to 100 indicates that the CP model is likely valid. If the
    core consistency is low, however, then the model either has components
    that describe noise or the data does not follow the model's assumptions.
    So the core consistency can help determine if the chosen number of
    components is suitable.

    Parameters
    ----------
    cp_tensor : CPTensor or tuple
        TensorLy-style CPTensor object or tuple with weights as first
        argument and a tuple of components as second argument
    dataset : np.ndarray
        Data tensor that the cp_tensor is fitted against
    normalised : Bool (default=False)
        If True, then the squared frobenius norm of the estimated core tensor
        is used to normalise the core consistency. Otherwise, the number of
        components is used.

        If ``normalised=False``, then the core consistency formula coincides
        with :cite:p:`bro2003new`, and if ``normalised=True``, the core consistency
        formula coincides with that used in the `N-Way toolbox <http://models.life.ku.dk/nwaytoolbox>`_,
        and is unlikely to be less than 0. For core consistencies close to
        100, the formulas approximately coincide.

    Returns
    -------
    float
        The core consistency

    Examples
    --------
    We can use the core consistency diagonstic to determine the correct number of components
    for a CP model. Here, we only fit one model, but in practice, you should fit multiple models
    and select the one with the lowest SSE (to account for local minima) before computing the
    core consistency.

    >>> from tlviz.data import simulated_random_cp_tensor
    >>> from tensorly.decomposition import parafac
    >>> cp_tensor, dataset = simulated_random_cp_tensor((10,11,12), 3, seed=42)
    >>> # Fit many CP models with different number of components
    >>> for rank in range(1, 5):
    ...     decomposition = parafac(dataset, rank=rank, random_state=42)
    ...     cc = core_consistency(decomposition, dataset, normalised=True)
    ...     print(f"No. components: {rank} - core consistency: {cc:.0f}")
    No. components: 1 - core consistency: 100
    No. components: 2 - core consistency: 100
    No. components: 3 - core consistency: 81
    No. components: 4 - core consistency: 0

    .. note::

        This implementation uses the fast method of estimating the core tensor
        :cite:p:`papalexakis2015fast,buis1996efficient`
    """
    # Distribute weights
    weights, factors = cp_tensor
    rank = factors[0].shape[1]

    A = factors[0].copy()
    if weights is not None:
        A *= weights.reshape(1, -1)

    factors = tuple((A, *factors[1:]))

    # Estimate core and compare
    G = estimate_core_tensor(factors, dataset)
    T = np.zeros([rank] * dataset.ndim)
    np.fill_diagonal(T, 1)
    if normalised:
        denom = np.sum(G**2)
    else:
        denom = rank

    return 100 - 100 * np.sum((G - T) ** 2) / denom


[docs]@_handle_tensorly_backends_dataset("dataset", None)
@_handle_tensorly_backends_cp("cp_tensor", None)
@_handle_labelled_dataset("dataset", None)
@_handle_labelled_cp("cp_tensor", None)
def sse(cp_tensor, dataset):
    """Compute the sum of squared error for a given cp_tensor.

    Parameters
    ----------
    cp_tensor : CPTensor or tuple
        TensorLy-style CPTensor object or tuple with weights as first
        argument and a tuple of components as second argument
    dataset : ndarray
        Tensor approximated by ``cp_tensor``

    Returns
    -------
    float
        The sum of squared error, ``sum((X_hat - dataset)**2)``, where ``X_hat``
        is the dense tensor represented by ``cp_tensor``

    Examples
    --------
    Below, we create a random CP tensor and a random tensor and compute
    the sum of squared error for these two tensors.

    >>> import tensorly as tl
    >>> from tensorly.random import random_cp
    >>> from tlviz.model_evaluation import sse
    >>> rng = tl.check_random_state(0)
    >>> cp = random_cp((4, 5, 6), 3, random_state=rng)
    >>> X = rng.random_sample((4, 5, 6))
    >>> sse(cp, X)
    18.948918157419186
    """
    X_hat = cp_to_tensor(cp_tensor)
    return np.sum((dataset - X_hat) ** 2)


[docs]@_handle_tensorly_backends_dataset("dataset", None)
@_handle_tensorly_backends_cp("cp_tensor", None)
@_handle_labelled_dataset("dataset", None)
@_handle_labelled_cp("cp_tensor", None)
def relative_sse(cp_tensor, dataset, sum_squared_dataset=None):
    """Compute the relative sum of squared error for a given cp_tensor.

    Parameters
    ----------
    cp_tensor : CPTensor or tuple
        TensorLy-style CPTensor object or tuple with weights as first
        argument and a tuple of components as second argument
    dataset : ndarray
        Tensor approximated by ``cp_tensor``
    sum_squared_dataset: float (optional)
        If ``sum(dataset**2)`` is already computed, you can optionally provide it
        using this argument to avoid unnecessary recalculation.

    Returns
    -------
    float
        The relative sum of squared error, ``sum((X_hat - dataset)**2)/sum(dataset**2)``,
        where ``X_hat`` is the dense tensor represented by ``cp_tensor``

    Examples
    --------
    Below, we create a random CP tensor and a random tensor and compute
    the sum of squared error for these two tensors.

    >>> import tensorly as tl
    >>> from tensorly.random import random_cp
    >>> from tlviz.model_evaluation import relative_sse
    >>> rng = tl.check_random_state(0)
    >>> cp = random_cp((4, 5, 6), 3, random_state=rng)
    >>> X = rng.random_sample((4, 5, 6))
    >>> relative_sse(cp, X)
    0.4817407254961442
    """
    if sum_squared_dataset is None:
        sum_squared_x = np.sum(dataset**2)
    return sse(cp_tensor, dataset) / sum_squared_x


[docs]@_handle_tensorly_backends_dataset("dataset", None)
@_handle_tensorly_backends_cp("cp_tensor", None)
@_handle_labelled_dataset("dataset", None)
@_handle_labelled_cp("cp_tensor", None)
def fit(cp_tensor, dataset, sum_squared_dataset=None):
    """Compute the fit (1-relative sum squared error) for a given cp_tensor.

    Parameters
    ----------
    cp_tensor : CPTensor or tuple
        TensorLy-style CPTensor object or tuple with weights as first
        argument and a tuple of components as second argument
    dataset : ndarray
        Tensor approximated by ``cp_tensor``
    sum_squared_dataset: float (optional)
        If ``sum(dataset**2)`` is already computed, you can optionally provide it
        using this argument to avoid unnecessary recalculation.

    Returns
    -------
    float
        The relative sum of squared error, ``sum((X_hat - dataset)**2)/sum(dataset**2)``,
        where ``X_hat`` is the dense tensor represented by ``cp_tensor``

    Examples
    --------
    Below, we create a random CP tensor and a random tensor and compute
    the sum of squared error for these two tensors.

    >>> import tensorly as tl
    >>> from tensorly.random import random_cp
    >>> from tlviz.model_evaluation import fit
    >>> rng = tl.check_random_state(0)
    >>> cp = random_cp((4, 5, 6), 3, random_state=rng)
    >>> X = rng.random_sample((4, 5, 6))
    >>> fit(cp, X)
    0.5182592745038558

    We can see that it is equal to 1 - relative SSE

    >>> from tlviz.model_evaluation import relative_sse
    >>> 1 - relative_sse(cp, X)
    0.5182592745038558
    """
    return 1 - relative_sse(cp_tensor, dataset, sum_squared_dataset=sum_squared_dataset)


[docs]@_alias_mode_axis()
@_handle_tensorly_backends_cp("cp_tensor", None)
def predictive_power(cp_tensor, y, sklearn_estimator, mode=0, metric=None, axis=None):
    """Use scikit-learn estimator to evaluate the predictive power of a factor matrix.

    This is useful if you evaluate the components based on their predictive
    power with respect to some task.

    Parameters
    ----------
    factor_matrix : ndarray(ndim=2)
        Factor matrix from a tensor decomposition model
    y : ndarray(ndim=1)
        Prediction target for each row of the factor matrix in the given mode.
        ``y`` should have same length as the first dimension of this factor
        matrix (i.e. the length of the tensor along the given mode).
    sklearn_estimator : scikit learn estimator
        Scikit learn estimator. Must have the ``fit`` and ``predict`` methods,
        and if ``metric`` is ``None``, then it should also have the ``score``
        method. See https://scikit-learn.org/stable/developers/develop.html.
    mode : int
        Which mode to perform the scoring along
    metric : Callable
        Callable (typically function) with the signature ``metric(y_true, y_pred)``,
        where ``y_true=labels`` and ``y_pred`` is the predicted values
        obtained from ``sklearn_estimator``. See
        https://scikit-learn.org/stable/developers/develop.html#specific-models.
    axis : int (optional)
        Alias for mode, if set, then mode cannot be set.

    Returns
    -------
    float
        Score based on the estimator's performance.

    Examples
    --------
    ``predictive_power`` can be useful to evaluate the predictive power of a CP decomposition.
    To illustrate this, we start by creating a simulated CP tensor and a variable we want to
    predict that is linearly related to one of the factor matrices.

    >>> from tlviz.data import simulated_random_cp_tensor
    >>> import numpy as np
    >>> rng = np.random.default_rng(0)
    >>> cp_tensor, X = simulated_random_cp_tensor((30, 10, 10), 3, noise_level=0.1, seed=rng)
    >>> weights, (A, B, C) = cp_tensor
    >>> regression_coefficients = rng.standard_normal((3, 1))
    >>> Y = A @ regression_coefficients

    Next, we fit a PARAFAC model to this data

    >>> from tensorly.decomposition import parafac
    >>> est_cp_tensor = parafac(X, 3)

    Finally, we see how well the estimated decomposition can describe our target variable, ``Y``.
    This will use the :math:`R^2`-coefficient for scoring, as that is the default scoring method
    for linear models.

    >>> from sklearn.linear_model import LinearRegression
    >>> from tlviz.model_evaluation import predictive_power
    >>> linear_regression = LinearRegression()
    >>> r_squared = predictive_power(cp_tensor, Y, linear_regression)
    >>> print(f"The R^2 coefficient is {r_squared:.2f}")
    The R^2 coefficient is 1.00

    We can also specify our own scoring function

    >>> from sklearn.metrics import max_error
    >>> highest_error = predictive_power(cp_tensor, Y, linear_regression, metric=max_error)
    >>> print(f"The maximum error is {highest_error:.2f}")
    The maximum error is 0.00
    """
    factor_matrix = cp_tensor[1][mode]
    sklearn_estimator.fit(factor_matrix, y)
    if metric is None:
        return sklearn_estimator.score(factor_matrix, y)
    return metric(y, sklearn_estimator.predict(factor_matrix))