Source code for tlviz.model_evaluation

# -*- coding: utf-8 -*-

__author__ = "Marie Roald & Yngve Mardal Moe"

"""
This module contains functions used to evaluate a single tensor factorisation model
by comparing it to a data tensor.
"""
import numpy as np
import scipy.linalg as sla

from ._tl_utils import _handle_tensorly_backends_cp, _handle_tensorly_backends_dataset, to_numpy
from ._xarray_wrapper import _handle_labelled_cp, _handle_labelled_dataset
from .utils import _alias_mode_axis, cp_to_tensor

__all__ = [
    "estimate_core_tensor",
    "core_consistency",
    "sse",
    "relative_sse",
    "fit",
    "predictive_power",
]


[docs]@_handle_tensorly_backends_dataset("dataset", None) @_handle_labelled_dataset("dataset", None) def estimate_core_tensor(factors, dataset): """Efficient estimation of the Tucker core from a factor matrices and a data tensor. Parameters ---------- factors : tuple Tuple of factor matrices used to estimate the core tensor from dataset : np.ndarray The data tensor that the core tensor is estimated from Notes ----- In the original paper, :cite:t:`papalexakis2015fast` present an algorithm for 3-way tensors. However, it is straightforward to generalise it to N-way tensors by using the inverse tensor product formula in :cite:p:`buis1996efficient`. """ factors = [to_numpy(factor, cast_labelled=True) for factor in factors] svds = [sla.svd(factor, full_matrices=False) for factor in factors] for U, s, Vh in svds[::-1]: dataset = np.tensordot(U.T, dataset, (1, dataset.ndim - 1)) for U, s, Vh in svds[::-1]: s_pinv = s.copy() mask = s_pinv != 0 s_pinv[mask] = 1 / s_pinv[mask] dataset = np.tensordot(np.diag(s_pinv), dataset, (1, dataset.ndim - 1)) for U, s, Vh in svds[::-1]: dataset = np.tensordot(Vh.T, dataset, (1, dataset.ndim - 1)) return np.ascontiguousarray(dataset)
[docs]@_handle_tensorly_backends_dataset("dataset", None) @_handle_tensorly_backends_cp("cp_tensor", None) @_handle_labelled_dataset("dataset", None) @_handle_labelled_cp("cp_tensor", None) def core_consistency(cp_tensor, dataset, normalised=False): r"""Computes the core consistency :cite:p:`bro2003new` A CP model can be interpreted as a restricted Tucker model, where the core tensor is constrained to be superdiagonal. For a third order tensor, this means that the core tensor, :math:`\mathcal{G}`, satisfy :math:`g_{ijk}\neq0` only if :math:`i = j = k`. To compute the core consistency of a CP decomposition, we use this property, and calculate the optimal Tucker core tensor given the factor matrices of the CP model. The key observation is that if the data tensor follows the assumptions of the CP model, then the optimal core tensor should be similar to that of the CP model, i. e. superdiagonal. However, if the data can be better described by allowing for interactions between the components across modes, then the core tensor will have non-zero off-diagonal. The core consistency quantifies this measure and is defined as: .. math:: \text{CC} = 100 - 100 \frac{\| \mathcal{G} - \mathcal{I} \|_F^2}{N} where :math:`\mathcal{G}` is the estimated core tensor, :math:`\mathcal{I}` is a superdiagonal tensor only ones on the superdiagonal and :math:`N` is a normalising factor, either equal to the number of components or the squared frobenius norm of the estimated core tensor. A core consistency score close to 100 indicates that the CP model is likely valid. If the core consistency is low, however, then the model either has components that describe noise or the data does not follow the model's assumptions. So the core consistency can help determine if the chosen number of components is suitable. Parameters ---------- cp_tensor : CPTensor or tuple TensorLy-style CPTensor object or tuple with weights as first argument and a tuple of components as second argument dataset : np.ndarray Data tensor that the cp_tensor is fitted against normalised : Bool (default=False) If True, then the squared frobenius norm of the estimated core tensor is used to normalise the core consistency. Otherwise, the number of components is used. If ``normalised=False``, then the core consistency formula coincides with :cite:p:`bro2003new`, and if ``normalised=True``, the core consistency formula coincides with that used in the `N-Way toolbox <http://models.life.ku.dk/nwaytoolbox>`_, and is unlikely to be less than 0. For core consistencies close to 100, the formulas approximately coincide. Returns ------- float The core consistency Examples -------- We can use the core consistency diagonstic to determine the correct number of components for a CP model. Here, we only fit one model, but in practice, you should fit multiple models and select the one with the lowest SSE (to account for local minima) before computing the core consistency. >>> from tlviz.data import simulated_random_cp_tensor >>> from tensorly.decomposition import parafac >>> cp_tensor, dataset = simulated_random_cp_tensor((10,11,12), 3, seed=42) >>> # Fit many CP models with different number of components >>> for rank in range(1, 5): ... decomposition = parafac(dataset, rank=rank, random_state=42) ... cc = core_consistency(decomposition, dataset, normalised=True) ... print(f"No. components: {rank} - core consistency: {cc:.0f}") No. components: 1 - core consistency: 100 No. components: 2 - core consistency: 100 No. components: 3 - core consistency: 81 No. components: 4 - core consistency: 0 .. note:: This implementation uses the fast method of estimating the core tensor :cite:p:`papalexakis2015fast,buis1996efficient` """ # Distribute weights weights, factors = cp_tensor rank = factors[0].shape[1] A = factors[0].copy() if weights is not None: A *= weights.reshape(1, -1) factors = tuple((A, *factors[1:])) # Estimate core and compare G = estimate_core_tensor(factors, dataset) T = np.zeros([rank] * dataset.ndim) np.fill_diagonal(T, 1) if normalised: denom = np.sum(G**2) else: denom = rank return 100 - 100 * np.sum((G - T) ** 2) / denom
[docs]@_handle_tensorly_backends_dataset("dataset", None) @_handle_tensorly_backends_cp("cp_tensor", None) @_handle_labelled_dataset("dataset", None) @_handle_labelled_cp("cp_tensor", None) def sse(cp_tensor, dataset): """Compute the sum of squared error for a given cp_tensor. Parameters ---------- cp_tensor : CPTensor or tuple TensorLy-style CPTensor object or tuple with weights as first argument and a tuple of components as second argument dataset : ndarray Tensor approximated by ``cp_tensor`` Returns ------- float The sum of squared error, ``sum((X_hat - dataset)**2)``, where ``X_hat`` is the dense tensor represented by ``cp_tensor`` Examples -------- Below, we create a random CP tensor and a random tensor and compute the sum of squared error for these two tensors. >>> import tensorly as tl >>> from tensorly.random import random_cp >>> from tlviz.model_evaluation import sse >>> rng = tl.check_random_state(0) >>> cp = random_cp((4, 5, 6), 3, random_state=rng) >>> X = rng.random_sample((4, 5, 6)) >>> sse(cp, X) 18.948918157419186 """ X_hat = cp_to_tensor(cp_tensor) return np.sum((dataset - X_hat) ** 2)
[docs]@_handle_tensorly_backends_dataset("dataset", None) @_handle_tensorly_backends_cp("cp_tensor", None) @_handle_labelled_dataset("dataset", None) @_handle_labelled_cp("cp_tensor", None) def relative_sse(cp_tensor, dataset, sum_squared_dataset=None): """Compute the relative sum of squared error for a given cp_tensor. Parameters ---------- cp_tensor : CPTensor or tuple TensorLy-style CPTensor object or tuple with weights as first argument and a tuple of components as second argument dataset : ndarray Tensor approximated by ``cp_tensor`` sum_squared_dataset: float (optional) If ``sum(dataset**2)`` is already computed, you can optionally provide it using this argument to avoid unnecessary recalculation. Returns ------- float The relative sum of squared error, ``sum((X_hat - dataset)**2)/sum(dataset**2)``, where ``X_hat`` is the dense tensor represented by ``cp_tensor`` Examples -------- Below, we create a random CP tensor and a random tensor and compute the sum of squared error for these two tensors. >>> import tensorly as tl >>> from tensorly.random import random_cp >>> from tlviz.model_evaluation import relative_sse >>> rng = tl.check_random_state(0) >>> cp = random_cp((4, 5, 6), 3, random_state=rng) >>> X = rng.random_sample((4, 5, 6)) >>> relative_sse(cp, X) 0.4817407254961442 """ if sum_squared_dataset is None: sum_squared_x = np.sum(dataset**2) return sse(cp_tensor, dataset) / sum_squared_x
[docs]@_handle_tensorly_backends_dataset("dataset", None) @_handle_tensorly_backends_cp("cp_tensor", None) @_handle_labelled_dataset("dataset", None) @_handle_labelled_cp("cp_tensor", None) def fit(cp_tensor, dataset, sum_squared_dataset=None): """Compute the fit (1-relative sum squared error) for a given cp_tensor. Parameters ---------- cp_tensor : CPTensor or tuple TensorLy-style CPTensor object or tuple with weights as first argument and a tuple of components as second argument dataset : ndarray Tensor approximated by ``cp_tensor`` sum_squared_dataset: float (optional) If ``sum(dataset**2)`` is already computed, you can optionally provide it using this argument to avoid unnecessary recalculation. Returns ------- float The relative sum of squared error, ``sum((X_hat - dataset)**2)/sum(dataset**2)``, where ``X_hat`` is the dense tensor represented by ``cp_tensor`` Examples -------- Below, we create a random CP tensor and a random tensor and compute the sum of squared error for these two tensors. >>> import tensorly as tl >>> from tensorly.random import random_cp >>> from tlviz.model_evaluation import fit >>> rng = tl.check_random_state(0) >>> cp = random_cp((4, 5, 6), 3, random_state=rng) >>> X = rng.random_sample((4, 5, 6)) >>> fit(cp, X) 0.5182592745038558 We can see that it is equal to 1 - relative SSE >>> from tlviz.model_evaluation import relative_sse >>> 1 - relative_sse(cp, X) 0.5182592745038558 """ return 1 - relative_sse(cp_tensor, dataset, sum_squared_dataset=sum_squared_dataset)
[docs]@_alias_mode_axis() @_handle_tensorly_backends_cp("cp_tensor", None) def predictive_power(cp_tensor, y, sklearn_estimator, mode=0, metric=None, axis=None): """Use scikit-learn estimator to evaluate the predictive power of a factor matrix. This is useful if you evaluate the components based on their predictive power with respect to some task. Parameters ---------- factor_matrix : ndarray(ndim=2) Factor matrix from a tensor decomposition model y : ndarray(ndim=1) Prediction target for each row of the factor matrix in the given mode. ``y`` should have same length as the first dimension of this factor matrix (i.e. the length of the tensor along the given mode). sklearn_estimator : scikit learn estimator Scikit learn estimator. Must have the ``fit`` and ``predict`` methods, and if ``metric`` is ``None``, then it should also have the ``score`` method. See https://scikit-learn.org/stable/developers/develop.html. mode : int Which mode to perform the scoring along metric : Callable Callable (typically function) with the signature ``metric(y_true, y_pred)``, where ``y_true=labels`` and ``y_pred`` is the predicted values obtained from ``sklearn_estimator``. See https://scikit-learn.org/stable/developers/develop.html#specific-models. axis : int (optional) Alias for mode, if set, then mode cannot be set. Returns ------- float Score based on the estimator's performance. Examples -------- ``predictive_power`` can be useful to evaluate the predictive power of a CP decomposition. To illustrate this, we start by creating a simulated CP tensor and a variable we want to predict that is linearly related to one of the factor matrices. >>> from tlviz.data import simulated_random_cp_tensor >>> import numpy as np >>> rng = np.random.default_rng(0) >>> cp_tensor, X = simulated_random_cp_tensor((30, 10, 10), 3, noise_level=0.1, seed=rng) >>> weights, (A, B, C) = cp_tensor >>> regression_coefficients = rng.standard_normal((3, 1)) >>> Y = A @ regression_coefficients Next, we fit a PARAFAC model to this data >>> from tensorly.decomposition import parafac >>> est_cp_tensor = parafac(X, 3) Finally, we see how well the estimated decomposition can describe our target variable, ``Y``. This will use the :math:`R^2`-coefficient for scoring, as that is the default scoring method for linear models. >>> from sklearn.linear_model import LinearRegression >>> from tlviz.model_evaluation import predictive_power >>> linear_regression = LinearRegression() >>> r_squared = predictive_power(cp_tensor, Y, linear_regression) >>> print(f"The R^2 coefficient is {r_squared:.2f}") The R^2 coefficient is 1.00 We can also specify our own scoring function >>> from sklearn.metrics import max_error >>> highest_error = predictive_power(cp_tensor, Y, linear_regression, metric=max_error) >>> print(f"The maximum error is {highest_error:.2f}") The maximum error is 0.00 """ factor_matrix = cp_tensor[1][mode] sklearn_estimator.fit(factor_matrix, y) if metric is None: return sklearn_estimator.score(factor_matrix, y) return metric(y, sklearn_estimator.predict(factor_matrix))