Source code for tensorly.regression.cp_plsr

from ..tenalg import khatri_rao, multi_mode_dot
from ..cp_tensor import CPTensor
from .. import backend as T
from .. import unfold, tensor_to_vec
from ..decomposition._cp import parafac

# Author: Cyrillus Tan, Jackson Chin, Aaron Meyer

# License: BSD 3 clause


[docs]class CP_PLSR: """CP tensor regression Learns a low rank CP tensor weight, This performs a partial least square regression to a tensor X (>= 2 modes) against a matrix Y. The first modes of X and Y will be considered coupled, and the decomposition will maximize the covariance between them. Parameters ---------- n_components : int rank of the CP decomposition of the regression weights tol : float convergence value n_iter_max : int, optional, default is 100 maximum number of iteration random_state : None, int or RandomState, optional, default is None verbose : bool, default is False whether to be verbose during fitting References ---------- .. [1] Rasmus Bro, "Multiway calibration. Multilinear PLS", Chemometrics, 1996 """ def __init__( self, n_components, tol=1.0e-9, n_iter_max=100, random_state=None, verbose=False ): self.n_components = n_components self.tol = tol self.n_iter_max = n_iter_max self.random_state = random_state self.verbose = verbose
[docs] def get_params(self, **kwargs): """Returns a dictionary of parameters""" params = ["n_components", "tol", "n_iter_max", "random_state", "verbose"] return {param_name: getattr(self, param_name) for param_name in params}
[docs] def set_params(self, **parameters): """Sets the value of the provided parameters""" for parameter, value in parameters.items(): setattr(self, parameter, value) return self
[docs] def fit(self, X, Y): """Fits the model to the data (X, Y) Parameters ---------- X : ndarray tensor data of shape (n_samples, N1, ..., NS) Y : 2D-array of shape (n_samples, n_predictions) labels associated with each sample Returns ------- self """ ## PREPROCESSING # Check that both tensors are coupled along the first mode if T.shape(X)[0] != T.shape(Y)[0]: raise ValueError( "The first modes of X and Y must be coupled and have the same length." ) X, Y = T.copy(X), T.copy(Y) # Check the shape of X and Y; convert vector Y to a matrix if T.ndim(X) < 2: raise ValueError("X must be at least a 2-mode tensor.") if (T.ndim(Y) != 1) and (T.ndim(Y) != 2): raise ValueError("Only a matrix (2-mode tensor) Y is allowed.") if T.ndim(Y) == 1: Y = T.reshape(Y, (-1, 1)) # Mean center the data, record info the object self.X_shape_ = T.shape(X) self.Y_shape_ = T.shape(Y) self.X_mean_ = T.mean(X, axis=0) self.Y_mean_ = T.mean(Y, axis=0) X -= self.X_mean_ Y -= self.Y_mean_ self.X_factors = [ T.zeros((l, self.n_components), **T.context(X)) for l in T.shape(X) ] self.Y_factors = [ T.zeros((l, self.n_components), **T.context(X)) for l in T.shape(Y) ] ## FITTING EACH COMPONENT for component in range(self.n_components): comp_X_factors = [ff[:, component] for ff in self.X_factors] comp_Y_factors_0 = Y[:, 0] old_comp_Y_factors_0 = T.ones(T.shape(comp_Y_factors_0)) * T.inf for iter in range(self.n_iter_max): Z = T.tensordot(X, comp_Y_factors_0, axes=((0,), (0,))) if T.ndim(Z) >= 2: Z_comp = parafac( Z, 1, tol=self.tol, init="svd", svd="randomized_svd", normalize_factors=True, )[1] else: Z_comp = [Z / T.norm(Z)] for mode in range( 1, X.ndim ): # Mode 0 of Z collapsed by above tensordot comp_X_factors[mode] = tensor_to_vec(Z_comp[mode - 1]) comp_X_factors[0] = multi_mode_dot( X, comp_X_factors[1:], range(1, T.ndim(X)) ) comp_Y_factors_1 = T.dot(T.transpose(Y), comp_X_factors[0]) comp_Y_factors_1 /= T.norm(comp_Y_factors_1) comp_Y_factors_0 = T.dot(Y, comp_Y_factors_1) if T.norm(old_comp_Y_factors_0 - comp_Y_factors_0) < self.tol: if self.verbose: print( f"Component {component}: converged after {iter} iterations" ) break old_comp_Y_factors_0 = T.copy(comp_Y_factors_0) # Put iteration results back to the parameter variables for ii in range(len(comp_X_factors)): self.X_factors[ii] = T.index_update( self.X_factors[ii], T.index[:, component], comp_X_factors[ii] ) self.Y_factors[0] = T.index_update( self.Y_factors[0], T.index[:, component], comp_Y_factors_0 ) self.Y_factors[1] = T.index_update( self.Y_factors[1], T.index[:, component], comp_Y_factors_1 ) # Deflation X -= CPTensor( (None, [T.reshape(ff, (-1, 1)) for ff in comp_X_factors]) ).to_tensor() Y -= T.dot( T.dot( self.X_factors[0], T.lstsq(self.X_factors[0], T.reshape(comp_Y_factors_0, (-1, 1)))[0], ), T.reshape(comp_Y_factors_1, (1, -1)), ) # Y -= T pinv(T) u q' return self
[docs] def predict(self, X): """Returns the predicted labels for a new data tensor Parameters ---------- X : ndarray tensor data of shape (n_samples, N1, ..., NS) """ if self.X_shape_[1:] != T.shape(X)[1:]: raise ValueError( f"Training X has shape {self.X_shape_}, while the new X has shape {T.shape(X)}" ) X -= self.X_mean_ factors_kr = khatri_rao(self.X_factors, skip_matrix=0) unfolded = unfold(X, 0) scores = T.lstsq(factors_kr, T.transpose(unfolded))[0] # = Tnew estimators = T.lstsq(self.X_factors[0], self.Y_factors[0])[0] return ( T.dot( T.dot(T.transpose(scores), estimators), T.transpose(self.Y_factors[1]) ) + self.Y_mean_ )
[docs] def transform(self, X, Y=None): """Apply the dimension reduction from fitting to a new tensor. Parameters ---------- X : array-like of shape (n_samples, n_features) Samples to transform. Y : array-like of shape (n_samples, n_targets), default=None Target vectors. Returns ------- X_scores, Y_scores : array-like or tuple of array-like Return `X_scores` if `Y` is not given, `(X_scores, Y_scores)` otherwise. """ if self.X_shape_[1:] != T.shape(X)[1:]: raise ValueError( f"Training X has shape {self.X_shape_}, while the new X has shape {T.shape(X)}" ) X = T.copy(X) X -= self.X_mean_ X_scores = T.zeros((T.shape(X)[0], self.n_components), **T.context(X)) for component in range(self.n_components): X_scores = T.index_update( X_scores, T.index[:, component], multi_mode_dot( X, [ff[:, component] for ff in self.X_factors[1:]], range(1, T.ndim(X)), ), ) X -= CPTensor( ( None, [T.reshape(X_scores[:, component], (-1, 1))] + [ T.reshape(ff[:, component], (-1, 1)) for ff in self.X_factors[1:] ], ) ).to_tensor() if Y is not None: Y = T.copy(Y) # Check on the shape of Y if (T.ndim(Y) != 1) and (T.ndim(Y) != 2): raise ValueError("Only a matrix (2-mode tensor) Y is allowed.") if T.ndim(Y) == 1: Y = T.reshape(Y, (-1, 1)) if self.Y_shape_[1:] != T.shape(Y)[1:]: raise ValueError( f"Training Y has shape {self.Y_shape_}, while the new Y has shape {T.shape(Y)}" ) Y -= self.Y_mean_ Y_scores = T.zeros((T.shape(Y)[0], self.n_components), **T.context(X)) for component in range(self.n_components): Y_scores = T.index_update( Y_scores, T.index[:, component], T.dot(Y, self.Y_factors[1][:, component]), ) Y -= T.dot( T.dot( T.lstsq(T.transpose(X_scores), T.transpose(X_scores))[0], Y_scores[:, [component]], ), T.transpose(self.Y_factors[1][:, [component]]), ) # Y -= T pinv(T) u q' return X_scores, Y_scores return X_scores
[docs] def fit_transform(self, X, Y): """Learn and apply the dimension reduction on the train data. Parameters ---------- X : array-like of shape (n_samples, n_features) Training vectors, where `n_samples` is the number of samples and `n_features` is the number of predictors. y : array-like of shape (n_samples, n_targets), default=None Target vectors, where `n_samples` is the number of samples and `n_targets` is the number of response variables. Returns ------- self : ndarray of shape (n_samples, n_components) Return `x_scores` if `Y` is not given, `(x_scores, y_scores)` otherwise. """ return self.fit(X, Y).transform(X, Y)