Source code for tlda.second_order_cumulant

import tensorly as tl
from sklearn.decomposition import IncrementalPCA
try:
    import cuml
except ImportError:
    pass

[docs] class SecondOrderCumulant(): """ Class to compute the third order cumulant """ def __init__(self, n_eigenvec, alpha_0, batch_size): # n_eigenvec here corresponds to n_topic in the LDA """ Computes the second order cumulant from centered batches of data, returns the whitened tensor Parameters ---------- n_eigenvec : int Corresponds to the number of topics in the Tensor LDA alpha_0 : int Mixing parameter for the topic weights batch_size : int Size of the batch to use for online learning n_docs : int Running count of fitted documents. Used for normalization """ self.n_eigenvec = n_eigenvec self.alpha_0 = alpha_0 self.batch_size = batch_size self.n_docs = 0 if tl.get_backend() == "numpy": self.pca = IncrementalPCA(n_components = self.n_eigenvec, batch_size = self.batch_size) elif tl.get_backend() == "cupy": self.pca = cuml.IncrementalPCA(n_components = self.n_eigenvec, batch_size = self.batch_size)
[docs] def fit(self, X): ''' Method to fit the entire data to get the projection weights (singular vectors) and whitening weights (scaled explained variance) of a centered input dataset X. Parameters ---------- X : tensor of shape (n_samples, vocabulary_size) Tensor containing all input documents ''' self.n_docs += X.shape[0] self.pca.fit(X*tl.sqrt(self.alpha_0+1)) self.projection_weights_ = tl.transpose(self.pca.components_) self.whitening_weights_ = self.pca.explained_variance_*(self.n_docs - 1)/(self.n_docs) del X
[docs] def partial_fit(self, X_batch): '''Fit a batch of data and update the projection weights (singular vectors) and whitening weights (scaled explained variance) accordingly using a centered batch of the input dataset X. Parameters ---------- X_batch : tensor of shape (batch_size, vocabulary_size) Tensor containing a batch of input documents ''' self.n_docs += X_batch.shape[0] self.pca.partial_fit(X_batch*tl.sqrt(self.alpha_0+1)) self.projection_weights_ = tl.transpose(self.pca.components_) self.whitening_weights_ = self.pca.explained_variance_*(self.n_docs - 1)/(self.n_docs) del X_batch
[docs] def transform(self, X): '''Whiten some centered tensor X using the fitted PCA model. Parameters ---------- X : tensor of shape (batch_size, vocabulary_size) Batch of centered samples Returns ------- whitened_X : tensor of shape (batch_size, self.n_eigenvec) Whitened samples ''' X_whit = tl.dot(X, (self.projection_weights_ / tl.sqrt(self.whitening_weights_)[None, :])) del X return X_whit
[docs] def reverse_transform(self, X): '''Unwhiten some whitened tensor X using the fitted PCA model. Parameters ---------- X : tensor of shape (batch_size, self.n_eigenvec) whitened input tensor Returns ------- unwhitened_X : tensor of shape (batch_size, vocabulary_size) Batch of unwhitened centered samples ''' X_unwhit = tl.dot(X, (self.projection_weights_ * tl.sqrt(self.whitening_weights_)).T) del X return X_unwhit