Source code for tltorch.factorized_layers.factorized_embedding

import torch
import numpy as np
from torch import nn
from tltorch.factorized_tensors import TensorizedTensor,tensor_init
from tltorch.utils import get_tensorized_shape

# Authors: Cole Hawkins 
#          Jean Kossaifi


[docs]
class FactorizedEmbedding(nn.Module):
    """
    Tensorized Embedding Layers For Efficient Model Compression
    Tensorized drop-in replacement for `torch.nn.Embedding`

    Parameters
    ----------
    num_embeddings : int
        number of entries in the lookup table
    embedding_dim : int
        number of dimensions per entry
    auto_tensorize : bool
        whether to use automatic reshaping for the embedding dimensions
    n_tensorized_modes : int or int tuple
        number of reshape dimensions for both embedding table dimension
    tensorized_num_embeddings : int tuple
        tensorized shape of the first embedding table dimension
    tensorized_embedding_dim : int tuple
        tensorized shape of the second embedding table dimension
    factorization : str
        tensor type
    rank : int tuple or str
        rank of the tensor factorization
    """
    def __init__(self,
                 num_embeddings,
                 embedding_dim,
                 auto_tensorize=True,
                 n_tensorized_modes=3,
                 tensorized_num_embeddings=None,
                 tensorized_embedding_dim=None,
                 factorization='blocktt',
                 rank=8,
                 n_layers=1,
                 device=None,
                 dtype=None):
        super().__init__()

        if auto_tensorize:

            if tensorized_num_embeddings is not None and tensorized_embedding_dim is not None:
                raise ValueError(
                    "Either use auto_tensorize or specify tensorized_num_embeddings and tensorized_embedding_dim."
                )

            tensorized_num_embeddings, tensorized_embedding_dim = get_tensorized_shape(in_features=num_embeddings, out_features=embedding_dim, order=n_tensorized_modes, min_dim=2, verbose=False)

        else:
            #check that dimensions match factorization
            computed_num_embeddings = np.prod(tensorized_num_embeddings)
            computed_embedding_dim = np.prod(tensorized_embedding_dim)

            if computed_num_embeddings!=num_embeddings:
                raise ValueError("Tensorized embeddding number {} does not match num_embeddings argument {}".format(computed_num_embeddings,num_embeddings))
            if computed_embedding_dim!=embedding_dim:
                raise ValueError("Tensorized embeddding dimension {} does not match embedding_dim argument {}".format(computed_embedding_dim,embedding_dim))

        self.num_embeddings = num_embeddings
        self.embedding_dim = embedding_dim
        self.tensor_shape = (tensorized_num_embeddings,
                             tensorized_embedding_dim)
        self.weight_shape = (self.num_embeddings, self.embedding_dim)

        self.n_layers = n_layers
        if n_layers > 1:
            self.tensor_shape = (n_layers, ) + self.tensor_shape
            self.weight_shape = (n_layers, ) + self.weight_shape

        self.factorization = factorization

        self.weight = TensorizedTensor.new(self.tensor_shape,
                                           rank=rank,
                                           factorization=self.factorization,
                                           device=device,
                                           dtype=dtype)
        self.reset_parameters()

        self.rank = self.weight.rank

    def reset_parameters(self):
        #Parameter initialization from Yin et al.
        #TT-Rec: Tensor Train Compression for Deep Learning Recommendation Model Embeddings
        target_stddev = 1 / np.sqrt(3 * self.num_embeddings)
        with torch.no_grad():
            tensor_init(self.weight,std=target_stddev)


[docs]
    def forward(self, input, indices=0):
        #to handle case where input is not 1-D
        output_shape = (*input.shape, self.embedding_dim)

        flattened_input = input.reshape(-1)

        if self.n_layers == 1:
            if indices == 0:
                embeddings = self.weight[flattened_input, :]
        else:
            embeddings = self.weight[indices, flattened_input, :]

        #CPTensorized returns CPTensorized when indexing
        if self.factorization.lower() == 'cp':
            embeddings = embeddings.to_matrix()

        #TuckerTensorized returns tensor not matrix,
        # and requires reshape not view for contiguous
        elif self.factorization.lower() == 'tucker':
            embeddings = embeddings.reshape(input.shape[0], -1)

        return embeddings.view(output_shape)



[docs]
    @classmethod
    def from_embedding(cls,
                       embedding_layer,
                       rank=8,
                       factorization='blocktt',
                       n_tensorized_modes=2,
                       decompose_weights=True,
                       auto_tensorize=True,
                       decomposition_kwargs=dict(),
                       **kwargs):
        """
        Create a tensorized embedding layer from a regular embedding layer

        Parameters
        ----------
        embedding_layer : torch.nn.Embedding
        rank : int tuple or str
            rank of the tensor decomposition
        factorization : str
            tensor type
        decompose_weights: bool
            whether to decompose weights and use for initialization
        auto_tensorize: bool
            if True, automatically reshape dimensions for TensorizedTensor
        decomposition_kwargs: dict
            specify kwargs for the decomposition
        """
        num_embeddings, embedding_dim = embedding_layer.weight.shape

        instance = cls(num_embeddings,
                       embedding_dim,
                       auto_tensorize=auto_tensorize,
                       factorization=factorization,
                       n_tensorized_modes=n_tensorized_modes,
                       rank=rank,
                       **kwargs)

        if decompose_weights:
            with torch.no_grad():
                instance.weight.init_from_matrix(embedding_layer.weight.data,
                                                 **decomposition_kwargs)

        else:
            instance.reset_parameters()

        return instance



[docs]
    @classmethod
    def from_embedding_list(cls,
                       embedding_layer_list,
                       rank=8,
                       factorization='blocktt',
                       n_tensorized_modes=2,
                       decompose_weights=True,
                       auto_tensorize=True,
                       decomposition_kwargs=dict(),
                       **kwargs):
        """
        Create a tensorized embedding layer from a regular embedding layer

        Parameters
        ----------
        embedding_layer : torch.nn.Embedding
        rank : int tuple or str
            tensor rank
        factorization : str
            tensor decomposition to use
        decompose_weights: bool
            decompose weights and use for initialization
        auto_tensorize: bool
            automatically reshape dimensions for TensorizedTensor
        decomposition_kwargs: dict
            specify kwargs for the decomposition
        """
        n_layers = len(embedding_layer_list)
        num_embeddings, embedding_dim = embedding_layer_list[0].weight.shape

        for i, layer in enumerate(embedding_layer_list[1:]):
            # Just some checks on the size of the embeddings
            # They need to have the same size so they can be jointly factorized
            new_num_embeddings, new_embedding_dim = layer.weight.shape
            if num_embeddings != new_num_embeddings:
                msg = 'All embedding layers must have the same num_embeddings.'
                msg += f'Yet, got embedding_layer_list[0] with num_embeddings={num_embeddings} '
                msg += f' and embedding_layer_list[{i+1}] with num_embeddings={new_num_embeddings}.'
                raise ValueError(msg)
            if embedding_dim != new_embedding_dim:
                msg = 'All embedding layers must have the same embedding_dim.'
                msg += f'Yet, got embedding_layer_list[0] with embedding_dim={embedding_dim} '
                msg += f' and embedding_layer_list[{i+1}] with embedding_dim={new_embedding_dim}.'
                raise ValueError(msg)

        instance = cls(num_embeddings,
                       embedding_dim,
                       n_tensorized_modes=n_tensorized_modes,
                       auto_tensorize=auto_tensorize,
                       factorization=factorization,
                       rank=rank,
                       n_layers=n_layers,
                       **kwargs)

        if decompose_weights:
            weight_tensor = torch.stack([layer.weight.data for layer in embedding_layer_list])
            with torch.no_grad():
                instance.weight.init_from_matrix(weight_tensor,
                                                 **decomposition_kwargs)

        else:
            instance.reset_parameters()

        return instance



    def get_embedding(self, indices):
        if self.n_layers == 1:
            raise ValueError('A single linear is parametrized, directly use the main class.')

        return SubFactorizedEmbedding(self, indices)



class SubFactorizedEmbedding(nn.Module):
    """Class representing one of the embeddings from the mother joint factorized embedding layer

    Parameters
    ----------

    Notes
    -----
    This relies on the fact that nn.Parameters are not duplicated:
    if the same nn.Parameter is assigned to multiple modules, they all point to the same data, 
    which is shared.
    """
    def __init__(self, main_layer, indices):
        super().__init__()
        self.main_layer = main_layer
        self.indices = indices

    def forward(self, x):
        return self.main_layer(x, self.indices)

    def extra_repr(self):
        return ''

    def __repr__(self):
        msg = f' {self.__class__.__name__} {self.indices} from main factorized layer.'
        msg += f'\n{self.__class__.__name__}('
        msg += self.extra_repr()
        msg += ')'
        return msg