Source code for models.content

"""
The content filtering recommender system attempts to match users to items
based on highest predicted inner product between the predicted user profile
and predicted item profile. The predictions of user and item profiles
are generated iteratively as users interact with items.
"""
import numpy as np
from scipy.optimize import nnls
import scipy.sparse as sp
import trecs.matrix_ops as mo
from trecs.random import Generator
from trecs.validate import validate_user_item_inputs
from .recommender import BaseRecommender


[docs]class ContentFiltering(BaseRecommender): """ A customizable content-filtering recommendation system. With content filtering, items and users are represented by a set of attributes A. This class assumes that the attributes used for items and users are the same. The recommendation system matches users to items with similar attributes. Item attributes are represented by a :math:`|A|\\times|I|` matrix, where :math:`|I|` is the number of items in the system. For each item, we define the similarity to each attribute. User profiles are represented by a :math:`|U|\\times|A|` matrix, where :math:`|U|` is the number of users in the system. For each user, we define the similarity to each attribute. Parameters ----------- num_users: int, default 100 The number of users :math:`|U|` in the system. num_items: int, default 1250 The number of items :math:`|I|` in the system. num_attributes: int, default 1000 The number of attributes :math:`|A|` in the system. user_representation: :obj:`numpy.ndarray`, optional A :math:`|U|\\times|A|` matrix representing the similarity between each item and attribute, as interpreted by the system. item_representation: :obj:`numpy.ndarray`, optional A :math:`|A|\\times|I|` matrix representing the similarity between each item and attribute. actual_user_representation: :obj:`numpy.ndarray` or \ :class:`~components.users.Users`, optional Either a :math:`|U|\\times|T|` matrix representing the real user profiles, where :math:`T` is the number of attributes in the real underlying user profile, or a `Users` object that contains the real user profiles or real user-item scores. This matrix is **not** used for recommendations. This is only kept for measurements and the system is unaware of it. actual_item_representation: :obj:`numpy.ndarray`, optional A :math:`|T|\\times|I|` matrix representing the real user profiles, where :math:`T` is the number of attributes in the real underlying item profile. This matrix is **not** used for recommendations. This is only kept for measurements and the system is unaware of it. num_items_per_iter: int, default 10 Number of items presented to the user per iteration. seed: int, optional Seed for random generator. Attributes ----------- Inherited from BaseRecommender: :class:`~models.recommender.BaseRecommender` Examples --------- ContentFiltering can be instantiated with no arguments -- in which case, it will be initialized with the default parameters and the item/user representations will be assigned randomly. >>> cf = ContentFiltering() >>> cf.users_hat.shape (100, 1000) # <-- 100 users (default), 1000 attributes (default) >>> cf.items.shape (1000, 1250) # <-- 1000 attributes (default), 1250 items (default) This class can be customized either by defining the number of users/items/attributes in the system. >>> cf = ContentFiltering(num_users=1200, num_items=5000) >>> cf.users_hat.shape (1200, 1000) # <-- 1200 users, 1000 attributes >>> cf = ContentFiltering(num_users=1200, num_items=5000, num_attributes=2000) >>> cf.users_hat.shape (1200, 2000) # <-- 1200 users, 2000 attributes Or by generating representations for items and/or users. In the example below, items are uniformly distributed. We indirectly define 100 attributes by defining the following ``item_representation``: >>> items = np.random.randint(0, 1, size=(100, 200)) # Users are represented by a power law distribution. # This representation also uses 100 attributes. >>> power_dist = Distribution(distr_type='powerlaw') >>> users = power_dist.compute(a=1.16, size=(30, 100)).compute() >>> cf = ContentFiltering(item_representation=items, user_representation=users) >>> cf.items.shape (100, 200) >>> cf.users_hat.shape (30, 100) Note that all arguments passed in at initialization must be consistent - otherwise, an error is thrown. For example, one cannot pass in ``num_users=200`` but have ``user_representation.shape`` be ``(300, 100)``. Likewise, one cannot pass in ``num_items=1000`` but have ``item_representation.shape`` be ``(100, 500)``. """ def __init__( # pylint: disable-all self, num_users=None, num_items=None, num_attributes=None, user_representation=None, item_representation=None, actual_user_representation=None, actual_item_representation=None, probabilistic_recommendations=False, seed=None, num_items_per_iter=10, **kwargs ): # pylint: disable=duplicate-code num_users, num_items, num_attributes = validate_user_item_inputs( num_users, num_items, user_representation, item_representation, actual_user_representation, actual_item_representation, 100, 1250, 1000, num_attributes=num_attributes, ) # generate recommender's initial "beliefs" about user profiles # and item attributes if user_representation is None: user_representation = np.zeros((num_users, num_attributes)) if item_representation is None: item_representation = Generator(seed=seed).binomial( n=1, p=0.5, size=(num_attributes, num_items) ) # if the actual item representation is not specified, we assume # that the recommender system's beliefs about the item attributes # are the same as the "true" item attributes if actual_item_representation is None: actual_item_representation = item_representation.copy() # initialize cumulative interactions as a sparse matrix self.all_interactions = None # Initialize recommender system BaseRecommender.__init__( self, user_representation, item_representation, actual_user_representation, actual_item_representation, num_users, num_items, num_items_per_iter, probabilistic_recommendations=probabilistic_recommendations, seed=seed, **kwargs ) # set cumulative interactions as a sparse matrix self.all_interactions = sp.csr_matrix((self.num_users, self.num_items), dtype=int) def _update_internal_state(self, interactions): """ Private function that updates user profiles with data from latest interactions. Specifically, this function converts interactions into attributes. For example, if user `u` has interacted with an item that has attributes `a1` and `a2`, user `u`'s profile will be updated by increasing the similarity to attributes `a1` and `a2`. Parameters: ------------ interactions: :obj:`numpy.ndarray` An array of item indices that users have interacted with in the latest step. Namely, `interactions[u]` represents the index of the item that the user has interacted with. """ sparse_interactions = sp.csr_matrix( (np.ones(interactions.shape), (self.users.user_vector, interactions)), self.all_interactions.shape, ) self.all_interactions = self.all_interactions + sparse_interactions
[docs] def train(self): """ Uses the NNLS solver to train the user representations, based on the user interaction & item attribute data. Note: this function may run slowly because it requires a manual loop over every user. """ if ( self.all_interactions is not None and self.all_interactions.sum() > 0 ): # if there are interactions present: for i in range(self.num_users): item_attr = mo.to_dense( self.predicted_item_attributes.T ) # convert to dense so nnls can be used user_interactions = self.all_interactions[i, :].toarray()[0, :] # solve for Content Filtering representation using nnls solver self.users_hat.value[i, :] = nnls(item_attr, user_interactions)[0] super().train()
[docs] def process_new_items(self, new_items): """ We assume the content filtering system has perfect knowledge of the new items; therefore, when new items are created, we simply return the new item attributes. Parameters ------------ new_items: :obj:`numpy.ndarray` An array of items that represents new items that are being added into the system. Should be :math:`|A|\\times|I|` """ # add indices for new items into all interactions matrix empty_interactions = sp.csr_matrix((self.num_users, new_items.shape[1]), dtype=int) self.all_interactions = sp.hstack([self.all_interactions, empty_interactions]) return new_items
[docs] def process_new_users(self, new_users, **kwargs): """ By default, the content filtering system assumes the predicted user profiles are zero vectors. (Note that this effectively corresponds to providing random recommendations to each user). Parameters ------------ new_users: :obj:`numpy.ndarray` An array of users that represents new users that are being added into the system. Should be of dimension :math:`|U|\\times|A|` """ # add indices for new items into all interactions matrix num_new = new_users.shape[0] empty_interactions = sp.csr_matrix((num_new, self.num_items), dtype=int) self.all_interactions = sp.vstack([self.all_interactions, empty_interactions]) # each user is initially represented as zeros return np.zeros((num_new, self.users_hat.num_attrs))