Source code for models.social

"""
Social filtering recommender system, where users get shown items that were
interacted with by users in their social networks
"""
import networkx as nx
import numpy as np
import scipy.sparse as sp
import trecs.matrix_ops as mo

from trecs.components import BinarySocialGraph
from trecs.random import SocialGraphGenerator
from trecs.validate import validate_user_item_inputs
from .recommender import BaseRecommender


[docs]class SocialFiltering(BaseRecommender, BinarySocialGraph): """ A customizable social filtering recommendation system. With social filtering, users are presented items that were previously liked by other users in their social networks. The social network is represented by a :math:`|U|\\times|U|` matrix, where :math:`|U|` is the number of users in the system. For each pair of users :math:`u` and :math:`v`, entry `[u,v]` defines whether :math:`u` "follows"/is connected to :math:`v`. This can be a binary relationship or a score that measures how likely :math:`u` is to engage with content that :math:`v` has previously interacted with. Please note that, in this class, the follow/unfollow and add_friends/remove_friends methods assume a binary social graph (see :class:`~components.socialgraph.BinarySocialGraph`). Item attributes are represented by a :math:`|U|\\times|I|` matrix, where :math:`|I|` is the number of items in the system. For each item :math:`i` and user :math:`u`, we define a score that determines the interactions :math:`u` had with :math:`i`. Again, this could just be a binary relationship. Parameters ----------- num_users: int, default 100 The number of users :math:`|U|` in the system. num_items: int, default 1250 The number of items :math:`|I|` in the system. user_representation: :obj:`numpy.ndarray`, optional A :math:`|U|\\times|U|` adjacency matrix representing each users' social network. If this is not None, `num_users` is ignored. item_representation: :obj:`numpy.ndarray`, optional A :math:`|U|\\times|I|` matrix representing the past user interactions. If this is not None, `num_items` is ignored. actual_user_representation: :obj:`numpy.ndarray` or \ :class:`~components.users.Users`, optional Either a :math:`|U|\\times|T|` matrix representing the real user profiles, where :math:`T` is the number of attributes in the real underlying user profile, or a `Users` object that contains the real user profiles or real user-item scores. This matrix is **not** used for recommendations. This is only kept for measurements and the system is unaware of it. actual_item_representation: :obj:`numpy.ndarray`, optional A :math:`|T|\\times|I|` matrix representing the real item profiles, where :math:`T` is the number of attributes in the real underlying item profile. This matrix is **not** used for recommendations. This is only kept for measurements and the system is unaware of it. num_items_per_iter: int, default 10 Number of items presented to the user per iteration. seed: int, optional Seed for random generator. Attributes ----------- Inherited from BaseRecommender : :class:`~models.recommender.BaseRecommender` Examples ---------- SocialFiltering can be instantiated with no arguments -- in which case, it will be initialized with the default parameters and the item/user representation will be initialized to zero. This means that a user starts with no followers/users they follow, and that there have been no previous interactions for this set of users. >>> sf = SocialFiltering() >>> sf.users_hat.shape (100, 100) # <-- 100 users (default) >>> sf.items.shape (100, 1250) # <-- 100 users (default), 1250 items (default) This class can be customized either by defining the number of users and/or items in the system: >>> sf = SocialFiltering(num_users=1200, num_items=5000) >>> sf.items.shape (1200, 5000) # <-- 1200 users, 5000 items >>> sf = ContentFiltering(num_users=50) >>> sf.items.shape (50, 1250) # <-- 50 users, 1250 items (default) Or by generating representations for items and/or users. In the example below, items are uniformly distributed. We "indirectly" define 100 users by defining a `100x200` item representation. >>> item_representation = np.random.randint(2, size=(100, 200)) # Social networks are drawn from a binomial distribution. # This representation also uses 100 users. >>> sf = SocialFiltering(item_representation=item_representation) >>> sf.items.shape (100, 200) >>> sf.users_hat.shape (100, 100) Note that all arguments passed in at initialization must be consistent - otherwise, an error is thrown. For example, one cannot pass in ``num_users=200`` but have ``user_representation.shape`` be ``(200, 500)`` or ``(300, 300)``. Likewise, one cannot pass in ``num_items=1000`` but have ``item_representation.shape`` be ``(200, 500)``. """ def __init__( # pylint: disable-all self, num_users=None, num_items=None, user_representation=None, item_representation=None, actual_user_representation=None, actual_item_representation=None, probabilistic_recommendations=False, num_items_per_iter=10, seed=None, **kwargs ): num_users, num_items, num_attributes = validate_user_item_inputs( num_users, num_items, user_representation, item_representation, actual_user_representation, actual_item_representation, None, # see if we can get the default number of users from the items array num_attributes=num_users, # number of attributes should be equal to the number of users default_num_items=1250, default_num_attributes=None, ) if num_users is None and num_attributes is None: # number of users could not be inferred from any of the inputs num_users = 100 num_attributes = 100 if num_users is None: # get user representation from items instead num_users = num_attributes if num_attributes is None: num_attributes = num_users # verify that the user representation is an adjacency matrix and that # the item representation aligns if not num_users == num_attributes: raise ValueError("Number of users must be consistent across all inputs") if user_representation is None: user_representation = SocialGraphGenerator.generate_random_graph( num=num_users, p=0.3, seed=seed, graph_type=nx.fast_gnp_random_graph ) if item_representation is None: item_representation = np.zeros((num_users, num_items), dtype=int) # if the actual item representation is not specified, we assume # that the recommender system's beliefs about the item attributes # are the same as the "true" item attributes if actual_item_representation is None: actual_item_representation = item_representation.copy() # Initialize recommender system BaseRecommender.__init__( self, user_representation, item_representation, actual_user_representation, actual_item_representation, num_users, num_items, num_items_per_iter, probabilistic_recommendations=probabilistic_recommendations, seed=seed, **kwargs ) def _update_internal_state(self, interactions): """Private function that updates user profiles with data from latest interactions. Specifically, this function converts interactions into item attributes. For example, if user u has interacted with item i, then the i's attributes will be updated to increase the similarity between u and i. Args: interactions (numpy.ndarray): An array of item indices that users have interacted with in the latest step. Namely, interactions_u represents the index of the item that the user has interacted with. """ if self.num_users != self.items_hat.num_attrs or self.num_items != self.items_hat.num_items: error_msg = ( "User-item interactions matrix must have same shape as internal " "item representation" ) raise ValueError(error_msg) interactions_per_user = sp.lil_matrix((self.num_users, self.num_items), dtype=int) interactions_per_user[self.users.user_vector, interactions] = 1 if mo.any_dense(self.items_hat.value): # only add dense to dense and sparse to sparse interactions_per_user = mo.to_dense(interactions_per_user) self.items_hat.value += interactions_per_user
[docs] def process_new_items(self, new_items): """ New items are simply represented as zeros, since they have not received interactions from any users yet. Parameters ------------ new_items: :obj:`numpy.ndarray` An array of items that represents new items that are being added into the system. Should be :math:`|A|\\times|I|` """ # users have never interacted with new items new_representation = sp.csr_matrix((self.num_users, new_items.shape[1])) return new_representation
[docs] def process_new_users(self, new_users, **kwargs): """ The representation of any new users is always zero. If you want to add users who have different ties to existing users, you should modify `items_hat` directly after calling `model.add_users()`. ------------ new_users: :obj:`numpy.ndarray` An array of users that represents new users that are being added into the system. Should be of dimension :math:`|U|\\times|A|` """ social_graph = kwargs.pop("social_graph", None) if social_graph is None: raise RuntimeError( "social_graph must be passed in as a keyword argument " "to add_users(). It must be dimension |U|x|U|, where " "|U| is the total number of users (including new users)." ) num_new_users = new_users.shape[0] # modify item representation to reflect new users item_rep = sp.csr_matrix((new_users.shape[0], self.num_items)) self.items_hat.value = mo.vstack([self.items_hat.value, item_rep]) # modify user representation by adding relationships from # old users to new users old_to_new = social_graph[:-num_new_users, -num_new_users:] self.users_hat.value = mo.hstack([self.users_hat.value, old_to_new]) new_to_all = social_graph[-num_new_users:, :] return new_to_all