""" John Rachlin DS 2000: Intro to Programming with Data Filename: recommend.py Description: We build on our foundation from last time - developing the features and functionality of our recommendation engine object, Recommender. To help us code our algorithms, we add a few utility methods (watched, liked, etc.) so we can focus on the big picture and not worry about low-level details. Algorithms for finding recommendations are quite rich with possibilities, even when all you have is a single rating and the name of a movie! Algorithm 1: MOST SIMILAR USER ("MSU") For a given user find another user who is most similar and take recommendations from that user. How is similarity defined? Ultimately, anyway you like. But we'll define it as the number of movies they both liked (rated 6 or higher)' Algorithm 2: FREQUENTLY LIKED MOVIES ("FLM") Find all the other users that have at least some overlap with you, i.e., they each have at least one movie that both you and they liked. Now tally up the other movies they liked. Compute total likes for each movie among these overlapping users. Recommend the 5 (?) movies that are most frequently liked among these users. """ import csv from pprint import pprint from collections import Counter class Recommender: """ A recommendation engine class """ def __init__(self): """ Constructor for Recommender engine """ self.ratings = {} # user -> {(movie, rating)...} def read_ratings_data(self, filename): """ Read ratings data from """ with open(filename) as file: reader = csv.reader(file, delimiter=',') next(reader) # skip header for row in reader: name = row[0] movie = row[1] rating = float(row[2]) # add rating to ratings if name in self.ratings: self.ratings[name].add((movie, rating)) else: self.ratings[name] = {(movie, rating)} def num_ratings(self): """ Number of user ratings in the db """ count = 0 for name in self.ratings: count += len(self.ratings[name]) return count def print_recs(self, recs): """ Print the recommendations one line at a time """ print("\nRecommendations:") print("----------------") if recs is not None: for r in recs: print(r) else: print("None") def watched(self, name): """ Get set of movies watched by """ return {movie for movie,rating in self.ratings[name]} def liked(self, name, min_rating = 6): """ Return the set of movies liked by (minimum movie rating >= ) """ return {movie for movie, rating in self.ratings[name] if rating >= min_rating} def recommend(self, name, other): """ Make recommendations for based on some other user - perhaps a friend? """ already_seen = self.watched(name) other_liked = self.liked(other) recs = other_liked - already_seen return recs def like_overlap(self, name1, name2): """ Number of movies two people both like """ liked_name1 = self.liked(name1) liked_name2 = self.liked(name2) overlap = len(liked_name1 & liked_name2) return overlap def recommend_msu(self, name): """ Algorithm MSU: Find the MOST SIMILAR USER (overlapping likes) Then recommend based on that most similar user """ best_overlap = 0 best_match = "" # Find most similar user for other in self.ratings: if other != name: # how many movies do they both like? overlap = self.like_overlap(name, other) if overlap > best_overlap: best_overlap = overlap best_match = other # make recommendations print(name, "best match:", best_match) print("Overlapping likes: ", best_overlap) if best_match == '': return None else: return self.recommend(name, best_match) def recommend_flm(self, name, top=5): """ Recommend based on FREQUENTLY LIKED MOVIES. For each other user with overlapping likes, tally which movies that other user liked. Total across all users to find the frequently liked movies (but only including users that have at least one overlapping like with ) """ tally = [] for other in self.ratings: if other != name and self.like_overlap(name, other) > 0: liked = self.liked(other) tally += list(liked) # Discard movies that name already likes already_seen = self.watched(name) for seen in already_seen: tally = [m for m in tally if m != seen] # Find most frequently liked (top) movies count = Counter(tally) favs = count.most_common(top) return {movie for movie,num in favs} def main(): engine = Recommender() engine.read_ratings_data('movies.csv') # Ben and Cole (1 overlap) # Christy and Alba (2 overlaps) recs = engine.recommend_flm("Cole") engine.print_recs(recs) main()