#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Felix Muzny 10/25/2022 DS 2000 Lecture 14 - movie review program A program to read and analyze some movie reviews. Using the movie review data (blackadam.txt and tickettoparadise.txt), write a larger program that will read in each review for the given movie, line-by-line and does three things: 1) Lets the user ask how many times a word exists in the reviews 2) Reports the combined word counts for all reviews for that movie """ # we'll probably do some plotting import matplotlib.pyplot as plt # can also switch to "tickettoparadise.txt" REVIEW_FILE = "blackadam.txt" def read_data(filename): """ Reads text data assuming that you have one review per line. Splits on whitespace. Parameters ---------- filename : string file path to be read in. Returns ------- List of reviews/lines in the input file. """ file = open(filename, "r") reviews = [] for line in file: # split the review on whitespace reviews.append(line.strip()) # close the file in the same function that we opened it in file.close() return reviews def count_words(text): """ Count each word in a given string. Parameters ---------- text : str words separated by whitespace. Returns ------- dict of word counts. """ words = text.split() counts = {} for word in words: if word in counts: counts[word] += 1 else: counts[word] = 1 return counts def add_dicts(d1, d2): """ Create a new dictionary that adds values for keys that exist in both dictionaries together. Parameters ---------- d1 : dict First dictionary to add. Values must be updateable using +. d2 : dict Second dictionary to add. Types should match first dictionary. Returns ------- final_dict : dict Final dictionary that is the result of adding our first two dictionaries together. """ final_dict = d1.copy() for k in d2: if k in final_dict: final_dict[k] += d2[k] else: final_dict[k] = d2[k] return final_dict def main(): print("We'll implement this!") # First, let's write down a roadmap # load in the data data = read_data(REVIEW_FILE) # make sure we've loaded it in properly print(len(data)) print(data[:2]) print() # count up the words total_counts = {} for review in data: counts = count_words(review) # combine the counts into a new dictionary of overall counts total_counts = add_dicts(total_counts, counts) # 1) ask the user about a word target_word = input("What word do you want to search for? ") if target_word in total_counts: print(target_word, "occurred:", total_counts[target_word]) else: print(target_word, "did not occur!") print() # 2) Report all the combined word counts (formatted nicely, of course!) for key in total_counts: print(key, ":", total_counts[key]) # from now on, we'll always use this "guard" for our call to # our main function! if __name__ == "__main__": main()