''' DS2000 Spring 2022 Sentiment Analysis algorithm In this version we make a list of pos words, and a list of neg words. We read all the comments in from a reddit file (different than the reddit flie on HW6, we have just one comment per line). Then, we pick one comment at random and clean it up (remove punctuation and numbers, and turn everything lowercase) and apply our sentiment analysis algorithm ----- +1 for every positive word, -1 for every negative word, divided by number of words in the comment. ''' import random REDDIT = "reddit.txt" POSITIVE = ["good", "happy", "relieved", "relief", "glad", "finally", "normal", "excited", "proud", "well", "healthy", "lol", "great"] NEGATIVE = ["bad", "angry", "frustrated", "stressed", "stress", "stupid", "scared", "scary", "scaring", "hate", "hated", "annoying", "annoyed", "tired", "disappointed", "lol", "afraid", "lmao"] def sentiment_score(comment, pos, neg): ''' Function: sentiment_score Parameters: comment (a string), list of pos words, list of neg words Returns: sentiment score (float between -1 and +1) ''' score = 0 words = comment.split() for word in words: if word in pos: score += 1 if word in neg: score -= 1 return score / len(words) def read_comments(filename): ''' Function: read_comments Parameter: filename, string Returns: list of strings, one per line in the file ''' comments = [] with open(filename, "r") as infile: while True: comment = infile.readline() if not comment: break comments.append(comment) return comments def clean_string(input_st): ''' Function: clean_string Parameter: one string Returns: new version of the string, cleaned up ''' output_st = "" for letter in input_st: if letter.isalpha() or letter == " ": output_st += letter.lower() return output_st def main(): # string = input("Enter a string...\n") # cleaned = clean_string(string) # print("Cleaned version:", cleaned) # Step one: read data from the file, one comment per # element in a list comments = read_comments(REDDIT) # Step two: computation --- clean up one random comment # to remove puncutation/numbers and make it all lowercase comment = random.choice(comments) clean = clean_string(comment) score = sentiment_score(clean, POSITIVE, NEGATIVE) # Step three: communication --- print the cleaned # up comment and its score print(clean) print("Sentiment score:", score) main()