#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Felix Muzny 11/4/2022 DS 2000 Lecture 17 - starter code We're going to start by creating a visualization where the y-axis is the line number in the lyrics and the x-axis is the word number within that line. We'll start by plotting a point where each word exists. Example: It's me, hi I'm the problem, it's me It's me, hi I'm the problem, it's me It's me, hi Everybody agrees, everybody agrees Becomes: x x x x x x x x x x x x x x x x x x x x x x x """ import matplotlib.pyplot as plt # for making custom legend later import matplotlib.lines as mlines # all the files that we'll use FILENAME = "antihero.txt" # eventually we'll use all of these STOPWORDS = "stopwords.txt" # UPDATED: use dashes here instead of underscores POS_WORDS = "positive-words.txt" NEG_WORDS = "negative-words.txt" # we'll use this function to read in the lyrics def read_data(filename): """ Reads in the data in a given file and stores the values in a list of lists of strings. Assumes that spaces separate words in the given file. Parameters ---------- filename : string name of the file Returns ------- data : list of lists list of lists of words for all lines in the file """ file = open(filename, "r") data = [] for line in file: # we'll split by whitespace instead of by comma pieces = line.strip().split() data.append(pieces) file.close() return data # we'll use this function to read in the word lists def read_data_singlelist(filename): """ Reads in the data in a given file and stores the values in a list of strings. Parameters ---------- filename : string name of the file skip_header: boolean, optional whether or not to skip a header row. Default to False. type_casts: list, optional type specification for each column in the data Returns ------- data : list of strings list of strings for all lines in the file """ file = open(filename, "r") data = [] for line in file: data.append(line.strip()) file.close() return data def make_plot(lyrics, stopwords): print("we'll be implementing this!") # must be before we do our plotting!!! plt.figure(figsize=(10,10)) for line_num in range(len(lyrics)): # words of a single line words = lyrics[line_num] # x_vals and y_vals could have better names x_vals = [] # same len as x_vals, but all the # same number y_vals = [] colors = [] # what gives me the # of words? in # this line? for word_num in range(len(words)): x_vals.append(word_num) y_vals.append(line_num) if words[word_num].lower() in stopwords: print("STOP", words[word_num]) colors.append("k") else: print("non stop", words[word_num]) colors.append("r") plt.scatter(x_vals, y_vals, color = colors, alpha=0.7) # switch the y-axis plt.gca().invert_yaxis() plt.ylabel("Line Number") plt.xlabel("Word number in the line") plt.show() """ (0, 0) (1, 0) (2, 0) It's me, hi (0, 1) (1, 1) (2, 1) (3, 1) (4, 1) I'm the problem, it's me It's me, hi I'm the problem, it's me It's me, hi Everybody agrees, everybody agrees Becomes: x x x x x x x x x x x x x x x x x x x x x x x """ def main(): lyrics = read_data(FILENAME) print(lyrics[:4]) print(len(lyrics)) print() # we'll use this code later :) stopwords = read_data_singlelist(STOPWORDS) print(stopwords[:4]) print(len(stopwords)) print() make_plot(lyrics, stopwords) # We'll work with this stuff in class on Nov. 8th! # pos = read_data_singlelist(POS_WORDS) # print(pos[:4]) # print(len(pos)) # neg = read_data_singlelist(NEG_WORDS) # print(neg[:4]) # print(len(neg)) if __name__ == "__main__": main()