#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Felix Muzny 11/4/2022 DS 2000 Lecture 17 - starter code We're going to start by creating a visualization where the y-axis is the line number in the lyrics and the x-axis is the word number within that line. We'll start by plotting a point where each word exists. Example: It's me, hi I'm the problem, it's me It's me, hi I'm the problem, it's me It's me, hi Everybody agrees, everybody agrees Becomes: x x x x x x x x x x x x x x x x x x x x x x x """ import matplotlib.pyplot as plt # for making custom legend later import matplotlib.lines as mlines # all the files that we'll use FILENAME = "antihero.txt" # eventually we'll use all of these STOPWORDS = "stopwords.txt" POS_WORDS = "positive_words.txt" NEG_WORDS = "negative_words.txt" # we'll use this function to read in the lyrics def read_data(filename): """ Reads in the data in a given file and stores the values in a list of lists of strings. Assumes that spaces separate words in the given file. Parameters ---------- filename : string name of the file Returns ------- data : list of lists list of lists of words for all lines in the file """ file = open(filename, "r") data = [] for line in file: pieces = line.strip().split() data.append(pieces) file.close() return data # we'll use this function to read in the word lists def read_data_singlelist(filename): """ Reads in the data in a given file and stores the values in a list of strings. Parameters ---------- filename : string name of the file skip_header: boolean, optional whether or not to skip a header row. Default to False. type_casts: list, optional type specification for each column in the data Returns ------- data : list of strings list of strings for all lines in the file """ file = open(filename, "r") data = [] for line in file: data.append(line.strip()) file.close() return data def make_plot(lyrics): print("we'll be implementing this!") def main(): lyrics = read_data(FILENAME) print(lyrics[:4]) print(len(lyrics)) make_plot(lyrics) # we'll use this code later :) # stopwords = read_data_singlelist(STOPWORDS) # print(stopwords[:4]) # print(len(stopwords)) # pos = read_data_singlelist(POS_WORDS) # print(pos[:4]) # print(len(pos)) # neg = read_data_singlelist(NEG_WORDS) # print(neg[:4]) # print(len(pos)) if __name__ == "__main__": main()