""" John Rachlin DS 2000: Intro to Programming with Data Filename: climb.py Description: Count the frequency of each word in the poem: "The Hill We Climb" by Amanda Gorman """ def clean_word(word): """ Clean a word, converting to lowercase and removing punctuation Parameters: word - A word to be cleaned Return: cleaned up word """ punctuation = "',?;:!." for c in punctuation: word = word.replace(c,'') word = word.lower() return word def extract_words(filename): """ Extract a list of words. Parameters: filename - the name of a file to be read Return: a list of words """ wordlist = [] with open(filename, 'r') as file: for line in file: words = line.strip().split() for word in words: wordlist.append(clean_word(word)) return wordlist def count_words(words): """ Convert a list of words to a dictionary wordcount """ D = {} for w in words: if w in D: D[w] += 1 else: D[w] = 1 return D def main(): # Read file and extract a list of words filename = "the_hill_we_climb.txt" words = extract_words(filename) # Count occurences of each word wc = count_words(words) # Output results of words occuring at least 5 times for word, count in wc.items(): if count>=5: print(word,":",count) # Sorting by count - this is advanced kv = list(wc.items()) kv.sort(key=lambda t: t[1], reverse=True) print(kv[:10]) if __name__ == '__main__': main()