""" Felix Muzny DS 2000 Lecture 15 November 1, 2024 Reading in files as dictionaries Lists of dictionaries """ import csv import matplotlib.pyplot as plt FILENAME = "ds2000_staff.csv" AGE_COL = "age" NUM_BINS = 10 def read_file_dict(filename): """ Reads the file in as a single dictionary, ignoring the first (header) row :param filename: string location of the file :return: dictionary mapping strings to ints """ data = {} with open(filename, "r") as file: # ignore the headers headers = file.readline() # read the rest of the lines for line in file: pieces = line.strip().split(",") # the first item will be the key # the second item will be the value, and should be # converted to ints data[pieces[0]] = int(pieces[1]) return data # partially provided def read_file_as_list_of_dicts(filename): """ Reads the given file as a list of dictionaries, using the first row of headers as key names :param filename: string location of file :return: list of dicts mapping strings to strings """ data = [] with open(filename, "r") as file: reader = csv.DictReader(file) for row in reader: # each row is pre-processed as a dictionary data.append(row) # return the data! return data def make_age_hist(data, age_col, bins): """ Make a histogram of the ages in the data :param data: list of dictionaries :param age_col: string name of column key associated with ages data :param bins: int number of bins for the histogram :return: none """ # get the ages as integers # this is like accessing the ages column from the file ages = [] for dict_row in data: int_val = int(dict_row[age_col]) ages.append(int_val) plt.hist(ages, bins = bins) plt.xlabel("ages") plt.ylabel("count") plt.show() def main(): print("Lecture 15 - reading files into dicts") # read the file into a dictionary data = read_file_dict(FILENAME) print(data) # access all of the keys and make into a list print(list(data.keys())) # access all of the values and make into a list print(list(data.values())) # read the file into a list of dictionaries, using the # column headers as keys for each individual dictionary list_dicts_data = read_file_as_list_of_dicts(FILENAME) print(list_dicts_data) # make our beautiful visualization! make_age_hist(list_dicts_data, AGE_COL, NUM_BINS) main()