#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Felix Muzny DS 2000 Lecture 10 10/11/2022 A program to read and analyze earnings data for city of boston employees """ # start with our imports import matplotlib.pyplot as plt # define our constants # if I want to switch between data sets # this is the only string that I need to edit DATAFILE = "boston_earnings.csv" # define our non-main functions next def read_data(filename): """ Reads in the data in a given file and stores the values in a list of lists of strings. Assumes that commas separate row items in the given file. Parameters ---------- filename : string name of the file Returns ------- data : list of lists list of lists of string values for all lines in the file """ file = open(filename, "r") data = [] for line in file: pieces = line.strip().split(",") data.append(pieces) return data # updated this function def get_column(data, column_index): """ Reads in the data from one column in a given list of lists. Assumes that all sub-lists have the same length and that the given column index is a valid column index. Parameters ---------- data : list of lists full dataset as a 2d list column_index : int 0-indexed number indicating which column to read Returns ------- column : list list of values for all rows for the target column """ column = [] for row in data: column.append(row[column_index]) return column def convert_to_floats(ls): """ Create a new list with all list elements converted into floats. Empty strings are ignored. This does not guarantee that the resulting list has the same number of elements as the original list. Parameters ---------- ls : list list of values to be converted to float Returns ------- ls_as_flts : list list of values floats converted to float """ ls_as_flts = [] for element in ls: # What to do with empty strings??? if element != "": ls_as_flts.append(float(element)) # for certain kinds of data, we might # want to replace with a 0 # else: # ls_as_flts.append(0) return ls_as_flts def average(ls): """ Calculate the average of a list of values. Parameters ---------- ls : list list of numbers Returns ------- float Average value contained in the given list. """ return sum(ls) / len(ls) def create_hist(number_ls, x_axis, y_axis, title): """ Creates a histogram of the given list of number data with axis labels and a title! Parameters ---------- number_ls : list values to be used in the histogram x_axis : str label for the x-axis y_axis : str label for the y-axis title : str title of the graph Returns ------- None. """ plt.hist(number_ls, bins = 10) plt.xlabel(x_axis) plt.ylabel(y_axis) plt.title(title) plt.show() # define our main function # start here def main(): # read in the data to a list of lists # and explore the content data = read_data(DATAFILE) print(len(data)) print(data[:5]) print() salaries = get_column(data, 1) salary_nums = convert_to_floats(salaries) avg_salary = average(salary_nums) print("This is the average salary:", round(avg_salary, 2)) # make a histogram to display this create_hist(salary_nums, "Salaries ($)", "Number of employees with this Salary", "Salary counts for the City of Boston") # using adata here instead of re-opening the file overtime = get_column(data, 2) overtime = convert_to_floats(overtime) avg_over = average(overtime) print("This is the average overtime:", round(avg_over, 2)) # make a histogram to display this create_hist(overtime, "Overtimes ($)", "Number of employees with this Overtime", "Overtime counts for the City of Boston") # call our main function main()