#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Felix Muzny DS 2000 Lecture 9 10/07/2022 A program to read and analyze earnings data for city of boston employees """ # start with our imports import matplotlib.pyplot as plt # define our constants # if I want to switch between data sets # this is the only string that I need to edit DATA = "boston_earnings.csv" # define our non-main functions next def read_data_column(filename, column_index): """ Reads in the data from one column in a given file and stores each value into a list from top to bottom of the file (line 1's value for the target column will be at index 0 of the list). Parameters ---------- filename : string name of the file column_index : int 0-indexed number indicating which column to read Returns ------- column : list list of string values for all lines for the target column """ file = open(filename, "r") column = [] for line in file: pieces = line.strip().split(",") column.append(pieces[column_index]) return column def convert_to_floats(ls): """ Create a new list with all list elements converted into floats. Empty strings are ignored. This does not guarantee that the resulting list has the same number of elements as the original list. Parameters ---------- ls : list list of values to be converted to float Returns ------- ls_as_flts : list list of values floats converted to float """ ls_as_flts = [] for element in ls: # What to do with empty strings??? if element != "": ls_as_flts.append(float(element)) # for certain kinds of data, we might # want to replace with a 0 # else: # ls_as_flts.append(0) return ls_as_flts def average(ls): """ Calculate the average of a list of values. Parameters ---------- ls : list list of numbers Returns ------- float Average value contained in the given list. """ return sum(ls) / len(ls) def create_hist(number_ls, x_axis, y_axis, title): """ Creates a histogram of the given list of number data with axis labels and a title! Parameters ---------- number_ls : list values to be used in the histogram x_axis : str label for the x-axis y_axis : str label for the y-axis title : str title of the graph Returns ------- None. """ plt.hist(number_ls, bins = 10) plt.xlabel(x_axis) plt.ylabel(y_axis) plt.title(title) plt.show() # define our main function # start here def main(): salaries = read_data_column(DATA, 1) salary_nums = convert_to_floats(salaries) avg_salary = average(salary_nums) print("This is the average salary:", round(avg_salary, 2)) # make a histogram to display this create_hist(salary_nums, "Salaries ($)", "Number of employees with this Salary", "Salary counts for the City of Boston") # what if we wanted to do all of these tasks for overtime as well? overtime = read_data_column(DATA, 2) overtime = convert_to_floats(overtime) avg_over = average(overtime) print("This is the average overtime:", round(avg_over, 2)) # make a histogram to display this create_hist(overtime, "Overtimes ($)", "Number of employees with this Overtime", "Overtime counts for the City of Boston") # call our main function main()