#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Felix Muzny
DS 2000
Lecture 10
10/11/2022

A program to read and analyze earnings data for city of boston employees
"""
# start with our imports
import matplotlib.pyplot as plt


# define our constants
# if I want to switch between data sets
# this is the only string that I need to edit
DATAFILE = "boston_earnings.csv"


# define our non-main functions next
def read_data(filename):
    """
    Reads in the data in a given file
    and stores the values in a list of lists of strings.
    Assumes that commas separate row items in the given file.

    Parameters
    ----------
    filename : string
        name of the file
    Returns
    -------
    data : list of lists
        list of lists of string values for all lines in the file

    """
    file = open(filename, "r")
    data = []
    for line in file:
        pieces = line.strip().split(",")
        data.append(pieces)
    return data

# updated this function
def get_column(data, column_index):
    """
    Reads in the data from one column in a given list of 
    lists. Assumes that all sub-lists have the same length
    and that the given column index is a valid column index.

    Parameters
    ----------
    data : list of lists
        full dataset as a 2d list
    column_index : int
        0-indexed number indicating which column to read 

    Returns
    -------
    column : list
        list of values for all rows for the target column
    """
    column = []
    for row in data:
        column.append(row[column_index])
    return column
    

def convert_to_floats(ls):
    """
    Create a new list with all list elements converted into 
    floats. Empty strings are ignored. This does not guarantee 
    that the resulting list has the same number of elements as
    the original list.

    Parameters
    ----------
    ls : list
        list of values to be converted to float

    Returns
    -------
    ls_as_flts : list
        list of values floats converted to float

    """
    ls_as_flts = []
    for element in ls:
        # What to do with empty strings???
        if element != "":    
            ls_as_flts.append(float(element))
        # for certain kinds of data, we might 
        # want to replace with a 0
        # else:
        #     ls_as_flts.append(0)
    return ls_as_flts


def average(ls):
    """
    Calculate the average of a list of values.

    Parameters
    ----------
    ls : list
        list of numbers

    Returns
    -------
    float
        Average value contained in the given list.

    """
    return sum(ls) / len(ls)

def create_hist(number_ls, x_axis, y_axis, title):
    """
    Creates a histogram of the given list of number 
    data with axis labels and a title!

    Parameters
    ----------
    number_ls : list
        values to be used in the histogram
    x_axis : str
        label for the x-axis
    y_axis : str
        label for the y-axis
    title : str
        title of the graph

    Returns
    -------
    None.

    """
    plt.hist(number_ls, bins = 10)
    plt.xlabel(x_axis)
    plt.ylabel(y_axis)
    plt.title(title)
    plt.show()

# define our main function
# start here
def main():

    # read in the data to a list of lists
    # and explore the content
    data = read_data(DATAFILE)
    print(len(data))
    print(data[:5])
    print()
    
    salaries = get_column(data, 1)
    salary_nums = convert_to_floats(salaries)
    avg_salary = average(salary_nums)
    print("This is the average salary:", round(avg_salary, 2))
    # make a histogram to display this
    create_hist(salary_nums, "Salaries ($)", "Number of employees with this Salary",
                "Salary counts for the City of Boston")

    # using adata here instead of re-opening the file
    overtime = get_column(data, 2)
    overtime = convert_to_floats(overtime)
    avg_over = average(overtime)
    print("This is the average overtime:", round(avg_over, 2))
    # make a histogram to display this
    create_hist(overtime, "Overtimes ($)", "Number of employees with this Overtime",
                "Overtime counts for the City of Boston")

# call our main function
main()
