'''
    DS2000
    Spring 2022
    Practicing with pandas for prez election data
    
    Useful from Pandas...
     - import pandas as pd

     - Read in a CSV file to a dataframe: pd.read_csv()
     - See the first few rows: df.head()
     - Summarize the quick stats: df.describe()
     - Keep only the columns I like: df = df[["col1", "col2"]]
     - Group together data with same col1,col2 values: df = df.grouby(["col1", "col2"])
'''

import pandas as pd
import matplotlib.pyplot as plt

PREZ = "1976-2020-president.csv"

def main():
    # Step one --- read in from the file to a dataframe
    df = pd.read_csv(PREZ, sep = "\t")
    
    # Get a peek of what's in the file
    print(df.head(20))
    
    # What are the columns?
    print(df.columns)
    
    # Summary of numeric data
    print(df.describe())
    
    # What are the datatypes of my data?
    print(df.dtypes)
    
    # Keep only the columns I like
    df = df[["year", "state", "candidate", "candidatevotes", "party_simplified"]]
    print(df.head(20))
    print(df["candidate"].head(20))
    
    # Group together by year and party
    df = df.groupby(["year", "party_simplified"]).sum().reset_index()
    print(df.head(20))
    print()
    
    # Sort within each year by candidatevotes
    df = df.sort_values(by = ["year", "candidatevotes"])
    print(df.head(20))
    print()
    
    # Get rid of any row where the party is not
    # a dem or a repub
    df = df[(df["party_simplified"] == "DEMOCRAT") | (df["party_simplified"] == "REPUBLICAN")]
    df = df.sort_values(by = ["year", "party_simplified"])
    print(df.head(20))
    
    # Make a list of dem votes and list of rep votes by using df.loc
    dems = df.loc[df["party_simplified"] == "DEMOCRAT", "candidatevotes"]
    reps = df.loc[df["party_simplified"] == "REPUBLICAN", "candidatevotes"]
    
    # Get the distinct years, no dupes
    years = df["year"].unique()
    
    # Make a line chart of votes over the years
    plt.plot(years, dems, "-o", color = "blue")
    plt.plot(years, reps, "-o", color = "red")
    
main()