#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Felix Muzny
11/1/2022
DS 2000
Lecture 16 - string functions, data, designing programs

Logistics:
    - Homework 7 is due Friday @ 9pm
        - Dictionaries are required.
    
    - Quiz 7 is available.
        
    - remote attendance (https://bit.ly/remote-ds2000-muzny)
    
    
 Three ways to participate (please do one of these!)
 1) via the PollEverywhere website: https://pollev.com/muzny
 2) via text: text "muzny" to the number 22333 to join the session
 3) via Poll Everywhere app (available for iOS or Android)

Warm-up 0: Lecture 15
---

Have you watched the asynchronous lecture from last Friday?
A. yes
B. no
C. no, but I'll do that later today


Have you read the HW 7 assignment?
A. yes
B. no
C. no, but I'll do that later today

"""

"""
Strings!
---
What do we know so far?
    - characters that you can't do math with
    (even if they are "72")
    - grouped together using quotation marks
    (have an order)
    - when we read in data, we read it as strings
    - we find them everywhere (e.g. in our csv files)
    - certain data can only be represented as strings
    
    - for functions:
        - pass them to print()
        - input() both takes (as a parameter) and returns a string
        - we can use string functions like .upper() to get 
        different version of strings
        - casting functions like int() or float()
"""

animal = "turtle"
print(animal)
print(animal.upper())

# ask how long a string is
print(len(animal))

# get the first character in a string
print(animal[0])
print()

"""
Iterating through strings
---
Strings are 0-indexed
 012345 
"turtle"

"""
 
# with position/index:
for i in range(len(animal)):
    print(i, ":", animal[i])
print()

# we can also iterate by value (by character)
for letter in animal:
    print(letter)
print()

# OOOPS I FORGOT TO CALL SPLIT
fruits = "mango banana apple kiwi"

# OH NO, iterating by letter, not by word!
for fruit in fruits:
    # fruit = "m"
    # fruit[0] -> m
    print(fruit)
    if fruit[0] == "a":
        print("A FRUIT")

print()

"""
Using string functions
---

Python provides so many built in string functions
For all of them, we need to remember that strings are
**IMMUTABLE**.

This is different than lists.
"""
ls = [1, 5, 8, 97]
print(ls)
# once I've created a list, I can change its contents
# this means that it's mutable
ls[0] = -999
print(ls)
ls.append(-999)
print(ls)
print()

# strings are immutable
animal = "cats are the most wonderful ever"
print(animal)
# TypeError: 'str' object does not support item assignment
# animal[0] = "b"
print(animal)

# creating a NEW STRING
animal2 = "b" + animal[1:]
print(animal)
print(animal2)

# whenever you use a string function, it will return
# a new string
upper_animal = animal.upper()
print(animal)
print(upper_animal)
print(upper_animal.isupper())

"""
Writing a string function
---

Write a function, count, that takes in one string and one 
letter and counts how many times that letter occurs in the 
given string.
"""


"""
Data: what questions are we asking?
---

Take a look at the dataset:
    https://provost.northeastern.edu/uds/facts/common-data-set/
    
What questions do we have?
    - is the data accurate?
    (where does it come from?)
    - of the total accepted students how many enrolled?
    - is there a csv available?
    - does the dataset contain the information that I'm 
    looking for?
    

For any data set that someone gives you, what questions 
would you want the answers to before you start any analysis?
    - where does the data come from?
    - who does the data represent?
    - who/what is missing from the data?
        - (where are transgender and non-binary people 
           represented in this data?)
    - what questions could we answer using this data?
        - (we compare admissions statistic for 
           racial and ethnic categories)
"""


"""
Designing programs & Functions (warm-ups from Sec 2)
---
"""


"""
    
Given the following two functions, write one new function that 
has the same behavior and can be used for both cases.
"""

# *~*~ program 1 ~*~*
# Remember that whenever we import data utils,
# data_utils.py
import data_utils

def get_column(data, column_index):
    """
    Reads in the data from one column in a given list of 
    lists. Assumes that all sub-lists have the same length
    and that the given column index is a valid column index.

    Parameters
    ----------
    data : list of lists
        full dataset as a 2d list
    column_index : int
        0-indexed number indicating which column to read 

    Returns
    -------
    column : list
        list of values for all rows for the target column
    """
    column = []
    for row in data:
        column.append(row[column_index])
    return column

# This is where we started (roughly)
# def max_rotten(data):
#     col_vals = data_utils.get_column(data, 2)
#     return max(col_vals)
    
# def max_imdb(data):
#     col_vals = data_utils.get_column(data, 3)
#     return max(col_vals)

# This is the function that we ended up with!
def max_column(data, column_index):
    col_vals = data_utils.get_column(data, column_index)
    return max(col_vals)

def main():    
    print("example program 1")
    data = data_utils.read_data("movies.csv", skip_header = True,
                                type_casts = [str, str, int, float])
    print(data)
    print()
    
    # the first function call to your new function
    # would be here
    highest_rotten = max_column(data, 2)
    print("maximum rotten tomatoes:", highest_rotten)
    
    # the second function call to your new function
    # would be here
    highest_imdb = max_column(data, 3)
    print("maximum imdb:", highest_imdb)
    print()
    
if __name__ == "__main__":
    main()

# program design is better because
# - more flexible!
# - don't have to write a new function each time we need a max
# - less code 

"""    
Given the following function, write two new functions that 
are more flexible in combination than this singular function is by
itself.
"""

# *~*~ program 2 ~*~*
import data_utils

def get_rated_movies(data, rating):
    # isolating the subset of data 
    # that I care about
    movies = []
    for movie in data:
        # the 1 is *GREAT* candidate to become a parameter :)
        if movie[1] == rating:
            movies.append(movie)
    return movies
    
def max_column(data, column_index):
    col_vals = data_utils.get_column(data, column_index)
    return max(col_vals)
    
def main():    
    print("example program 2")
    data = data_utils.read_data("movies.csv", skip_header = True,
                                type_casts = [str, str, int, float])
    
    r_movies = get_rated_movies(data, "R")
    highest_rotten_R = max_column(r_movies, 2)
    print("maximum rotten tomatoes for R-rated films:", 
          highest_rotten_R)
    print()
    
    pg13_movies = get_rated_movies(data, "PG-13")
    highest_rotten_pg13 = max_column(pg13_movies, 2)
    print("maximum rotten tomatoes for PG-13 -rated films:", 
          highest_rotten_pg13)
    print()
    
if __name__ == "__main__":
    main()
    
# program design is better because
# - these two tasks aren't inherently linked
# - more flexible and powerful program later
# - that we can expand w/o writing new functions


"""    
Given the following function, write two new functions that 
are better designed according to the principle that each function
should do one of the following three things:
    - read in data
    - do a calculation or a data manipulation
    - display a result
    
# Update the corresponding main() function as needed!
"""

# *~*~ program 3 ~*~*
import data_utils

def maximums(data):
    col_maxes = []
    # do a calculation
    # find the number of columns
    column_nums = len(data[0])
    for col_num in range(column_nums):
        column = data_utils.get_column(data, col_num)

        maximum_val = max(column)
        col_maxes.append(maximum_val)
    return col_maxes

def report_maxes(max_vals):   
    for maximum_val in max_vals:     
        # now I do the display
        print("The maximum value of this column is:", maximum_val)
    
def graph_maxes(max_vals):
    print("beautiful graph code!")

def main():    
    data = data_utils.read_data("movies.csv", skip_header = True,
                                type_casts = [str, str, int, float])
    
    max_vals = maximums(data)
    report_maxes(max_vals)
    graph_maxes(max_vals)
    
if __name__ == "__main__":
    main()

# program design is better because
# - these two tasks aren't inherently linked
# - more flexible and powerful program later
# - easy to display results in different ways
# (or do more calculations later)


"""
plt.hist example
"""

import matplotlib.pyplot as plt


"""
Next time
    - string processing & string manipulation
    - data and data sources
"""