#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Felix Muzny 11/1/2022 DS 2000 Lecture 16 - string functions, data, designing programs Logistics: - Homework 7 is due Friday @ 9pm - Dictionaries are required. - Quiz 7 is available. - remote attendance (https://bit.ly/remote-ds2000-muzny) Three ways to participate (please do one of these!) 1) via the PollEverywhere website: https://pollev.com/muzny 2) via text: text "muzny" to the number 22333 to join the session 3) via Poll Everywhere app (available for iOS or Android) """ """ Warm-up 0: Lecture 15 --- Have you watched the asynchronous lecture from last Friday? A. yes B. no C. no, but I'll do that later today """ """ Strings! --- What do we know so far? """ """ Iterating through strings --- """ """ Using string functions --- """ """ Writing a string function --- Write a function, count, that takes in one string and one letter and counts how many times that letter occurs in the given string. """ """ Data: what questions are we asking? --- Take a look at the dataset: https://provost.northeastern.edu/uds/facts/common-data-set/ What questions do we have? For any data set that someone gives you, what questions would you want the answers to before you start any analysis? """ """ Designing programs & Functions (warm-ups from Sec 2) --- """ """ Given the following two functions, write one new function that has the same behavior and can be used for both cases. """ # *~*~ program 1 ~*~* # import data_utils # def max_rotten(data): # rottens = data_utils.get_column(data, 2) # return max(rottens) # def max_imdb(data): # imdbs = data_utils.get_column(data, 3) # return max(imdbs) # def main(): # data = data_utils.read_data("movies.csv", skip_header = True, # type_casts = [str, str, int, float]) # highest_rotten = max_rotten(data) # print("maximum rotten tomatoes:", highest_rotten) # highest_imdb = max_imdb(data) # print("maximum imdb:", highest_imdb) # if __name__ == "__main__": # main() """ Given the following function, write two new functions that are more flexible in combination than this singular function is by itself. """ # # *~*~ program 2 ~*~* # import data_utils # def max_rotten_R(data): # r_movies = [] # for movie in data: # if movie[1] == "R": # r_movies.append(movie) # rottens = data_utils.get_column(r_movies, 2) # return max(rottens) # def main(): # data = data_utils.read_data("movies.csv", skip_header = True, # type_casts = [str, str, int, float]) # highest_rotten_R = max_rotten_R(data) # print("maximum rotten tomatoes for R-rated films:", highest_rotten_R) # if __name__ == "__main__": # main() """ Given the following function, write two new functions that are better designed according to the principle that each function should do one of the following three things: - read in data - do a calculation or a data manipulation - display a result # Update the corresponding main() function as needed! """ # # *~*~ program 3 ~*~* # import data_utils # def maximums(data): # column_nums = len(data[0]) # for col_num in range(column_nums): # column = data_utils.get_column(data, col_num) # maximum_val = max(column) # print("The maximum value of this column is:", maximum_val) # def main(): # data = data_utils.read_data("movies.csv", skip_header = True, # type_casts = [str, str, int, float]) # maximums(data) # if __name__ == "__main__": # main() """ Next time - string processing & string manipulation - data and data sources """