#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Felix Muzny
12/2/2022
DS 2000
Lecture 23 - timing experiments

Some code to play around with time!
"""
import data_utils
import time
import pandas as pd
import matplotlib.pyplot as plt
import random


# for students in section 2
def generate_coordinate_data(rows):
    """
    Generates a two dimension list of x, y coordates where x and y are
    between 0 and 100

    Parameters
    ----------
    rows : int
        Number of rows to include in the data.

    Returns
    -------
    data : list
        list of lists of integers.
    """
    
    data = []
    for row_num in range(rows):
        row = [random.randint(0, 100) for i in range(2)]
        data.append(row)
    return data

# sections 3 & 4
def generate_list(num_elements):
    """
    Generates a one-dimensional list of integers with values 
    between 0 and 100.

    Parameters
    ----------
    num_elements : int
        number of integers that should be in the list.

    Returns
    -------
    data : list
        generated list of integers.

    """
    # data = []
    # for i in range(num_elements):
    #     data.append(random.randint(0, 100))
    
    # list comprehension
    data = [random.randint(0, 100) for i in range(num_elements)]
    # you can also use the variable i in the comprehension 
    # if you'd like to!
    # data = [i for i in range(num_elements)]
    return data

def main():
    # To test out:
    # - loading data (trips)
    # - getting a column

    # timing questions (in general)
    # will the amount of time increase with the number of elements
    # in the list?
    # if yes, will it increase...
    # linearly?
    # exponentially?
    # something else?
    
    ls = generate_list(100)
    print(ls)
    print()
    
    # A. no increase <--- (python stores list length as an attribute)
    # B. linear increase 
    # C. more than linear increase
    
    # let's time the len() function!
    
    # max()
    # A. no increase 
    # B. linear increase  <----
    # C. more than linear increase

    xs = []
    ys = []
    for list_length in range(10000, 100000, 10000):
        ls = generate_list(list_length)
        
        start = time.time()
        max_element = max(ls)
        end = time.time()
        xs.append(list_length)
        ys.append(end - start)
        print("Number of elements:", list_length)
        print(end - start)
        print()
        
    plt.plot(xs, ys)
    plt.show()
    
    
    # data loading,
    # homegrown version 2.5x longer than pandas
    
    # data utils functions
    start = time.time()
    # run the list function
    data = data_utils.read_data_dict("trips.csv", {"duration": int,
                                                   "bike_id": int})
    end = time.time()
    duration = end - start
    print("Our data loading function took:", duration)
    print()
    
    # pandas functions
    start = time.time()
    # run the list function
    df = pd.read_csv("trips.csv")
    end = time.time()
    duration = end - start
    print("pandas data loading function took:", duration)
    print()
    
    # time the get_column and pandas column functions
    
    # get_column
    # 0.0137
    
    # pandas
    # A. >= 0.0137
    # B. 0.005 - 0.0137 
    # C. 0.001 - 0.005  <--- 0.003, 0.0012
    # D. < 0.001 <- 0.00095
    
    # data utils functions
    start = time.time()
    # run the list function
    
    durations = data_utils.get_column(data, "duration")
    
    end = time.time()
    duration = end - start
    print("Our get_column function took:", duration)
    print()
    
    # pandas functions
    start = time.time()
    # run the data function
    durations = df["duration"]

    end = time.time()
    duration = end - start
    print("pandas column access function took:", duration)
    print()
    

if __name__ == "__main__":
    main()