# Import pandas and read the csv file into a dataframe
import pandas as pd
df = pd.read_csv("huskies_hockey_stats.csv")


df.head(5)


df.dtypes

Season        int64
Date         object
Opponent     object
W/L          object
Score        object
G             int64
A             int64
PTS           int64
SH            int64
PCT         float64
NO/MIN       object
PP            int64
SH.1          int64
EN            int64
GW            int64
GTG           int64
HT            int64
UA            int64
BLK           int64
dtype: object


type(df.dtypes)

pandas.core.series.Series


# Summarize the entire dataframe
df.describe()


# Summarize just one column
df["G"].describe()

count    251.000000
mean       3.430279
std        2.013484
min        0.000000
25%        2.000000
50%        3.000000
75%        5.000000
max       12.000000
Name: G, dtype: float64


# Don't want the HT column anymore? Drop it
df_col_dropped = df.drop("HT", axis = 1)
df_col_dropped.head(5)


# Try to drop a column with axis = 0? Nope.
df_axis = df.drop("PP", axis = 0)
df_axis.head(5)

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
/var/folders/81/695611dx35n9wp1vjkd8y8h00000gn/T/ipykernel_90377/2564734204.py in <module>
      1 # Try to drop a column with axis = 0? Nope.
----> 2 df_axis = df.drop("PP", axis = 0)
      3 df_axis.head(5)

/opt/anaconda3/lib/python3.9/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
    309                     stacklevel=stacklevel,
    310                 )
--> 311             return func(*args, **kwargs)
    312 
    313         return wrapper

/opt/anaconda3/lib/python3.9/site-packages/pandas/core/frame.py in drop(self, labels, axis, index, columns, level, inplace, errors)
   4904                 weight  1.0     0.8
   4905         """
-> 4906         return super().drop(
   4907             labels=labels,
   4908             axis=axis,

/opt/anaconda3/lib/python3.9/site-packages/pandas/core/generic.py in drop(self, labels, axis, index, columns, level, inplace, errors)
   4148         for axis, labels in axes.items():
   4149             if labels is not None:
-> 4150                 obj = obj._drop_axis(labels, axis, level=level, errors=errors)
   4151 
   4152         if inplace:

/opt/anaconda3/lib/python3.9/site-packages/pandas/core/generic.py in _drop_axis(self, labels, axis, level, errors)
   4183                 new_axis = axis.drop(labels, level=level, errors=errors)
   4184             else:
-> 4185                 new_axis = axis.drop(labels, errors=errors)
   4186             result = self.reindex(**{axis_name: new_axis})
   4187 

/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py in drop(self, labels, errors)
   6015         if mask.any():
   6016             if errors != "ignore":
-> 6017                 raise KeyError(f"{labels[mask]} not found in axis")
   6018             indexer = indexer[~mask]
   6019         return self.delete(indexer)

KeyError: "['PP'] not found in axis"


# Try to drop a row with axis = 0? Now we're talking
df_row_dropped = df.drop(0, axis = 0)
df_row_dropped.head(5)


# Collect all the rows where we won (returns another dataframe)
wins = df.loc[(df["W/L"] == "W")]
wins.head(5)


# Iterate over all the rows. Let's look at Opponent in each row
for i in range(len(df)):
    print(df.loc[i, "Opponent"])


# Add a new column that's the sum of two other columns
df["new col"] = df["G"] + df["GW"]
df.head()

	Season	Date	Opponent	W/L	Score	G	A	PTS	SH	PCT	NO/MIN	SH.1	EN	GW	UA	BLK
0	2020	12/11/20	at Boston College	W	4-1	4	7	11	36	0.111	2-4	0	0	1	0	10
1	2020	12/13/20	Boston College	L	1-2	1	2	3	33	0.030	5-10	0	0	0	0	15
2	2020	01/03/21	Maine	W	3-0	3	3	6	47	0.064	1-2	0	0	1	1	6
3	2020	01/04/21	Maine	W	1-0	1	1	2	41	0.024	4-8	0	0	1	0	15
4	2020	01/08/21	New Hampshire	W	3-1	3	3	6	42	0.071	3-6	1	1	1	0	16

	Season	Date	Opponent	W/L	Score	G	A	PTS	SH	PCT	NO/MIN	SH.1	EN	GW	UA	BLK
0	2020	12/11/20	at Boston College	W	4-1	4	7	11	36	0.111	2-4	0	0	1	0	10
1	2020	12/13/20	Boston College	L	1-2	1	2	3	33	0.030	5-10	0	0	0	0	15
2	2020	01/03/21	Maine	W	3-0	3	3	6	47	0.064	1-2	0	0	1	1	6
3	2020	01/04/21	Maine	W	1-0	1	1	2	41	0.024	4-8	0	0	1	0	15
4	2020	01/08/21	New Hampshire	W	3-1	3	3	6	42	0.071	3-6	1	1	1	0	16

	Season	Date	Opponent	W/L	Score	G	A	PTS	SH	PCT	NO/MIN	PP	SH.1	EN	GW	UA	BLK
1	2020	12/13/20	Boston College	L	1-2	1	2	3	33	0.030	5-10	0	0	0	0	0	15
2	2020	01/03/21	Maine	W	3-0	3	3	6	47	0.064	1-2	0	0	0	1	1	6
3	2020	01/04/21	Maine	W	1-0	1	1	2	41	0.024	4-8	0	0	0	1	0	15
4	2020	01/08/21	New Hampshire	W	3-1	3	3	6	42	0.071	3-6	0	1	1	1	0	16
5	2020	01/09/21	at New Hampshire	T	2-2	2	3	5	32	0.063	3-6	1	0	0	0	0	6

	Season	Date	Opponent	W/L	Score	G	A	PTS	SH	PCT	NO/MIN	SH.1	EN	GW	UA	BLK	new col
0	2020	12/11/20	at Boston College	W	4-1	4	7	11	36	0.111	2-4	0	0	1	0	10	5
1	2020	12/13/20	Boston College	L	1-2	1	2	3	33	0.030	5-10	0	0	0	0	15	1
2	2020	01/03/21	Maine	W	3-0	3	3	6	47	0.064	1-2	0	0	1	1	6	4
3	2020	01/04/21	Maine	W	1-0	1	1	2	41	0.024	4-8	0	0	1	0	15	2
4	2020	01/08/21	New Hampshire	W	3-1	3	3	6	42	0.071	3-6	1	1	1	0	16	4

	Season	G	A	PTS	SH	PCT	PP	SH.1	EN	GW	GTG	HT	UA	BLK
count	251.000000	251.000000	251.000000	251.000000	251.000000	251.000000	251.000000	251.000000	251.000000	251.000000	251.000000	251.000000	251.000000	251.000000
mean	2016.872510	3.430279	5.494024	8.924303	33.924303	0.102920	0.693227	0.159363	0.139442	0.653386	0.035857	0.067729	0.286853	13.581673
std	1.922415	2.013484	3.514963	5.449243	8.411792	0.059156	0.832775	0.408047	0.380103	0.476842	0.186304	0.251782	0.519025	5.519810
min	2014.000000	0.000000	0.000000	0.000000	16.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	3.000000
25%	2015.000000	2.000000	3.000000	5.000000	28.000000	0.063000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	9.000000
50%	2017.000000	3.000000	5.000000	8.000000	33.000000	0.095000	1.000000	0.000000	0.000000	1.000000	0.000000	0.000000	0.000000	13.000000
75%	2018.500000	5.000000	7.000000	12.000000	38.000000	0.138500	1.000000	0.000000	0.000000	1.000000	0.000000	0.000000	1.000000	17.000000
max	2020.000000	12.000000	21.000000	33.000000	64.000000	0.345000	4.000000	2.000000	2.000000	1.000000	1.000000	1.000000	3.000000	31.000000