In [6]:
# Import pandas and read the csv file into a dataframe
import pandas as pd
df = pd.read_csv("huskies_hockey_stats.csv")
In [7]:
df.head(5)
Out[7]:
Season Date Opponent W/L Score G A PTS SH PCT NO/MIN PP SH.1 EN GW GTG HT UA BLK
0 2020 12/11/20 at Boston College W 4-1 4 7 11 36 0.111 2-4 0 0 0 1 0 0 0 10
1 2020 12/13/20 Boston College L 1-2 1 2 3 33 0.030 5-10 0 0 0 0 0 0 0 15
2 2020 01/03/21 Maine W 3-0 3 3 6 47 0.064 1-2 0 0 0 1 0 0 1 6
3 2020 01/04/21 Maine W 1-0 1 1 2 41 0.024 4-8 0 0 0 1 0 0 0 15
4 2020 01/08/21 New Hampshire W 3-1 3 3 6 42 0.071 3-6 0 1 1 1 0 0 0 16
In [8]:
df.dtypes
Out[8]:
Season        int64
Date         object
Opponent     object
W/L          object
Score        object
G             int64
A             int64
PTS           int64
SH            int64
PCT         float64
NO/MIN       object
PP            int64
SH.1          int64
EN            int64
GW            int64
GTG           int64
HT            int64
UA            int64
BLK           int64
dtype: object
In [9]:
type(df.dtypes)
Out[9]:
pandas.core.series.Series
In [10]:
# Summarize the entire dataframe
df.describe()
Out[10]:
Season G A PTS SH PCT PP SH.1 EN GW GTG HT UA BLK
count 251.000000 251.000000 251.000000 251.000000 251.000000 251.000000 251.000000 251.000000 251.000000 251.000000 251.000000 251.000000 251.000000 251.000000
mean 2016.872510 3.430279 5.494024 8.924303 33.924303 0.102920 0.693227 0.159363 0.139442 0.653386 0.035857 0.067729 0.286853 13.581673
std 1.922415 2.013484 3.514963 5.449243 8.411792 0.059156 0.832775 0.408047 0.380103 0.476842 0.186304 0.251782 0.519025 5.519810
min 2014.000000 0.000000 0.000000 0.000000 16.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 3.000000
25% 2015.000000 2.000000 3.000000 5.000000 28.000000 0.063000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 9.000000
50% 2017.000000 3.000000 5.000000 8.000000 33.000000 0.095000 1.000000 0.000000 0.000000 1.000000 0.000000 0.000000 0.000000 13.000000
75% 2018.500000 5.000000 7.000000 12.000000 38.000000 0.138500 1.000000 0.000000 0.000000 1.000000 0.000000 0.000000 1.000000 17.000000
max 2020.000000 12.000000 21.000000 33.000000 64.000000 0.345000 4.000000 2.000000 2.000000 1.000000 1.000000 1.000000 3.000000 31.000000
In [11]:
# Summarize just one column
df["G"].describe()
Out[11]:
count    251.000000
mean       3.430279
std        2.013484
min        0.000000
25%        2.000000
50%        3.000000
75%        5.000000
max       12.000000
Name: G, dtype: float64
In [12]:
# Don't want the HT column anymore? Drop it
df_col_dropped = df.drop("HT", axis = 1)
df_col_dropped.head(5)
Out[12]:
Season Date Opponent W/L Score G A PTS SH PCT NO/MIN PP SH.1 EN GW GTG UA BLK
0 2020 12/11/20 at Boston College W 4-1 4 7 11 36 0.111 2-4 0 0 0 1 0 0 10
1 2020 12/13/20 Boston College L 1-2 1 2 3 33 0.030 5-10 0 0 0 0 0 0 15
2 2020 01/03/21 Maine W 3-0 3 3 6 47 0.064 1-2 0 0 0 1 0 1 6
3 2020 01/04/21 Maine W 1-0 1 1 2 41 0.024 4-8 0 0 0 1 0 0 15
4 2020 01/08/21 New Hampshire W 3-1 3 3 6 42 0.071 3-6 0 1 1 1 0 0 16
In [13]:
# Try to drop a column with axis = 0? Nope.
df_axis = df.drop("PP", axis = 0)
df_axis.head(5)
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
/var/folders/81/695611dx35n9wp1vjkd8y8h00000gn/T/ipykernel_90377/2564734204.py in <module>
      1 # Try to drop a column with axis = 0? Nope.
----> 2 df_axis = df.drop("PP", axis = 0)
      3 df_axis.head(5)

/opt/anaconda3/lib/python3.9/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
    309                     stacklevel=stacklevel,
    310                 )
--> 311             return func(*args, **kwargs)
    312 
    313         return wrapper

/opt/anaconda3/lib/python3.9/site-packages/pandas/core/frame.py in drop(self, labels, axis, index, columns, level, inplace, errors)
   4904                 weight  1.0     0.8
   4905         """
-> 4906         return super().drop(
   4907             labels=labels,
   4908             axis=axis,

/opt/anaconda3/lib/python3.9/site-packages/pandas/core/generic.py in drop(self, labels, axis, index, columns, level, inplace, errors)
   4148         for axis, labels in axes.items():
   4149             if labels is not None:
-> 4150                 obj = obj._drop_axis(labels, axis, level=level, errors=errors)
   4151 
   4152         if inplace:

/opt/anaconda3/lib/python3.9/site-packages/pandas/core/generic.py in _drop_axis(self, labels, axis, level, errors)
   4183                 new_axis = axis.drop(labels, level=level, errors=errors)
   4184             else:
-> 4185                 new_axis = axis.drop(labels, errors=errors)
   4186             result = self.reindex(**{axis_name: new_axis})
   4187 

/opt/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py in drop(self, labels, errors)
   6015         if mask.any():
   6016             if errors != "ignore":
-> 6017                 raise KeyError(f"{labels[mask]} not found in axis")
   6018             indexer = indexer[~mask]
   6019         return self.delete(indexer)

KeyError: "['PP'] not found in axis"
In [14]:
# Try to drop a row with axis = 0? Now we're talking
df_row_dropped = df.drop(0, axis = 0)
df_row_dropped.head(5)
Out[14]:
Season Date Opponent W/L Score G A PTS SH PCT NO/MIN PP SH.1 EN GW GTG HT UA BLK
1 2020 12/13/20 Boston College L 1-2 1 2 3 33 0.030 5-10 0 0 0 0 0 0 0 15
2 2020 01/03/21 Maine W 3-0 3 3 6 47 0.064 1-2 0 0 0 1 0 0 1 6
3 2020 01/04/21 Maine W 1-0 1 1 2 41 0.024 4-8 0 0 0 1 0 0 0 15
4 2020 01/08/21 New Hampshire W 3-1 3 3 6 42 0.071 3-6 0 1 1 1 0 0 0 16
5 2020 01/09/21 at New Hampshire T 2-2 2 3 5 32 0.063 3-6 1 0 0 0 0 0 0 6
In [ ]:
# Collect all the rows where we won (returns another dataframe)
wins = df.loc[(df["W/L"] == "W")]
wins.head(5)
In [ ]:
# Iterate over all the rows. Let's look at Opponent in each row
for i in range(len(df)):
    print(df.loc[i, "Opponent"])
In [15]:
# Add a new column that's the sum of two other columns
df["new col"] = df["G"] + df["GW"]
df.head()
Out[15]:
Season Date Opponent W/L Score G A PTS SH PCT NO/MIN PP SH.1 EN GW GTG HT UA BLK new col
0 2020 12/11/20 at Boston College W 4-1 4 7 11 36 0.111 2-4 0 0 0 1 0 0 0 10 5
1 2020 12/13/20 Boston College L 1-2 1 2 3 33 0.030 5-10 0 0 0 0 0 0 0 15 1
2 2020 01/03/21 Maine W 3-0 3 3 6 47 0.064 1-2 0 0 0 1 0 0 1 6 4
3 2020 01/04/21 Maine W 1-0 1 1 2 41 0.024 4-8 0 0 0 1 0 0 0 15 2
4 2020 01/08/21 New Hampshire W 3-1 3 3 6 42 0.071 3-6 0 1 1 1 0 0 0 16 4
In [ ]: