import pandas as pd

#reads in data set as a whole
df_pitching = pd.read_csv("Pitching.csv")

#visualization of the complete dataframe
df_pitching


#I have decided to drop a large number of columns to only focus on statistics and identification that can be useful to 
#draw conclusions that can help pitchers improve
df_pitching.drop(["stint", "GS", "H", "HR", "IBB", "WP", "HBP", "BK", "BFP", "GF", "R", "SH", "SF", "GIDP"], axis = 1, inplace = True)


#creates a dictionary that describes the data that is in the Dataframe, telling what each column would represent
pitching_dict = {}
pitching_dict["playerID"] = "The first five letters of the pitchers last name,then the first two letters of their first name, followed by a number starting from 01 and increasing for each repeated name."
pitching_dict["yearID"] = "The year represented that the player pitched in."
pitching_dict["teamID"] = "Team that the player is on represented by a 3 letter symbol."
pitching_dict["IgID"] = "Conference that the team is a part of."
pitching_dict["W"] = "Number of wins."
pitching_dict["L"] = "Number of losses."
pitching_dict["G"] = "Number of Games."
pitching_dict["CG"] = "Number of complete games pitched."
pitching_dict["SHO"] = "Number of complete games that were a shutout."
pitching_dict["SV"] = "Number of saves."
pitching_dict["IPouts"] = "Number of innins pitched with outs as a decimal."
pitching_dict["ER"] = "Number of Earned Runs."
pitching_dict["BB"] = "Number of walks."
pitching_dict["SO"] = "Number of strikeouts."
pitching_dict["BAOpp"] = "Batting average of the opponent."
pitching_dict["ERA"] = "Earned Run Average."


#displays the dictionary
pitching_dict

{'playerID': 'The first five letters of the pitchers last name,then the first two letters of their first name, followed by a number starting from 01 and increasing for each repeated name.',
 'yearID': 'The year represented that the player pitched in.',
 'teamID': 'Team that the player is on represented by a 3 letter symbol.',
 'IgID': 'Conference that the team is a part of.',
 'W': 'Number of wins.',
 'L': 'Number of losses.',
 'G': 'Number of Games.',
 'CG': 'Number of complete games pitched.',
 'SHO': 'Number of complete games that were a shutout.',
 'SV': 'Number of saves.',
 'IPouts': 'Number of innins pitched with outs as a decimal.',
 'ER': 'Number of Earned Runs.',
 'BB': 'Number of walks.',
 'SO': 'Number of strikeouts.',
 'BAOpp': 'Batting average of the opponent.',
 'ERA': 'Earned Run Average.'}


#displays the updated dataframe with the correct number of columns that were detailed above
df_pitching

	playerID	yearID	stint	teamID	lgID	W	L	G	GS	CG	...	IBB	WP	HBP	BK	BFP	GF	R	SH	SF	GIDP
0	bechtge01	1871	1	PH1	NaN	1	2	3	3	2	...	NaN	NaN	NaN	0	NaN	NaN	42	NaN	NaN	NaN
1	brainas01	1871	1	WS3	NaN	12	15	30	30	30	...	NaN	NaN	NaN	0	NaN	NaN	292	NaN	NaN	NaN
2	fergubo01	1871	1	NY2	NaN	0	0	1	0	0	...	NaN	NaN	NaN	0	NaN	NaN	9	NaN	NaN	NaN
3	fishech01	1871	1	RC1	NaN	4	16	24	24	22	...	NaN	NaN	NaN	0	NaN	NaN	257	NaN	NaN	NaN
4	fleetfr01	1871	1	NY2	NaN	0	1	1	1	1	...	NaN	NaN	NaN	0	NaN	NaN	21	NaN	NaN	NaN
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
44134	youngch03	2015	1	KCA	AL	11	6	34	18	0	...	0.0	5.0	0.0	0	500.0	3.0	44	4.0	2.0	NaN
44135	zieglbr01	2015	1	ARI	NL	0	3	66	0	0	...	3.0	2.0	1.0	0	263.0	46.0	17	1.0	0.0	NaN
44136	zimmejo02	2015	1	WAS	NL	13	10	33	33	0	...	3.0	2.0	8.0	1	831.0	0.0	89	8.0	2.0	NaN
44137	zitoba01	2015	1	OAK	AL	0	0	3	2	0	...	0.0	0.0	0.0	0	37.0	1.0	8	0.0	0.0	NaN
44138	zychto01	2015	1	SEA	AL	0	0	13	1	0	...	0.0	1.0	2.0	0	76.0	4.0	6	0.0	0.0	NaN

	playerID	yearID	teamID	lgID	W	L	G	CG	SHO	SV	IPouts	ER	BB	SO	BAOpp	ERA
0	bechtge01	1871	PH1	NaN	1	2	3	2	0	0	78.0	23	11	1	NaN	7.96
1	brainas01	1871	WS3	NaN	12	15	30	30	0	0	792.0	132	37	13	NaN	4.50
2	fergubo01	1871	NY2	NaN	0	0	1	0	0	0	3.0	3	0	0	NaN	27.00
3	fishech01	1871	RC1	NaN	4	16	24	22	1	0	639.0	103	31	15	NaN	4.35
4	fleetfr01	1871	NY2	NaN	0	1	1	1	0	0	27.0	10	3	0	NaN	10.00
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
44134	youngch03	2015	KCA	AL	11	6	34	0	0	0	370.0	42	43	83	0.202	3.06
44135	zieglbr01	2015	ARI	NL	0	3	66	0	0	30	204.0	14	17	36	0.197	1.85
44136	zimmejo02	2015	WAS	NL	13	10	33	0	0	0	605.0	82	39	164	0.264	3.66
44137	zitoba01	2015	OAK	AL	0	0	3	0	0	0	21.0	8	6	2	0.387	10.29
44138	zychto01	2015	SEA	AL	0	0	13	0	0	0	55.0	5	3	24	0.239	2.45

MLB Pitching Data¶

MLB Pitchers and their statistics 1871-2015¶

Problem¶

Solution¶

Impact¶

Data¶

Potential Problems¶

Method¶