import pandas as pd


# import csv and txt files into dataframes

df_employee_benefits = pd.read_csv('employee-benefits-in-the-united-states-dataset.csv')

df_all_data = pd.read_csv ('nb.data.1.AllData.txt', sep='\t')
df_by_series_id = df_data = pd.read_csv ('nb.series.txt', sep='\t')

/var/folders/g6/xbp7f9210h5gpwlxw5mctj940000gn/T/ipykernel_14473/3926048682.py:3: DtypeWarning: Columns (11,22) have mixed types. Specify dtype option on import or set low_memory=False.
  df_employee_benefits = pd.read_csv('employee-benefits-in-the-united-states-dataset.csv')
/var/folders/g6/xbp7f9210h5gpwlxw5mctj940000gn/T/ipykernel_14473/3926048682.py:5: DtypeWarning: Columns (4) have mixed types. Specify dtype option on import or set low_memory=False.
  df_all_data = pd.read_csv ('nb.data.1.AllData.txt', sep='\t')


# contains 615,797 data points 
df_employee_benefits


df_all_data


# each row represents a unique instance of a combination of variables  
# each row is represented by a unique series id (95,118 series ids total)

df_by_series_id

	Series ID	Series title	Year	Estimate	Estimate footnote	Standard error	Standard error footnote	Survey code	Survey text	Ownership code	...	Occupation text	Subcell code	Job characteristic category	Job characteristic	Estimate code	Estimate text	Datatype code	Datatype text	Provision code	Provision text
0	NBU10000000000000028016	Percent of civilian workers with access to def...	2010.0	1	NaN	0.1	NaN	IP	Incidence and Key Provision Benefits	1.0	...	All occupations	0.0	All workers	All workers	0.0	Benefit combinations	28.0	Percent of workers with access	16	Defined benefit retirement and no medical care...
1	NBU10000000000000028016	Percent of civilian workers with access to def...	2011.0	1	NaN	0.1	NaN	IP	Incidence and Key Provision Benefits	1.0	...	All occupations	0.0	All workers	All workers	0.0	Benefit combinations	28.0	Percent of workers with access	16	Defined benefit retirement and no medical care...
2	NBU10000000000000028016	Percent of civilian workers with access to def...	2012.0	1	NaN	0.1	NaN	IP	Incidence and Key Provision Benefits	1.0	...	All occupations	0.0	All workers	All workers	0.0	Benefit combinations	28.0	Percent of workers with access	16	Defined benefit retirement and no medical care...
3	NBU10000000000000028016	Percent of civilian workers with access to def...	2013.0	1	NaN	0.1	NaN	IP	Incidence and Key Provision Benefits	1.0	...	All occupations	0.0	All workers	All workers	0.0	Benefit combinations	28.0	Percent of workers with access	16	Defined benefit retirement and no medical care...
4	NBU10000000000000028016	Percent of civilian workers with access to def...	2014.0	1	NaN	0.1	NaN	IP	Incidence and Key Provision Benefits	1.0	...	All occupations	0.0	All workers	All workers	0.0	Benefit combinations	28.0	Percent of workers with access	16	Defined benefit retirement and no medical care...
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
615792	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
615793	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
615794	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
615795	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
615796	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	series_id	year	period	value	footnote_codes
0	NBU10000000000000028007	2010	A01	63	NaN
1	NBU10000000000000028007	2011	A01	62	NaN
2	NBU10000000000000028007	2012	A01	62	NaN
3	NBU10000000000000028007	2013	A01	62	NaN
4	NBU10000000000000028007	2014	A01	63	NaN
...	...	...	...	...	...
615692	NBU59700000000000033967	2021	A01	4	NaN
615693	NBU59700000000000033967	2022	A01	4	NaN
615694	NBU59800000000000033968	2020	A01	6	NaN
615695	NBU59800000000000033968	2021	A01	6	NaN
615696	NBU59800000000000033968	2022	A01	6	NaN

	series_id	seasonal	ownership_code	estimate_code	industry_code	occupation_code	subcell_code	datatype_code	provision_code	survey_code	series_title	footnote_codes	begin_year	begin_period	end_year	end_period
0	NBU10000000000000028007	U	1	0	000000	0	0	28	007	IP	Percent of civilian workers with access to med...	NaN	2010	A01	2022	A01
1	NBU10000000000000028008	U	1	0	000000	0	0	28	008	IP	Percent of civilian workers with access to med...	NaN	2010	A01	2022	A01
2	NBU10000000000000028009	U	1	0	000000	0	0	28	009	IP	Percent of civilian workers with access to ret...	NaN	2010	A01	2022	A01
3	NBU10000000000000028010	U	1	0	000000	0	0	28	010	IP	Percent of civilian workers with access to no ...	NaN	2010	A01	2022	A01
4	NBU10000000000000028011	U	1	0	000000	0	0	28	011	IP	Percent of civilian workers with access to med...	NaN	2010	A01	2022	A01
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
95113	NBU59400000000000026172	U	5	94	000000	0	0	26	172	IP	Percent of local government workers participat...	NaN	2010	A01	2022	A01
95114	NBU59400000000000032173	U	5	94	000000	0	0	32	173	IP	Take-up rate for local government workers with...	NaN	2010	A01	2022	A01
95115	NBU59500000000000028365	U	5	95	000000	0	0	28	365	IP	Percent of local government workers with acces...	NaN	2017	A01	2018	A01
95116	NBU59700000000000033967	U	5	97	000000	0	0	33	967	IP	Percent of local government workers with acces...	NaN	2020	A01	2022	A01
95117	NBU59800000000000033968	U	5	98	000000	0	0	33	968	IP	Percent of local government workers with acces...	NaN	2020	A01	2022	A01

Topic Description and Justification¶

The Datasets¶

Data dictionary¶

Using the Data¶