import pandas as pd
df = pd.read_csv('diabetes_balanced.csv')
df.head()
Diabetes_binary | HighBP | HighChol | CholCheck | BMI | Smoker | Stroke | HeartDiseaseorAttack | PhysActivity | Fruits | ... | AnyHealthcare | NoDocbcCost | GenHlth | MentHlth | PhysHlth | DiffWalk | Sex | Age | Education | Income | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.0 | 1.0 | 0.0 | 1.0 | 26.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | ... | 1.0 | 0.0 | 3.0 | 5.0 | 30.0 | 0.0 | 1.0 | 4.0 | 6.0 | 8.0 |
1 | 0.0 | 1.0 | 1.0 | 1.0 | 26.0 | 1.0 | 1.0 | 0.0 | 0.0 | 1.0 | ... | 1.0 | 0.0 | 3.0 | 0.0 | 0.0 | 0.0 | 1.0 | 12.0 | 6.0 | 8.0 |
2 | 0.0 | 0.0 | 0.0 | 1.0 | 26.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | ... | 1.0 | 0.0 | 1.0 | 0.0 | 10.0 | 0.0 | 1.0 | 13.0 | 6.0 | 8.0 |
3 | 0.0 | 1.0 | 1.0 | 1.0 | 28.0 | 1.0 | 0.0 | 0.0 | 1.0 | 1.0 | ... | 1.0 | 0.0 | 3.0 | 0.0 | 3.0 | 0.0 | 1.0 | 11.0 | 6.0 | 8.0 |
4 | 0.0 | 0.0 | 0.0 | 1.0 | 29.0 | 1.0 | 0.0 | 0.0 | 1.0 | 1.0 | ... | 1.0 | 0.0 | 2.0 | 0.0 | 0.0 | 0.0 | 0.0 | 8.0 | 5.0 | 8.0 |
5 rows × 22 columns
df.columns
Index(['Diabetes_binary', 'HighBP', 'HighChol', 'CholCheck', 'BMI', 'Smoker', 'Stroke', 'HeartDiseaseorAttack', 'PhysActivity', 'Fruits', 'Veggies', 'HvyAlcoholConsump', 'AnyHealthcare', 'NoDocbcCost', 'GenHlth', 'MentHlth', 'PhysHlth', 'DiffWalk', 'Sex', 'Age', 'Education', 'Income'], dtype='object')
data_dict = {'Diabetes_binary': 'diagnosis for prediabetes or diabetes',
'HighBP': 'presence of high blood pressure',
'HighChol': 'presence of high cholesterol',
'CholCheck': 'cholestoral check in the past 5 years',
'BMI': 'body mass index',
'Smoker': 'smoked at least 100 cigarettes (5 packs) in lifetime',
'Stroke': 'has experienced a stroke in lifetime',
'HeartDiseaseorAttack': 'has experienced heart disease or heart attack in lifetime',
'PhysActivity': 'completed physical activity in past 30 days (outside of profession)',
'Fruits': 'consumes 1+ fruits per day',
'Veggies': 'consumes 1+ vegetables per day',
'HvyAlcoholConsump': 'heavy alcohol consumption (adult men >=14 drinks per week, adult women>=7 drinks per week)',
'AnyHealthcare': 'has any form of health insurance',
'NoDocbcCost': 'has not seen a doctor when needed in the past year because of cost',
'GenHlth': 'self-analysis of health, 1 (excellent) - 5 (poor)',
'MentHlth': 'days of poor mental health in past 30 days',
'PhysHlth': 'days of physical illness or injury in past 30 days',
'DiffWalk': 'has difficulty walking or climbing',
'Sex': 'sex, 0 (female) or 1 (male)',
'Age': '13 level age range, 1 (18-24 years) - 13 (80+ years)',
'Education': '6 level education range, 1 (never attended school) - 6 (college graduate)',
'Income': '8 level income range, 1 (<10,000) - 8 (>75,000)'}
data_dict
{'Diabetes_binary': 'diagnosis for prediabetes or diabetes', 'HighBP': 'presence of high blood pressure', 'HighChol': 'presence of high cholesterol', 'CholCheck': 'cholestoral check in the past 5 years', 'BMI': 'body mass index', 'Smoker': 'smoked at least 100 cigarettes (5 packs) in lifetime', 'Stroke': 'has experienced a stroke in lifetime', 'HeartDiseaseorAttack': 'has experienced heart disease or heart attack in lifetime', 'PhysActivity': 'completed physical activity in past 30 days (outside of profession)', 'Fruits': 'consumes 1+ fruits per day', 'Veggies': 'consumes 1+ vegetables per day', 'HvyAlcoholConsump': 'heavy alcohol consumption (adult men >=14 drinks per week, adult women>=7 drinks per week)', 'AnyHealthcare': 'has any form of health insurance', 'NoDocbcCost': 'has not seen a doctor when needed in the past year because of cost', 'GenHlth': 'self-analysis of health, 1 (excellent) - 5 (poor)', 'MentHlth': 'days of poor mental health in past 30 days', 'PhysHlth': 'days of physical illness or injury in past 30 days', 'DiffWalk': 'has difficulty walking or climbing', 'Sex': 'sex, 0 (female) or 1 (male)', 'Age': '13 level age range, 1 (18-24 years) - 13 (80+ years)', 'Education': '6 level education range, 1 (never attended school) - 6 (college graduate)', 'Income': '8 level income range, 1 (<10,000) - 8 (>75,000)'}