import pandas as pd
df = pd.read_csv('diabetes_balanced.csv')
df.head()
| Diabetes_binary | HighBP | HighChol | CholCheck | BMI | Smoker | Stroke | HeartDiseaseorAttack | PhysActivity | Fruits | ... | AnyHealthcare | NoDocbcCost | GenHlth | MentHlth | PhysHlth | DiffWalk | Sex | Age | Education | Income | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.0 | 1.0 | 0.0 | 1.0 | 26.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | ... | 1.0 | 0.0 | 3.0 | 5.0 | 30.0 | 0.0 | 1.0 | 4.0 | 6.0 | 8.0 |
| 1 | 0.0 | 1.0 | 1.0 | 1.0 | 26.0 | 1.0 | 1.0 | 0.0 | 0.0 | 1.0 | ... | 1.0 | 0.0 | 3.0 | 0.0 | 0.0 | 0.0 | 1.0 | 12.0 | 6.0 | 8.0 |
| 2 | 0.0 | 0.0 | 0.0 | 1.0 | 26.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | ... | 1.0 | 0.0 | 1.0 | 0.0 | 10.0 | 0.0 | 1.0 | 13.0 | 6.0 | 8.0 |
| 3 | 0.0 | 1.0 | 1.0 | 1.0 | 28.0 | 1.0 | 0.0 | 0.0 | 1.0 | 1.0 | ... | 1.0 | 0.0 | 3.0 | 0.0 | 3.0 | 0.0 | 1.0 | 11.0 | 6.0 | 8.0 |
| 4 | 0.0 | 0.0 | 0.0 | 1.0 | 29.0 | 1.0 | 0.0 | 0.0 | 1.0 | 1.0 | ... | 1.0 | 0.0 | 2.0 | 0.0 | 0.0 | 0.0 | 0.0 | 8.0 | 5.0 | 8.0 |
5 rows × 22 columns
df.columns
Index(['Diabetes_binary', 'HighBP', 'HighChol', 'CholCheck', 'BMI', 'Smoker',
'Stroke', 'HeartDiseaseorAttack', 'PhysActivity', 'Fruits', 'Veggies',
'HvyAlcoholConsump', 'AnyHealthcare', 'NoDocbcCost', 'GenHlth',
'MentHlth', 'PhysHlth', 'DiffWalk', 'Sex', 'Age', 'Education',
'Income'],
dtype='object')
data_dict = {'Diabetes_binary': 'diagnosis for prediabetes or diabetes',
'HighBP': 'presence of high blood pressure',
'HighChol': 'presence of high cholesterol',
'CholCheck': 'cholestoral check in the past 5 years',
'BMI': 'body mass index',
'Smoker': 'smoked at least 100 cigarettes (5 packs) in lifetime',
'Stroke': 'has experienced a stroke in lifetime',
'HeartDiseaseorAttack': 'has experienced heart disease or heart attack in lifetime',
'PhysActivity': 'completed physical activity in past 30 days (outside of profession)',
'Fruits': 'consumes 1+ fruits per day',
'Veggies': 'consumes 1+ vegetables per day',
'HvyAlcoholConsump': 'heavy alcohol consumption (adult men >=14 drinks per week, adult women>=7 drinks per week)',
'AnyHealthcare': 'has any form of health insurance',
'NoDocbcCost': 'has not seen a doctor when needed in the past year because of cost',
'GenHlth': 'self-analysis of health, 1 (excellent) - 5 (poor)',
'MentHlth': 'days of poor mental health in past 30 days',
'PhysHlth': 'days of physical illness or injury in past 30 days',
'DiffWalk': 'has difficulty walking or climbing',
'Sex': 'sex, 0 (female) or 1 (male)',
'Age': '13 level age range, 1 (18-24 years) - 13 (80+ years)',
'Education': '6 level education range, 1 (never attended school) - 6 (college graduate)',
'Income': '8 level income range, 1 (<10,000) - 8 (>75,000)'}
data_dict
{'Diabetes_binary': 'diagnosis for prediabetes or diabetes',
'HighBP': 'presence of high blood pressure',
'HighChol': 'presence of high cholesterol',
'CholCheck': 'cholestoral check in the past 5 years',
'BMI': 'body mass index',
'Smoker': 'smoked at least 100 cigarettes (5 packs) in lifetime',
'Stroke': 'has experienced a stroke in lifetime',
'HeartDiseaseorAttack': 'has experienced heart disease or heart attack in lifetime',
'PhysActivity': 'completed physical activity in past 30 days (outside of profession)',
'Fruits': 'consumes 1+ fruits per day',
'Veggies': 'consumes 1+ vegetables per day',
'HvyAlcoholConsump': 'heavy alcohol consumption (adult men >=14 drinks per week, adult women>=7 drinks per week)',
'AnyHealthcare': 'has any form of health insurance',
'NoDocbcCost': 'has not seen a doctor when needed in the past year because of cost',
'GenHlth': 'self-analysis of health, 1 (excellent) - 5 (poor)',
'MentHlth': 'days of poor mental health in past 30 days',
'PhysHlth': 'days of physical illness or injury in past 30 days',
'DiffWalk': 'has difficulty walking or climbing',
'Sex': 'sex, 0 (female) or 1 (male)',
'Age': '13 level age range, 1 (18-24 years) - 13 (80+ years)',
'Education': '6 level education range, 1 (never attended school) - 6 (college graduate)',
'Income': '8 level income range, 1 (<10,000) - 8 (>75,000)'}