# needed to create a df 
import pandas as pd


# Loads the relavent columns 
gdp = pd.read_csv("gdp-per-capita-maddison-2020.csv").drop(labels=["417485-annotations","Code"],axis=1)

# Filters to the year included in the world happiness report 
gdp = gdp.loc[gdp['Year'] >= 2015]

gdp.head()


# WoW!! There is a better way to do this but for some reason every refused to work 
# REgardless, for a smaller data set, this is not the bestbut not horrible

# for each year, reads the file, adds the year and adjusts formats
happy2015 = pd.read_csv("archive/2015.csv")
happy2015['year'] = 2015
happy2015.columns = [x.lower() for x in happy2015.columns]

# fixes a difference in year over year reporting 
happy2015['happiness score'] = happy2015['happiness score'] * 1000


happy2016 = pd.read_csv("archive/2016.csv")
happy2016['year'] = 2016
happy2016.columns = [x.lower() for x in happy2016.columns]

happy2017 = pd.read_csv("archive/2017.csv")
happy2017['year'] = 2017
happy2017.columns = [x.lower() for x in happy2017.columns]

happy2018 = pd.read_csv("archive/2018.csv")
happy2018['year'] = 2018
happy2018.columns = [x.lower() for x in happy2018.columns]

happy2019 = pd.read_csv("archive/2019.csv")
happy2019['year'] = 2019
happy2019.columns = [x.lower() for x in happy2019.columns]

happy2020 = pd.read_csv("archive/2020.csv")
happy2020['year'] = 2020
happy2020.columns = [x.lower() for x in happy2020.columns]

happy2021 = pd.read_csv("archive/2021.csv")
happy2021['year'] = 2021
happy2021.columns = [x.lower() for x in happy2021.columns]

happy2022 = pd.read_csv("archive/2022.csv")
happy2022['year'] = 2022
happy2022.columns = [x.lower() for x in happy2022.columns]
happy2022['happiness score'] 

# adds all the years into one df 
years = [happy2015,happy2016,happy2017,happy2018,happy2019,happy2020,happy2021,happy2022]
happiness = pd.concat(years,ignore_index=True)

# fixes issue in year over year reporting differences 
happiness["happiness rank"] = happiness["happiness rank"].fillna(0) + happiness["rank"].fillna(0)

# extracts just neccasary values 
happiness = happiness[["country","happiness rank","happiness score","year"]]

happiness.head()


# gets set of countrues that are in both data
countries = set(gdp["Entity"].tolist()).intersection(set(happiness["country"].tolist()))

# trims data to only countries in both dfs 
happiness = happiness[happiness['country'].isin(countries)]
gdp = gdp[gdp['Entity'].isin(countries)]

# standardizes data 
gdp = gdp.rename(columns={"Entity": "country", "Year": "year"})

	Entity	Year	GDP per capita
65	Afghanistan	2015	1928.0000
66	Afghanistan	2016	1929.0000
67	Afghanistan	2017	2014.7452
68	Afghanistan	2018	1934.5550
140	Albania	2015	10032.0000

	country	happiness rank	happiness score	year
0	Switzerland	1.0	7587.0	2015
1	Iceland	2.0	7561.0	2015
2	Denmark	3.0	7527.0	2015
3	Norway	4.0	7522.0	2015
4	Canada	5.0	7427.0	2015

Money Can't Buy Happiness.. but can GDP?¶

Description of Real World problem (1%)¶

Data Set (1%)¶

Data Dictionary¶

How will data be used?¶