#!pip install bubbly
#!pip install https://10473-14579099-gh.circle-artifacts.com/0/dist/plotly-3.6.1%2B3.g48f2ce4a.tar.gz
#https://en.wikipedia.org/wiki/List_of_countries_by_inequality-adjusted_HDI
import requests
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from bubbly.bubbly import bubbleplot
import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly import __version__
import plotly as py
import plotly.graph_objs as go
from plotly.grid_objs import Grid, Column
py.offline.init_notebook_mode(connected=True)
from plotly import tools
import plotly.plotly
py.tools.set_credentials_file(username='as6140', api_key='AROO7t4LTQpnC9VBLSrp')
print(__version__)
#Import libraries
import requests
from bs4 import BeautifulSoup
#parse with BeautifulSoup & Requests
web_page = "https://en.wikipedia.org/wiki/List_of_countries_by_inequality-adjusted_HDI"
req=requests.get(web_page)
page = req.text
soup = BeautifulSoup(page, 'html.parser')
soup.title
table = soup.find_all("table", "wikitable")
from IPython.display import IFrame, HTML
HTML(str(table))
ihdi = table[0]
table_rows = ihdi.find_all('tr')
header = table_rows[1]
table_rows[1].a.get_text()
countries = [table_rows[i].a.get_text() for i in range(len(table_rows))[1:]]
ihdi2 = table[1]
table_rows2 = ihdi2.find_all('tr')
header2 = table_rows2[1]
table_rows2[1].a.get_text()
countries2 = [table_rows2[i].a.get_text() for i in range(len(table_rows2))[1:]]
countries[:5]
countries2
type(countries)
type(countries2)
countries_all = countries + countries2 #concat lists
len(countries) + len(countries2)
len(countries_all)
temp = ihdi.find_all('td')
temp[2].get_text()
temp2 = ihdi2.find_all('td')
temp2[2].get_text()
IHDI_1 = [temp[i].get_text() for i in range(len(temp)) if "." in temp[i].get_text()]
IHDI_2 = [temp2[i].get_text() for i in range(len(temp2)) if "." in temp2[i].get_text()]
IHDI_all = IHDI_1 + IHDI_2
#rank = list(range(len(countries)))
IHDI_all
#removing some errant values that contained a '.' in the country name itself
IHDI_all = [e for e in IHDI_all if e not in ('\xa0Dem. Rep. of the Congo',
'\xa0Rep. of the Congo','\xa0Rep. of Macedonia')]
len(IHDI_all)
rank = list(range(len(IHDI_all)))
data = zip(rank,countries_all, IHDI_all)
import pandas as pd
cols = ['Rank', 'Country', 'IHDI']
ihdi_df = pd.DataFrame(list(data), columns = cols, index=rank)
ihdi_df.head()
ihdi_df['IHDI'] = ihdi_df['IHDI'].apply(lambda x: x.replace('\n', '')).astype(float)
ihdi_df_top20 = ihdi_df[0:21]
ihdi_df_bot20 = ihdi_df[-21:]
ihdi_df.info()
trace1 = go.Bar(x = ihdi_df_top20.Country, y = ihdi_df_top20.IHDI)
data = [trace1]
layout = go.Layout(title='Top 20 Countries Ranked by Inequality-Adjusted Human Development Index')
fig = go.Figure(data = data, layout = layout)
py.offline.iplot(fig);
trace2 = go.Bar(x = ihdi_df_bot20.Country, y = ihdi_df_bot20.IHDI)
data2 = [trace2]
layout2 = go.Layout(title='Bottom 20 Countries Ranked by Inequality-Adjusted Human Development Index')
fig2 = go.Figure(data = data2, layout = layout2)
py.offline.iplot(fig2);
data = [dict(type='choropleth',locations = ihdi_df['Country'],
autocolorscale = True,z = ihdi_df['IHDI'],
locationmode = 'country names',
marker = dict(line = dict (color = 'rgb(255,255,255)',width = 2)),
colorbar = dict(title = "2017 Inequality-Adjusted HDI")
)
]
layout = dict(title = 'Countries by 2017 Inequality-Adjusted HDI')
fig = go.Figure(data = data, layout = layout)
py.offline.iplot(fig);
#Save Dataframe to CSV
ihdi_df.to_csv('2018_inequality_adjusted_hdi.csv')
#Link to Edited Plot.ly Map
from IPython.core.display import display, HTML
display(HTML("""<a href="https://plot.ly/~as6140/1/">Public Link to Edited Graph on Plot.ly</a>"""))
#DO PEOPLE FROM HIGH IHDI COUNTRIES LIVE LONGER?
le_df = pd.read_csv('life_expectancy_by_country_world_bank.csv')
le_df.head()
le_00_to_15_df = le_df[['Country Name','2000','2001','2002','2003','2004','2005',
'2006','2007','2008','2009','2010','2011','2012','2013','2014','2015']]
le_00_to_15_df.head()
pop_df = pd.read_csv('population_by_country_world_bank.csv')
pop_df.head()
pop_00_to_15_df = pop_df[['Country Name','2000','2001','2002','2003','2004','2005',
'2006','2007','2008','2009','2010','2011','2012','2013','2014','2015']]
pop_00_to_15_df.head()
le_pop_df = pd.merge(le_00_to_15_df, pop_00_to_15_df, how='inner', on='Country Name', suffixes=('_le', '_pop'))
le_pop_df.head()
hdi_df = pd.read_csv('hdi_human_development_index.csv')
hdi_df = hdi_df.drop(columns=['1990','1991','1992','1993','1994','1995','1996','1997','1998','1999'])
hdi_df.head()
bubbly_df = pd.merge(hdi_df, le_pop_df, how='inner', left_on='country', right_on='Country Name', suffixes=('_hdi', ''))
bubbly_df.info()
new_names = [(i,i+'_hdi') for i in bubbly_df.iloc[:, 1:17].columns.values]
bubbly_df.rename(columns = dict(new_names), inplace=True)
bubbly_df.head()
bubbly_df.info()
bubbly_df = bubbly_df.dropna(axis = 0, how ='any')
bubbly_df = bubbly_df.reset_index(drop=True)
bubbly_df.info()
sum(bubbly_df.country == bubbly_df['Country Name'])
bubbly_df = bubbly_df.drop(columns=['Country Name'])
bubbly_df.columns
bdf_melt1 = bubbly_df.melt(id_vars=['country'],
value_vars=['2000_hdi', '2001_hdi', '2002_hdi', '2003_hdi', '2004_hdi',
'2005_hdi', '2006_hdi', '2007_hdi', '2008_hdi', '2009_hdi', '2010_hdi',
'2011_hdi', '2012_hdi', '2013_hdi', '2014_hdi', '2015_hdi'],
var_name='year', value_name='hdi')
bdf_melt2 = bubbly_df.melt(id_vars=['country'],
value_vars=['2000_le','2001_le', '2002_le', '2003_le', '2004_le', '2005_le',
'2006_le','2007_le', '2008_le', '2009_le', '2010_le', '2011_le',
'2012_le','2013_le', '2014_le', '2015_le'],
var_name='year', value_name='life_expectancy')
bdf_melt3 = bubbly_df.melt(id_vars=['country'],
value_vars=['2000_pop', '2001_pop', '2002_pop', '2003_pop', '2004_pop',
'2005_pop', '2006_pop', '2007_pop', '2008_pop','2009_pop',
'2010_pop', '2011_pop', '2012_pop', '2013_pop', '2014_pop','2015_pop'],
var_name='year', value_name='population')
bdf_melt1.year = bdf_melt1.year.apply(lambda x: x[0:4]).astype(int)
bdf_melt2.year = bdf_melt2.year.apply(lambda x: x[0:4]).astype(int)
bdf_melt3.year = bdf_melt3.year.apply(lambda x: x[0:4]).astype(int)
grouped_hdi = bdf_melt1.groupby(["country"]).apply(lambda x: x.sort_values(["year"], ascending = True)).reset_index(drop=True)
grouped_le = bdf_melt2.groupby(["country"]).apply(lambda x: x.sort_values(["year"], ascending = True)).reset_index(drop=True)
grouped_pop = bdf_melt3.groupby(["country"]).apply(lambda x: x.sort_values(["year"], ascending = True)).reset_index(drop=True)
temp = pd.merge(grouped_hdi, grouped_le, on = ['country', 'year'], how = 'inner')
temp = pd.merge(temp, grouped_pop, on = ['country', 'year'], how = 'inner')
temp.head()
data = temp.copy()
continents_df = pd.read_csv('gapminder_continents.csv')
continents_df.head()
continents_df = continents_df[['continent','country']]
temp.info()
temp = pd.merge(temp,continents_df, how='left', on='country')
temp.info()
temp.continent.unique()
temp['country'][temp.isna().any(axis=1)].unique()
temp.loc[temp['country'] == 'Armenia','continent'] = 'Asia'
temp.loc[temp['country'] == 'Azerbaijan','continent'] = 'Asia'
temp.loc[temp['country'] == 'Barbados','continent'] = 'Americas'
temp.loc[temp['country'] == 'Belarus','continent'] = 'Europe'
temp.loc[temp['country'] == 'Belize','continent'] = 'Americas'
temp.loc[temp['country'] == 'Congo, Dem. Rep.','continent'] = 'Africa'
temp.loc[temp['country'] == 'Congo, Rep.','continent'] = 'Africa'
temp.loc[temp['country'] == 'Cyprus','continent'] = 'Europe'
temp.loc[temp['country'] == 'Estonia','continent'] = 'Europe'
temp.loc[temp['country'] == 'Fiji','continent'] = 'Oceania'
temp.loc[temp['country'] == 'Georgia','continent'] = 'Asia'
temp.loc[temp['country'] == 'Guyana','continent'] = 'Americas'
temp.loc[temp['country'] == 'Kazakhstan','continent'] = 'Asia'
temp.loc[temp['country'] == 'Kyrgyz Republic','continent'] = 'Asia'
temp.loc[temp['country'] == 'Latvia','continent'] = 'Europe'
temp.loc[temp['country'] == 'Liechtenstein','continent'] = 'Europe'
temp.loc[temp['country'] == 'Lithuania','continent'] = 'Europe'
temp.loc[temp['country'] == 'Luxembourg','continent'] = 'Europe'
temp.loc[temp['country'] == 'Maldives','continent'] = 'Asia'
temp.loc[temp['country'] == 'Moldova','continent'] = 'Europe'
temp.loc[temp['country'] == 'Malta','continent'] = 'Europe'
temp.loc[temp['country'] == 'Micronesia, Fed. Sts.','continent'] = 'Oceania'
temp.loc[temp['country'] == 'Papua New Guinea','continent'] = 'Oceania'
temp.loc[temp['country'] == 'Qatar','continent'] = 'Asia'
temp.loc[temp['country'] == 'Samoa','continent'] = 'Oceania'
temp.loc[temp['country'] == 'Seychelles','continent'] = 'Africa'
temp.loc[temp['country'] == 'Solomon Islands','continent'] = 'Oceania'
temp.loc[temp['country'] == 'St. Lucia','continent'] = 'Americas'
temp.loc[temp['country'] == 'St. Vincent and the Grenadines','continent'] = 'Americas'
temp.loc[temp['country'] == 'Tajikistan','continent'] = 'Asia'
temp.loc[temp['country'] == 'Timor-Leste','continent'] = 'Oceania'
temp.loc[temp['country'] == 'Tonga','continent'] = 'Oceania'
temp.loc[temp['country'] == 'Ukraine','continent'] = 'Europe'
temp.loc[temp['country'] == 'United Arab Emirates','continent'] = 'Asia'
temp.loc[temp['country'] == 'Uzbekistan','continent'] = 'Asia'
temp.info()
temp['country'].loc[temp['continent'] == 'Americas'].unique()
temp.loc[temp['country'] == 'Argentina','continent'] = 'South America'
temp.loc[temp['country'] == 'Barbados','continent'] = 'North America'
temp.loc[temp['country'] == 'Belize','continent'] = 'North America'
temp.loc[temp['country'] == 'Bolivia','continent'] = 'South America'
temp.loc[temp['country'] == 'Brazil','continent'] = 'South America'
temp.loc[temp['country'] == 'Canada','continent'] = 'North America'
temp.loc[temp['country'] == 'Chile','continent'] = 'South America'
temp.loc[temp['country'] == 'Colombia','continent'] = 'South America'
temp.loc[temp['country'] == 'Costa Rica','continent'] = 'North America'
temp.loc[temp['country'] == 'Cuba','continent'] = 'North America'
temp.loc[temp['country'] == 'Dominican Republic','continent'] = 'North America'
temp.loc[temp['country'] == 'Ecuador','continent'] = 'South America'
temp.loc[temp['country'] == 'El Salvador','continent'] = 'North America'
temp.loc[temp['country'] == 'Guatemala','continent'] = 'North America'
temp.loc[temp['country'] == 'Guyana','continent'] = 'South America'
temp.loc[temp['country'] == 'Haiti','continent'] = 'North America'
temp.loc[temp['country'] == 'Honduras','continent'] = 'North America'
temp.loc[temp['country'] == 'Jamaica','continent'] = 'North America'
temp.loc[temp['country'] == 'Mexico','continent'] = 'North America'
temp.loc[temp['country'] == 'Nicaragua','continent'] = 'North America'
temp.loc[temp['country'] == 'Panama','continent'] = 'North America'
temp.loc[temp['country'] == 'Paraguay','continent'] = 'South America'
temp.loc[temp['country'] == 'Peru','continent'] = 'South America'
temp.loc[temp['country'] == 'St. Lucia','continent'] = 'North America'
temp.loc[temp['country'] == 'St. Vincent and the Grenadines','continent'] = 'North America'
temp.loc[temp['country'] == 'Trinidad and Tobago','continent'] = 'South America'
temp.loc[temp['country'] == 'United States','continent'] = 'North America'
temp.loc[temp['country'] == 'Uruguay','continent'] = 'South America'
data = temp.copy()
#from bubbly.bubbly import bubbleplot
#figure = bubbleplot(dataset=data, x_column='hdi', y_column='life_expectancy',
# bubble_column='country', time_column='year', size_column='population', color_column='continent',
# x_title="Human Development Index (HDI)", y_title="Life Expectancy",
# title='Human Development vs. Life Expectancy by Year, Country, & Population',
# x_logscale=False, scale_bubble=3, height=650, x_range = (0.2,1), y_range = (30,95))
#py.offline.iplot(figure, config={'scrollzoom': True})
#py.plotly.icreate_animations(figure, config={'scrollzoom': True})
display(HTML("""<a href="https://plot.ly/~as6140/4/">Public Link to Edited Bubble Graph on Plot.ly</a>"""))
eco_df = pd.read_csv('NFA 2018 Edition.csv')
eco_df = eco_df.loc[:,['country','year','record','total']]
eco_df.head()
eco_df1 = eco_df[(eco_df.record == "EFConsPerCap")].rename(columns={"total": "EFConsPerCap"}).drop(columns='record')
eco_df2 = eco_df[(eco_df.record == "EFConsTotGHA")].rename(columns={"total": "EFConsTotGHA"}).drop(columns='record')
eco_df3 = eco_df[(eco_df.record == "BiocapPerCap")].rename(columns={"total": "BiocapPerCap"}).drop(columns='record')
eco_df4 = eco_df[(eco_df.record == "BiocapTotGHA")].rename(columns={"total": "BiocapTotGHA"}).drop(columns='record')
eco_df5 = pd.merge(eco_df1,eco_df2, on=['country', 'year'])
eco_df6 = pd.merge(eco_df5,eco_df3, on=['country', 'year'])
eco_df = pd.merge(eco_df6,eco_df4, on=['country', 'year'])
eco_df = eco_df[eco_df['year'].isin(['2000','2001','2002','2003','2004','2005','2006','2007','2008','2009',
'2010','2011','2012','2013','2014'])]
eco_df.loc[eco_df.country == 'Viet Nam','country'] = 'Vietnam'
eco_df.loc[eco_df.country == 'Tanzania, United Republic of','country'] = 'Tanzania'
eco_df.loc[eco_df.country == 'United States of America','country'] = 'United States'
eco_df.loc[eco_df.country == 'Congo, Democratic Republic of','country'] = 'Congo, Dem. Rep.'
eco_df.loc[eco_df.country == 'Congo','country'] = 'Congo, Rep.'
eco_df.loc[eco_df.country == 'Saint Lucia','country'] = 'St. Lucia'
eco_df.loc[eco_df.country == 'Libyan Arab Jamahiriya','country'] = 'Libya'
eco_df.loc[eco_df.country == 'Micronesia, Federated States of','country'] = 'Micronesia, Fed. Sts.'
eco_df.loc[eco_df.country == 'Kyrgyzstan','country'] = 'Kyrgyz Republic'
eco_df.loc[eco_df.country == 'Slovakia','country'] = 'Slovak Republic'
eco_df.loc[eco_df.country == 'Sudan (former)','country'] = 'Sudan'
eco_df.loc[eco_df.country == 'Serbia and Montenegro','country'] = 'Serbia'
temp.info()
eco_df['country'].unique()
temp = temp[temp.year != 2015]
temp = pd.merge(temp,eco_df, how='inner', on=['country', 'year'])
temp.info()
temp.head()
temp = temp[temp.year != 2015]
temp.country[temp.isnull().any(axis=1)].value_counts()
data1 = temp.copy()
data1.to_csv('data_for_plotly_grid.csv')
# Biocapacity Per Capita vs. EF Per Capita
from bubbly.bubbly import bubbleplot
figure = bubbleplot(dataset=data1, x_column='BiocapPerCap', y_column='EFConsPerCap',
bubble_column='country', time_column='year', size_column='EFConsTotGHA', color_column='continent',
x_title="Biocapacity Per Capita (global hectares)", y_title="Ecological Footprint Per Capita (global hectares)",
title='Biocapacity (Per Capita) vs. Ecological Footprint (Per Capita) by Year, Country, & Population',
x_logscale=False, scale_bubble=3, height=650,x_range = (-2,18), y_range = (-2,18))
iplot(figure, config={'scrollzoom': True})
#py.plotly.icreate_animations(figure)
from bubbly.bubbly import bubbleplot
figure = bubbleplot(dataset=data1, x_column='hdi', y_column='EFConsPerCap',
bubble_column='country', time_column='year', size_column='EFConsTotGHA', color_column='continent',
x_title="Human Development Index (HDI)", y_title="Ecological Footprint Per Capita (global hectares)",
title='Human Development Index vs. Ecological Footprint by Year, Country, & Population (Bubble Size = Total Footprint)',
x_logscale=False, scale_bubble=3, height=650,x_range = (0.25,1), y_range = (-2,20))
iplot(figure, config={'scrollzoom': True})
#,x_range = (0.2,1), y_range = (-100000,6000000000)