In [84]:
#!pip install bubbly
#!pip install https://10473-14579099-gh.circle-artifacts.com/0/dist/plotly-3.6.1%2B3.g48f2ce4a.tar.gz
Collecting https://10473-14579099-gh.circle-artifacts.com/0/dist/plotly-3.6.1%2B3.g48f2ce4a.tar.gz
  Downloading https://10473-14579099-gh.circle-artifacts.com/0/dist/plotly-3.6.1%2B3.g48f2ce4a.tar.gz
     \ 36.7MB 67.2MB/ss
Requirement already satisfied: decorator>=4.0.6 in /anaconda3/envs/learn-env/lib/python3.6/site-packages (from plotly==3.6.1+3.g48f2ce4a) (4.3.0)
Requirement already satisfied: nbformat>=4.2 in /anaconda3/envs/learn-env/lib/python3.6/site-packages (from plotly==3.6.1+3.g48f2ce4a) (4.4.0)
Requirement already satisfied: pytz in /anaconda3/envs/learn-env/lib/python3.6/site-packages (from plotly==3.6.1+3.g48f2ce4a) (2018.5)
Requirement already satisfied: requests in /anaconda3/envs/learn-env/lib/python3.6/site-packages (from plotly==3.6.1+3.g48f2ce4a) (2.19.1)
Requirement already satisfied: retrying>=1.3.3 in /anaconda3/envs/learn-env/lib/python3.6/site-packages (from plotly==3.6.1+3.g48f2ce4a) (1.3.3)
Requirement already satisfied: six in /anaconda3/envs/learn-env/lib/python3.6/site-packages (from plotly==3.6.1+3.g48f2ce4a) (1.11.0)
Requirement already satisfied: ipython_genutils in /anaconda3/envs/learn-env/lib/python3.6/site-packages (from nbformat>=4.2->plotly==3.6.1+3.g48f2ce4a) (0.2.0)
Requirement already satisfied: traitlets>=4.1 in /anaconda3/envs/learn-env/lib/python3.6/site-packages (from nbformat>=4.2->plotly==3.6.1+3.g48f2ce4a) (4.3.2)
Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in /anaconda3/envs/learn-env/lib/python3.6/site-packages (from nbformat>=4.2->plotly==3.6.1+3.g48f2ce4a) (2.6.0)
Requirement already satisfied: jupyter_core in /anaconda3/envs/learn-env/lib/python3.6/site-packages (from nbformat>=4.2->plotly==3.6.1+3.g48f2ce4a) (4.4.0)
Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /anaconda3/envs/learn-env/lib/python3.6/site-packages (from requests->plotly==3.6.1+3.g48f2ce4a) (3.0.4)
Requirement already satisfied: certifi>=2017.4.17 in /anaconda3/envs/learn-env/lib/python3.6/site-packages (from requests->plotly==3.6.1+3.g48f2ce4a) (2018.8.13)
Requirement already satisfied: urllib3<1.24,>=1.21.1 in /anaconda3/envs/learn-env/lib/python3.6/site-packages (from requests->plotly==3.6.1+3.g48f2ce4a) (1.23)
Requirement already satisfied: idna<2.8,>=2.5 in /anaconda3/envs/learn-env/lib/python3.6/site-packages (from requests->plotly==3.6.1+3.g48f2ce4a) (2.7)
Building wheels for collected packages: plotly
  Building wheel for plotly (setup.py) ... done
  Stored in directory: /Users/flatironschool/Library/Caches/pip/wheels/27/29/22/87e9991ed2841d1ff61cfa9ec411b8fea6b5d3eb6e9bf8011a
Successfully built plotly
Installing collected packages: plotly
  Found existing installation: plotly 3.1.0
    Uninstalling plotly-3.1.0:
      Successfully uninstalled plotly-3.1.0
Successfully installed plotly-3.6.1+3.g48f2ce4a
In [136]:
#https://en.wikipedia.org/wiki/List_of_countries_by_inequality-adjusted_HDI
import requests
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from bubbly.bubbly import bubbleplot

 import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly import __version__
import plotly as py
import plotly.graph_objs as go
from plotly.grid_objs import Grid, Column
py.offline.init_notebook_mode(connected=True)
from plotly import tools
import plotly.plotly
py.tools.set_credentials_file(username='as6140', api_key='AROO7t4LTQpnC9VBLSrp')

print(__version__)
3.1.0
In [3]:
#Import libraries
import requests
from bs4 import BeautifulSoup

#parse with BeautifulSoup & Requests
web_page = "https://en.wikipedia.org/wiki/List_of_countries_by_inequality-adjusted_HDI"

req=requests.get(web_page)
page = req.text
soup = BeautifulSoup(page, 'html.parser')
soup.title
Out[3]:
<title>List of countries by inequality-adjusted HDI - Wikipedia</title>
In [4]:
table = soup.find_all("table", "wikitable")

from IPython.display import IFrame, HTML
HTML(str(table))
Out[4]:
[
Rank Country IHDI
1  Iceland 0.878
2  Japan 0.876
2  Norway 0.876
4   Switzerland 0.871
5  Finland 0.868
6  Sweden 0.864
7  Australia 0.861
8  Germany 0.861
9  Denmark 0.860
10  Netherlands 0.857
11  Ireland 0.854
12  Canada 0.852
13  New Zealand 0.846
14  Slovenia 0.846
15  Czech Republic 0.840
16  Belgium 0.836
17  Austria 0.835
18  United Kingdom 0.835
19  Singapore 0.816
20  Luxembourg 0.811
21  Hong Kong 0.809
22  France 0.808
23  Malta 0.805
24  Slovakia 0.797
25  United States 0.797
26  Estonia 0.794
27  Israel 0.787
28  Poland 0.787
29  South Korea 0.773
30  Hungary 0.772
31  Italy 0.771
32  Cyprus 0.769
33  Latvia 0.759
34  Lithuania 0.757
35  Croatia 0.756
36  Belarus 0.755
37  Spain 0.754
38  Greece 0.753
39  Montenegro 0.741
40  Russia 0.738
41  Kazakhstan 0.737
42  Portugal 0.732
43  Romania 0.717
44  Bulgaria 0.710
45  Chile 0.710
46  Iran 0.707
47  Argentina 0.707
48  Albania 0.706
49  Ukraine 0.701
50  Uruguay 0.689
51  Mauritius 0.683
52  Georgia 0.682
53  Azerbaijan 0.681
54  Armenia 0.680
55  Barbados 0.669
56  Turkey 0.669
57  Serbia 0.667
58  Sri Lanka 0.664
59  Rep. of Macedonia 0.661
60  Costa Rica 0.651
61  Bosnia and Herzegovina 0.649
62  China 0.643
63  Mongolia 0.639
64  Venezuela 0.636
65  Thailand 0.636
66  Moldova 0.627
67  Panama 0.623
68  Saint Lucia 0.622
68  Jordan 0.617
70  Mexico 0.609
71  Jamaica 0.608
72  Kyrgyzstan 0.606
73  Peru 0.606
74  Ecuador 0.603
75  Algeria 0.598
76  Palestine 0.583
,
Rank Country IHDI
77  Dominican Republic 0.581
78  Brazil 0.578
79  Turkmenistan 0.575
80  Philippines 0.574
81  Vietnam 0.574
82  Tunisia 0.573
83  Colombia 0.571
84  Indonesia 0.563
85  Tajikistan 0.562
86  Suriname 0.557
87  Belize 0.550
88  Maldives 0.549
89  Iraq 0.546
90  Gabon 0.545
91  Guyana 0.532
92  El Salvador 0.524
93  Paraguay 0.522
94  Bolivia 0.514
95  Nicaragua 0.507
96  Vanuatu 0.499
97  Egypt 0.493
98  São Tomé and Príncipe 0.473
99  Cambodia 0.469
100  Rep. of the Congo 0.469
101  India 0.468
102  Guatemala 0.467
103  South Africa 0.467
104  Myanmar 0.466
105  Bangladesh 0.462
106  Honduras 0.459
107  Timor-Leste 0.452
108  Bhutan 0.446
109  Laos 0.445
110  Kenya 0.434
111    Nepal 0.427
112  Namibia 0.422
113  Ghana 0.420
114  Swaziland 0.414
115  Tanzania 0.404
116  Angola 0.393
117  Zambia 0.388
118  Pakistan 0.387
119  Madagascar 0.385
120  Uganda 0.370
121  Rwanda 0.367
122  Cameroon 0.366
123  Lesotho 0.359
124  Afghanistan 0.350
125  Mauritania 0.348
126  Nigeria 0.347
127  Togo 0.344
128  Senegal 0.340
129  Malawi 0.332
130  Ethiopia 0.331
131  Sudan 0.328
132  Benin 0.326
133  Dem. Rep. of the Congo 0.319
134  Côte d'Ivoire 0.311
135  Yemen 0.308
136  Djibouti 0.306
137  Guinea 0.306
138  Haiti 0.304
139  Liberia 0.298
140  Mozambique 0.294
141  Gambia 0.289
142  Burkina Faso 0.288
143  Mali 0.282
144  Burundi 0.278
145  Guinea-Bissau 0.276
146  Comoros 0.275
147  Sierra Leone 0.266
148  Niger 0.250
149  Chad 0.249
150  South Sudan 0.247
151  Central African Republic 0.212
]
In [5]:
ihdi = table[0]
table_rows = ihdi.find_all('tr')
header = table_rows[1]
table_rows[1].a.get_text()
Out[5]:
'Iceland'
In [6]:
countries = [table_rows[i].a.get_text() for i in range(len(table_rows))[1:]]
In [7]:
ihdi2 = table[1]
table_rows2 = ihdi2.find_all('tr')
header2 = table_rows2[1]
table_rows2[1].a.get_text()
Out[7]:
'Dominican Republic'
In [8]:
countries2 = [table_rows2[i].a.get_text() for i in range(len(table_rows2))[1:]]
In [9]:
countries[:5]
Out[9]:
['Iceland', 'Japan', 'Norway', 'Switzerland', 'Finland']
In [10]:
countries2
Out[10]:
['Dominican Republic',
 'Brazil',
 'Turkmenistan',
 'Philippines',
 'Vietnam',
 'Tunisia',
 'Colombia',
 'Indonesia',
 'Tajikistan',
 'Suriname',
 'Belize',
 'Maldives',
 'Iraq',
 'Gabon',
 'Guyana',
 'El Salvador',
 'Paraguay',
 'Bolivia',
 'Nicaragua',
 'Vanuatu',
 'Egypt',
 'São Tomé and Príncipe',
 'Cambodia',
 'Rep. of the Congo',
 'India',
 'Guatemala',
 'South Africa',
 'Myanmar',
 'Bangladesh',
 'Honduras',
 'Timor-Leste',
 'Bhutan',
 'Laos',
 'Kenya',
 'Nepal',
 'Namibia',
 'Ghana',
 'Swaziland',
 'Tanzania',
 'Angola',
 'Zambia',
 'Pakistan',
 'Madagascar',
 'Uganda',
 'Rwanda',
 'Cameroon',
 'Lesotho',
 'Afghanistan',
 'Mauritania',
 'Nigeria',
 'Togo',
 'Senegal',
 'Malawi',
 'Ethiopia',
 'Sudan',
 'Benin',
 'Dem. Rep. of the Congo',
 "Côte d'Ivoire",
 'Yemen',
 'Djibouti',
 'Guinea',
 'Haiti',
 'Liberia',
 'Mozambique',
 'Gambia',
 'Burkina Faso',
 'Mali',
 'Burundi',
 'Guinea-Bissau',
 'Comoros',
 'Sierra Leone',
 'Niger',
 'Chad',
 'South Sudan',
 'Central African Republic']
In [11]:
type(countries)
Out[11]:
list
In [12]:
type(countries2)
Out[12]:
list
In [13]:
countries_all = countries + countries2 #concat lists
In [14]:
len(countries) + len(countries2)
Out[14]:
151
In [15]:
len(countries_all)
Out[15]:
151
In [16]:
temp = ihdi.find_all('td')
temp[2].get_text()
Out[16]:
'0.878\n'
In [17]:
temp2 = ihdi2.find_all('td')
temp2[2].get_text()
Out[17]:
'0.581\n'
In [18]:
IHDI_1 = [temp[i].get_text() for i in range(len(temp)) if "." in temp[i].get_text()]
IHDI_2 = [temp2[i].get_text() for i in range(len(temp2)) if "." in temp2[i].get_text()]
IHDI_all = IHDI_1 + IHDI_2



#rank = list(range(len(countries)))
In [19]:
IHDI_all
Out[19]:
['0.878\n',
 '0.876\n',
 '0.876\n',
 '0.871\n',
 '0.868\n',
 '0.864\n',
 '0.861\n',
 '0.861\n',
 '0.860\n',
 '0.857\n',
 '0.854\n',
 '0.852\n',
 '0.846\n',
 '0.846\n',
 '0.840\n',
 '0.836\n',
 '0.835\n',
 '0.835\n',
 '0.816\n',
 '0.811\n',
 '0.809\n',
 '0.808\n',
 '0.805\n',
 '0.797\n',
 '0.797\n',
 '0.794\n',
 '0.787\n',
 '0.787\n',
 '0.773\n',
 '0.772\n',
 '0.771\n',
 '0.769\n',
 '0.759\n',
 '0.757\n',
 '0.756\n',
 '0.755\n',
 '0.754\n',
 '0.753\n',
 '0.741\n',
 '0.738\n',
 '0.737\n',
 '0.732\n',
 '0.717\n',
 '0.710\n',
 '0.710\n',
 '0.707\n',
 '0.707\n',
 '0.706\n',
 '0.701\n',
 '0.689\n',
 '0.683\n',
 '0.682\n',
 '0.681\n',
 '0.680\n',
 '0.669\n',
 '0.669\n',
 '0.667\n',
 '0.664\n',
 '\xa0Rep. of Macedonia',
 '0.661\n',
 '0.651\n',
 '0.649\n',
 '0.643\n',
 '0.639\n',
 '0.636\n',
 '0.636\n',
 '0.627\n',
 '0.623\n',
 '0.622\n',
 '0.617\n',
 '0.609\n',
 '0.608\n',
 '0.606\n',
 '0.606\n',
 '0.603\n',
 '0.598\n',
 '0.583\n',
 '0.581\n',
 '0.578\n',
 '0.575\n',
 '0.574\n',
 '0.574\n',
 '0.573\n',
 '0.571\n',
 '0.563\n',
 '0.562\n',
 '0.557\n',
 '0.550\n',
 '0.549\n',
 '0.546\n',
 '0.545\n',
 '0.532\n',
 '0.524\n',
 '0.522\n',
 '0.514\n',
 '0.507\n',
 '0.499\n',
 '0.493\n',
 '0.473\n',
 '0.469\n',
 '\xa0Rep. of the Congo',
 '0.469\n',
 '0.468\n',
 '0.467\n',
 '0.467\n',
 '0.466\n',
 '0.462\n',
 '0.459\n',
 '0.452\n',
 '0.446\n',
 '0.445\n',
 '0.434\n',
 '0.427\n',
 '0.422\n',
 '0.420\n',
 '0.414\n',
 '0.404\n',
 '0.393\n',
 '0.388\n',
 '0.387\n',
 '0.385\n',
 '0.370\n',
 '0.367\n',
 '0.366\n',
 '0.359\n',
 '0.350\n',
 '0.348\n',
 '0.347\n',
 '0.344\n',
 '0.340\n',
 '0.332\n',
 '0.331\n',
 '0.328\n',
 '0.326\n',
 '\xa0Dem. Rep. of the Congo',
 '0.319\n',
 '0.311\n',
 '0.308\n',
 '0.306\n',
 '0.306\n',
 '0.304\n',
 '0.298\n',
 '0.294\n',
 '0.289\n',
 '0.288\n',
 '0.282\n',
 '0.278\n',
 '0.276\n',
 '0.275\n',
 '0.266\n',
 '0.250\n',
 '0.249\n',
 '0.247\n',
 '0.212\n']
In [20]:
#removing some errant values that contained a '.' in the country name itself
IHDI_all = [e for e in IHDI_all if e not in ('\xa0Dem. Rep. of the Congo', 
                                             '\xa0Rep. of the Congo','\xa0Rep. of Macedonia')]
len(IHDI_all)
Out[20]:
151
In [21]:
rank = list(range(len(IHDI_all)))
In [22]:
data = zip(rank,countries_all, IHDI_all)

import pandas as pd
cols = ['Rank', 'Country', 'IHDI']
ihdi_df = pd.DataFrame(list(data), columns = cols, index=rank)
ihdi_df.head()
Out[22]:
Rank Country IHDI
0 0 Iceland 0.878\n
1 1 Japan 0.876\n
2 2 Norway 0.876\n
3 3 Switzerland 0.871\n
4 4 Finland 0.868\n
In [23]:
ihdi_df['IHDI'] = ihdi_df['IHDI'].apply(lambda x: x.replace('\n', '')).astype(float)
In [24]:
ihdi_df_top20 = ihdi_df[0:21]
ihdi_df_bot20 = ihdi_df[-21:]
In [25]:
ihdi_df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 151 entries, 0 to 150
Data columns (total 3 columns):
Rank       151 non-null int64
Country    151 non-null object
IHDI       151 non-null float64
dtypes: float64(1), int64(1), object(1)
memory usage: 4.7+ KB
In [26]:
trace1 = go.Bar(x = ihdi_df_top20.Country, y = ihdi_df_top20.IHDI)
data = [trace1]
layout = go.Layout(title='Top 20 Countries Ranked by Inequality-Adjusted Human Development Index')
fig = go.Figure(data = data, layout = layout)
py.offline.iplot(fig);
In [27]:
trace2 = go.Bar(x = ihdi_df_bot20.Country, y = ihdi_df_bot20.IHDI)
data2 = [trace2]
layout2 = go.Layout(title='Bottom 20 Countries Ranked by Inequality-Adjusted Human Development Index')
fig2 = go.Figure(data = data2, layout = layout2)
py.offline.iplot(fig2);
In [110]:
data = [dict(type='choropleth',locations = ihdi_df['Country'],
             autocolorscale = True,z = ihdi_df['IHDI'],
             locationmode = 'country names',
             marker = dict(line = dict (color = 'rgb(255,255,255)',width = 2)),
             colorbar = dict(title = "2017 Inequality-Adjusted HDI")
            ) 
       ]
layout = dict(title = 'Countries by 2017 Inequality-Adjusted HDI')
fig = go.Figure(data = data, layout = layout)
py.offline.iplot(fig);
In [29]:
#Save Dataframe to CSV
ihdi_df.to_csv('2018_inequality_adjusted_hdi.csv')

#Link to Edited Plot.ly Map
from IPython.core.display import display, HTML
display(HTML("""<a href="https://plot.ly/~as6140/1/">Public Link to Edited Graph on Plot.ly</a>"""))
In [30]:
#DO PEOPLE FROM HIGH IHDI COUNTRIES LIVE LONGER?
le_df = pd.read_csv('life_expectancy_by_country_world_bank.csv')
le_df.head()
Out[30]:
Country Name Country Code Indicator Name Indicator Code 1960 1961 1962 1963 1964 1965 ... 2010 2011 2012 2013 2014 2015 2016 2017 2018 Unnamed: 63
0 Aruba ABW Life expectancy at birth, total (years) SP.DYN.LE00.IN 65.662 66.074 66.444 66.787 67.113 67.435 ... 75.016 75.158 75.299 75.440 75.582 75.725 75.867 76.010 NaN NaN
1 Afghanistan AFG Life expectancy at birth, total (years) SP.DYN.LE00.IN 32.292 32.742 33.185 33.624 34.060 34.495 ... 61.226 61.666 62.086 62.494 62.895 63.288 63.673 64.047 NaN NaN
2 Angola AGO Life expectancy at birth, total (years) SP.DYN.LE00.IN 33.251 33.573 33.914 34.272 34.645 35.031 ... 58.192 59.042 59.770 60.373 60.858 61.241 61.547 61.809 NaN NaN
3 Albania ALB Life expectancy at birth, total (years) SP.DYN.LE00.IN 62.279 63.298 64.187 64.911 65.461 65.848 ... 76.652 77.031 77.389 77.702 77.963 78.174 78.345 78.495 NaN NaN
4 Andorra AND Life expectancy at birth, total (years) SP.DYN.LE00.IN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

5 rows × 64 columns

In [31]:
le_00_to_15_df = le_df[['Country Name','2000','2001','2002','2003','2004','2005',
                    '2006','2007','2008','2009','2010','2011','2012','2013','2014','2015']]
le_00_to_15_df.head()
Out[31]:
Country Name 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015
0 Aruba 73.787 73.853 73.937 74.038 74.156 74.287 74.429 74.576 74.725 74.872 75.016 75.158 75.299 75.440 75.582 75.725
1 Afghanistan 55.482 56.044 56.637 57.250 57.875 58.500 59.110 59.694 60.243 60.754 61.226 61.666 62.086 62.494 62.895 63.288
2 Angola 47.113 48.200 49.341 50.508 51.676 52.833 53.974 55.096 56.189 57.231 58.192 59.042 59.770 60.373 60.858 61.241
3 Albania 73.955 74.286 74.575 74.820 75.028 75.217 75.418 75.656 75.943 76.281 76.652 77.031 77.389 77.702 77.963 78.174
4 Andorra NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
In [32]:
pop_df = pd.read_csv('population_by_country_world_bank.csv')
pop_df.head()
Out[32]:
Country Name Country Code Indicator Name Indicator Code 1960 1961 1962 1963 1964 1965 ... 2010 2011 2012 2013 2014 2015 2016 2017 2018 Unnamed: 63
0 Aruba ABW Population, total SP.POP.TOTL 54211.0 55438.0 56225.0 56695.0 57032.0 57360.0 ... 101669.0 102053.0 102577.0 103187.0 103795.0 104341.0 104822.0 105264.0 NaN NaN
1 Afghanistan AFG Population, total SP.POP.TOTL 8996351.0 9166764.0 9345868.0 9533954.0 9731361.0 9938414.0 ... 28803167.0 29708599.0 30696958.0 31731688.0 32758020.0 33736494.0 34656032.0 35530081.0 NaN NaN
2 Angola AGO Population, total SP.POP.TOTL 5643182.0 5753024.0 5866061.0 5980417.0 6093321.0 6203299.0 ... 23369131.0 24218565.0 25096150.0 25998340.0 26920466.0 27859305.0 28813463.0 29784193.0 NaN NaN
3 Albania ALB Population, total SP.POP.TOTL 1608800.0 1659800.0 1711319.0 1762621.0 1814135.0 1864791.0 ... 2913021.0 2905195.0 2900401.0 2895092.0 2889104.0 2880703.0 2876101.0 2873457.0 NaN NaN
4 Andorra AND Population, total SP.POP.TOTL 13411.0 14375.0 15370.0 16412.0 17469.0 18549.0 ... 84449.0 83751.0 82431.0 80788.0 79223.0 78014.0 77281.0 76965.0 NaN NaN

5 rows × 64 columns

In [33]:
pop_00_to_15_df = pop_df[['Country Name','2000','2001','2002','2003','2004','2005',
                    '2006','2007','2008','2009','2010','2011','2012','2013','2014','2015']]
pop_00_to_15_df.head()
Out[33]:
Country Name 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015
0 Aruba 90853.0 92898.0 94992.0 97017.0 98737.0 100031.0 100832.0 101220.0 101353.0 101453.0 101669.0 102053.0 102577.0 103187.0 103795.0 104341.0
1 Afghanistan 20093756.0 20966463.0 21979923.0 23064851.0 24118979.0 25070798.0 25893450.0 26616792.0 27294031.0 28004331.0 28803167.0 29708599.0 30696958.0 31731688.0 32758020.0 33736494.0
2 Angola 16440924.0 16983266.0 17572649.0 18203369.0 18865716.0 19552542.0 20262399.0 20997687.0 21759420.0 22549547.0 23369131.0 24218565.0 25096150.0 25998340.0 26920466.0 27859305.0
3 Albania 3089027.0 3060173.0 3051010.0 3039616.0 3026939.0 3011487.0 2992547.0 2970017.0 2947314.0 2927519.0 2913021.0 2905195.0 2900401.0 2895092.0 2889104.0 2880703.0
4 Andorra 65390.0 67341.0 70049.0 73182.0 76244.0 78867.0 80991.0 82683.0 83861.0 84462.0 84449.0 83751.0 82431.0 80788.0 79223.0 78014.0
In [34]:
le_pop_df = pd.merge(le_00_to_15_df, pop_00_to_15_df, how='inner', on='Country Name', suffixes=('_le', '_pop'))
In [35]:
le_pop_df.head()
Out[35]:
Country Name 2000_le 2001_le 2002_le 2003_le 2004_le 2005_le 2006_le 2007_le 2008_le ... 2006_pop 2007_pop 2008_pop 2009_pop 2010_pop 2011_pop 2012_pop 2013_pop 2014_pop 2015_pop
0 Aruba 73.787 73.853 73.937 74.038 74.156 74.287 74.429 74.576 74.725 ... 100832.0 101220.0 101353.0 101453.0 101669.0 102053.0 102577.0 103187.0 103795.0 104341.0
1 Afghanistan 55.482 56.044 56.637 57.250 57.875 58.500 59.110 59.694 60.243 ... 25893450.0 26616792.0 27294031.0 28004331.0 28803167.0 29708599.0 30696958.0 31731688.0 32758020.0 33736494.0
2 Angola 47.113 48.200 49.341 50.508 51.676 52.833 53.974 55.096 56.189 ... 20262399.0 20997687.0 21759420.0 22549547.0 23369131.0 24218565.0 25096150.0 25998340.0 26920466.0 27859305.0
3 Albania 73.955 74.286 74.575 74.820 75.028 75.217 75.418 75.656 75.943 ... 2992547.0 2970017.0 2947314.0 2927519.0 2913021.0 2905195.0 2900401.0 2895092.0 2889104.0 2880703.0
4 Andorra NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 80991.0 82683.0 83861.0 84462.0 84449.0 83751.0 82431.0 80788.0 79223.0 78014.0

5 rows × 33 columns

In [36]:
hdi_df = pd.read_csv('hdi_human_development_index.csv')
hdi_df = hdi_df.drop(columns=['1990','1991','1992','1993','1994','1995','1996','1997','1998','1999'])
hdi_df.head()
Out[36]:
country 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015
0 Afghanistan 0.340 0.341 0.373 0.381 0.396 0.405 0.415 0.433 0.434 0.448 0.454 0.463 0.470 0.476 0.479 0.479
1 Albania 0.662 0.670 0.674 0.681 0.685 0.696 0.703 0.713 0.721 0.725 0.738 0.752 0.759 0.761 0.762 0.764
2 Algeria 0.644 0.653 0.663 0.673 0.680 0.686 0.690 0.697 0.705 0.714 0.724 0.732 0.737 0.741 0.743 0.745
3 Andorra NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN 0.819 0.819 0.843 0.850 0.857 0.858
4 Angola 0.391 0.401 0.406 0.415 0.426 0.439 0.454 0.468 0.480 0.488 0.495 0.508 0.523 0.527 0.531 0.533
In [37]:
bubbly_df = pd.merge(hdi_df, le_pop_df, how='inner', left_on='country', right_on='Country Name', suffixes=('_hdi', ''))
bubbly_df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 172 entries, 0 to 171
Data columns (total 50 columns):
country         172 non-null object
2000            154 non-null float64
2001            154 non-null float64
2002            154 non-null float64
2003            156 non-null float64
2004            158 non-null float64
2005            166 non-null float64
2006            166 non-null float64
2007            166 non-null float64
2008            166 non-null float64
2009            166 non-null float64
2010            172 non-null float64
2011            172 non-null float64
2012            172 non-null float64
2013            172 non-null float64
2014            172 non-null float64
2015            172 non-null float64
Country Name    172 non-null object
2000_le         169 non-null float64
2001_le         168 non-null float64
2002_le         170 non-null float64
2003_le         168 non-null float64
2004_le         168 non-null float64
2005_le         169 non-null float64
2006_le         168 non-null float64
2007_le         168 non-null float64
2008_le         168 non-null float64
2009_le         168 non-null float64
2010_le         168 non-null float64
2011_le         168 non-null float64
2012_le         168 non-null float64
2013_le         168 non-null float64
2014_le         168 non-null float64
2015_le         168 non-null float64
2000_pop        172 non-null float64
2001_pop        172 non-null float64
2002_pop        172 non-null float64
2003_pop        172 non-null float64
2004_pop        172 non-null float64
2005_pop        172 non-null float64
2006_pop        172 non-null float64
2007_pop        172 non-null float64
2008_pop        172 non-null float64
2009_pop        172 non-null float64
2010_pop        172 non-null float64
2011_pop        172 non-null float64
2012_pop        171 non-null float64
2013_pop        171 non-null float64
2014_pop        171 non-null float64
2015_pop        171 non-null float64
dtypes: float64(48), object(2)
memory usage: 68.5+ KB
In [38]:
new_names = [(i,i+'_hdi') for i in bubbly_df.iloc[:, 1:17].columns.values]
bubbly_df.rename(columns = dict(new_names), inplace=True)
In [39]:
bubbly_df.head()
Out[39]:
country 2000_hdi 2001_hdi 2002_hdi 2003_hdi 2004_hdi 2005_hdi 2006_hdi 2007_hdi 2008_hdi ... 2006_pop 2007_pop 2008_pop 2009_pop 2010_pop 2011_pop 2012_pop 2013_pop 2014_pop 2015_pop
0 Afghanistan 0.340 0.341 0.373 0.381 0.396 0.405 0.415 0.433 0.434 ... 25893450.0 26616792.0 27294031.0 28004331.0 28803167.0 29708599.0 30696958.0 31731688.0 32758020.0 33736494.0
1 Albania 0.662 0.670 0.674 0.681 0.685 0.696 0.703 0.713 0.721 ... 2992547.0 2970017.0 2947314.0 2927519.0 2913021.0 2905195.0 2900401.0 2895092.0 2889104.0 2880703.0
2 Algeria 0.644 0.653 0.663 0.673 0.680 0.686 0.690 0.697 0.705 ... 33777915.0 34300076.0 34860715.0 35465760.0 36117637.0 36819558.0 37565847.0 38338562.0 39113313.0 39871528.0
3 Andorra NaN NaN NaN NaN NaN NaN NaN NaN NaN ... 80991.0 82683.0 83861.0 84462.0 84449.0 83751.0 82431.0 80788.0 79223.0 78014.0
4 Angola 0.391 0.401 0.406 0.415 0.426 0.439 0.454 0.468 0.480 ... 20262399.0 20997687.0 21759420.0 22549547.0 23369131.0 24218565.0 25096150.0 25998340.0 26920466.0 27859305.0

5 rows × 50 columns

In [40]:
bubbly_df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 172 entries, 0 to 171
Data columns (total 50 columns):
country         172 non-null object
2000_hdi        154 non-null float64
2001_hdi        154 non-null float64
2002_hdi        154 non-null float64
2003_hdi        156 non-null float64
2004_hdi        158 non-null float64
2005_hdi        166 non-null float64
2006_hdi        166 non-null float64
2007_hdi        166 non-null float64
2008_hdi        166 non-null float64
2009_hdi        166 non-null float64
2010_hdi        172 non-null float64
2011_hdi        172 non-null float64
2012_hdi        172 non-null float64
2013_hdi        172 non-null float64
2014_hdi        172 non-null float64
2015_hdi        172 non-null float64
Country Name    172 non-null object
2000_le         169 non-null float64
2001_le         168 non-null float64
2002_le         170 non-null float64
2003_le         168 non-null float64
2004_le         168 non-null float64
2005_le         169 non-null float64
2006_le         168 non-null float64
2007_le         168 non-null float64
2008_le         168 non-null float64
2009_le         168 non-null float64
2010_le         168 non-null float64
2011_le         168 non-null float64
2012_le         168 non-null float64
2013_le         168 non-null float64
2014_le         168 non-null float64
2015_le         168 non-null float64
2000_pop        172 non-null float64
2001_pop        172 non-null float64
2002_pop        172 non-null float64
2003_pop        172 non-null float64
2004_pop        172 non-null float64
2005_pop        172 non-null float64
2006_pop        172 non-null float64
2007_pop        172 non-null float64
2008_pop        172 non-null float64
2009_pop        172 non-null float64
2010_pop        172 non-null float64
2011_pop        172 non-null float64
2012_pop        171 non-null float64
2013_pop        171 non-null float64
2014_pop        171 non-null float64
2015_pop        171 non-null float64
dtypes: float64(48), object(2)
memory usage: 68.5+ KB
In [41]:
bubbly_df = bubbly_df.dropna(axis = 0, how ='any') 
In [42]:
bubbly_df = bubbly_df.reset_index(drop=True)
In [43]:
bubbly_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 152 entries, 0 to 151
Data columns (total 50 columns):
country         152 non-null object
2000_hdi        152 non-null float64
2001_hdi        152 non-null float64
2002_hdi        152 non-null float64
2003_hdi        152 non-null float64
2004_hdi        152 non-null float64
2005_hdi        152 non-null float64
2006_hdi        152 non-null float64
2007_hdi        152 non-null float64
2008_hdi        152 non-null float64
2009_hdi        152 non-null float64
2010_hdi        152 non-null float64
2011_hdi        152 non-null float64
2012_hdi        152 non-null float64
2013_hdi        152 non-null float64
2014_hdi        152 non-null float64
2015_hdi        152 non-null float64
Country Name    152 non-null object
2000_le         152 non-null float64
2001_le         152 non-null float64
2002_le         152 non-null float64
2003_le         152 non-null float64
2004_le         152 non-null float64
2005_le         152 non-null float64
2006_le         152 non-null float64
2007_le         152 non-null float64
2008_le         152 non-null float64
2009_le         152 non-null float64
2010_le         152 non-null float64
2011_le         152 non-null float64
2012_le         152 non-null float64
2013_le         152 non-null float64
2014_le         152 non-null float64
2015_le         152 non-null float64
2000_pop        152 non-null float64
2001_pop        152 non-null float64
2002_pop        152 non-null float64
2003_pop        152 non-null float64
2004_pop        152 non-null float64
2005_pop        152 non-null float64
2006_pop        152 non-null float64
2007_pop        152 non-null float64
2008_pop        152 non-null float64
2009_pop        152 non-null float64
2010_pop        152 non-null float64
2011_pop        152 non-null float64
2012_pop        152 non-null float64
2013_pop        152 non-null float64
2014_pop        152 non-null float64
2015_pop        152 non-null float64
dtypes: float64(48), object(2)
memory usage: 59.5+ KB
In [44]:
sum(bubbly_df.country == bubbly_df['Country Name'])
Out[44]:
152
In [45]:
bubbly_df = bubbly_df.drop(columns=['Country Name'])
In [46]:
bubbly_df.columns
Out[46]:
Index(['country', '2000_hdi', '2001_hdi', '2002_hdi', '2003_hdi', '2004_hdi',
       '2005_hdi', '2006_hdi', '2007_hdi', '2008_hdi', '2009_hdi', '2010_hdi',
       '2011_hdi', '2012_hdi', '2013_hdi', '2014_hdi', '2015_hdi', '2000_le',
       '2001_le', '2002_le', '2003_le', '2004_le', '2005_le', '2006_le',
       '2007_le', '2008_le', '2009_le', '2010_le', '2011_le', '2012_le',
       '2013_le', '2014_le', '2015_le', '2000_pop', '2001_pop', '2002_pop',
       '2003_pop', '2004_pop', '2005_pop', '2006_pop', '2007_pop', '2008_pop',
       '2009_pop', '2010_pop', '2011_pop', '2012_pop', '2013_pop', '2014_pop',
       '2015_pop'],
      dtype='object')
In [47]:
bdf_melt1 = bubbly_df.melt(id_vars=['country'], 
              value_vars=['2000_hdi', '2001_hdi', '2002_hdi', '2003_hdi', '2004_hdi',
                        '2005_hdi', '2006_hdi', '2007_hdi', '2008_hdi', '2009_hdi', '2010_hdi',
                        '2011_hdi', '2012_hdi', '2013_hdi', '2014_hdi', '2015_hdi'],
              var_name='year', value_name='hdi')
bdf_melt2 = bubbly_df.melt(id_vars=['country'], 
              value_vars=['2000_le','2001_le', '2002_le', '2003_le', '2004_le', '2005_le', 
                        '2006_le','2007_le', '2008_le', '2009_le', '2010_le', '2011_le', 
                        '2012_le','2013_le', '2014_le', '2015_le'],
              var_name='year', value_name='life_expectancy')
bdf_melt3 = bubbly_df.melt(id_vars=['country'], 
              value_vars=['2000_pop', '2001_pop', '2002_pop', '2003_pop', '2004_pop', 
                        '2005_pop', '2006_pop', '2007_pop', '2008_pop','2009_pop', 
                        '2010_pop', '2011_pop', '2012_pop', '2013_pop', '2014_pop','2015_pop'],
              var_name='year', value_name='population')
In [48]:
bdf_melt1.year = bdf_melt1.year.apply(lambda x: x[0:4]).astype(int)
bdf_melt2.year = bdf_melt2.year.apply(lambda x: x[0:4]).astype(int)
bdf_melt3.year = bdf_melt3.year.apply(lambda x: x[0:4]).astype(int)
In [49]:
grouped_hdi = bdf_melt1.groupby(["country"]).apply(lambda x: x.sort_values(["year"], ascending = True)).reset_index(drop=True)
grouped_le = bdf_melt2.groupby(["country"]).apply(lambda x: x.sort_values(["year"], ascending = True)).reset_index(drop=True)
grouped_pop = bdf_melt3.groupby(["country"]).apply(lambda x: x.sort_values(["year"], ascending = True)).reset_index(drop=True)
In [50]:
temp = pd.merge(grouped_hdi, grouped_le, on = ['country', 'year'], how = 'inner')
temp = pd.merge(temp, grouped_pop, on = ['country', 'year'], how = 'inner')
temp.head()
Out[50]:
country year hdi life_expectancy population
0 Afghanistan 2000 0.340 55.482 20093756.0
1 Afghanistan 2001 0.341 56.044 20966463.0
2 Afghanistan 2002 0.373 56.637 21979923.0
3 Afghanistan 2003 0.381 57.250 23064851.0
4 Afghanistan 2004 0.396 57.875 24118979.0
In [51]:
data = temp.copy()
In [52]:
continents_df = pd.read_csv('gapminder_continents.csv')
continents_df.head()
Out[52]:
continent country gdpPercap_1952 gdpPercap_1957 gdpPercap_1962 gdpPercap_1967 gdpPercap_1972 gdpPercap_1977 gdpPercap_1982 gdpPercap_1987 ... pop_1962 pop_1967 pop_1972 pop_1977 pop_1982 pop_1987 pop_1992 pop_1997 pop_2002 pop_2007
0 Africa Algeria 2449.008185 3013.976023 2550.816880 3246.991771 4182.663766 4910.416756 5745.160213 5681.358539 ... 11000948.0 12760499.0 14760787.0 17152804.0 20033753.0 23254956.0 26298373.0 29072015.0 31287142 33333216
1 Africa Angola 3520.610273 3827.940465 4269.276742 5522.776375 5473.288005 3008.647355 2756.953672 2430.208311 ... 4826015.0 5247469.0 5894858.0 6162675.0 7016384.0 7874230.0 8735988.0 9875024.0 10866106 12420476
2 Africa Benin 1062.752200 959.601080 949.499064 1035.831411 1085.796879 1029.161251 1277.897616 1225.856010 ... 2151895.0 2427334.0 2761407.0 3168267.0 3641603.0 4243788.0 4981671.0 6066080.0 7026113 8078314
3 Africa Botswana 851.241141 918.232535 983.653976 1214.709294 2263.611114 3214.857818 4551.142150 6205.883850 ... 512764.0 553541.0 619351.0 781472.0 970347.0 1151184.0 1342614.0 1536536.0 1630347 1639131
4 Africa Burkina Faso 543.255241 617.183465 722.512021 794.826560 854.735976 743.387037 807.198586 912.063142 ... 4919632.0 5127935.0 5433886.0 5889574.0 6634596.0 7586551.0 8878303.0 10352843.0 12251209 14326203

5 rows × 38 columns

In [53]:
continents_df = continents_df[['continent','country']]
In [54]:
temp.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 2432 entries, 0 to 2431
Data columns (total 5 columns):
country            2432 non-null object
year               2432 non-null int64
hdi                2432 non-null float64
life_expectancy    2432 non-null float64
population         2432 non-null float64
dtypes: float64(3), int64(1), object(1)
memory usage: 114.0+ KB
In [55]:
temp = pd.merge(temp,continents_df, how='left', on='country')
temp.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 2432 entries, 0 to 2431
Data columns (total 6 columns):
country            2432 non-null object
year               2432 non-null int64
hdi                2432 non-null float64
life_expectancy    2432 non-null float64
population         2432 non-null float64
continent          1872 non-null object
dtypes: float64(3), int64(1), object(2)
memory usage: 133.0+ KB
In [56]:
temp.continent.unique()
Out[56]:
array(['Asia', 'Europe', 'Africa', 'Americas', nan, 'Oceania'],
      dtype=object)
In [57]:
temp['country'][temp.isna().any(axis=1)].unique()
Out[57]:
array(['Armenia', 'Azerbaijan', 'Barbados', 'Belarus', 'Belize',
       'Congo, Dem. Rep.', 'Congo, Rep.', 'Cyprus', 'Estonia', 'Fiji',
       'Georgia', 'Guyana', 'Kazakhstan', 'Kyrgyz Republic', 'Latvia',
       'Liechtenstein', 'Lithuania', 'Luxembourg', 'Maldives', 'Malta',
       'Micronesia, Fed. Sts.', 'Moldova', 'Papua New Guinea', 'Qatar',
       'Samoa', 'Seychelles', 'Solomon Islands', 'St. Lucia',
       'St. Vincent and the Grenadines', 'Tajikistan', 'Timor-Leste',
       'Tonga', 'Ukraine', 'United Arab Emirates', 'Uzbekistan'],
      dtype=object)
In [58]:
temp.loc[temp['country'] == 'Armenia','continent'] = 'Asia'
temp.loc[temp['country'] == 'Azerbaijan','continent'] = 'Asia'
temp.loc[temp['country'] == 'Barbados','continent'] = 'Americas'
temp.loc[temp['country'] == 'Belarus','continent'] = 'Europe'
temp.loc[temp['country'] == 'Belize','continent'] = 'Americas'
temp.loc[temp['country'] == 'Congo, Dem. Rep.','continent'] = 'Africa'
temp.loc[temp['country'] == 'Congo, Rep.','continent'] = 'Africa'
temp.loc[temp['country'] == 'Cyprus','continent'] = 'Europe'
temp.loc[temp['country'] == 'Estonia','continent'] = 'Europe'
temp.loc[temp['country'] == 'Fiji','continent'] = 'Oceania'
temp.loc[temp['country'] == 'Georgia','continent'] = 'Asia'
temp.loc[temp['country'] == 'Guyana','continent'] = 'Americas'
temp.loc[temp['country'] == 'Kazakhstan','continent'] = 'Asia'
temp.loc[temp['country'] == 'Kyrgyz Republic','continent'] = 'Asia'
temp.loc[temp['country'] == 'Latvia','continent'] = 'Europe'
temp.loc[temp['country'] == 'Liechtenstein','continent'] = 'Europe'
temp.loc[temp['country'] == 'Lithuania','continent'] = 'Europe'
temp.loc[temp['country'] == 'Luxembourg','continent'] = 'Europe'
temp.loc[temp['country'] == 'Maldives','continent'] = 'Asia'
temp.loc[temp['country'] == 'Moldova','continent'] = 'Europe'
temp.loc[temp['country'] == 'Malta','continent'] = 'Europe'
temp.loc[temp['country'] == 'Micronesia, Fed. Sts.','continent'] = 'Oceania'
temp.loc[temp['country'] == 'Papua New Guinea','continent'] = 'Oceania'
temp.loc[temp['country'] == 'Qatar','continent'] = 'Asia'
temp.loc[temp['country'] == 'Samoa','continent'] = 'Oceania'
temp.loc[temp['country'] == 'Seychelles','continent'] = 'Africa'
temp.loc[temp['country'] == 'Solomon Islands','continent'] = 'Oceania'
temp.loc[temp['country'] == 'St. Lucia','continent'] = 'Americas'
temp.loc[temp['country'] == 'St. Vincent and the Grenadines','continent'] = 'Americas'
temp.loc[temp['country'] == 'Tajikistan','continent'] = 'Asia'
temp.loc[temp['country'] == 'Timor-Leste','continent'] = 'Oceania'
temp.loc[temp['country'] == 'Tonga','continent'] = 'Oceania'
temp.loc[temp['country'] == 'Ukraine','continent'] = 'Europe'
temp.loc[temp['country'] == 'United Arab Emirates','continent'] = 'Asia'
temp.loc[temp['country'] == 'Uzbekistan','continent'] = 'Asia'
In [59]:
temp.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 2432 entries, 0 to 2431
Data columns (total 6 columns):
country            2432 non-null object
year               2432 non-null int64
hdi                2432 non-null float64
life_expectancy    2432 non-null float64
population         2432 non-null float64
continent          2432 non-null object
dtypes: float64(3), int64(1), object(2)
memory usage: 133.0+ KB
In [60]:
temp['country'].loc[temp['continent'] == 'Americas'].unique()
Out[60]:
array(['Argentina', 'Barbados', 'Belize', 'Bolivia', 'Brazil', 'Canada',
       'Chile', 'Colombia', 'Costa Rica', 'Cuba', 'Dominican Republic',
       'Ecuador', 'El Salvador', 'Guatemala', 'Guyana', 'Haiti',
       'Honduras', 'Jamaica', 'Mexico', 'Nicaragua', 'Panama', 'Paraguay',
       'Peru', 'St. Lucia', 'St. Vincent and the Grenadines',
       'Trinidad and Tobago', 'United States', 'Uruguay'], dtype=object)
In [61]:
temp.loc[temp['country'] == 'Argentina','continent'] = 'South America'
temp.loc[temp['country'] == 'Barbados','continent'] = 'North America'
temp.loc[temp['country'] == 'Belize','continent'] = 'North America'
temp.loc[temp['country'] == 'Bolivia','continent'] = 'South America'
temp.loc[temp['country'] == 'Brazil','continent'] = 'South America'
temp.loc[temp['country'] == 'Canada','continent'] = 'North America'
temp.loc[temp['country'] == 'Chile','continent'] = 'South America'
temp.loc[temp['country'] == 'Colombia','continent'] = 'South America'
temp.loc[temp['country'] == 'Costa Rica','continent'] = 'North America'
temp.loc[temp['country'] == 'Cuba','continent'] = 'North America'
temp.loc[temp['country'] == 'Dominican Republic','continent'] = 'North America'
temp.loc[temp['country'] == 'Ecuador','continent'] = 'South America'
temp.loc[temp['country'] == 'El Salvador','continent'] = 'North America'
temp.loc[temp['country'] == 'Guatemala','continent'] = 'North America'
temp.loc[temp['country'] == 'Guyana','continent'] = 'South America'
temp.loc[temp['country'] == 'Haiti','continent'] = 'North America'
temp.loc[temp['country'] == 'Honduras','continent'] = 'North America'
temp.loc[temp['country'] == 'Jamaica','continent'] = 'North America'
temp.loc[temp['country'] == 'Mexico','continent'] = 'North America'
temp.loc[temp['country'] == 'Nicaragua','continent'] = 'North America'
temp.loc[temp['country'] == 'Panama','continent'] = 'North America'
temp.loc[temp['country'] == 'Paraguay','continent'] = 'South America'
temp.loc[temp['country'] == 'Peru','continent'] = 'South America'
temp.loc[temp['country'] == 'St. Lucia','continent'] = 'North America'
temp.loc[temp['country'] == 'St. Vincent and the Grenadines','continent'] = 'North America'
temp.loc[temp['country'] == 'Trinidad and Tobago','continent'] = 'South America'
temp.loc[temp['country'] == 'United States','continent'] = 'North America'
temp.loc[temp['country'] == 'Uruguay','continent'] = 'South America'
In [62]:
data = temp.copy()
In [115]:
#from bubbly.bubbly import bubbleplot
#figure = bubbleplot(dataset=data, x_column='hdi', y_column='life_expectancy', 
#    bubble_column='country', time_column='year', size_column='population', color_column='continent', 
#    x_title="Human Development Index (HDI)", y_title="Life Expectancy", 
#    title='Human Development vs. Life Expectancy by Year, Country, & Population',
#    x_logscale=False, scale_bubble=3, height=650, x_range = (0.2,1), y_range = (30,95))
#py.offline.iplot(figure, config={'scrollzoom': True})
#py.plotly.icreate_animations(figure, config={'scrollzoom': True})
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-115-894760a7cc04> in <module>()
      5     x_title="Human Development Index (HDI)", y_title="Life Expectancy",
      6     title='Human Development vs. Life Expectancy by Year, Country, & Population',
----> 7     x_logscale=False, scale_bubble=3, height=650, x_range = (0.2,1), y_range = (30,95))
      8 py.offline.iplot(figure, config={'scrollzoom': True})
      9 #py.plotly.icreate_animations(figure, config={'scrollzoom': True})

/anaconda3/envs/learn-env/lib/python3.6/site-packages/bubbly/bubbly.py in bubbleplot(dataset, x_column, y_column, bubble_column, z_column, time_column, size_column, color_column, x_logscale, y_logscale, z_logscale, x_range, y_range, z_range, x_title, y_title, z_title, title, colorbar_title, scale_bubble, colorscale, marker_opacity, marker_border_width, show_slider, show_button, show_colorbar, show_legend, width, height)
     16     category_column = None
     17     if color_column: # Can be numerical or categorical
---> 18         if dataset[color_column].dtype.name in ['category', 'object', 'bool']:
     19             category_column = color_column
     20             color_column = None

TypeError: list indices must be integers or slices, not str
In [64]:
display(HTML("""<a href="https://plot.ly/~as6140/4/">Public Link to Edited Bubble Graph on Plot.ly</a>"""))
In [65]:
eco_df = pd.read_csv('NFA 2018 Edition.csv')
In [66]:
eco_df = eco_df.loc[:,['country','year','record','total']]
In [67]:
eco_df.head()
Out[67]:
country year record total
0 Armenia 1992 AreaPerCap 5.022540e-01
1 Armenia 1992 AreaTotHA 1.732525e+06
2 Armenia 1992 BiocapPerCap 4.273741e-01
3 Armenia 1992 BiocapTotGHA 1.474227e+06
4 Armenia 1992 EFConsPerCap 1.730092e+00
In [68]:
eco_df1 = eco_df[(eco_df.record == "EFConsPerCap")].rename(columns={"total": "EFConsPerCap"}).drop(columns='record')
eco_df2 = eco_df[(eco_df.record == "EFConsTotGHA")].rename(columns={"total": "EFConsTotGHA"}).drop(columns='record')
eco_df3 = eco_df[(eco_df.record == "BiocapPerCap")].rename(columns={"total": "BiocapPerCap"}).drop(columns='record')
eco_df4 = eco_df[(eco_df.record == "BiocapTotGHA")].rename(columns={"total": "BiocapTotGHA"}).drop(columns='record')
In [69]:
eco_df5 = pd.merge(eco_df1,eco_df2, on=['country', 'year'])
eco_df6 = pd.merge(eco_df5,eco_df3, on=['country', 'year'])
eco_df = pd.merge(eco_df6,eco_df4, on=['country', 'year'])
In [70]:
eco_df = eco_df[eco_df['year'].isin(['2000','2001','2002','2003','2004','2005','2006','2007','2008','2009',
                                    '2010','2011','2012','2013','2014'])]
In [71]:
eco_df.loc[eco_df.country == 'Viet Nam','country'] = 'Vietnam'
eco_df.loc[eco_df.country == 'Tanzania, United Republic of','country'] = 'Tanzania'
eco_df.loc[eco_df.country == 'United States of America','country'] = 'United States'
eco_df.loc[eco_df.country == 'Congo, Democratic Republic of','country'] = 'Congo, Dem. Rep.'
eco_df.loc[eco_df.country == 'Congo','country'] = 'Congo, Rep.'
eco_df.loc[eco_df.country == 'Saint Lucia','country'] = 'St. Lucia'
eco_df.loc[eco_df.country == 'Libyan Arab Jamahiriya','country'] = 'Libya'
eco_df.loc[eco_df.country == 'Micronesia, Federated States of','country'] = 'Micronesia, Fed. Sts.'
eco_df.loc[eco_df.country == 'Kyrgyzstan','country'] = 'Kyrgyz Republic'
eco_df.loc[eco_df.country == 'Slovakia','country'] = 'Slovak Republic'
eco_df.loc[eco_df.country == 'Sudan (former)','country'] = 'Sudan'
eco_df.loc[eco_df.country == 'Serbia and Montenegro','country'] = 'Serbia'
In [72]:
temp.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 2432 entries, 0 to 2431
Data columns (total 6 columns):
country            2432 non-null object
year               2432 non-null int64
hdi                2432 non-null float64
life_expectancy    2432 non-null float64
population         2432 non-null float64
continent          2432 non-null object
dtypes: float64(3), int64(1), object(2)
memory usage: 133.0+ KB
In [73]:
eco_df['country'].unique()
Out[73]:
array(['Armenia', 'Afghanistan', 'Albania', 'Algeria', 'Angola',
       'Antigua and Barbuda', 'Argentina', 'Australia', 'Austria',
       'Bahamas', 'Bahrain', 'Barbados', 'Bangladesh', 'Bermuda',
       'Bhutan', 'Bolivia', 'Botswana', 'Brazil', 'Aruba', 'Belize',
       'Brunei Darussalam', 'Bulgaria', 'Myanmar', 'Burundi', 'Cameroon',
       'Canada', 'Cabo Verde', 'Cayman Islands',
       'Central African Republic', 'Sri Lanka', 'Chad', 'Chile',
       'Colombia', 'Comoros', 'Congo, Rep.', 'Cook Islands', 'Costa Rica',
       'Cuba', 'Cyprus', 'Azerbaijan', 'Benin', 'Denmark', 'Dominica',
       'Dominican Republic', 'Belarus', 'Ecuador', 'Egypt', 'El Salvador',
       'Equatorial Guinea', 'Estonia', 'Fiji', 'Finland', 'France',
       'French Guiana', 'French Polynesia', 'Djibouti', 'Georgia',
       'Gabon', 'Gambia', 'Germany', 'Bosnia and Herzegovina', 'Ghana',
       'Kiribati', 'Greece', 'Grenada', 'Guadeloupe', 'Guatemala',
       'Guinea', 'Guyana', 'Haiti', 'Honduras', 'Hungary', 'Croatia',
       'India', 'Indonesia', 'Iran, Islamic Republic of', 'Iraq',
       'Ireland', 'Israel', 'Italy', "Cote d'Ivoire", 'Kazakhstan',
       'Jamaica', 'Japan', 'Jordan', 'Kyrgyz Republic', 'Kenya',
       'Cambodia', "Korea, Democratic People's Republic of",
       'Korea, Republic of', 'Kuwait', 'Latvia',
       "Lao People's Democratic Republic", 'Lebanon', 'Lesotho',
       'Liberia', 'Libya', 'Lithuania', 'Madagascar', 'Malawi',
       'Malaysia', 'Mali', 'Malta', 'Martinique', 'Mauritania',
       'Mauritius', 'Mexico', 'Mongolia', 'Montserrat', 'Morocco',
       'Mozambique', 'Micronesia, Fed. Sts.', 'Moldova', 'Namibia',
       'Nepal', 'Netherlands', 'Macedonia TFYR', 'Vanuatu', 'New Zealand',
       'Nicaragua', 'Niger', 'Nigeria', 'Norway', 'Pakistan', 'Panama',
       'Czech Republic', 'Papua New Guinea', 'Paraguay', 'Peru',
       'Philippines', 'Poland', 'Portugal', 'Guinea-Bissau',
       'Timor-Leste', 'Eritrea', 'Qatar', 'Zimbabwe', 'Reunion',
       'Romania', 'Rwanda', 'Russian Federation', 'Serbia', 'St. Lucia',
       'Sao Tome and Principe', 'Saudi Arabia', 'Senegal', 'Sierra Leone',
       'Slovenia', 'Slovak Republic', 'Singapore', 'Somalia',
       'South Africa', 'Spain', 'Sudan', 'Suriname', 'Tajikistan',
       'Swaziland', 'Sweden', 'Switzerland', 'Syrian Arab Republic',
       'Turkmenistan', 'Tanzania', 'Thailand', 'Togo', 'Tonga',
       'Trinidad and Tobago', 'Oman', 'Tunisia', 'Turkey',
       'United Arab Emirates', 'Uganda', 'United Kingdom', 'Ukraine',
       'United States', 'Burkina Faso', 'Uruguay', 'Uzbekistan',
       'Venezuela, Bolivarian Republic of', 'Vietnam', 'Ethiopia',
       'Samoa', 'Yemen', 'Congo, Dem. Rep.', 'Zambia', 'Belgium',
       'Luxembourg', 'Montenegro', 'South Sudan', 'China', 'World'],
      dtype=object)
In [74]:
temp = temp[temp.year != 2015]
temp = pd.merge(temp,eco_df, how='inner', on=['country', 'year'])
In [75]:
temp.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 2157 entries, 0 to 2156
Data columns (total 10 columns):
country            2157 non-null object
year               2157 non-null int64
hdi                2157 non-null float64
life_expectancy    2157 non-null float64
population         2157 non-null float64
continent          2157 non-null object
EFConsPerCap       2157 non-null float64
EFConsTotGHA       2157 non-null float64
BiocapPerCap       2157 non-null float64
BiocapTotGHA       2157 non-null float64
dtypes: float64(7), int64(1), object(2)
memory usage: 185.4+ KB
In [76]:
temp.head()
Out[76]:
country year hdi life_expectancy population continent EFConsPerCap EFConsTotGHA BiocapPerCap BiocapTotGHA
0 Afghanistan 2000 0.340 55.482 20093756.0 Asia 0.648085 12768498.74 0.509767 10043372.21
1 Afghanistan 2001 0.341 56.044 20966463.0 Asia 0.605820 12438205.89 0.500063 10266894.13
2 Afghanistan 2002 0.373 56.637 21979923.0 Asia 0.704061 15128227.46 0.570591 12260349.44
3 Afghanistan 2003 0.381 57.250 23064851.0 Asia 0.708856 15954511.65 0.591033 13302606.63
4 Afghanistan 2004 0.396 57.875 24118979.0 Asia 0.619414 14556109.86 0.502825 11816295.92
In [77]:
temp = temp[temp.year != 2015]
In [78]:
temp.country[temp.isnull().any(axis=1)].value_counts()
Out[78]:
Series([], Name: country, dtype: int64)
In [79]:
data1 = temp.copy()
In [98]:
data1.to_csv('data_for_plotly_grid.csv')
In [140]:
# Biocapacity Per Capita vs. EF Per Capita
from bubbly.bubbly import bubbleplot
figure = bubbleplot(dataset=data1, x_column='BiocapPerCap', y_column='EFConsPerCap', 
    bubble_column='country', time_column='year', size_column='EFConsTotGHA', color_column='continent', 
    x_title="Biocapacity Per Capita (global hectares)", y_title="Ecological Footprint Per Capita (global hectares)", 
    title='Biocapacity (Per Capita) vs. Ecological Footprint (Per Capita) by Year, Country, & Population',
    x_logscale=False, scale_bubble=3, height=650,x_range = (-2,18), y_range = (-2,18))
iplot(figure, config={'scrollzoom': True})
#py.plotly.icreate_animations(figure)
In [88]:
from bubbly.bubbly import bubbleplot
figure = bubbleplot(dataset=data1, x_column='hdi', y_column='EFConsPerCap', 
    bubble_column='country', time_column='year', size_column='EFConsTotGHA', color_column='continent', 
    x_title="Human Development Index (HDI)", y_title="Ecological Footprint Per Capita (global hectares)", 
    title='Human Development Index vs. Ecological Footprint by Year, Country, & Population (Bubble Size = Total Footprint)',
    x_logscale=False, scale_bubble=3, height=650,x_range = (0.25,1), y_range = (-2,20))
iplot(figure, config={'scrollzoom': True})
#,x_range = (0.2,1), y_range = (-100000,6000000000)
In [ ]: