Create year 2000 dataset from input data#
import numpy as np
import pandas as pd
df = pd.read_csv('children-per-woman-vs-human-development-index.csv')
df.head()
| Entity | Code | Year | Fertility Rate | Human Development Index | Population | Region | |
|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | AFG | 1950 | 7.248 | NaN | 7776133.0 | NaN |
| 1 | Afghanistan | AFG | 1951 | 7.260 | NaN | 7879295.0 | NaN |
| 2 | Afghanistan | AFG | 1952 | 7.260 | NaN | 7987737.0 | NaN |
| 3 | Afghanistan | AFG | 1953 | 7.266 | NaN | 8096656.0 | NaN |
| 4 | Afghanistan | AFG | 1954 | 7.254 | NaN | 8207910.0 | NaN |
y2k_df = (df[(df['Year'] == 2000) & ~df['Code'].isna()]
.drop(columns='Region')
.rename(columns={'Entity': 'Country Name'})
.dropna()
[['Code', 'Human Development Index', 'Fertility Rate', 'Population', 'Country Name']]
)
y2k_df
| Code | Human Development Index | Fertility Rate | Population | Country Name | |
|---|---|---|---|---|---|
| 50 | AFG | 0.340 | 7.566 | 2.013028e+07 | Afghanistan |
| 654 | ALB | 0.678 | 2.217 | 3.166104e+06 | Albania |
| 915 | DZA | 0.652 | 2.590 | 3.090385e+07 | Algeria |
| 1366 | AND | 0.815 | 1.273 | 6.565300e+04 | Andorra |
| 1627 | AGO | 0.380 | 6.639 | 1.619481e+07 | Angola |
| ... | ... | ... | ... | ... | ... |
| 58465 | VNM | 0.599 | 2.028 | 7.715397e+07 | Vietnam |
| 58926 | OWID_WRL | 0.645 | 2.754 | 6.171703e+09 | World |
| 59052 | YEM | 0.434 | 6.317 | 1.962409e+07 | Yemen |
| 59439 | ZMB | 0.418 | 5.921 | 1.001758e+07 | Zambia |
| 59700 | ZWE | 0.426 | 4.009 | 1.189200e+07 | Zimbabwe |
177 rows × 5 columns
# Get selected country codes. These were from sorting the Gender Stats Data Frame by GDP.
wealthy_codes = (pd.read_csv('gender_stats.csv')
.sort_values('gdp_us_billion', ascending=False)
.head(15)
['country_code']
.sort_values()
)
wealthy_codes
10 AUS
26 BRA
32 CAN
35 CHN
49 DEU
58 ESP
63 FRA
67 GBR
88 IND
94 ITA
97 JPN
104 KOR
124 MEX
164 RUS
202 USA
Name: country_code, dtype: str
y2k_out = (y2k_df[y2k_df['Code'].isin(wealthy_codes)]
.reset_index(drop=True)
.sort_values('Code'))
y2k_out
| Code | Human Development Index | Fertility Rate | Population | Country Name | |
|---|---|---|---|---|---|
| 0 | AUS | 0.896 | 1.764 | 1.913243e+07 | Australia |
| 1 | BRA | 0.668 | 2.247 | 1.740182e+08 | Brazil |
| 2 | CAN | 0.890 | 1.510 | 3.089176e+07 | Canada |
| 3 | CHN | 0.586 | 1.628 | 1.269581e+09 | China |
| 5 | DEU | 0.890 | 1.386 | 8.179720e+07 | Germany |
| 12 | ESP | 0.828 | 1.210 | 4.101972e+07 | Spain |
| 4 | FRA | 0.844 | 1.876 | 5.948367e+07 | France |
| 13 | GBR | 0.863 | 1.641 | 5.905728e+07 | United Kingdom |
| 6 | IND | 0.490 | 3.350 | 1.057923e+09 | India |
| 7 | ITA | 0.842 | 1.249 | 5.727216e+07 | Italy |
| 8 | JPN | 0.883 | 1.346 | 1.270278e+08 | Japan |
| 11 | KOR | 0.824 | 1.467 | 4.676661e+07 | South Korea |
| 9 | MEX | 0.709 | 2.714 | 9.862552e+07 | Mexico |
| 10 | RUS | 0.733 | 1.190 | 1.467177e+08 | Russia |
| 14 | USA | 0.894 | 2.030 | 2.814841e+08 | United States |
# Population in millions, rounded to 4 DP.
y2k_out['Population'] = (y2k_out['Population'] / 1_000_000).round(4)
y2k_out
| Code | Human Development Index | Fertility Rate | Population | Country Name | |
|---|---|---|---|---|---|
| 0 | AUS | 0.896 | 1.764 | 19.1324 | Australia |
| 1 | BRA | 0.668 | 2.247 | 174.0182 | Brazil |
| 2 | CAN | 0.890 | 1.510 | 30.8918 | Canada |
| 3 | CHN | 0.586 | 1.628 | 1269.5811 | China |
| 5 | DEU | 0.890 | 1.386 | 81.7972 | Germany |
| 12 | ESP | 0.828 | 1.210 | 41.0197 | Spain |
| 4 | FRA | 0.844 | 1.876 | 59.4837 | France |
| 13 | GBR | 0.863 | 1.641 | 59.0573 | United Kingdom |
| 6 | IND | 0.490 | 3.350 | 1057.9227 | India |
| 7 | ITA | 0.842 | 1.249 | 57.2722 | Italy |
| 8 | JPN | 0.883 | 1.346 | 127.0278 | Japan |
| 11 | KOR | 0.824 | 1.467 | 46.7666 | South Korea |
| 9 | MEX | 0.709 | 2.714 | 98.6255 | Mexico |
| 10 | RUS | 0.733 | 1.190 | 146.7177 | Russia |
| 14 | USA | 0.894 | 2.030 | 281.4841 | United States |
out_fname = 'year_2000_hdi_fert.csv'
y2k_out.to_csv(out_fname, index=None)
pd.read_csv(out_fname)
| Code | Human Development Index | Fertility Rate | Population | Country Name | |
|---|---|---|---|---|---|
| 0 | AUS | 0.896 | 1.764 | 19.1324 | Australia |
| 1 | BRA | 0.668 | 2.247 | 174.0182 | Brazil |
| 2 | CAN | 0.890 | 1.510 | 30.8918 | Canada |
| 3 | CHN | 0.586 | 1.628 | 1269.5811 | China |
| 4 | DEU | 0.890 | 1.386 | 81.7972 | Germany |
| 5 | ESP | 0.828 | 1.210 | 41.0197 | Spain |
| 6 | FRA | 0.844 | 1.876 | 59.4837 | France |
| 7 | GBR | 0.863 | 1.641 | 59.0573 | United Kingdom |
| 8 | IND | 0.490 | 3.350 | 1057.9227 | India |
| 9 | ITA | 0.842 | 1.249 | 57.2722 | Italy |
| 10 | JPN | 0.883 | 1.346 | 127.0278 | Japan |
| 11 | KOR | 0.824 | 1.467 | 46.7666 | South Korea |
| 12 | MEX | 0.709 | 2.714 | 98.6255 | Mexico |
| 13 | RUS | 0.733 | 1.190 | 146.7177 | Russia |
| 14 | USA | 0.894 | 2.030 | 281.4841 | United States |