Create year 2000 dataset from input data

Create year 2000 dataset from input data#

import numpy as np
import pandas as pd
df = pd.read_csv('children-per-woman-vs-human-development-index.csv')
df.head()
Entity Code Year Fertility Rate Human Development Index Population Region
0 Afghanistan AFG 1950 7.248 NaN 7776133.0 NaN
1 Afghanistan AFG 1951 7.260 NaN 7879295.0 NaN
2 Afghanistan AFG 1952 7.260 NaN 7987737.0 NaN
3 Afghanistan AFG 1953 7.266 NaN 8096656.0 NaN
4 Afghanistan AFG 1954 7.254 NaN 8207910.0 NaN
y2k_df = (df[(df['Year'] == 2000) & ~df['Code'].isna()]
          .drop(columns='Region')
          .rename(columns={'Entity': 'Country Name'})
          .dropna()
          [['Code', 'Human Development Index', 'Fertility Rate', 'Population', 'Country Name']]
         )
y2k_df
Code Human Development Index Fertility Rate Population Country Name
50 AFG 0.340 7.566 2.013028e+07 Afghanistan
654 ALB 0.678 2.217 3.166104e+06 Albania
915 DZA 0.652 2.590 3.090385e+07 Algeria
1366 AND 0.815 1.273 6.565300e+04 Andorra
1627 AGO 0.380 6.639 1.619481e+07 Angola
... ... ... ... ... ...
58465 VNM 0.599 2.028 7.715397e+07 Vietnam
58926 OWID_WRL 0.645 2.754 6.171703e+09 World
59052 YEM 0.434 6.317 1.962409e+07 Yemen
59439 ZMB 0.418 5.921 1.001758e+07 Zambia
59700 ZWE 0.426 4.009 1.189200e+07 Zimbabwe

177 rows × 5 columns

# Get selected country codes.  These were from sorting the Gender Stats Data Frame by GDP.
wealthy_codes = (pd.read_csv('gender_stats.csv')
                 .sort_values('gdp_us_billion', ascending=False)
                 .head(15)
                 ['country_code']
                 .sort_values()
                )
wealthy_codes
10     AUS
26     BRA
32     CAN
35     CHN
49     DEU
58     ESP
63     FRA
67     GBR
88     IND
94     ITA
97     JPN
104    KOR
124    MEX
164    RUS
202    USA
Name: country_code, dtype: str
y2k_out = (y2k_df[y2k_df['Code'].isin(wealthy_codes)]
           .reset_index(drop=True)
           .sort_values('Code'))
y2k_out
Code Human Development Index Fertility Rate Population Country Name
0 AUS 0.896 1.764 1.913243e+07 Australia
1 BRA 0.668 2.247 1.740182e+08 Brazil
2 CAN 0.890 1.510 3.089176e+07 Canada
3 CHN 0.586 1.628 1.269581e+09 China
5 DEU 0.890 1.386 8.179720e+07 Germany
12 ESP 0.828 1.210 4.101972e+07 Spain
4 FRA 0.844 1.876 5.948367e+07 France
13 GBR 0.863 1.641 5.905728e+07 United Kingdom
6 IND 0.490 3.350 1.057923e+09 India
7 ITA 0.842 1.249 5.727216e+07 Italy
8 JPN 0.883 1.346 1.270278e+08 Japan
11 KOR 0.824 1.467 4.676661e+07 South Korea
9 MEX 0.709 2.714 9.862552e+07 Mexico
10 RUS 0.733 1.190 1.467177e+08 Russia
14 USA 0.894 2.030 2.814841e+08 United States
# Population in millions, rounded to 4 DP.
y2k_out['Population'] = (y2k_out['Population'] / 1_000_000).round(4)
y2k_out
Code Human Development Index Fertility Rate Population Country Name
0 AUS 0.896 1.764 19.1324 Australia
1 BRA 0.668 2.247 174.0182 Brazil
2 CAN 0.890 1.510 30.8918 Canada
3 CHN 0.586 1.628 1269.5811 China
5 DEU 0.890 1.386 81.7972 Germany
12 ESP 0.828 1.210 41.0197 Spain
4 FRA 0.844 1.876 59.4837 France
13 GBR 0.863 1.641 59.0573 United Kingdom
6 IND 0.490 3.350 1057.9227 India
7 ITA 0.842 1.249 57.2722 Italy
8 JPN 0.883 1.346 127.0278 Japan
11 KOR 0.824 1.467 46.7666 South Korea
9 MEX 0.709 2.714 98.6255 Mexico
10 RUS 0.733 1.190 146.7177 Russia
14 USA 0.894 2.030 281.4841 United States
out_fname = 'year_2000_hdi_fert.csv'
y2k_out.to_csv(out_fname, index=None)
pd.read_csv(out_fname)
Code Human Development Index Fertility Rate Population Country Name
0 AUS 0.896 1.764 19.1324 Australia
1 BRA 0.668 2.247 174.0182 Brazil
2 CAN 0.890 1.510 30.8918 Canada
3 CHN 0.586 1.628 1269.5811 China
4 DEU 0.890 1.386 81.7972 Germany
5 ESP 0.828 1.210 41.0197 Spain
6 FRA 0.844 1.876 59.4837 France
7 GBR 0.863 1.641 59.0573 United Kingdom
8 IND 0.490 3.350 1057.9227 India
9 ITA 0.842 1.249 57.2722 Italy
10 JPN 0.883 1.346 127.0278 Japan
11 KOR 0.824 1.467 46.7666 South Korea
12 MEX 0.709 2.714 98.6255 Mexico
13 RUS 0.733 1.190 146.7177 Russia
14 USA 0.894 2.030 281.4841 United States