Create year 2000 dataset from input data

Create year 2000 dataset from input data#

import numpy as np
import pandas as pd

df = pd.read_csv('children-per-woman-vs-human-development-index.csv')
df.head()

	Entity	Code	Year	Fertility Rate	Human Development Index	Population	Region
0	Afghanistan	AFG	1950	7.248	NaN	7776133.0	NaN
1	Afghanistan	AFG	1951	7.260	NaN	7879295.0	NaN
2	Afghanistan	AFG	1952	7.260	NaN	7987737.0	NaN
3	Afghanistan	AFG	1953	7.266	NaN	8096656.0	NaN
4	Afghanistan	AFG	1954	7.254	NaN	8207910.0	NaN

y2k_df = (df[(df['Year'] == 2000) & ~df['Code'].isna()]
          .drop(columns='Region')
          .rename(columns={'Entity': 'Country Name'})
          .dropna()
          [['Code', 'Human Development Index', 'Fertility Rate', 'Population', 'Country Name']]
         )
y2k_df

	Code	Human Development Index	Fertility Rate	Population	Country Name
50	AFG	0.340	7.566	2.013028e+07	Afghanistan
654	ALB	0.678	2.217	3.166104e+06	Albania
915	DZA	0.652	2.590	3.090385e+07	Algeria
1366	AND	0.815	1.273	6.565300e+04	Andorra
1627	AGO	0.380	6.639	1.619481e+07	Angola
...	...	...	...	...	...
58465	VNM	0.599	2.028	7.715397e+07	Vietnam
58926	OWID_WRL	0.645	2.754	6.171703e+09	World
59052	YEM	0.434	6.317	1.962409e+07	Yemen
59439	ZMB	0.418	5.921	1.001758e+07	Zambia
59700	ZWE	0.426	4.009	1.189200e+07	Zimbabwe

177 rows × 5 columns

# Get selected country codes.  These were from sorting the Gender Stats Data Frame by GDP.
wealthy_codes = (pd.read_csv('gender_stats.csv')
                 .sort_values('gdp_us_billion', ascending=False)
                 .head(15)
                 ['country_code']
                 .sort_values()
                )
wealthy_codes

   AUS
   BRA
   CAN
   CHN
   DEU
   ESP
   FRA
   GBR
   IND
   ITA
   JPN
  KOR
  MEX
  RUS
  USA
Name: country_code, dtype: str

y2k_out = (y2k_df[y2k_df['Code'].isin(wealthy_codes)]
           .reset_index(drop=True)
           .sort_values('Code'))
y2k_out

	Code	Human Development Index	Fertility Rate	Population	Country Name
0	AUS	0.896	1.764	1.913243e+07	Australia
1	BRA	0.668	2.247	1.740182e+08	Brazil
2	CAN	0.890	1.510	3.089176e+07	Canada
3	CHN	0.586	1.628	1.269581e+09	China
5	DEU	0.890	1.386	8.179720e+07	Germany
12	ESP	0.828	1.210	4.101972e+07	Spain
4	FRA	0.844	1.876	5.948367e+07	France
13	GBR	0.863	1.641	5.905728e+07	United Kingdom
6	IND	0.490	3.350	1.057923e+09	India
7	ITA	0.842	1.249	5.727216e+07	Italy
8	JPN	0.883	1.346	1.270278e+08	Japan
11	KOR	0.824	1.467	4.676661e+07	South Korea
9	MEX	0.709	2.714	9.862552e+07	Mexico
10	RUS	0.733	1.190	1.467177e+08	Russia
14	USA	0.894	2.030	2.814841e+08	United States

# Population in millions, rounded to 4 DP.
y2k_out['Population'] = (y2k_out['Population'] / 1_000_000).round(4)
y2k_out

	Code	Human Development Index	Fertility Rate	Population	Country Name
0	AUS	0.896	1.764	19.1324	Australia
1	BRA	0.668	2.247	174.0182	Brazil
2	CAN	0.890	1.510	30.8918	Canada
3	CHN	0.586	1.628	1269.5811	China
5	DEU	0.890	1.386	81.7972	Germany
12	ESP	0.828	1.210	41.0197	Spain
4	FRA	0.844	1.876	59.4837	France
13	GBR	0.863	1.641	59.0573	United Kingdom
6	IND	0.490	3.350	1057.9227	India
7	ITA	0.842	1.249	57.2722	Italy
8	JPN	0.883	1.346	127.0278	Japan
11	KOR	0.824	1.467	46.7666	South Korea
9	MEX	0.709	2.714	98.6255	Mexico
10	RUS	0.733	1.190	146.7177	Russia
14	USA	0.894	2.030	281.4841	United States

out_fname = 'year_2000_hdi_fert.csv'
y2k_out.to_csv(out_fname, index=None)
pd.read_csv(out_fname)

	Code	Human Development Index	Fertility Rate	Population	Country Name
0	AUS	0.896	1.764	19.1324	Australia
1	BRA	0.668	2.247	174.0182	Brazil
2	CAN	0.890	1.510	30.8918	Canada
3	CHN	0.586	1.628	1269.5811	China
4	DEU	0.890	1.386	81.7972	Germany
5	ESP	0.828	1.210	41.0197	Spain
6	FRA	0.844	1.876	59.4837	France
7	GBR	0.863	1.641	59.0573	United Kingdom
8	IND	0.490	3.350	1057.9227	India
9	ITA	0.842	1.249	57.2722	Italy
10	JPN	0.883	1.346	127.0278	Japan
11	KOR	0.824	1.467	46.7666	South Korea
12	MEX	0.709	2.714	98.6255	Mexico
13	RUS	0.733	1.190	146.7177	Russia
14	USA	0.894	2.030	281.4841	United States