Code
library(tidyverse)
library(openxlsx)
library(readxl)
library(janitor)
library(plotly)
Tony Duan
August 8, 2023
# A tibble: 6 × 67
`Data Source` World Development In…¹ ...3 ...4 ...5 ...6 ...7 ...8 ...9
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 Last Updated… 45132 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
2 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
3 Country Name Country Code Indi… Indi… 1960 1961 1962 1963 1964
4 Taiwan <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
5 Aruba ABW GDP … NY.G… <NA> <NA> <NA> <NA> <NA>
6 Africa Easte… AFE GDP … NY.G… 2112… 2161… 2350… 2804… 2592…
# ℹ abbreviated name: ¹`World Development Indicators`
# ℹ 58 more variables: ...10 <chr>, ...11 <chr>, ...12 <chr>, ...13 <chr>,
# ...14 <chr>, ...15 <chr>, ...16 <chr>, ...17 <chr>, ...18 <chr>,
# ...19 <chr>, ...20 <chr>, ...21 <chr>, ...22 <chr>, ...23 <chr>,
# ...24 <chr>, ...25 <chr>, ...26 <chr>, ...27 <chr>, ...28 <chr>,
# ...29 <chr>, ...30 <chr>, ...31 <chr>, ...32 <chr>, ...33 <chr>,
# ...34 <chr>, ...35 <chr>, ...36 <chr>, ...37 <chr>, ...38 <chr>, …
read in excel and start from 3rd row and then clean up column names
# A tibble: 6 × 67
country_name country_code indicator_name indicator_code x1960 x1961
<chr> <chr> <chr> <chr> <dbl> <dbl>
1 Taiwan <NA> <NA> <NA> NA NA
2 Aruba ABW GDP (current … NY.GDP.MKTP.CD NA NA
3 Africa Eastern a… AFE GDP (current … NY.GDP.MKTP.CD 2.11e10 2.16e10
4 Afghanistan AFG GDP (current … NY.GDP.MKTP.CD 5.38e 8 5.49e 8
5 Africa Western a… AFW GDP (current … NY.GDP.MKTP.CD 1.04e10 1.12e10
6 Angola AGO GDP (current … NY.GDP.MKTP.CD NA NA
# ℹ 61 more variables: x1962 <dbl>, x1963 <dbl>, x1964 <dbl>, x1965 <dbl>,
# x1966 <dbl>, x1967 <dbl>, x1968 <dbl>, x1969 <dbl>, x1970 <dbl>,
# x1971 <dbl>, x1972 <dbl>, x1973 <dbl>, x1974 <dbl>, x1975 <dbl>,
# x1976 <dbl>, x1977 <dbl>, x1978 <dbl>, x1979 <dbl>, x1980 <dbl>,
# x1981 <dbl>, x1982 <dbl>, x1983 <dbl>, x1984 <dbl>, x1985 <dbl>,
# x1986 <dbl>, x1987 <dbl>, x1988 <dbl>, x1989 <dbl>, x1990 <dbl>,
# x1991 <dbl>, x1992 <dbl>, x1993 <dbl>, x1994 <dbl>, x1995 <dbl>, …
delete column
wide to long
Rows: 16,821
Columns: 3
$ country_name <chr> "Taiwan", "Taiwan", "Taiwan", "Taiwan", "Taiwan", "Taiwan…
$ year <chr> "x1960", "x1961", "x1962", "x1963", "x1964", "x1965", "x1…
$ GDP <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
Rows: 63
Columns: 4
$ year <chr> "x1960", "x1961", "x1962", "x1963", "x1964", "x1965", "x1966"…
$ V1 <chr> "59716251765", "50056688015", "47209188356", "50706616610", "…
$ V2 <chr> " 1320796652", " 1383681651", " 1612346412", " 1935298266", "…
$ year_num <dbl> 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1…
Rows: 252
Columns: 3
$ country_name <chr> "Taiwan", "Taiwan", "Taiwan", "Taiwan", "Taiwan", "Taiwan…
$ GDP <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
$ year_num <dbl> 1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 196…
from imf
# A tibble: 6 × 50
gdp_per_capita_current…¹ x1980 x1981 x1982 x1983 x1984 x1985 x1986 x1987 x1988
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
2 Afghanistan no d… no d… no d… no d… no d… no d… no d… no d… no d…
3 Albania 728.… 817.… 824.… 815.… 788.… 788.… 855.… 832.… 805.…
4 Algeria 2268… 2305… 2254… 2316… 2432… 2753… 2698… 2705… 2143…
5 Andorra no d… no d… no d… no d… no d… no d… no d… no d… no d…
6 Angola 802.… 731.… 712.… 723.… 747.… 817.… 743.… 828.… 875.…
# ℹ abbreviated name: ¹gdp_per_capita_current_prices_u_s_dollars_per_capita
# ℹ 40 more variables: x1989 <chr>, x1990 <chr>, x1991 <chr>, x1992 <chr>,
# x1993 <chr>, x1994 <chr>, x1995 <chr>, x1996 <chr>, x1997 <chr>,
# x1998 <chr>, x1999 <chr>, x2000 <chr>, x2001 <chr>, x2002 <chr>,
# x2003 <chr>, x2004 <chr>, x2005 <chr>, x2006 <chr>, x2007 <chr>,
# x2008 <chr>, x2009 <chr>, x2010 <chr>, x2011 <chr>, x2012 <chr>,
# x2013 <chr>, x2014 <chr>, x2015 <chr>, x2016 <chr>, x2017 <chr>, …
delete first empty row
wide to long
Rows: 294
Columns: 3
$ country_name <chr> "China, People's Republic of", "China, People's Republic …
$ GDP <dbl> 306.980, 288.491, 279.971, 296.509, 301.111, 292.990, 279…
$ year_num <dbl> 1980, 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 198…
https://data.worldbank.org/indicator/NY.GDP.MKTP.CD
https://zh.wikipedia.org/zh-cn/%E4%BA%9A%E6%B4%B2%E5%9B%9B%E5%B0%8F%E9%BE%99
https://www.imf.org/external/datamapper/PPPGDP@WEO/TWN?zoom=TWN&highlight=TWN
---
title: "Global GDP"
subtitle: "Global GDP"
author: "Tony Duan"
date: "2023-08-08"
categories: [analysis]
execute:
warning: false
error: false
format:
html:
toc: true
code-fold: show
code-tools: true
number-sections: true
code-block-bg: true
code-block-border-left: "#31BAE9"
---
:max_bytes(150000):strip_icc()/GDP_final-c532743acd98498baa8c8da493017af8.png)
# input data
```{r}
library(tidyverse)
library(openxlsx)
library(readxl)
library(janitor)
library(plotly)
```
```{r}
library()
```
```{r}
#download.file('https://api.worldbank.org/v2/en/indicator/NY.GDP.MKTP.CD?downloadformat=excel','output.xls')
```
```{r}
data001=read_excel('output.xls')
head(data001)
```
read in excel and start from 3rd row and then clean up column names
```{r}
data001=read_excel('output.xls',skip = 2) %>% clean_names()
head(data001)
```
delete column
```{r}
data002=data001 %>% select(-country_code,-indicator_name,-indicator_code)
```
wide to long
```{r}
data003=data002 %>% pivot_longer(!country_name,names_to = "year", values_to = "GDP")
```
```{r}
glimpse(data003)
```
```{r}
data003=data003 %>% mutate(year_num=as.numeric(str_replace(year,'x',''))) %>% select(-year)
```
```{r}
glimpse(data003)
```
# China mainland and Hongkong
```{r}
data004=data003 %>%filter(country_name %in% c('China','Hong Kong SAR, China'))
```
```{r}
ggplot(data004, aes(year_num, GDP, colour = country_name)) +
geom_line()+ scale_y_continuous(labels = scales::label_number_si())
```
```{r}
data_cn_hk001=data002%>%filter(country_name %in% c('China','Hong Kong SAR, China'))
```
```{r}
data_cn_hk002=t(data_cn_hk001)%>% as.data.frame()
```
```{r}
library(tibble)
data_cn_hk002=rownames_to_column(data_cn_hk002, "year")
```
```{r}
data_cn_hk003=data_cn_hk002[-1,] %>% mutate(year_num=as.numeric(str_replace(year,'x','')))
```
```{r}
glimpse(data_cn_hk003)
```
```{r}
data_cn_hk003=data_cn_hk003 %>% mutate(V1=as.numeric(V1),V2=as.numeric(V2),total=V1+V2
,V1_share=V1/total,V12_share=V2/total,)
```
```{r}
ggplot(data_cn_hk003, aes(year_num, V12_share)) +
geom_line()+ scale_y_continuous(labels = scales::label_number_si())
```
```{r}
plot_ly(data = data_cn_hk003, x = ~year_num, y = ~V12_share,mode='lines')
```
# Asia 4 drgan
```{r}
Asia_4_drgan001=data003 %>%filter(country_name %in% c('Taiwan','Korea, Rep.','Singapore','Hong Kong SAR, China'))
```
```{r}
glimpse(Asia_4_drgan001)
```
```{r}
ggplot(Asia_4_drgan001, aes(year_num, GDP, colour = country_name)) +
geom_line()+ scale_y_continuous(labels = scales::label_number_si())
```
# GDP per capita
from imf
```{r}
#download.file('https://www.imf.org/external/datamapper//export/excel.php?indicator=NGDPDPC','output_gdp_per_capita.xls')
```
```{r}
data001=read_excel('output_gdp_per_capita.xls') %>% clean_names()
head(data001)
```
delete first empty row
```{r}
data002=data001[-c(1), ]
```
wide to long
```{r}
data003=data002 %>% pivot_longer(!gdp_per_capita_current_prices_u_s_dollars_per_capita,names_to = "year", values_to = "GDP") %>% rename(country_name=gdp_per_capita_current_prices_u_s_dollars_per_capita) %>% mutate(GDP=as.numeric(GDP))
```
```{r}
data003=data003 %>% mutate(year_num=as.numeric(str_replace(year,'x',''))) %>% select(-year)
```
# Asia 4 drgan and more
```{r}
Asia_4_drgan001=data003 %>%filter(country_name %in% c('Taiwan Province of China','Korea, Republic of','Singapore','Hong Kong SAR',"China, People's Republic of",'Japan'))
```
```{r}
glimpse(Asia_4_drgan001)
```
```{r}
ggplot(Asia_4_drgan001, aes(year_num, GDP, colour = country_name)) +
geom_line()+ scale_y_continuous(labels = scales::label_number_si())
```
# Reference
https://data.worldbank.org/indicator/NY.GDP.MKTP.CD
https://zh.wikipedia.org/zh-cn/%E4%BA%9A%E6%B4%B2%E5%9B%9B%E5%B0%8F%E9%BE%99
https://www.imf.org/external/datamapper/PPPGDP@WEO/TWN?zoom=TWN&highlight=TWN