-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmapping-largest-african-cities.R
127 lines (105 loc) · 3.84 KB
/
mapping-largest-african-cities.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#==============
# LOAD PACKAGES
#==============
library(rvest)
library(tidyverse)
library(ggmap)
library(stringr)
#==========================
# SCRAPE DATA ROM WIKIPEDIA
#==========================
html.population <- read_html('https://en.wikipedia.org/wiki/List_of_Asian_cities_by_population_within_city_limits')
df.asian_cities <- html.population %>%
html_nodes("table") %>%
.[[3]] %>%
html_table(fill = TRUE)
# inspect
head(df.asian_cities)
names(df.asian_cities)
str(df.asian_cities)
#============================
# REMOVE EXTRANEOUS VARIABLES
#============================
df.asian_cities <- df.asian_cities[,-c(3,5,6)]
names(df.asian_cities)
str(df.asian_cities)
# rename columns
colnames(df.asian_cities) <- c("City", "Country", "Population")
head(df.asian_cities)
# remove extra row at top
df.asian_cities <- df.asian_cities[-1,]
head(df.asian_cities)
# remove bracketed footnote markers from population markers (e.g. 23,500,000[4])
df.asian_cities <- df.asian_cities %>%
mutate(Population = str_replace_all(Population, "\\(.*\\)","") %>%
parse_number())
# inspect
head(df.asian_cities)
#=============================================
# create "City, Country" column for geocoding
#=============================================
df.asian_cities <- df.asian_cities %>%
mutate(Full_City_Name = str_c(df.asian_cities$City,
df.asian_cities$Country,
sep = ", "))
# inspect
head(df.asian_cities)
#=====================================================
# REORDER VARIABLES
# - with select(), simply list them in desired col order
#=====================================================
df.asian_cities <-df.asian_cities %>%
select(City, Country, Full_City_Name, Population)
# inspect
head(df.asian_cities)
#========================================
# COERCE TO TIBBLE
# - just to make the data print better
#========================================
df.asian_cities <- df.asian_cities %>% as_tibble()
#===============================================
# GEOCODE
# - get longitude, latitude via ggmap::geocode()
#================================================
geocodes <- geocode(df.asian_cities$Full_City_Name)
print(geocodes)
class(geocodes)
# join geocodes to the df
df.asian_cities<- cbind(df.asian_cities, geocodes)
# inspect
head(df.asian_cities)
#=============
# GET ASIA MAP
#=============
map.asia <- get_map('Asia', zoom = 3, source = "stamen", maptype = "watercolor")
# map it ...
map.asia %>% ggmap()
#========================================
# PLOT CITIES ON MAP
#========================================
# just the points
ggmap(map.asia) +
geom_point(data = df.asian_cities,
aes(x = lon, y = lat, size = Population),
color = "red", alpha = .3) +
geom_point(data = df.asian_cities,
aes(x = lon, y = lat, size = Population),
color = "red", shape = 1)
# final map
ggmap(map.asia) +
geom_point(data = df.asian_cities,
aes(x = lon, y = lat, size = Population),
color = "red", alpha = .3) +
geom_point(data = df.asian_cities,
aes(x = lon, y = lat, size = Population),
color = "red", shape = 1) +
labs(x = NULL, y = NULL) +
labs(size = 'Population (millions)') +
labs(title = "Largest Cities in Asia", subtitle = "source: https://en.wikipedia.org/wiki/List_of_Asian_cities_by_population_within_city_limits") +
scale_size_continuous(range = c(.6,18), labels = scales::comma_format(), breaks = c(1500000, 10000000, 20000000)) +
theme(text = element_text(color = "#464646", family = "American Typewriter")) +
theme(axis.text = element_blank()) + # remove axis tick marks
theme(axis.ticks = element_blank()) +
theme(plot.title = element_text(size = 32)) +
theme(plot.subtitle = element_text(size = 10)) +
theme(legend.key = element_rect(fill = "white")) # white background for key