March 1, 2022
library(tidyverse) library(socviz) library(ggthemes) theme_set(theme_minimal())
socviz::election dataset has various measures of the vote and vote shares by state.election %>% select(state, total_vote,
r_points, pct_trump, party, census) %>%
sample_n(5)
## # A tibble: 5 × 6 ## state total_vote r_points pct_trump party census ## <chr> <dbl> <dbl> <dbl> <chr> <chr> ## 1 Wisconsin 2976150 0.770 47.2 Republican Midwest ## 2 New York 7721795 -22.5 36.5 Democratic Northeast ## 3 South Carolina 2103027 14.3 54.9 Republican South ## 4 Nebraska 844227 25.0 58.8 Republican Midwest ## 5 Louisiana 2029032 19.6 58.1 Republican South
party_colors <- c("#2E74C0", "#CB454A") # Hex color codes for Dem Blue and Rep Red
p0 <- ggplot(data = subset(election, st %nin% "DC"),
mapping = aes(x = r_points,
y = reorder(state, r_points),
color = party))
p1 <- p0 + geom_vline(xintercept = 0, color = "gray30") +
geom_point(size = 2)
p2 <- p1 + scale_color_manual(values = party_colors)
p3 <- p2 + scale_x_continuous(breaks = c(-30, -20, -10, 0, 10, 20, 30, 40),
labels = c("30\n (Clinton)", "20", "10", "0",
"10", "20", "30", "40\n(Trump)"))
p3 + facet_wrap(~ census, ncol=1, scales="free_y") +
guides(color = "none") + labs(x = "Point Margin", y = "") +
theme(axis.text=element_text(size=8))
us_states <- map_data("state")
head(us_states)
## long lat group order region subregion ## 1 -87.46201 30.38968 1 1 alabama <NA> ## 2 -87.48493 30.37249 1 2 alabama <NA> ## 3 -87.52503 30.37249 1 3 alabama <NA> ## 4 -87.53076 30.33239 1 4 alabama <NA> ## 5 -87.57087 30.32665 1 5 alabama <NA> ## 6 -87.58806 30.32665 1 6 alabama <NA>
dim(us_states)
## [1] 15537 6
p <- ggplot(data = us_states,
mapping = aes(x = long, y = lat,
group = group))
p + geom_polygon(fill = "white", color = "black")
fill the map.p <- ggplot(data = us_states,
aes(x = long, y = lat,
group = group, fill = region))
p + geom_polygon(color = "gray90", size = 0.1) + guides(fill = FALSE)
p <- ggplot(data = us_states,
mapping = aes(x = long, y = lat,
group = group, fill = region))
p + geom_polygon(color = "gray90", size = 0.1) +
coord_map(projection = "albers", lat0 = 39, lat1 = 45) +
guides(fill = FALSE)
We can transform the default projection used by geom_polygon(), via the coord_map() function.
lat0 and lat1.election data on to the mapelection$region <- tolower(election$state) us_states_elec <- left_join(us_states, election)
In the map data, us_states, the state names (in a variable named region) are in lower case.
Here we can create a variable in the election data frame to correspond to this, using the tolower() function to convert the state names.
It is important to know your data and variables well enough to check that they have merged properly.
p0 <- ggplot(data = us_states_elec,
aes(x = long, y = lat,
group = group, fill = party))
p0 + geom_polygon(color = "gray90", size = 0.1) +
coord_map(projection = "albers", lat0 = 39, lat1 = 45)
party colors for the fill.p1 <- p0 + geom_polygon(color = "gray90", size = 0.1) +
coord_map(projection = "albers", lat0 = 39, lat1 = 45)
p2 <- p1 + scale_fill_manual(values = party_colors) +
labs(title = "Election Results 2016", fill = NULL)
p2 + theme_map()
fill aethetic, let’s try a continuous measure, such as the percentage of the vote received by Donald Trump (pct_trump).p0 <- ggplot(data = us_states_elec,
mapping = aes(x = long, y = lat, group = group, fill = pct_trump))
p1 <- p0 + geom_polygon(color = "gray90", size = 0.1) +
coord_map(projection = "albers", lat0 = 39, lat1 = 45)
p1 + labs(title = "Trump vote") + theme_map() + labs(fill = "Percent")
Blue is not the color we want here.
The color gradient runs in the wrong direction.
Let’s fix these problems using scale_fill_gradient():
p2 <- p1 + scale_fill_gradient(low = "white", high = "#CB454A") +
labs(title = "Trump vote")
p2 + theme_map() + labs(fill = "Percent")
scale_gradient2() function gives us a blue-red spectrum that passes through white by default.high and low colors.p0 <- ggplot(data = us_states_elec,
mapping = aes(x = long, y = lat, group = group, fill = d_points))
p1 <- p0 + geom_polygon(color = "gray90", size = 0.1) +
coord_map(projection = "albers", lat0 = 39, lat1 = 45)
p2 <- p1 + scale_fill_gradient2() + labs(title = "Winning margins")
p2 + theme_map() + labs(fill = "Percent")
scale_gradient2() function, we can also re-specify the mid-level color along with the high and low colors.p3 <- p1 + scale_fill_gradient2(low = "red",
mid = scales::muted("purple"),
high = "blue",
breaks = c(-25, 0, 25, 50, 75))
p3 + theme_map() + labs(fill = "Percent", title = "Winning margins")
p3, you’ll see that it extends very high on the Blue side.
p0 <- ggplot(data = subset(us_states_elec,
region %nin% "district of columbia"),
aes(x = long, y = lat, group = group, fill = d_points))
p3 <- p1 + scale_fill_gradient2(low = "red",
mid = scales::muted("purple"),
high = "blue",
breaks = c(-25, 0, 25, 50, 75))
p3 + theme_map() + labs(fill = "Percent", title = "Winning margins", caption = "DC is omitted.")
Choropleth maps display divided geographical areas or regions that are colored, shaded or patterned in relation to a data variable.
County-level US choropleth maps can be aesthetically pleasing, because of the added detail they bring to a national map.
The county-level datasets (county_map and county_data) are included in the socviz library.
county_map, has been processed a little in order to transform it to an Albers projection, and also to relocate (and re-scale) Alaska and Hawaii.county_map %>% sample_n(5)
county_data %>%
select(id, name, state, pop_dens, pct_black) %>%
sample_n(5)
county_full <- left_join(county_map, county_data, by = "id")
The id field is the FIPS code for the county.
pop_dens is population density.
pct_black is percent of African-American population.
We merge the data frames using the shared FIPS id column.
p <- ggplot(data = county_full,
mapping = aes(x = long, y = lat,
fill = pop_dens,
group = group))
p1 <- p + geom_polygon(color = "gray90", size = 0.05) + coord_equal()
p1
p1 object produces a legible map, but by default it chooses an unordered categorical layout.
This is because the pop_dens variable is not ordered.
pop_dens is an un-ordered discrete variable.
summary(county_full$pop_dens)
The use of coord_equal() makes sure that the relative scale of our map does not change even if we alter the overall dimensions of the plot.
p2 <- p1 + scale_fill_brewer(palette = "Blues",
labels = c("0-10", "10-50", "50-100", "100-500","500-1,000", "1,000-5,000", ">5,000"))
p2 + labs(fill = "Population per\nsquare mile") +
theme_map() +
guides(fill = guide_legend(nrow = 1)) +
theme(legend.position = "bottom")
We can manually supply the right sort of scale using the scale_fill_brewer() function, together with a nicer set of labels.
We can also use the guides() function to make sure each element of the key in the legend appears on the same row.
pct_black is an un-ordered factor variable.summary(county_full$pct_black)p <- ggplot(data = county_full,
mapping = aes(x = long, y = lat, fill = pct_black,
group = group))
p1 <- p + geom_polygon(color = "gray90", size = 0.05) + coord_equal()
p2 <- p1 + scale_fill_brewer(palette="Greens")
p2 + labs(fill = "US Population, Percent Black") +
guides(fill = guide_legend(nrow = 1)) +
theme_map() + theme(legend.position = "bottom")
Let’s draw two new county-level choropleths.
We have a pop_dens6 variable that divides the population density into six categories.
We will map the color scale to the value of variable.
orange_pal <- RColorBrewer::brewer.pal(n = 6, name = "Oranges") orange_pal orange_rev <- rev(orange_pal) orange_rev
RColorBrewer::brewer.pal() function to manually create two palettes.
brewer.pal() function produces evenly-spaced color schemes.rev() function to reverse the order of a color vector.pop_p <- ggplot(data = county_full,
mapping = aes(x = long, y = lat,
fill = pop_dens6,
group = group))
pop_p1 <- pop_p + geom_polygon(color = "gray90", size = 0.05) + coord_equal()
pop_p2 <- pop_p1 + scale_fill_manual(values = orange_pal)
pop_p2 + labs(title = "Population Density",
fill = "People per square mile") +
theme_map() + theme(legend.position = "bottom")
pop_p2_rev <- pop_p1 + scale_fill_manual(values = orange_rev)
pop_p2_rev + labs(title = "Reverse-coded Population Density",
fill = "People per square mile") +
theme_map() + theme(legend.position = "bottom")
per_gop_2016.
class(county_full$per_gop_2016).gop_p <- ggplot(data = county_full,
mapping = aes(x = long, y = lat,
fill = per_gop_2016,
group = group))
gop_p1 <- gop_p + geom_polygon(color = "gray70", size = 0.05) + coord_equal()
gop_p1
scale_fill_gradient(), scale_fill_gradient2(), or scale_fill_gradient2() function:
scale_fill_gradient() produces a two-color gradient.scale_fill_gradient2() produces a three-color gradient with specified midpoint.scale_fill_gradientn() produces an n-color gradient.scale_fill_gradient2(), choose the value and color for midpoint carefully.gop_p2 <- gop_p1 + scale_fill_gradient2(
low = '#2E74C0', # from party_colors for DEM
mid = '#FFFFFF', # transparent white
high = '#CB454A', # from party_colors for GOP
na.value = "grey50",
midpoint = .5)
gop_p2 + labs(title = "US Presidential Election 2016",
fill = "Trump vote share") +
theme_map() + theme(legend.position = "bottom")
statebins.# install.packages("statebins")
library(statebins)
p <- ggplot( data = election,
mapping = aes( state = state, fill = pct_trump ) )
p1 <- p + geom_statebins(lbl_size = 5,
border_col = "grey90", border_size = 1)
p2 <- p1 + labs(fill = "Percent Trump") +
coord_equal() +
theme_statebins( legend_position = c(.45, 1) ) +
theme( legend.direction="horizontal" )
p2 + scale_fill_gradient2(
low = '#2E74C0', # from party_colors for DEM
mid = '#FFFFFF', # transparent white
high = '#CB454A', # from party_colors for GOP
na.value = "grey50",
midpoint = 50) # set the midpoint value
scale_fill_gradient().p <- ggplot(data = subset(election, st != "DC") ,
mapping = aes(state=state, fill=pct_clinton))
p1 <- p + geom_statebins(lbl_size = 5,
border_col = "grey90",
border_size = 1)
p2 <- p1 + labs(fill = "Percent Clinton") +
coord_equal() +
theme_statebins( legend_position = c(.45, 1) ) +
theme( legend.direction="horizontal" )
p2 + scale_fill_gradient(
low = '#FFFFFF', # transparent white
high = '#2E74C0', # from party_colors for DEM
na.value = "grey50") # set the midpoint value
Let’s use scale_fill_manual() to fill color by party.
legend_position allows for adjusting a coordinate for the legend position.
p <- ggplot(data = election ,
mapping = aes(state=state, fill=party))
p1 <- p + geom_statebins(lbl_size = 5,
border_col = "grey90",
border_size = 1)
p2 <- p1 + labs(fill = "Winner") +
coord_equal() +
theme_statebins( legend_position = c(.25, 1) ) +
theme( legend.direction="horizontal",
legend.title = element_text(size=30),
legend.text = element_text(size=30) )
p2 + scale_fill_manual( values = c(Republican = "darkred",
Democratic = "royalblue"))
scale_fill_gradient() with breaks.p <- ggplot(data = election ,
mapping = aes(state = state, fill=pct_trump))
p1 <- p + geom_statebins(lbl_size = 5,
border_col = "grey90",
border_size = 1)
p2 <- p1 + labs(fill = "Percent Trump") +
coord_equal() +
theme_statebins( legend_position = c(.2, 1) ) +
theme( legend.direction="horizontal")
p2 + scale_fill_gradient(breaks = c(5, 21, 41, 48, 57),
labels = c("< 5", "5-21", "21-41", "41-58", "> 57"),
low = "#f9ecec", high = "#CB454A") +
guides(fill = guide_legend())
NY_socioecon_geo_poverty <- read.table( 'https://bcecon.github.io/NY_socioecon_geo_poverty.csv', sep = ',', header = TRUE, stringsAsFactor = TRUE ) library(viridis)
The viridis colors run in low-to-high sequences and combines perceptually uniform colors with easy-to-see, easily-contrasted hues along their scales.
scale_fill_viridis_c() function is for continuous data.scale_fill_viridis_d() function is for discrete data.p <- ggplot(data = NY_socioecon_geo_poverty,
mapping = aes(x = long, y = lat, group = group,
fill = c04_058 ))
p1 <- p + geom_polygon(color = "grey", size = 0.1) +
coord_map(projection = "albers", lat0 = 39, lat1 = 45)
p2 <- p1 + scale_fill_viridis_c(option = "plasma") + theme_map()
p2 + facet_wrap(~ year, ncol = 3) +
theme(legend.position = "bottom",
strip.background = element_blank()) +
labs(fill = "Poverty rate in NY (%)",
title = "Poverty rate for the male population 25 years and over \nfor whom the highest educational attainment is bachelor's degree")
facet_wrap().