# install.packages(c(
# "tidyverse", "this.path", "janitor", "sf", "leaflet", "cowplot"
# ))
library(tidyverse)
library(this.path)
# Re-import and clean the ILINet data (same steps as intro)
ili <- read_csv(here("ILINet.csv"), skip = 1)
pop <- read_csv(here("population.csv"))Visualizations & Maps in R Worksheet
Welcome back!
Use this worksheet to follow along with the live coding demonstration. This workshop picks up where the Intro to R workshop left off.
Today we’ll cover two big topics: how ggplot2 really works, and making maps in R. A lot of what we’ll show you today would normally require paid software like Power BI or ArcGIS.
Setup & Motivation
Rebuild your workspace and load new data
ili <- ili |>
select(
REGION, YEAR, WEEK, `%UNWEIGHTED ILI`, ILITOTAL, `NUM. OF PROVIDERS`,
`TOTAL PATIENTS`
) |>
janitor::clean_names()
ili <- ili |>
mutate(
wk = case_when(
year == 2024 ~ week - 39,
year == 2025 ~ week + 13
),
ili_per_provider = ilitotal / num_of_providers
) |>
filter(region %in% c("Michigan", "Ohio", "Wisconsin")) |>
rename(
ili_total = ilitotal,
pct_unweighted_ili = percent_unweighted_ili
)
ili <- left_join(ili, pop, join_by(region == state))Now load the libraries and data for the mapping portion.
The new data here that we’ll be working with is a CDC-produced estimate of age-adjusted diabetes prevalence from their CDC PLACES dataset, December 2025 release. Its estimates are county-level, so will allow us to make a map of Michigan using: - county boundaries from the state’s ArcGIS portal - ZIP code boundaries (ZCTAs) from data.gov
No need to download these yourself! We’ve packaged and slightly trimmed them for you already.
library(sf)
places <- read_csv(here("PLACES__Michigan_County_Data_2025_release_20260313.csv"))
mi_counties <- st_read(here("MI_counties.geojson"))
mi_zips <- st_read(here("MI_ZIPs.geojson"))Run glimpse(ili) and glimpse(mi_counties) to confirm everything loaded.
ggplot2
In the intro workshop, we made plots. Now let’s understand the system behind them. ggplot2 builds plots in layers: data, geometry, scales, labels, themes.
Building a plot layer by layer
# Step 1: Just the plot and axes
ggplot(ili, aes(x = wk, y = ili_per_provider))# Step 2: A line! Well...
ggplot(ili, aes(x = wk, y = ili_per_provider)) +
geom_line()# Step 3: 3 lines, even better.
ggplot(ili, aes(x = wk, y = ili_per_provider, color = region)) +
geom_line()# Step 4: Points on lines. Lookin' great!
ggplot(ili, aes(x = wk, y = ili_per_provider, color = region)) +
geom_line() +
geom_point()Aesthetic mappings
Anything inside aes() maps a variable to a visual property. Common aesthetics: x, y, color, fill, size, shape, linetype, alpha. (color = outlines/lines; fill = filled areas.)
ggplot(ili, aes(x = wk, y = ili_per_provider, color = region)) +
geom_point(aes(size = ili_total), alpha = 0.6)# size is INSIDE aes() because it's mapped to a variable.
# alpha is OUTSIDE aes() because it's a fixed value.Different geoms
# Histogram
ggplot(ili, aes(x = ili_per_provider)) +
geom_histogram(bins = 30)
# Boxplot
ggplot(ili, aes(x = region, y = ili_per_provider)) +
geom_boxplot()
# Smoothed trend line
ggplot(ili, aes(x = wk, y = ili_per_provider, color = region)) +
geom_point(alpha = 0.3) +
geom_smooth(method = "loess", se = FALSE)There are dozens of geoms! Check out the ggplot2 reference for the full list.
Color scales
# Manual: pick exact colors
ggplot(ili, aes(x = wk, y = ili_per_provider, color = region)) +
geom_line() +
scale_color_manual(values = c(
"Michigan" = "blue", "Ohio" = "red", "Wisconsin" = "green"
))
# Brewer palettes: pre-built, colorblind-friendly
ggplot(ili, aes(x = wk, y = ili_per_provider, color = region)) +
geom_line(linewidth = 1) +
scale_color_brewer(palette = "Set2")
# For continuous variables, gradient scales
ggplot(ili, aes(x = wk, y = ili_per_provider, color = ili_total)) +
geom_point(size = 2) +
scale_color_viridis_c()The naming convention is scale_{aesthetic}_{type}(). So scale_color_manual() controls color with manually specified values, while scale_fill_viridis_c() controls fill with a continuous viridis palette. This pattern will come back when we make maps!
Axis scales
ggplot(ili, aes(x = wk, y = ili_per_provider, color = region)) +
geom_line() +
scale_x_continuous(
breaks = seq(0, 52, by = 4),
labels = \(x) glue::glue("Wk {x}")
) +
scale_y_continuous(
limits = c(0, NA),
labels = \(y) paste0(round(y, 1))
)Labels
ggplot(ili, aes(x = wk, y = ili_per_provider, color = region)) +
geom_line() +
labs(
title = "ILI per provider by state",
subtitle = "2024-2025 respiratory season",
x = "Week of season",
y = "ILI cases per provider",
color = "State",
caption = "Source: CDC ILINet"
)Themes
Built-in themes change the overall look. Try: theme_minimal(), theme_bw(), theme_classic(), theme_void() (we’ll use this one for maps!).
Fine-tune with theme():
ggplot(ili, aes(x = wk, y = ili_per_provider, color = region)) +
geom_line() +
theme_minimal() +
theme(
legend.position = "top",
legend.title = element_blank(),
panel.grid.minor = element_blank(),
axis.text = element_text(size = 10),
plot.title = element_text(face = "bold")
) +
labs(title = "ILI per provider by state")theme() has a lot of arguments. Don’t memorize them! Check the theme documentation when you need to tweak something. (We hope you memorized this, though: ?)
Faceting
facet_wrap() creates “small multiples,” splitting your plot into panels:
ggplot(ili, aes(x = wk, y = ili_per_provider)) +
geom_line(color = "steelblue") +
facet_wrap(~ region) +
theme_minimal()Multi-plots with cowplot
cowplot lets you combine separate plots with plot_grid():
library(cowplot)
Attaching package: 'cowplot'
The following object is masked from 'package:lubridate':
stamp
p1 <- ggplot(ili, aes(x = wk, y = ili_per_provider, color = region)) +
geom_line() + theme_minimal() + labs(title = "ILI per provider")
p2 <- ggplot(ili, aes(x = wk, y = ili_total, color = region)) +
geom_line() + theme_minimal() + labs(title = "Total ILI cases")
plot_grid(p1, p2, ncol = 2)There’s a whole ecosystem of packages that extend ggplot2: ggsankeyfier for Sankey diagrams, gganimate for animated plots, ggridges for ridge plots. They all follow the same layering pattern.
Custom Ordering with forcats
Factors are character values with a defined order. ggplot2 uses factor order for bars, legend entries, facets, etc. forcats is the tidyverse library for factors. Every function starts with fct_.
The problem: unordered characters
data(starwars)
sw_homeworlds <- starwars |>
filter(!is.na(homeworld)) |>
count(homeworld, sort = TRUE) |>
slice_head(n = 10)
# Alphabetical (not useful!)
sw_homeworlds |>
ggplot(aes(x = homeworld, y = n)) +
geom_col() +
coord_flip()Ordering factors
Some options:
fct_infreq(): order by frequencyfct_reorder(): order by another variablefct_relevel(): manual orderfct_lump_n(): collapse rare levels into “Other”
Try using fct_reorder() to fix the bar chart above:
Code
sw_homeworlds |>
mutate(homeworld = homeworld |> fct_reorder(n)) |>
ggplot(aes(x = homeworld, y = n)) +
geom_col() +
coord_flip() +
labs(title = "Top 10 homeworlds in starwars")Chaining fct_*() functions
starwars |>
filter(!is.na(homeworld)) |>
mutate(
homeworld = homeworld |>
fct_infreq() |>
fct_lump_n(n = 5) |>
fct_rev()
) |>
ggplot(aes(y = homeworld)) +
geom_col(stat = "count")Introduction to Geographic Data
The sf package lets R work with geographic data. The key idea: you’ve got your geo data (shapes, boundaries, points), and you’ve got your geo-associated metrics (health outcomes, counts). Mapping is about joining them together.
Loading geographic data
mi_counties <- st_read(here("data/MI_counties.geojson"))Reading layer `MI_counties' from data source
`C:\Users\jckjcbs\Documents\mi_support\workshop_dataviz_maps_R\data\MI_counties.geojson'
using driver `GeoJSON'
Simple feature collection with 83 features and 13 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -90.41828 ymin: 41.69612 xmax: -82.41347 ymax: 48.26268
Geodetic CRS: WGS 84
It’s just a data frame with a special geometry column:
class(mi_counties)[1] "sf" "data.frame"
glimpse(mi_counties)Rows: 83
Columns: 14
$ OBJECTID <int> 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353,…
$ FIPSCode <chr> "001", "003", "005", "007", "009", "011", "013", "017", "01…
$ Name <chr> "Alcona", "Alger", "Allegan", "Alpena", "Antrim", "Arenac",…
$ FeatureID <chr> "db9600c4-8fab-4c75-9a52-3ebe807e7ec2", "93326140-a6c5-441b…
$ MapLayout <chr> "landscape", "landscape", "landscape", "landscape", "landsc…
$ FIPSNum <int> 1, 3, 5, 7, 9, 11, 13, 17, 15, 19, 21, 23, 25, 27, 29, 31, …
$ Label <chr> "Alcona County", "Alger County", "Allegan County", "Alpena …
$ Type <chr> "County", "County", "County", "County", "County", "County",…
$ CntyCode <chr> "1", "3", "5", "7", "9", "11", "13", "17", "15", "19", "21"…
$ Peninsula <chr> "Lower", "Upper", "Lower", "Lower", "Lower", "Lower", "Uppe…
$ MGFVersion <chr> "V25", "V25", "V25", "V25", "V25", "V25", "V25", "V25", "V2…
$ Shape__Are <dbl> 1798557219, 2424879220, 2180969194, 1539372961, 1358721028,…
$ Shape__Len <dbl> 172519.8, 390876.7, 192581.6, 288786.0, 180700.6, 227329.9,…
$ geometry <MULTIPOLYGON [°]> MULTIPOLYGON (((-83.31858 4..., MULTIPOLYGON (…
plot(mi_counties["Name"])Check the coordinate reference system:
st_crs(mi_counties)Coordinate Reference System:
User input: WGS 84
wkt:
GEOGCRS["WGS 84",
DATUM["World Geodetic System 1984",
ELLIPSOID["WGS 84",6378137,298.257223563,
LENGTHUNIT["metre",1]]],
PRIMEM["Greenwich",0,
ANGLEUNIT["degree",0.0174532925199433]],
CS[ellipsoidal,2],
AXIS["geodetic latitude (Lat)",north,
ORDER[1],
ANGLEUNIT["degree",0.0174532925199433]],
AXIS["geodetic longitude (Lon)",east,
ORDER[2],
ANGLEUNIT["degree",0.0174532925199433]],
ID["EPSG",4326]]
Joining health data onto geographic data
Filter PLACES to age-adjusted diabetes prevalence, then join onto county geometries:
diabetes <- places |>
filter(
Short_Question_Text == "Diabetes",
Data_Value_Type == "Age-adjusted prevalence"
) |>
select(LocationName, Data_Value)
mi_diabetes <- mi_counties |>
left_join(diabetes, join_by(Name == LocationName))This works just like any left_join() because an sf object is just a data frame!
st_intersects() vs st_intersection()
What if we want ZIPs that overlap Oakland County? Let’s try a spatial filter first:
oakland <- mi_counties |>
filter(Name == "Oakland")
oakland_zips <- mi_zips |>
filter(st_intersects(mi_zips, oakland, sparse = FALSE)[, 1])
plot(oakland_zips["zip_code"])That looks bad! The ZIPs aren’t clipped to the county boundary. The fix is st_intersection(), which clips:
oakland_zips <- mi_zips |>
st_intersection(oakland)Warning: attribute variables are assumed to be spatially constant throughout
all geometries
plot(oakland_zips["zip_code"])st_intersection() clips geometries to the boundary. ZIPs crossing the county line get cut. Usually what you want for mapping, but be aware of it for analysis!
As ever, the documentation for sf is clarifying here. st_intersects() is a “binary predicate” (gives you TRUE/FALSE) while st_intersection() is a “geometric operation” (gives you a geo object).
Geographic Visualization Concepts
Types of maps
- Point map: dots on a map (e.g., hospital locations)
- Line map: lines on a map (e.g., roads, rivers)
- Choropleth: filled areas colored by a variable (this is what we’re making!)
- Raster: gridded pixel data (e.g., satellite imagery)
- Basemap: background layer for context (streets, terrain)
More specialized: spatial flow diagrams, cartograms, hexbin maps.
Security concerns with geographic health data
Geocoding converts addresses to lat/long. If you send patient addresses to a third-party API (Google, Esri, etc.), you may be exposing PHI.
MI-Support might be able to help you if you ever need to geocode addresses!
Small-number suppression: Mapping health data to small geographies (Census tracts, blocks) can make individuals re-identifiable. Most health departments suppress cells with fewer than 5 cases.
Making Maps
Choropleth with geom_sf()
ggplot(mi_diabetes) +
geom_sf(aes(fill = Data_Value))Same layering system as before: geom, scale, labs, theme. Because it’s specific to sf, you (usually) don’t even need to specify geometry as an aesthetic!
ggplot(mi_diabetes) +
geom_sf(aes(fill = Data_Value), color = "white", linewidth = 0.2) +
scale_fill_viridis_c(
name = "Prevalence (%)",
option = "plasma"
) +
labs(
title = "Diabetes Prevalence by County",
subtitle = "Age-adjusted, Michigan 2023",
caption = "Source: CDC PLACES 2025 release"
) +
theme_void() +
theme(
plot.title = element_text(face = "bold", size = 14),
legend.position = "right"
)Zooming in
Use coord_sf() to set bounding box limits:
ggplot(mi_diabetes) +
geom_sf(aes(fill = Data_Value), color = "white", linewidth = 0.2) +
scale_fill_viridis_c(name = "Prevalence (%)", option = "plasma") +
coord_sf(
xlim = c(-84.5, -82.5),
ylim = c(41.7, 43.0)
) +
labs(title = "Diabetes Prevalence: SE Michigan") +
theme_void()Oakland County ZIPs
ggplot(oakland_zips) +
geom_sf(fill = "lightblue", color = "white", linewidth = 0.3) +
labs(title = "ZIP Codes in Oakland County") +
theme_void()Interactive maps with leaflet
leaflet makes zoomable, interactive maps. It needs data in WGS 84 (EPSG:4326).
library(leaflet)
oakland_zips_4326 <- st_transform(oakland_zips, crs = 4326)
leaflet(oakland_zips_4326) |>
addTiles() |>
addPolygons(
weight = 1,
color = "navy",
fillColor = "lightblue",
fillOpacity = 0.5,
label = ~zip_code
)Thank you for following along!
Don’t forget that MI-Support offers R office hours if you’d ever like to continue the conversation about R or discuss a project (free of charge!) to integrate it into your workflow.