# Load required packages
library(tidyverse)
library(tidycensus)
library(janitor)
library(sf)
library(tigris)
library(scales)
library(patchwork)
library(RColorBrewer)
library(units)
library(knitr)
library(caret)Demographic Changes at Census Tract Level - Sharswood
URBS 4000: Urban Studies Thesis
Project Title: Mixed by Design
Part 1: ACS Data
American Community Survey (ACS) Estimates
The first ACS 5-year estimate was released in late 2010 as a replacement of the longform decennial Census. Every year, the ACS surveys about 1 in 40 housing units to produce 1-year estimates, but these housing units must not have been surveyed in the previous five years. Hence, the 5-year estimate incorporates five years of non-overlapping data (approximately 1 in 8 housing units) to recreate the longform of the decennial census.
In this exploratory data analysis, I used three separate (i.e. no overlapping years) ACS 5-year estimates to provide pre- and post-redevelopment snapshots: 2009-2012, 2013-17, and 2018-2022.
There are two main limitations with this methodology:
- The census tracts do not align exactly with the boundaries of the Sharswood redevelopment area
- 5-year estimates also smooth out significant redevelopment events over multiple years, and it is challenging to identify the critical junctures in the redevelopment process and their direct impact on the neighborhood
Step 1: Load Philadelphia Demographic Data using tidycensus
# Load all available variables for ACS 5-year 2022
acs_vars_2022 <- load_variables(2022, "acs5", cache = TRUE)# Helper Variables for Summing Population Values for Children aged 5-19
child_pop = c("B01001_004", "B01001_005", "B01001_006", "B01001_007",
"B01001_028", "B01001_029", "B01001_030", "B01001_031")
# Helper Variable for Summing Population Values for Elderly Population
elderly_pop = c("B01001_020", "B01001_021", "B01001_022", "B01001_023",
"B01001_024", "B01001_025",
"B01001_044", "B01001_045", "B01001_046",
"B01001_047", "B01001_048", "B01001_049")
# Helper Function for Variables
vars <- c(
total_pop = "B01003_001",
child_pop = child_pop,
elderly_pop = elderly_pop,
median_income = "B19013_001",
poverty_total = "B17001_001",
poverty_level = "B17001_002",
White = "B03002_003",
Black = "B03002_004",
Hispanic = "B03002_012"
)
# Helper Function for Summarizing ACS
summarize_acs <- function(df) {
df |>
mutate(
# totals (E) and MOEs (M; combined by quadrature)
elderly_popE = rowSums(across(matches("^elderly_pop\\d+E$")), na.rm = TRUE),
elderly_popM = sqrt(rowSums(across(matches("^elderly_pop\\d+M$"), ~ (.x)^2), na.rm = TRUE)),
child_popE = rowSums(across(matches("^child_pop\\d+E$")), na.rm = TRUE),
child_popM = sqrt(rowSums(across(matches("^child_pop\\d+M$"), ~ (.x)^2), na.rm = TRUE)),
# percentages
pct_elderly = round((elderly_popE / total_popE) * 100, 2),
pct_child = round((child_popE / total_popE) * 100, 2),
pct_white = round((WhiteE / total_popE) * 100, 2),
pct_black = round((BlackE / total_popE) * 100, 2),
pct_hispanic = round((HispanicE / total_popE) * 100, 2),
pct_poverty = round((poverty_levelE / poverty_totalE) * 100, 2)
) |>
select(any_of(c(
"GEOID", "tract_name", "county_name", "total_popE", "median_incomeE",
"elderly_popE", "elderly_popM", "pct_elderly",
"child_popE", "child_popM", "pct_child",
"pct_poverty", "pct_white", "pct_black", "pct_hispanic", "geometry")))
}# Get tract-level demographic data from 2022 ACS 5-Yr Estimates for Philadelphia
phl_tract_2022_data <- get_acs(
geography = "tract",
variables = vars,
state = "PA",
county = "Philadelphia",
year = 2022,
survey = "acs5",
output = "wide",
geometry = TRUE
)
# Clean the county names to remove state name and "County"
phl_tract_2022_clean <- phl_tract_2022_data |>
separate(
NAME,
into = c("tract_name", "county_name", "state_name"),
sep = "; "
) |>
mutate(
tract_name = str_remove(tract_name, "Census Tract "),
county_name = str_remove(county_name, " County")
)
phl_tract_2022_summary <- summarize_acs(phl_tract_2022_clean)Step 2: Filter Data for Sharswood
# Filter Census Tracts for Sharswood
sharswood_2022_tract_sf <- phl_tract_2022_summary |>
filter(tract_name %in% c("138", "139"))
sharswood_2022_tract <- sharswood_2022_tract_sf |>
st_drop_geometry()
sharswood_2022_tractStep 3: Repeat Process for 2013-2017 ACS 5-Yr Estimates
# Get tract-level demographic data from 2017 ACS 5-Yr Estimates for Philadelphia
phl_2017_tract_data <- get_acs(
geography = "tract",
variables = vars,
state = "PA",
county = "Philadelphia",
year = 2017,
survey = "acs5",
output = "wide",
geometry = TRUE
)
# Clean the county names to remove state name and "County"
phl_2017_tract_clean <- phl_2017_tract_data |>
separate(
NAME,
into = c("tract_name", "county_name", "state_name"),
sep = ", "
) |>
mutate(
tract_name = str_remove(tract_name, "Census Tract "),
county_name = str_remove(county_name, " County")
)
phl_2017_tract_summary <- summarize_acs(phl_2017_tract_clean)# Filter Census Tracts for Sharswood
sharswood_2017_tract_sf <- phl_2017_tract_summary |>
filter(tract_name == c("138", "139"))
sharswood_2017_tract <- sharswood_2017_tract_sf |>
st_drop_geometry()
sharswood_2017_tractStep 4: Repeat Process for 2008-2012 ACS 5-Yr Estimates
# Get tract-level demographic data from 2022 ACS 5-Yr Estimates for Philadelphia
phl_2012_tract_data <- get_acs(
geography = "tract",
variables = vars,
state = "PA",
county = "Philadelphia",
year = 2012,
survey = "acs5",
output = "wide",
geometry = TRUE
)
# Clean the county names to remove state name and "County"
phl_2012_tract_clean <- phl_2012_tract_data |>
separate(
NAME,
into = c("tract_name", "county_name", "state_name"),
sep = ", "
) |>
mutate(
tract_name = str_remove(tract_name, "Census Tract "),
county_name = str_remove(county_name, " County")
)
phl_2012_tract_summary <- summarize_acs(phl_2012_tract_clean)# Filter Census Tracts for Sharswood
sharswood_2012_tract_sf <- phl_2012_tract_summary |>
filter(tract_name %in% c("138", "139"))
sharswood_2012_tract <- sharswood_2012_tract_sf |>
st_drop_geometry()
sharswood_2012_tractPart 2: Comprehensive Visualization and Analysis
Step 1: Rename Columns for ACS Year
cols_to_suffix <- c(
"total_popE","median_incomeE",
"elderly_popE","elderly_popM","pct_elderly",
"child_popE","child_popM","pct_child", "pct_poverty",
"pct_white", "pct_black", "pct_hispanic"
)
s12_sf <- sharswood_2012_tract_sf |>
rename_with(~ paste0(.x, "_2012"), any_of(cols_to_suffix))
s12 <- sharswood_2012_tract |>
rename_with(~ paste0(.x, "_2012"), any_of(cols_to_suffix))
s17_sf <- sharswood_2017_tract_sf |>
rename_with(~ paste0(.x, "_2017"), any_of(cols_to_suffix))
s17 <- sharswood_2017_tract |>
rename_with(~ paste0(.x, "_2017"), any_of(cols_to_suffix))
s22_sf <- sharswood_2022_tract_sf |>
rename_with(~ paste0(.x, "_2022"), any_of(cols_to_suffix))
s22 <- sharswood_2022_tract |>
rename_with(~ paste0(.x, "_2022"), any_of(cols_to_suffix))compare_sf <- s12_sf |>
select(GEOID, tract_name, county_name, geometry, ends_with("_2012")) |>
left_join(select(s17, GEOID, ends_with("_2017")), by = "GEOID") |>
left_join(select(s22, GEOID, ends_with("_2022")), by = "GEOID") |>
mutate(
pop_change_12_22 = total_popE_2022 - total_popE_2012,
pop_pct_change_12_22 = 100 * (total_popE_2022 - total_popE_2012) / total_popE_2012,
d_pct_child_12_22 = pct_child_2022 - pct_child_2012,
d_pct_elderly_12_22 = pct_elderly_2022 - pct_elderly_2012,
d_pct_poverty_12_22 = pct_poverty_2022 - pct_poverty_2012,
d_pct_white_12_22 = pct_white_2022 - pct_white_2012,
d_pct_black_12_22 = pct_black_2022 - pct_black_2012,
d_pct_hispanic_12_22 = pct_hispanic_2022 - pct_hispanic_2012,
d_income_12_17 = median_incomeE_2017 - median_incomeE_2012,
d_income_17_22 = median_incomeE_2022 - median_incomeE_2017,
d_income_12_22 = median_incomeE_2022 - median_incomeE_2012
)# Table 1: Population
tbl_pop <- compare_sf |>
st_drop_geometry() |>
select(
GEOID, total_popE_2012, total_popE_2017, total_popE_2022,
pop_change_12_22, pop_pct_change_12_22,
pct_child_2012, pct_child_2017, pct_child_2022, d_pct_child_12_22,
pct_elderly_2012, pct_elderly_2017, pct_elderly_2022, d_pct_elderly_12_22
) |>
rename(
`Census Tract` = GEOID,
`Total Pop (2012)` = total_popE_2012,
`Total Pop (2017)` = total_popE_2017,
`Total Pop (2022)` = total_popE_2022,
`Δ pop 12–22` = pop_change_12_22,
`% change 12–22` = pop_pct_change_12_22,
`Child % 2012` = pct_child_2012,
`Child % 2017` = pct_child_2017,
`Child % 2022` = pct_child_2022,
`Δ Child (pp) 12–22` = d_pct_child_12_22,
`Elderly % 2012` = pct_elderly_2012,
`Elderly % 2017` = pct_elderly_2017,
`Elderly % 2022` = pct_elderly_2022,
`Δ Elderly (pp) 12–22` = d_pct_elderly_12_22
)
kable(tbl_pop, digits = 1, caption = "Sharswood: Changes in Population",
format.args = list(big.mark = ","))# Table 2: Race
tbl_race <- compare_sf |>
st_drop_geometry() |>
select(
GEOID, pct_white_2012, pct_white_2017, pct_white_2022, d_pct_white_12_22,
pct_black_2012, pct_black_2017, pct_black_2022, d_pct_black_12_22,
pct_hispanic_2012, pct_hispanic_2017, pct_hispanic_2022, d_pct_hispanic_12_22
) |>
rename(
`Census Tract` = GEOID,
`White % 2012` = pct_white_2012,
`White % 2017` = pct_white_2017,
`White % 2022` = pct_white_2022,
`Δ White (pp) 12–22` = d_pct_white_12_22,
`Black % 2012` = pct_black_2012,
`Black % 2017` = pct_black_2017,
`Black % 2022` = pct_black_2022,
`Δ Black (pp) 12–22` = d_pct_black_12_22,
`Hispanic % 2012` = pct_hispanic_2012,
`Hispanic % 2017` = pct_hispanic_2017,
`Hispanic % 2022` = pct_hispanic_2022,
`Δ Hispanic (pp) 12–22` = d_pct_hispanic_12_22
)
kable(tbl_race, digits = 1, caption = "Sharswood: Changes in Race")# Table 3: Income
tbl_income <- compare_sf |>
st_drop_geometry() |>
select(
GEOID,
pct_poverty_2012, pct_poverty_2017, pct_poverty_2022, d_pct_poverty_12_22,
median_incomeE_2012, median_incomeE_2017, median_incomeE_2022,
d_income_12_17, d_income_17_22, d_income_12_22
) |>
rename(
`Census Tract` = GEOID,
`Poverty % 2012` = pct_poverty_2012,
`Poverty % 2017` = pct_poverty_2017,
`Poverty % 2022` = pct_poverty_2022,
`Δ Poverty (pp) 12–22`= d_pct_poverty_12_22,
`Median income (2012)`= median_incomeE_2012,
`Median income (2017)`= median_incomeE_2017,
`Median income (2022)`= median_incomeE_2022,
`Δ income 12–17` = d_income_12_17,
`Δ income 17–22` = d_income_17_22,
`Δ income 12–22` = d_income_12_22
)
kable(tbl_income, digits = 1, caption = "Sharswood: Changes in Income & Poverty",
format.args = list(big.mark = ","))Step 2: Facet Maps by Period
# Convert from Wide to Long
long_panel <- compare_sf |>
pivot_longer(
cols = matches("_(2012|2017|2022)$"),
names_to = c("metric","period"),
names_pattern = "(.+)_([0-9]{4})$",
values_to = "value"
)
long_panel_pts <- long_panel |> st_point_on_surface()
# Facet Helper with Boxed Labels
facet_metric <- function(metric_name, title, fill_lab,
fill_lab_fmt = NULL, text_fmt = NULL,
text_size = 3, box_fill = alpha("white", 0.60)) {
dat <- long_panel |> filter(metric == metric_name)
lab <- long_panel_pts |> filter(metric == metric_name)
if (is.null(text_fmt)) text_fmt <- label_number(accuracy = 1)
lab <- lab |> mutate(lbl = text_fmt(value))
ggplot() +
geom_sf(data = dat, aes(fill = value), color = "grey25", linewidth = 0.25) +
geom_sf_label(data = lab, aes(label = lbl), size = text_size,
label.size = 0, label.padding = unit(1.2, "pt"),
fill = box_fill, color = "black", label.r = unit(1.5, "pt"),
check_overlap = TRUE, na.rm = TRUE, show.legend = FALSE) +
facet_wrap(~ period, nrow = 1) +
labs(
title = title,
subtitle = "Panels: 2008-2012, 2013-2017, 2018-2022",
fill = fill_lab,
caption = "Data Source: ACS 5-year estimates"
) +
theme_void() +
theme(legend.position = "right") +
{
if (is.null(fill_lab_fmt)) {
scale_fill_viridis_c()
} else {
scale_fill_viridis_c(labels = fill_lab_fmt)
}
}
}
# Format Data Labels
pct_fmt <- label_number(accuracy = 0.1, suffix = "%") # your % values are 0–100
dol_fmt <- label_dollar(accuracy = 1)
# Facet Maps
facet_metric("pct_poverty", "Sharswood: % in Poverty", "%",
fill_lab_fmt = pct_fmt, text_fmt = pct_fmt)
facet_metric("pct_white", "Sharswood: % White (race alone)", "%",
fill_lab_fmt = pct_fmt, text_fmt = pct_fmt)
facet_metric("pct_black", "Sharswood: % Black (race alone)", "%",
fill_lab_fmt = pct_fmt, text_fmt = pct_fmt)
facet_metric("pct_hispanic", "Sharswood: % Hispanic (any race)", "%",
fill_lab_fmt = pct_fmt, text_fmt = pct_fmt)
facet_metric("pct_child", "Sharswood: % Children (5–19)", "%",
fill_lab_fmt = pct_fmt, text_fmt = pct_fmt)
facet_metric("pct_elderly", "Sharswood: % Elderly (65+)", "%",
fill_lab_fmt = pct_fmt, text_fmt = pct_fmt)
facet_metric("median_incomeE", "Sharswood: Median Household Income", "$",
fill_lab_fmt = dol_fmt, text_fmt = dol_fmt)Step 3: Change Maps
# Aesthetic Formatting
period_lab <- c("2012" = "2008–12", "2017" = "2013–17", "2022" = "2018–22")
period_pal <- c("2008–12" = "#8DA0CB", "2013–17" = "#66C2A5", "2018–22" = "#FC8D62")# Demographic Change
bars <- long_panel |>
filter(metric %in% c("pct_child","pct_elderly")) |>
mutate(metric = recode(metric, pct_child = "% Children (5–19)", pct_elderly = "% Elderly (65+)"),
period = recode(period, !!!period_lab),
tract_name = paste("Tract", tract_name))
ggplot(bars, aes(x = period, y = value, fill = period)) +
geom_col(width = 0.7) +
facet_grid(metric ~ tract_name, scales = "free_y") +
scale_fill_manual(values = period_pal, guide = "none") +
scale_y_continuous(labels = label_number(accuracy = 0.1)) +
labs(title = "Before–After Comparison by Tract and Period",
subtitle = "Data Source: ACS 5-Year Estimates",
x = NULL, y = "Percent") +
theme_minimal() + theme(legend.position = "none")# Plot Median Income
bars_income <- long_panel |>
filter(metric == "median_incomeE") |>
mutate(period = recode(period, !!!period_lab),
tract_name = paste("Tract", tract_name))
ggplot(bars_income, aes(x = period, y = value, fill = period)) +
geom_col(width = 0.7) +
facet_grid(~ tract_name, scales = "free_y") +
scale_fill_manual(values = period_pal, guide = "none") +
scale_y_continuous(labels = label_dollar(accuracy = 1)) +
labs(
title = "Before–After: Median Household Income by Tract",
subtitle = "Data Source: ACS 5-Year Estimates",
x = NULL, y = "Dollars"
) +
theme_minimal()
# Plot Poverty Rate
bars_poverty <- long_panel |>
filter(metric == "pct_poverty") |>
mutate(period = recode(period, !!!period_lab),
tract_name = paste("Tract", tract_name))
ggplot(bars_poverty, aes(x = period, y = value, fill = period)) +
geom_col(width = 0.7) +
facet_grid(~ tract_name, scales = "free_y") +
scale_fill_manual(values = period_pal, guide = "none") +
scale_y_continuous(labels = label_number(accuracy = 0.1, suffix = "%")) +
labs(
title = "Before–After: Poverty Rate by Tract",
x = NULL, y = "Percent"
) +
theme_minimal()Step 4: Composite Maps
Multi-metric × Period Grid
grid_metrics <- c("pct_poverty","pct_child","pct_elderly","pct_black","pct_white","pct_hispanic")
metric_labels <- c("Poverty %","Children %","Elderly %",
"Black %","White %","Hispanic %")
long_subset <- long_panel |>
filter(metric %in% grid_metrics) |>
mutate(
metric = factor(metric, levels = grid_metrics, labels = metric_labels),
period = factor(period,
levels = c("2012","2017","2022"),
labels = c("2008–12","2013–17","2018–22"))
)
long_subset_pts <- long_panel_pts |>
filter(metric %in% grid_metrics) |>
mutate(
metric = factor(metric, levels = grid_metrics, labels = metric_labels),
period = factor(period,
levels = c("2012","2017","2022"),
labels = c("2008–12","2013–17","2018–22")),
lbl = label_number(accuracy = 0.1, suffix = "%")(value)
)
ggplot() +
geom_sf(data = long_subset, aes(fill = value),
color = "grey25", linewidth = 0.25) +
geom_sf_label(data = long_subset_pts, aes(label = lbl),
size = 3, label.size = 0,
label.padding = unit(1, "pt"), fill = alpha("white", 0.60),
color = "black", show.legend = FALSE) +
facet_grid(metric ~ period) +
scale_fill_distiller(palette = "Spectral", breaks = c(0, 25, 50, 75, 100),
limits = c(0, 100), direction = 1,
labels = label_number(accuracy = 1, suffix = "%")) +
guides(fill = guide_colorbar(title.position = "top", ticks = TRUE,
barheight = unit(80, "pt"),
bardwidth = unit(5, "pt"))) +
labs(title = "Sharswood: ACS 5-Yr Estimates by Census Tract",
subtitle = "Tract 138 is the left polygon and Tract 139 is the right polygon") +
theme_void() +
theme(legend.position = "right",
plot.title = element_text(face = "bold", size = 12))Bivariate change map
library(forcats)
bivar <- compare_sf %>%
st_drop_geometry() %>%
transmute(GEOID,
d_pov = d_pct_poverty_12_22,
d_inc = d_income_12_22) %>%
mutate(
c_pov = cut_number(d_pov, n = 3, labels = c("low Δpov","mid Δpov","high Δpov")),
c_inc = cut_number(d_inc, n = 3, labels = c("low Δinc","mid Δinc","high Δinc")),
bivar_class = interaction(fct_rev(c_pov), c_inc, sep = " / ", drop = TRUE)
)
# simple 3x3 palette
pal <- c(
"high Δpov / low Δinc" = "#3F2949","high Δpov / mid Δinc" = "#435786","high Δpov / high Δinc" = "#4885C1",
"mid Δpov / low Δinc" = "#77324C","mid Δpov / mid Δinc" = "#806A8A","mid Δpov / high Δinc" = "#89A1C8",
"low Δpov / low Δinc" = "#AE3A4E","low Δpov / mid Δinc" = "#BC7C8F","low Δpov / high Δinc" = "#CABED0"
)
compare_bivar <- compare_sf %>% left_join(bivar, by = "GEOID")
ggplot(compare_bivar) +
geom_sf(aes(fill = bivar_class), color = "grey25", linewidth = 0.25) +
scale_fill_manual(values = pal, na.value = "grey85", name = "Δ Poverty vs Δ Income\n(2012→2022)") +
labs(title = "Bivariate Change Map: Poverty (pct-pts) vs Income ($)",
subtitle = "Data Source: ACS 5-Year Estimates") +
theme_void()