Demographic Changes at Census Tract Level - Sharswood

URBS 4000: Urban Studies Thesis

Author

Jed Chew

Published

December 8, 2025

Project Title: Mixed by Design

A Case Study of the Philadelphia Housing Authority’s (PHA) Choice Neighborhoods Redevelopment of Sharswood

For my Urban Studies thesis, I am researching the PHA’s Choice Neighborhoods redevelopment of the Sharswood neighborhood in North Philadelphia. I have two main research questions about the process and outcome of this redevelopment:

(1) the process by which the PHA aligned the politics, finance, and design for the redevelopment of Sharswood; and

(2) the early redevelopment outcomes relative to the Choice Neighborhoods Initiative (CNI) vision of mixed-partners, mixed-use, and mixed-income

Part 1: ACS Data

American Community Survey (ACS) Estimates

The first ACS 5-year estimate was released in late 2010 as a replacement of the longform decennial Census. Every year, the ACS surveys about 1 in 40 housing units to produce 1-year estimates, but these housing units must not have been surveyed in the previous five years. Hence, the 5-year estimate incorporates five years of non-overlapping data (approximately 1 in 8 housing units) to recreate the longform of the decennial census.

In this exploratory data analysis, I used three separate (i.e. no overlapping years) ACS 5-year estimates to provide pre- and post-redevelopment snapshots: 2009-2012, 2013-17, and 2018-2022.

There are two main limitations with this methodology:

The census tracts do not align exactly with the boundaries of the Sharswood redevelopment area
5-year estimates also smooth out significant redevelopment events over multiple years, and it is challenging to identify the critical junctures in the redevelopment process and their direct impact on the neighborhood

Step 1: Load Philadelphia Demographic Data using `tidycensus`

# Load required packages
library(tidyverse)
library(tidycensus)
library(janitor)
library(sf)
library(tigris)
library(scales)
library(patchwork)
library(RColorBrewer)
library(units)
library(knitr)
library(caret)

# Load all available variables for ACS 5-year 2022
acs_vars_2022 <- load_variables(2022, "acs5", cache = TRUE)

# Helper Variables for Summing Population Values for Children aged 5-19
child_pop = c("B01001_004", "B01001_005", "B01001_006", "B01001_007", 
                "B01001_028", "B01001_029", "B01001_030", "B01001_031")


# Helper Variable for Summing Population Values for Elderly Population
elderly_pop = c("B01001_020", "B01001_021", "B01001_022", "B01001_023", 
                "B01001_024", "B01001_025", 
                "B01001_044", "B01001_045", "B01001_046", 
                "B01001_047", "B01001_048", "B01001_049")

# Helper Function for Variables
vars <- c(
  total_pop = "B01003_001",
  child_pop = child_pop,
  elderly_pop = elderly_pop,
  median_income = "B19013_001",
  poverty_total = "B17001_001",
  poverty_level = "B17001_002",
  White     = "B03002_003",
  Black     = "B03002_004",
  Hispanic  = "B03002_012"
)

# Helper Function for Summarizing ACS
summarize_acs <- function(df) {
  df |> 
       mutate(
      # totals (E) and MOEs (M; combined by quadrature)
      elderly_popE = rowSums(across(matches("^elderly_pop\\d+E$")), na.rm = TRUE),
      elderly_popM = sqrt(rowSums(across(matches("^elderly_pop\\d+M$"), ~ (.x)^2), na.rm = TRUE)),
      child_popE   = rowSums(across(matches("^child_pop\\d+E$")),   na.rm = TRUE),
      child_popM   = sqrt(rowSums(across(matches("^child_pop\\d+M$"),   ~ (.x)^2), na.rm = TRUE)),

      # percentages
      pct_elderly  = round((elderly_popE / total_popE) * 100, 2),
      pct_child    = round((child_popE   / total_popE) * 100, 2),
      pct_white    = round((WhiteE       / total_popE) * 100, 2),
      pct_black    = round((BlackE       / total_popE) * 100, 2),
      pct_hispanic = round((HispanicE    / total_popE) * 100, 2),
      pct_poverty  = round((poverty_levelE / poverty_totalE) * 100, 2)
    ) |>
    select(any_of(c(
      "GEOID", "tract_name", "county_name", "total_popE", "median_incomeE",
      "elderly_popE", "elderly_popM", "pct_elderly",
      "child_popE", "child_popM", "pct_child",
      "pct_poverty", "pct_white", "pct_black", "pct_hispanic", "geometry")))
}

# Get tract-level demographic data from 2022 ACS 5-Yr Estimates for Philadelphia
phl_tract_2022_data <- get_acs(
  geography = "tract",
  variables = vars,
  state = "PA",
  county = "Philadelphia",
  year = 2022,
  survey = "acs5",
  output = "wide",
  geometry = TRUE
)

# Clean the county names to remove state name and "County" 
phl_tract_2022_clean <- phl_tract_2022_data |> 
  separate(
    NAME, 
    into = c("tract_name", "county_name", "state_name"), 
    sep = "; "
  ) |> 
  mutate(
    tract_name = str_remove(tract_name, "Census Tract "),
    county_name = str_remove(county_name, " County")
  )
phl_tract_2022_summary <- summarize_acs(phl_tract_2022_clean)

Step 2: Filter Data for Sharswood

# Filter Census Tracts for Sharswood
sharswood_2022_tract_sf <- phl_tract_2022_summary |> 
  filter(tract_name %in% c("138", "139"))

sharswood_2022_tract <- sharswood_2022_tract_sf |> 
  st_drop_geometry()
sharswood_2022_tract

Step 3: Repeat Process for 2013-2017 ACS 5-Yr Estimates

# Get tract-level demographic data from 2017 ACS 5-Yr Estimates for Philadelphia
phl_2017_tract_data <- get_acs(
  geography = "tract",
  variables = vars,
  state = "PA",
  county = "Philadelphia",
  year = 2017,
  survey = "acs5",
  output = "wide",
  geometry = TRUE
)

# Clean the county names to remove state name and "County" 
phl_2017_tract_clean <- phl_2017_tract_data |> 
  separate(
    NAME, 
    into = c("tract_name", "county_name", "state_name"), 
    sep = ", "
  ) |> 
  mutate(
    tract_name = str_remove(tract_name, "Census Tract "),
    county_name = str_remove(county_name, " County")
  )
phl_2017_tract_summary <- summarize_acs(phl_2017_tract_clean)

# Filter Census Tracts for Sharswood
sharswood_2017_tract_sf <- phl_2017_tract_summary |> 
  filter(tract_name == c("138", "139"))

sharswood_2017_tract <- sharswood_2017_tract_sf |> 
  st_drop_geometry()
sharswood_2017_tract

Step 4: Repeat Process for 2008-2012 ACS 5-Yr Estimates

# Get tract-level demographic data from 2022 ACS 5-Yr Estimates for Philadelphia
phl_2012_tract_data <- get_acs(
  geography = "tract",
  variables = vars,
  state = "PA",
  county = "Philadelphia",
  year = 2012,
  survey = "acs5",
  output = "wide",
  geometry = TRUE
)

# Clean the county names to remove state name and "County" 
phl_2012_tract_clean <- phl_2012_tract_data |> 
  separate(
    NAME, 
    into = c("tract_name", "county_name", "state_name"), 
    sep = ", "
  ) |> 
  mutate(
    tract_name = str_remove(tract_name, "Census Tract "),
    county_name = str_remove(county_name, " County")
  )
phl_2012_tract_summary <- summarize_acs(phl_2012_tract_clean)

# Filter Census Tracts for Sharswood
sharswood_2012_tract_sf <- phl_2012_tract_summary |> 
  filter(tract_name %in% c("138", "139"))

sharswood_2012_tract <- sharswood_2012_tract_sf |> 
  st_drop_geometry()
sharswood_2012_tract

Part 2: Comprehensive Visualization and Analysis

Step 1: Rename Columns for ACS Year

cols_to_suffix <- c(
  "total_popE","median_incomeE",
  "elderly_popE","elderly_popM","pct_elderly",
  "child_popE","child_popM","pct_child", "pct_poverty",
  "pct_white", "pct_black", "pct_hispanic"
)

s12_sf <- sharswood_2012_tract_sf |>
  rename_with(~ paste0(.x, "_2012"), any_of(cols_to_suffix))
s12 <- sharswood_2012_tract |>
  rename_with(~ paste0(.x, "_2012"), any_of(cols_to_suffix))
s17_sf <- sharswood_2017_tract_sf |>
  rename_with(~ paste0(.x, "_2017"), any_of(cols_to_suffix))
s17 <- sharswood_2017_tract |>
  rename_with(~ paste0(.x, "_2017"), any_of(cols_to_suffix))
s22_sf <- sharswood_2022_tract_sf |>
  rename_with(~ paste0(.x, "_2022"), any_of(cols_to_suffix))
s22 <- sharswood_2022_tract |>
  rename_with(~ paste0(.x, "_2022"), any_of(cols_to_suffix))

compare_sf <- s12_sf |>
  select(GEOID, tract_name, county_name, geometry, ends_with("_2012")) |>
  left_join(select(s17, GEOID, ends_with("_2017")), by = "GEOID") |>
  left_join(select(s22, GEOID, ends_with("_2022")), by = "GEOID") |>
  mutate(
    pop_change_12_22       = total_popE_2022 - total_popE_2012,
    pop_pct_change_12_22   = 100 * (total_popE_2022 - total_popE_2012) / total_popE_2012,

    d_pct_child_12_22    = pct_child_2022    - pct_child_2012,
    d_pct_elderly_12_22  = pct_elderly_2022  - pct_elderly_2012,
    d_pct_poverty_12_22  = pct_poverty_2022  - pct_poverty_2012,
    d_pct_white_12_22    = pct_white_2022    - pct_white_2012,
    d_pct_black_12_22    = pct_black_2022    - pct_black_2012,
    d_pct_hispanic_12_22 = pct_hispanic_2022 - pct_hispanic_2012,

    d_income_12_17 = median_incomeE_2017 - median_incomeE_2012,
    d_income_17_22 = median_incomeE_2022 - median_incomeE_2017,
    d_income_12_22 = median_incomeE_2022 - median_incomeE_2012
  )

# Table 1: Population
tbl_pop <- compare_sf |>
  st_drop_geometry() |>
  select(
    GEOID, total_popE_2012, total_popE_2017, total_popE_2022,
    pop_change_12_22, pop_pct_change_12_22,
    pct_child_2012, pct_child_2017, pct_child_2022, d_pct_child_12_22,
    pct_elderly_2012, pct_elderly_2017, pct_elderly_2022, d_pct_elderly_12_22
  ) |>
  rename(
    `Census Tract` = GEOID,
    `Total Pop (2012)` = total_popE_2012,
    `Total Pop (2017)` = total_popE_2017,
    `Total Pop (2022)` = total_popE_2022,
    `Δ pop 12–22`      = pop_change_12_22,
    `% change 12–22`   = pop_pct_change_12_22,
    `Child % 2012`     = pct_child_2012,
    `Child % 2017`     = pct_child_2017,
    `Child % 2022`     = pct_child_2022,
    `Δ Child (pp) 12–22` = d_pct_child_12_22,
    `Elderly % 2012`     = pct_elderly_2012,
    `Elderly % 2017`     = pct_elderly_2017,
    `Elderly % 2022`     = pct_elderly_2022,
    `Δ Elderly (pp) 12–22` = d_pct_elderly_12_22
  )

kable(tbl_pop, digits = 1, caption = "Sharswood: Changes in Population",
      format.args = list(big.mark = ","))

# Table 2: Race
tbl_race <- compare_sf |>
  st_drop_geometry() |>
  select(
    GEOID, pct_white_2012, pct_white_2017, pct_white_2022, d_pct_white_12_22,
    pct_black_2012, pct_black_2017, pct_black_2022, d_pct_black_12_22,
    pct_hispanic_2012, pct_hispanic_2017, pct_hispanic_2022, d_pct_hispanic_12_22
  ) |>
  rename(
    `Census Tract` = GEOID,
    `White % 2012` = pct_white_2012,
    `White % 2017` = pct_white_2017,
    `White % 2022` = pct_white_2022,
    `Δ White (pp) 12–22` = d_pct_white_12_22,
    `Black % 2012` = pct_black_2012,
    `Black % 2017` = pct_black_2017,
    `Black % 2022` = pct_black_2022,
    `Δ Black (pp) 12–22` = d_pct_black_12_22,
    `Hispanic % 2012` = pct_hispanic_2012,
    `Hispanic % 2017` = pct_hispanic_2017,
    `Hispanic % 2022` = pct_hispanic_2022,
    `Δ Hispanic (pp) 12–22` = d_pct_hispanic_12_22
  )

kable(tbl_race, digits = 1, caption = "Sharswood: Changes in Race")

# Table 3: Income
tbl_income <- compare_sf |>
  st_drop_geometry() |>
  select(
    GEOID, 
    pct_poverty_2012, pct_poverty_2017, pct_poverty_2022, d_pct_poverty_12_22,
    median_incomeE_2012, median_incomeE_2017, median_incomeE_2022,
    d_income_12_17, d_income_17_22, d_income_12_22
  ) |>
  rename(
    `Census Tract` = GEOID,
    `Poverty % 2012`      = pct_poverty_2012,
    `Poverty % 2017`      = pct_poverty_2017,
    `Poverty % 2022`      = pct_poverty_2022,
    `Δ Poverty (pp) 12–22`= d_pct_poverty_12_22,
    `Median income (2012)`= median_incomeE_2012,
    `Median income (2017)`= median_incomeE_2017,
    `Median income (2022)`= median_incomeE_2022,
    `Δ income 12–17`      = d_income_12_17,
    `Δ income 17–22`      = d_income_17_22,
    `Δ income 12–22`      = d_income_12_22
  )

kable(tbl_income, digits = 1, caption = "Sharswood: Changes in Income & Poverty",
      format.args = list(big.mark = ","))

Step 2: Facet Maps by Period

# Convert from Wide to Long
long_panel <- compare_sf |>
  pivot_longer(
    cols = matches("_(2012|2017|2022)$"),
    names_to = c("metric","period"),
    names_pattern = "(.+)_([0-9]{4})$",
    values_to = "value"
  ) 

long_panel_pts <- long_panel |> st_point_on_surface()

# Facet Helper with Boxed Labels
facet_metric <- function(metric_name, title, fill_lab,
                         fill_lab_fmt = NULL, text_fmt = NULL,
                         text_size = 3, box_fill = alpha("white", 0.60)) {

  dat <- long_panel     |> filter(metric == metric_name)
  lab <- long_panel_pts |> filter(metric == metric_name)

  if (is.null(text_fmt)) text_fmt <- label_number(accuracy = 1)
  lab <- lab |> mutate(lbl = text_fmt(value))

  ggplot() +
    geom_sf(data  = dat, aes(fill = value), color = "grey25", linewidth = 0.25) +
    geom_sf_label(data  = lab, aes(label = lbl), size = text_size, 
                  label.size = 0, label.padding = unit(1.2, "pt"),    
                  fill = box_fill, color   = "black", label.r = unit(1.5, "pt"),
                  check_overlap = TRUE, na.rm   = TRUE, show.legend = FALSE) +
    facet_wrap(~ period, nrow = 1) +
    labs(
      title = title, 
      subtitle = "Panels: 2008-2012, 2013-2017, 2018-2022",
      fill = fill_lab,
      caption = "Data Source: ACS 5-year estimates"
    ) +
    theme_void() +
    theme(legend.position = "right") +
    {
      if (is.null(fill_lab_fmt)) {
        scale_fill_viridis_c()
      } else {
        scale_fill_viridis_c(labels = fill_lab_fmt)
      }
    }
}

# Format Data Labels
pct_fmt <- label_number(accuracy = 0.1, suffix = "%")  # your % values are 0–100
dol_fmt <- label_dollar(accuracy = 1)

# Facet Maps
facet_metric("pct_poverty",    "Sharswood: % in Poverty", "%", 
             fill_lab_fmt = pct_fmt, text_fmt = pct_fmt)
facet_metric("pct_white",      "Sharswood: % White (race alone)", "%", 
             fill_lab_fmt = pct_fmt, text_fmt = pct_fmt)
facet_metric("pct_black",      "Sharswood: % Black (race alone)", "%", 
             fill_lab_fmt = pct_fmt, text_fmt = pct_fmt)
facet_metric("pct_hispanic",   "Sharswood: % Hispanic (any race)", "%", 
             fill_lab_fmt = pct_fmt, text_fmt = pct_fmt)
facet_metric("pct_child",      "Sharswood: % Children (5–19)", "%", 
             fill_lab_fmt = pct_fmt, text_fmt = pct_fmt)
facet_metric("pct_elderly",    "Sharswood: % Elderly (65+)", "%", 
             fill_lab_fmt = pct_fmt, text_fmt = pct_fmt)
facet_metric("median_incomeE", "Sharswood: Median Household Income", "$", 
             fill_lab_fmt = dol_fmt, text_fmt = dol_fmt)

Step 3: Change Maps

# Aesthetic Formatting
period_lab <- c("2012" = "2008–12", "2017" = "2013–17", "2022" = "2018–22")
period_pal <- c("2008–12" = "#8DA0CB", "2013–17" = "#66C2A5", "2018–22" = "#FC8D62")

# Demographic Change
bars <- long_panel |>
  filter(metric %in% c("pct_child","pct_elderly")) |>
  mutate(metric = recode(metric, pct_child = "% Children (5–19)", pct_elderly = "% Elderly (65+)"),
         period = recode(period, !!!period_lab),
         tract_name = paste("Tract", tract_name))

ggplot(bars, aes(x = period, y = value, fill = period)) +
  geom_col(width = 0.7) +
  facet_grid(metric ~ tract_name, scales = "free_y") +
  scale_fill_manual(values = period_pal, guide = "none") +
  scale_y_continuous(labels = label_number(accuracy = 0.1)) +
  labs(title = "Before–After Comparison by Tract and Period",
       subtitle = "Data Source: ACS 5-Year Estimates",
       x = NULL, y = "Percent") +
  theme_minimal() + theme(legend.position = "none")

# Plot Median Income
bars_income <- long_panel |>
  filter(metric == "median_incomeE") |>
  mutate(period = recode(period, !!!period_lab),
         tract_name = paste("Tract", tract_name))

ggplot(bars_income, aes(x = period, y = value, fill = period)) +
  geom_col(width = 0.7) +
  facet_grid(~ tract_name, scales = "free_y") +
  scale_fill_manual(values = period_pal, guide = "none") +
  scale_y_continuous(labels = label_dollar(accuracy = 1)) +
  labs(
    title = "Before–After: Median Household Income by Tract",
    subtitle = "Data Source: ACS 5-Year Estimates",
    x = NULL, y = "Dollars"
  ) +
  theme_minimal()

# Plot Poverty Rate
bars_poverty <- long_panel |>
  filter(metric == "pct_poverty") |>
  mutate(period = recode(period, !!!period_lab),
         tract_name = paste("Tract", tract_name))

ggplot(bars_poverty, aes(x = period, y = value, fill = period)) +
  geom_col(width = 0.7) +
  facet_grid(~ tract_name, scales = "free_y") +
  scale_fill_manual(values = period_pal, guide = "none") +
  scale_y_continuous(labels = label_number(accuracy = 0.1, suffix = "%")) +
  labs(
    title = "Before–After: Poverty Rate by Tract",
    x = NULL, y = "Percent"
  ) +
  theme_minimal()

Step 4: Composite Maps

Multi-metric × Period Grid

grid_metrics <- c("pct_poverty","pct_child","pct_elderly","pct_black","pct_white","pct_hispanic")

metric_labels <- c("Poverty %","Children %","Elderly %",
                   "Black %","White %","Hispanic %")

long_subset <- long_panel |>
  filter(metric %in% grid_metrics) |>
  mutate(
    metric = factor(metric, levels = grid_metrics, labels = metric_labels),
    period = factor(period, 
                    levels = c("2012","2017","2022"),
                    labels  = c("2008–12","2013–17","2018–22"))
  )

long_subset_pts <- long_panel_pts |>
  filter(metric %in% grid_metrics) |>
  mutate(
    metric = factor(metric, levels = grid_metrics, labels = metric_labels),
    period = factor(period, 
                    levels = c("2012","2017","2022"),
                    labels  = c("2008–12","2013–17","2018–22")),
    lbl = label_number(accuracy = 0.1, suffix = "%")(value)  
  )

ggplot() +
  geom_sf(data = long_subset, aes(fill = value), 
          color = "grey25", linewidth = 0.25) +
  geom_sf_label(data = long_subset_pts, aes(label = lbl), 
                size = 3, label.size = 0,
                label.padding = unit(1, "pt"), fill = alpha("white", 0.60),
                color = "black", show.legend = FALSE) +
  facet_grid(metric ~ period) +
  scale_fill_distiller(palette = "Spectral", breaks = c(0, 25, 50, 75, 100),
                       limits = c(0, 100), direction = 1, 
                       labels = label_number(accuracy = 1, suffix = "%")) +

  guides(fill = guide_colorbar(title.position = "top", ticks = TRUE, 
                               barheight = unit(80, "pt"),
                               bardwidth = unit(5, "pt"))) +
  labs(title = "Sharswood: ACS 5-Yr Estimates by Census Tract",
       subtitle = "Tract 138 is the left polygon and Tract 139 is the right polygon") +
  theme_void() + 
  theme(legend.position = "right",
        plot.title = element_text(face = "bold", size = 12))

Bivariate change map

library(forcats)

bivar <- compare_sf %>%
  st_drop_geometry() %>%
  transmute(GEOID,
            d_pov = d_pct_poverty_12_22,
            d_inc = d_income_12_22) %>%
  mutate(
    c_pov = cut_number(d_pov, n = 3, labels = c("low Δpov","mid Δpov","high Δpov")),
    c_inc = cut_number(d_inc, n = 3, labels = c("low Δinc","mid Δinc","high Δinc")),
    bivar_class = interaction(fct_rev(c_pov), c_inc, sep = " / ", drop = TRUE)
  )

# simple 3x3 palette
pal <- c(
  "high Δpov / low Δinc"  = "#3F2949","high Δpov / mid Δinc" = "#435786","high Δpov / high Δinc" = "#4885C1",
  "mid Δpov / low Δinc"   = "#77324C","mid Δpov / mid Δinc"  = "#806A8A","mid Δpov / high Δinc"  = "#89A1C8",
  "low Δpov / low Δinc"   = "#AE3A4E","low Δpov / mid Δinc"  = "#BC7C8F","low Δpov / high Δinc"  = "#CABED0"
)

compare_bivar <- compare_sf %>% left_join(bivar, by = "GEOID")

ggplot(compare_bivar) +
  geom_sf(aes(fill = bivar_class), color = "grey25", linewidth = 0.25) +
  scale_fill_manual(values = pal, na.value = "grey85", name = "Δ Poverty vs Δ Income\n(2012→2022)") +
  labs(title = "Bivariate Change Map: Poverty (pct-pts) vs Income ($)",
       subtitle = "Data Source: ACS 5-Year Estimates") +
  theme_void()