# Spatial join tracts to counties
library(dplyr)
library(sf)
library(rlang)
pop_col <- "total_pop"
# 如果 vuln_pts 还没有 total_pop 这一列(只有 vuln_tracts 有),先合并
if (!pop_col %in% names(vuln_pts)) {
vuln_pts <- vuln_pts |>
left_join(
st_drop_geometry(vuln_tracts[, c("geoid", pop_col)]),
by = "geoid"
)
}
# 创建“脆弱人口估计”:只有 vulnerable==TRUE 的 tract 才计入 total_pop
vuln_pts <- vuln_pts |>
mutate(vuln_pop_est = ifelse(vulnerable, total_pop, 0))
# 如果 vuln_pts 不含人口列,则从 vuln_tracts 合并
if (!pop_col %in% names(vuln_pts) && !is.na(pop_col)) {
join_cols <- intersect(names(vuln_tracts), c(tract_id, pop_col, "vulnerable"))
vuln_pts <- vuln_pts |>
left_join(st_drop_geometry(vuln_tracts[, join_cols]), by = setNames(tract_id, tract_id))
}
# 创建脆弱人口估计(仅在 vulnerable == TRUE 的地块计入人口,否则为 0)
vuln_pts <- vuln_pts |>
mutate(
vuln_pop_est = dplyr::case_when(
!is.na(pop_col) ~ ifelse(vulnerable, as.numeric(.data[[pop_col]]), 0),
TRUE ~ NA_real_
)
)
counties <- get_acs(
geography = "county",
state = "PA",
variables = "B01003_001", # 总人口,只是用来取 geometry
geometry = TRUE,
year = 2023
)
# --- 1) 县级空间连接 ---
# 假定你已有 counties (sf) 图层;确保 CRS 一致
if (st_crs(vuln_pts) != st_crs(counties)) {
counties <- st_transform(counties, st_crs(vuln_pts))
}
# 抽取常见县字段,做一个健壮的 county_id 与 county_name
county_id_candidates <- c("GEOID","geoid","GEOID10","GEOID20","COUNTYFP","FIPS")
county_name_candidates <- c("NAME","name","NAMELSAD","County","county")
county_id <- intersect(county_id_candidates, names(counties))
county_name <- intersect(county_name_candidates, names(counties))
county_keep <- unique(c(head(county_id,1), head(county_name,1)))
county_keep <- county_keep[!is.na(county_keep)]
vuln_pts_c <- st_join(
vuln_pts,
counties[, county_keep],
left = TRUE,
join = st_within # 质心→县,多面边界更稳
)
# 标准化县识别列名
if (length(county_id) == 0) stop("counties 缺少可用的 GEOID/FIPS 字段。")
if (length(county_name) == 0) {
vuln_pts_c <- vuln_pts_c |>
mutate(.county_name = NA_character_)
} else {
vuln_pts_c <- vuln_pts_c |>
mutate(.county_name = .data[[county_name[1]]])
}
vuln_pts_c <- vuln_pts_c |>
mutate(.county_id = .data[[county_id[1]]])
# Aggregate statistics by county
# --- 2) 县级聚合统计 ---
county_stats <- vuln_pts_c |>
st_drop_geometry() |>
group_by(.county_id, .county_name) |>
summarise(
n_vuln_tracts = sum(vulnerable, na.rm = TRUE),
n_underserved_tracts = sum(underserved, na.rm = TRUE),
pct_underserved_vuln = ifelse(n_vuln_tracts > 0, n_underserved_tracts / n_vuln_tracts, NA_real_),
avg_dist_mi_vuln = suppressWarnings(mean(dist_mi[vulnerable], na.rm = TRUE)),
vuln_pop_total = suppressWarnings(sum(vuln_pop_est, na.rm = TRUE)),
und_vuln_pop = suppressWarnings(sum(ifelse(underserved, vuln_pop_est, 0), na.rm = TRUE)),
.groups = "drop"
) |>
arrange(desc(pct_underserved_vuln), desc(und_vuln_pop))
# --- 3) 回答两个问题(输出前 5 名) ---
top5_pct_underserved <- county_stats |>
arrange(desc(pct_underserved_vuln)) |>
slice_head(n = 5)
top5_und_vuln_pop <- county_stats |>
arrange(desc(und_vuln_pop)) |>
slice_head(n = 5)
list(
top5_highest_pct_underserved = top5_pct_underserved,
top5_most_vulnerable_people_far = top5_und_vuln_pop
)