# Load required packages (hint: you need tidycensus, tidyverse, and knitr)
library(tidycensus)
library(tidyverse)
library(knitr)
# Set your Census API key
census_api_key("a8081579173d9c4b9d21b9265f9c67c8708d6a02")
# Choose your state for analysis - assign it to a variable called my_state
<- "Pennsylvania" my_state
Assignment 1: Census Data Quality for Policy Decisions
Evaluating Data Reliability for Algorithmic Decision-Making
Assignment Overview
Scenario
You are a data analyst for the [Your State] Department of Human Services. The department is considering implementing an algorithmic system to identify communities that should receive priority for social service funding and outreach programs. Your supervisor has asked you to evaluate the quality and reliability of available census data to inform this decision.
Drawing on our Week 2 discussion of algorithmic bias, you need to assess not just what the data shows, but how reliable it is and what communities might be affected by data quality issues.
Learning Objectives
- Apply dplyr functions to real census data for policy analysis
- Evaluate data quality using margins of error
- Connect technical analysis to algorithmic decision-making
- Identify potential equity implications of data reliability issues
- Create professional documentation for policy stakeholders
Submission Instructions
Submit by posting your updated portfolio link on Canvas. Your assignment should be accessible at your-portfolio-url/assignments/assignment_1/
Make sure to update your _quarto.yml
navigation to include this assignment under an “Assignments” menu.
Part 1: Portfolio Integration
Create this assignment in your portfolio repository under an assignments/assignment_1/
folder structure. Update your navigation menu to include:
- text: Assignments
menu:
- href: assignments/assignment_1/your_file_name.qmd
text: "Assignment 1: Census Data Exploration"
If there is a special character like comma, you need use double quote mark so that the quarto can identify this as text
Setup
State Selection: I have chosen [Your State Name] for this analysis because: [Brief explanation of why you chose this state]
Part 2: County-Level Resource Assessment
2.1 Data Retrieval
Your Task: Use get_acs()
to retrieve county-level data for your chosen state.
Requirements: - Geography: county level - Variables: median household income (B19013_001) and total population (B01003_001)
- Year: 2022 - Survey: acs5 - Output format: wide
Hint: Remember to give your variables descriptive names using the variables = c(name = "code")
syntax.
# Write your get_acs() code here
<- get_acs(
state_pop geography = "state",
variables = "B01003_001", # Total population
year = 2022,
survey = "acs5"
)<- get_acs(
median_income geography = "state",
variables = "B19013_001", # Total population
year = 2022,
survey = "acs5"
)
glimpse(state_pop)
Rows: 52
Columns: 5
$ GEOID <chr> "01", "02", "04", "05", "06", "08", "09", "10", "11", "12", "…
$ NAME <chr> "Alabama", "Alaska", "Arizona", "Arkansas", "California", "Co…
$ variable <chr> "B01003_001", "B01003_001", "B01003_001", "B01003_001", "B010…
$ estimate <dbl> 5028092, 734821, 7172282, 3018669, 39356104, 5770790, 3611317…
$ moe <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
<- get_acs(
pa_data geography = "county",
variables = c(
total_pop = "B01003_001",
median_income = "B19013_001"
),state = "PA",
year = 2022,
output = "wide" # Makes analysis easier
)
head(pa_data)
# A tibble: 6 × 6
GEOID NAME total_popE total_popM median_incomeE median_incomeM
<chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 42001 Adams County, Penns… 104604 NA 78975 3334
2 42003 Allegheny County, P… 1245310 NA 72537 869
3 42005 Armstrong County, P… 65538 NA 61011 2202
4 42007 Beaver County, Penn… 167629 NA 67194 1531
5 42009 Bedford County, Pen… 47613 NA 58337 2606
6 42011 Berks County, Penns… 428483 NA 74617 1191
# Clean the county names to remove state name and "County"
# Hint: use mutate() with str_remove()
<- pa_data %>%
pa_clean mutate(
# Remove state name from county names
county_name = str_remove(NAME, ", Pennsylvania"),
# Remove "County" word
county_name = str_remove(county_name, " County")
)# Display the first few rows
head(pa_clean)
# A tibble: 6 × 7
GEOID NAME total_popE total_popM median_incomeE median_incomeM county_name
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
1 42001 Adams C… 104604 NA 78975 3334 Adams
2 42003 Alleghe… 1245310 NA 72537 869 Allegheny
3 42005 Armstro… 65538 NA 61011 2202 Armstrong
4 42007 Beaver … 167629 NA 67194 1531 Beaver
5 42009 Bedford… 47613 NA 58337 2606 Bedford
6 42011 Berks C… 428483 NA 74617 1191 Berks
2.2 Data Quality Assessment
Your Task: Calculate margin of error percentages and create reliability categories.
Requirements: - Calculate MOE percentage: (margin of error / estimate) * 100 - Create reliability categories: - High Confidence: MOE < 5% - Moderate Confidence: MOE 5-10%
- Low Confidence: MOE > 10% - Create a flag for unreliable estimates (MOE > 10%)
Hint: Use mutate()
with case_when()
for the categories.
# Calculate MOE percentage and reliability categories using mutate()
select(pa_clean, NAME, county_name)
# A tibble: 67 × 2
NAME county_name
<chr> <chr>
1 Adams County, Pennsylvania Adams
2 Allegheny County, Pennsylvania Allegheny
3 Armstrong County, Pennsylvania Armstrong
4 Beaver County, Pennsylvania Beaver
5 Bedford County, Pennsylvania Bedford
6 Berks County, Pennsylvania Berks
7 Blair County, Pennsylvania Blair
8 Bradford County, Pennsylvania Bradford
9 Bucks County, Pennsylvania Bucks
10 Butler County, Pennsylvania Butler
# ℹ 57 more rows
<- pa_clean %>%
pa_reliability mutate(
# Calculate MOE as percentage of estimate
moe_percentage = round((median_incomeM / median_incomeE) * 100, 2),
# Create reliability categories
reliability = case_when(
< 5 ~ "High Confidence",
moe_percentage >= 5 & moe_percentage <= 10 ~ "Moderate",
moe_percentage > 10 ~ "Low Confidence"
moe_percentage
)
)
# Create a summary showing count of counties in each reliability category
# Hint: use count() and mutate() to add percentages
<- pa_reliability %>%
pa_reliability_summary count(reliability) %>%
mutate(percent = n / sum(n) * 100)
pa_reliability_summary
# A tibble: 2 × 3
reliability n percent
<chr> <int> <dbl>
1 High Confidence 57 85.1
2 Moderate 10 14.9
2.3 High Uncertainty Counties
Your Task: Identify the 5 counties with the highest MOE percentages.
Requirements: - Sort by MOE percentage (highest first) - Select the top 5 counties - Display: county name, median income, margin of error, MOE percentage, reliability category - Format as a professional table using kable()
Hint: Use arrange()
, slice()
, and select()
functions.
<- pa_reliability %>%
top5_uncertain arrange(desc(moe_percentage)) %>%
slice(1:5) %>%
select(
County = county_name,
`Median Income` = median_incomeE,
`Margin of Error` = median_incomeM,
`MOE (%)` = moe_percentage,
Reliability = reliability
)
kable(
top5_uncertain,digits = 2,
caption = "Top 5 Pennsylvania Counties with Highest Income Estimate Uncertainty"
)
County | Median Income | Margin of Error | MOE (%) | Reliability |
---|---|---|---|---|
Forest | 46188 | 4612 | 9.99 | Moderate |
Sullivan | 62910 | 5821 | 9.25 | Moderate |
Union | 64914 | 4753 | 7.32 | Moderate |
Montour | 72626 | 5146 | 7.09 | Moderate |
Elk | 61672 | 4091 | 6.63 | Moderate |
Data Quality Commentary:
[Write 2-3 sentences explaining what these results mean for algorithmic decision-making. Consider: Which counties might be poorly served by algorithms that rely on this income data? What factors might contribute to higher uncertainty?]
Part 3: Neighborhood-Level Analysis
3.1 Focus Area Selection
Your Task: Select 2-3 counties from your reliability analysis for detailed tract-level study.
Strategy: Choose counties that represent different reliability levels (e.g., 1 high confidence, 1 moderate, 1 low confidence) to compare how data quality varies.
# Use filter() to select 2-3 counties from your county_reliability data
# Store the selected counties in a variable called selected_counties
<- pa_reliability %>%
selected_counties filter(county_name %in% c("Allegheny", "Elk")) %>%
select(county_name, median_incomeE, reliability, moe_percentage)
# Display the selected counties with their key characteristics
# Show: county name, median income, MOE percentage, reliability category
selected_counties
# A tibble: 2 × 4
county_name median_incomeE reliability moe_percentage
<chr> <dbl> <chr> <dbl>
1 Allegheny 72537 High Confidence 1.2
2 Elk 61672 Moderate 6.63
Comment on the output: [write something :)]
3.2 Tract-Level Demographics
Your Task: Get demographic data for census tracts in your selected counties.
Requirements: - Geography: tract level - Variables: white alone (B03002_003), Black/African American (B03002_004), Hispanic/Latino (B03002_012), total population (B03002_001) - Use the same state and year as before - Output format: wide - Challenge: You’ll need county codes, not names. Look at the GEOID patterns in your county data for hints.
# Define your race/ethnicity variables with descriptive names
<- c(
race_vars total_pop = "B03002_001",
white = "B03002_003",
black = "B03002_004",
hispanic = "B03002_012"
)# Use get_acs() to retrieve tract-level data
# Hint: You may need to specify county codes in the county parameter
<- get_acs(
tract_data geography = "tract",
state = "PA",
county = c("003", "047"), # Selected county GEOIDs
variables = race_vars,
year = 2022,
survey = "acs5",
output = "wide"
)# Calculate percentage of each group using mutate()
<- tract_data %>%
tract_data mutate(
pct_white = round((whiteE / total_popE) * 100, 2),
pct_black = round((blackE / total_popE) * 100, 2),
pct_hispanic = round((hispanicE / total_popE) * 100, 2)
)# Create percentages for white, Black, and Hispanic populations
# Add readable tract and county name columns using str_extract() or similar
<- tract_data %>%
tract_data mutate(
tract_name = str_extract(NAME, "Tract [0-9\\.]+"),
county_name = str_replace(NAME, " Tract [0-9\\.]+, Pennsylvania", "")
)head(tract_data)
# A tibble: 6 × 15
GEOID NAME total_popE total_popM whiteE whiteM blackE blackM hispanicE
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 42003010301 Censu… 2028 61 755 108 1086 125 60
2 42003010302 Censu… 4631 198 3283 254 678 174 269
3 42003020100 Censu… 4310 368 2915 319 541 142 192
4 42003020300 Censu… 1471 206 1170 201 15 23 44
5 42003030500 Censu… 3044 535 724 179 1821 451 135
6 42003040200 Censu… 1843 235 947 234 493 140 53
# ℹ 6 more variables: hispanicM <dbl>, pct_white <dbl>, pct_black <dbl>,
# pct_hispanic <dbl>, tract_name <chr>, county_name <chr>
3.3 Demographic Analysis
Your Task: Analyze the demographic patterns in your selected areas.
# Find the tract with the highest percentage of Hispanic/Latino residents
# Hint: use arrange() and slice() to get the top tract
<- tract_data %>%
top_hispanic_tract arrange(desc(pct_hispanic)) %>%
slice(1) %>%
select(county_name, tract_name, pct_hispanic)
top_hispanic_tract
# A tibble: 1 × 3
county_name tract_name pct_hispanic
<chr> <chr> <dbl>
1 Census Tract 1203; Allegheny County; Pennsylvania Tract 1203 16.4
# Calculate average demographics by county using group_by() and summarize()
<- tract_data %>%
county_summary filter(substr(GEOID, 1, 5) %in% c("42003", "42047")) %>%
group_by(county_geoid = substr(GEOID, 1, 5)) %>%
summarize(
county_name = first(county_name),
n_tracts = n(),
avg_pct_white = round(mean(pct_white, na.rm = TRUE), 2),
avg_pct_black = round(mean(pct_black, na.rm = TRUE), 2),
avg_pct_hispanic = round(mean(pct_hispanic, na.rm = TRUE), 2),
.groups = "drop"
)
# Show: number of tracts, average percentage for each racial/ethnic group
# Create a nicely formatted table of your results using kable()
kable(
county_summary,caption = "Average Tract-Level Demographics by Selected County",
digits = 2
)
county_geoid | county_name | n_tracts | avg_pct_white | avg_pct_black | avg_pct_hispanic |
---|---|---|---|---|---|
42003 | Census Tract 103.01; Allegheny County; Pennsylvania | 394 | 74.45 | 15.42 | 2.42 |
42047 | Census Tract 9501; Elk County; Pennsylvania | 9 | 95.91 | 0.47 | 0.73 |
Part 4: Comprehensive Data Quality Evaluation
4.1 MOE Analysis for Demographic Variables
Your Task: Examine margins of error for demographic variables to see if some communities have less reliable data.
Requirements: - Calculate MOE percentages for each demographic variable - Flag tracts where any demographic variable has MOE > 15% - Create summary statistics
# Calculate MOE percentages for white, Black, and Hispanic variables
# Hint: use the same formula as before (margin/estimate * 100)
<- tract_data %>%
tract_data mutate(
moe_pct_white = ifelse(whiteE > 0, round((whiteM / whiteE) * 100, 2), NA),
moe_pct_black = ifelse(blackE > 0, round((blackM / blackE) * 100, 2), NA),
moe_pct_hispanic = ifelse(hispanicE > 0, round((hispanicM / hispanicE) * 100, 2), NA)
)# Create a flag for tracts with high MOE on any demographic variable
# Use logical operators (| for OR) in an ifelse() statement
<- tract_data %>%
tract_data mutate(
high_moe_flag = ifelse(
> 15 | moe_pct_black > 15 | moe_pct_hispanic > 15,
moe_pct_white 1, 0
)
)
# Create summary statistics showing how many tracts have data quality issues
<- tract_data %>%
moe_summary mutate(county_geoid = substr(GEOID, 1, 5)) %>%
filter(county_geoid %in% c("42003", "42047")) %>%
group_by(county_geoid) %>%
summarize(
n_tracts = n(),
n_high_moe = sum(high_moe_flag, na.rm = TRUE),
pct_high_moe = round((n_high_moe / n_tracts) * 100, 2),
avg_moe_white = round(mean(moe_pct_white, na.rm = TRUE), 2),
avg_moe_black = round(mean(moe_pct_black, na.rm = TRUE), 2),
avg_moe_hispanic = round(mean(moe_pct_hispanic, na.rm = TRUE), 2),
.groups = "drop"
%>%
) mutate(
county_name = case_when(
== "42003" ~ "Allegheny County",
county_geoid == "42047" ~ "Elk County"
county_geoid
)%>%
) select(county_name, everything())
# Nicely formatted summary table
kable(
moe_summary,caption = "Summary of MOE Reliability Issues by County",
digits = 2
)
county_name | county_geoid | n_tracts | n_high_moe | pct_high_moe | avg_moe_white | avg_moe_black | avg_moe_hispanic |
---|---|---|---|---|---|---|---|
Allegheny County | 42003 | 394 | 385 | 97.72 | 19.11 | 80.39 | 108.18 |
Elk County | 42047 | 9 | 7 | 77.78 | 8.07 | 152.39 | 96.69 |
4.2 Pattern Analysis
Your Task: Investigate whether data quality problems are randomly distributed or concentrated in certain types of communities.
# Group tracts by whether they have high MOE issues
# Calculate average characteristics for each group:
# - population size, demographic percentages
<- tract_data %>%
pattern_summary filter(substr(GEOID, 1, 5) %in% c("42003", "42047")) %>%
group_by(high_moe_flag) %>%
summarize(
n_tracts = n(),
avg_total_pop = round(mean(total_popE, na.rm = TRUE), 0),
avg_pct_white = round(mean(pct_white, na.rm = TRUE), 2),
avg_pct_black = round(mean(pct_black, na.rm = TRUE), 2),
avg_pct_hispanic = round(mean(pct_hispanic, na.rm = TRUE), 2),
.groups = "drop"
%>%
) mutate(
moe_group = case_when(
== 1 ~ "High MOE Tracts",
high_moe_flag == 0 ~ "Low MOE Tracts",
high_moe_flag is.na(high_moe_flag) ~ "Low MOE Tracts" # treat NA as Low MOE
)%>%
) select(moe_group, everything(), -high_moe_flag)
# Use group_by() and summarize() to create this comparison
# Create a professional table showing the patterns
kable(
pattern_summary,caption = "Comparison of Demographic Patterns by Data Quality Group",
digits = 2
)
moe_group | n_tracts | avg_total_pop | avg_pct_white | avg_pct_black | avg_pct_hispanic |
---|---|---|---|---|---|
High MOE Tracts | 392 | 3240 | 74.78 | 15.19 | 2.4 |
Low MOE Tracts | 11 | 567 | 96.18 | 0.00 | 0.0 |
Pattern Analysis: [Describe any patterns you observe. Do certain types of communities have less reliable data? What might explain this?]
Part 5: Policy Recommendations
5.1 Analysis Integration and Professional Summary
Your Task: Write an executive summary that integrates findings from all four analyses.
Executive Summary Requirements: 1. Overall Pattern Identification: What are the systematic patterns across all your analyses? 2. Equity Assessment: Which communities face the greatest risk of algorithmic bias based on your findings? 3. Root Cause Analysis: What underlying factors drive both data quality issues and bias risk? 4. Strategic Recommendations: What should the Department implement to address these systematic issues?
Executive Summary:
[Your integrated 4-paragraph summary here]
6.3 Specific Recommendations
Your Task: Create a decision framework for algorithm implementation.
# Create a summary table using your county reliability data
# Include: county name, median income, MOE percentage, reliability category
<- pa_reliability %>%
recommendations_table # Add algorithm recommendation based on reliability
mutate(
algorithm_recommendation = case_when(
== "High Confidence" ~ "Safe for algorithmic decisions",
reliability == "Moderate" ~ "Use with caution - monitor outcomes",
reliability == "Low Confidence" ~ "Requires manual review or additional data",
reliability TRUE ~ "Unknown"
)%>%
) # Keep only the desired columns
select(county_name, median_incomeE, moe_percentage, reliability, algorithm_recommendation)
# Add a new column with algorithm recommendations using case_when():
# - High Confidence: "Safe for algorithmic decisions"
# - Moderate Confidence: "Use with caution - monitor outcomes"
# - Low Confidence: "Requires manual review or additional data"
# Format as a professional table with kable()
kable(
recommendations_table,caption = "Algorithm Implementation Recommendations by County",
digits = 2
)
county_name | median_incomeE | moe_percentage | reliability | algorithm_recommendation |
---|---|---|---|---|
Adams | 78975 | 4.22 | High Confidence | Safe for algorithmic decisions |
Allegheny | 72537 | 1.20 | High Confidence | Safe for algorithmic decisions |
Armstrong | 61011 | 3.61 | High Confidence | Safe for algorithmic decisions |
Beaver | 67194 | 2.28 | High Confidence | Safe for algorithmic decisions |
Bedford | 58337 | 4.47 | High Confidence | Safe for algorithmic decisions |
Berks | 74617 | 1.60 | High Confidence | Safe for algorithmic decisions |
Blair | 59386 | 3.47 | High Confidence | Safe for algorithmic decisions |
Bradford | 60650 | 3.57 | High Confidence | Safe for algorithmic decisions |
Bucks | 107826 | 1.41 | High Confidence | Safe for algorithmic decisions |
Butler | 82932 | 2.61 | High Confidence | Safe for algorithmic decisions |
Cambria | 54221 | 3.34 | High Confidence | Safe for algorithmic decisions |
Cameron | 46186 | 5.64 | Moderate | Use with caution - monitor outcomes |
Carbon | 64538 | 5.31 | Moderate | Use with caution - monitor outcomes |
Centre | 70087 | 2.77 | High Confidence | Safe for algorithmic decisions |
Chester | 118574 | 1.70 | High Confidence | Safe for algorithmic decisions |
Clarion | 58690 | 4.37 | High Confidence | Safe for algorithmic decisions |
Clearfield | 56982 | 2.79 | High Confidence | Safe for algorithmic decisions |
Clinton | 59011 | 3.86 | High Confidence | Safe for algorithmic decisions |
Columbia | 59457 | 3.76 | High Confidence | Safe for algorithmic decisions |
Crawford | 58734 | 3.91 | High Confidence | Safe for algorithmic decisions |
Cumberland | 82849 | 2.20 | High Confidence | Safe for algorithmic decisions |
Dauphin | 71046 | 2.27 | High Confidence | Safe for algorithmic decisions |
Delaware | 86390 | 1.53 | High Confidence | Safe for algorithmic decisions |
Elk | 61672 | 6.63 | Moderate | Use with caution - monitor outcomes |
Erie | 59396 | 2.55 | High Confidence | Safe for algorithmic decisions |
Fayette | 55579 | 4.16 | High Confidence | Safe for algorithmic decisions |
Forest | 46188 | 9.99 | Moderate | Use with caution - monitor outcomes |
Franklin | 71808 | 3.00 | High Confidence | Safe for algorithmic decisions |
Fulton | 63153 | 3.65 | High Confidence | Safe for algorithmic decisions |
Greene | 66283 | 6.41 | Moderate | Use with caution - monitor outcomes |
Huntingdon | 61300 | 4.72 | High Confidence | Safe for algorithmic decisions |
Indiana | 57170 | 4.65 | High Confidence | Safe for algorithmic decisions |
Jefferson | 56607 | 3.41 | High Confidence | Safe for algorithmic decisions |
Juniata | 61915 | 4.79 | High Confidence | Safe for algorithmic decisions |
Lackawanna | 63739 | 2.58 | High Confidence | Safe for algorithmic decisions |
Lancaster | 81458 | 1.79 | High Confidence | Safe for algorithmic decisions |
Lawrence | 57585 | 3.07 | High Confidence | Safe for algorithmic decisions |
Lebanon | 72532 | 2.69 | High Confidence | Safe for algorithmic decisions |
Lehigh | 74973 | 2.00 | High Confidence | Safe for algorithmic decisions |
Luzerne | 60836 | 2.35 | High Confidence | Safe for algorithmic decisions |
Lycoming | 63437 | 4.39 | High Confidence | Safe for algorithmic decisions |
McKean | 57861 | 4.75 | High Confidence | Safe for algorithmic decisions |
Mercer | 57353 | 3.63 | High Confidence | Safe for algorithmic decisions |
Mifflin | 58012 | 3.43 | High Confidence | Safe for algorithmic decisions |
Monroe | 80656 | 3.17 | High Confidence | Safe for algorithmic decisions |
Montgomery | 107441 | 1.27 | High Confidence | Safe for algorithmic decisions |
Montour | 72626 | 7.09 | Moderate | Use with caution - monitor outcomes |
Northampton | 82201 | 1.93 | High Confidence | Safe for algorithmic decisions |
Northumberland | 55952 | 2.67 | High Confidence | Safe for algorithmic decisions |
Perry | 76103 | 3.17 | High Confidence | Safe for algorithmic decisions |
Philadelphia | 57537 | 1.38 | High Confidence | Safe for algorithmic decisions |
Pike | 76416 | 4.90 | High Confidence | Safe for algorithmic decisions |
Potter | 56491 | 4.42 | High Confidence | Safe for algorithmic decisions |
Schuylkill | 63574 | 2.40 | High Confidence | Safe for algorithmic decisions |
Snyder | 65914 | 5.56 | Moderate | Use with caution - monitor outcomes |
Somerset | 57357 | 2.78 | High Confidence | Safe for algorithmic decisions |
Sullivan | 62910 | 9.25 | Moderate | Use with caution - monitor outcomes |
Susquehanna | 63968 | 3.14 | High Confidence | Safe for algorithmic decisions |
Tioga | 59707 | 3.23 | High Confidence | Safe for algorithmic decisions |
Union | 64914 | 7.32 | Moderate | Use with caution - monitor outcomes |
Venango | 59278 | 3.45 | High Confidence | Safe for algorithmic decisions |
Warren | 57925 | 5.19 | Moderate | Use with caution - monitor outcomes |
Washington | 74403 | 2.38 | High Confidence | Safe for algorithmic decisions |
Wayne | 59240 | 4.79 | High Confidence | Safe for algorithmic decisions |
Westmoreland | 69454 | 1.99 | High Confidence | Safe for algorithmic decisions |
Wyoming | 67968 | 3.85 | High Confidence | Safe for algorithmic decisions |
York | 79183 | 1.79 | High Confidence | Safe for algorithmic decisions |
Key Recommendations:
Your Task: Use your analysis results to provide specific guidance to the department.
Counties suitable for immediate algorithmic implementation: [List counties with high confidence data and explain why they’re appropriate]
Counties requiring additional oversight: [List counties with moderate confidence data and describe what kind of monitoring would be needed]
Counties needing alternative approaches: [List counties with low confidence data and suggest specific alternatives - manual review, additional surveys, etc.]
Questions for Further Investigation
[List 2-3 questions that your analysis raised that you’d like to explore further in future assignments. Consider questions about spatial patterns, time trends, or other demographic factors.]
Technical Notes
Data Sources: - U.S. Census Bureau, American Community Survey 2018-2022 5-Year Estimates - Retrieved via tidycensus R package on [date]
Reproducibility: - All analysis conducted in R version [your version] - Census API key required for replication - Complete code and documentation available at: [your portfolio URL]
Methodology Notes: [Describe any decisions you made about data processing, county selection, or analytical choices that might affect reproducibility]
Limitations: [Note any limitations in your analysis - sample size issues, geographic scope, temporal factors, etc.]
Submission Checklist
Before submitting your portfolio link on Canvas:
Remember: Submit your portfolio URL on Canvas, not the file itself. Your assignment should be accessible at your-portfolio-url/assignments/assignment_1/your_file_name.html