# Toolbox Tuesday, August 2025
# Contact Echo Zheng (zheng@scag.ca.gov) for questions or additional data resources 

# In this session, you will:
# (1) set up your working environment in R;
# (2) find and extract your census variables using functions in R;
# (3) get descriptive statistics for your variables;
# (4) visualize changes over time; and
# (5) get annual data from 1-year ACS for all years available
# (6) clean and prepare data for your analysis

#-------------------------------------------------------------------------------
#####                         Getting started in R                         #####
#-------------------------------------------------------------------------------

# To get started, install the needed packages - only need to do it once
# Setting check: Tools—>Global Options—>Packages, and then uncheck “use secure download method for HTTP”
# Press shift+ctrl+c to comment/un-comment MULTIPLE lines
# Press ctrl+enter to run codes or press "Run" (Upper right of this panel)

options(pkgType = "binary")             # One trick to make package installation quicker, works for most packages
#install.packages("tidyverse")          # A suite of packages designed for data wrangling
install.packages("dplyr")               # Part of "tidyverse" package
install.packages("tidyr")               # Part of "tidyverse" package
install.packages("purrr")               # Part of "tidyverse" package
install.packages("ggplot2")             # Part of "tidyverse" package
install.packages("tidycensus")
install.packages("statar")
install.packages("mapview")             # For interactive viewing of spatial data in R
install.packages("sf")
install.packages("leafsync")
install.packages("scales")

# If you have any issue installing a package, try adding the argument dependencies="TRUE"
# Example:
# install.packages("mapview", dependencies = TRUE)

# Alternative solution:
# 1. Find your package here: https://cran.r-project.org/web/packages/available_packages_by_name.html
# 2. Downloaded it manually having tar.gz extension
# 3. In R Studio, chose the option "Package Archive File (.zip;.tar.gz)" for install packages 
# 4: Browsed locally to the place where it was downloaded and clicked install


# Load packages - do it each time you start your analysis
#library(tidyverse)
library(dplyr)
library(tidyr)
library(purrr)
library(ggplot2)
library(tidycensus)
library(statar)
library(mapview)
library(sf)
library(leafsync)
library(scales)

# Optional (Recommended): get and activate your Census API key
# visit https://api.census.gov/data/key_signup.html to request a key, then activate the key from the link in your email.
# Set your API key in R
# census_api_key("054fae4b3f5a4739d7a9634a2bab59d7b4de32d3", install=TRUE, overwrite = TRUE)


# Check current working directory
getwd()

# Set your working directory
setwd("C:/Users/zheng/OneDrive - Southern CA Association of Governments/Desktop/ToolboxTuesday2025")
dir.create("output")

#-------------------------------------------------------------------------------
#####         Objective 1a: Find your census variable: ACS example         #####
#-------------------------------------------------------------------------------
?load_variables
censusvars_2023 <- load_variables(2023, "acs5")
censusvars_2013 <- load_variables(2013, "acs5")

## "%>%" is called the pipe operator. It chains operation step by step.
## Keyboard shortcut: Ctrl + Shift + M (Windows); Cmd + Shift + M (Mac)

censusvars_2023 %>% 
  filter(grepl("transportation", concept, ignore.case = TRUE)) %>% 
  distinct(concept) %>% 
  print(n = Inf, width = Inf)

censusvars_2023 %>% 
  filter(concept == "Sex of Workers by Means of Transportation to Work") %>% 
  View()

#-------------------------------------------------------------------------------
#####         Objective 1b: Extract my ACS variables directly in R         #####
#-------------------------------------------------------------------------------
## Let's start with one variable
## Variables used for demonstration: 
## B08006_017 Workers who worked at home
## B19013_001 Median Household Income
?get_acs()

acsdata1 <- get_acs(geography = "tract", 
                   variables = "B08006_017",
                   state = "CA",
                   county = "Orange",
                   year = 2023,
                   survey = "acs5",
                   geometry = TRUE                    # Geometry = TRUE to enable mapping
                   )  %>%  
  mutate(period = "2019-2023") %>% 
  rename(WFH = estimate)

## Try with a different time period 
acsdata2 <- get_acs(geography = "tract", 
                    variables = "B08006_017",
                    state = "CA",
                    county = "Orange",
                    year = 2013,
                    survey = "acs5",
                    geometry = TRUE                    # Geometry = TRUE to enable mapping
)  %>%  
  mutate(period = "2009-2013") %>% 
  rename(WFH = estimate)

## Try with a different variable
acsdata3 <- get_acs(geography = "tract", 
                    variables = "B19013_001",
                    state = "CA",
                    county = "Orange",
                    year = 2023,
                    survey = "acs5",
                    geometry = TRUE                    # Geometry = TRUE to enable mapping
)  %>%  
  mutate(period = "2019-2023") %>% 
  rename(Income = estimate)

st_write(acsdata1, "output/WFH_OCtracts_2023.shp",  delete_dsn = TRUE)
st_write(acsdata2, "output/WFH_OCtracts_2013.shp",  delete_dsn = TRUE)

#-------------------------------------------------------------------------------
#####       Objective 2: Get quick descriptive statistics of the data      #####
#-------------------------------------------------------------------------------
acsdata3 %>% 
  as.data.frame() %>% 
  sum_up(Income, d=TRUE)

acs_combined <- bind_rows(acsdata1, acsdata2)

acs_combined %>% 
  as.data.frame() %>% 
  group_by(period) %>% 
  sum_up(WFH, d=TRUE) 
#-------------------------------------------------------------------------------
#####               Objective 3: Visualize changes over time               #####
#-------------------------------------------------------------------------------
## Side-by-side maps can show changes overtime and across space
map1 <- acsdata1 %>% 
  mapview(zcol = "WFH", 
          layer.name = "Workers WFH, 2019-2023", 
          legend = TRUE, 
          at = seq(0, 2200, by = 550))
map1

map2 <- acsdata2 %>% 
  mapview(zcol = "WFH", 
          layer.name = "Workers WFH, 2009-2013", 
          legend = TRUE, 
          at = seq(0, 2200, by = 550))
map2

sync(map1, map2)

map3 <- acsdata3 %>% 
  mapview(zcol = "Income", 
          layer.name = "Median HH Income, 2019-2023", 
          legend = TRUE, 
          at = seq(0, 250000, by = 50000))
map3

sync(map1, map3)
  
#-------------------------------------------------------------------------------
#####               Objective 4: Extract annual data from ACS              #####
#-------------------------------------------------------------------------------
## Reminder: For annual data we need to turn to 1-year ACS, which is available for geographies with 65000+ pop.

## Let's double check our variable names
censusvars_2023 <- load_variables(2023, "acs1", cache = TRUE)
censusvars_2006 <- load_variables(2006, "acs1", cache = TRUE)

acs_annual <- map_dfr(
  c(2006:2019, 2021:2023), 
  ~get_acs(geography = "county",
           variables = c("B08006_001","B08006_017"),
           state = "CA",
           county = c("Los Angeles", "Orange", "Fresno"),
           year = .x,
           survey = "acs1",
           geometry = FALSE) %>% 
    mutate(year=.x)
        )

## Wrangle the data to fit analysis needs
## 1. Pivot wider so each variable becomes a column
acs_annual2 <- acs_annual %>%
  select(GEOID, NAME, year, variable, estimate) %>%
  pivot_wider(names_from = variable, values_from = estimate)

## 2. Rename variable and compute percentage 
acs_annual2 <- acs_annual2 %>% 
  rename(Workers = B08006_001,
         WFH =B08006_017) %>% 
  mutate(p_WFH = WFH/Workers)

## What if we want to look at more than one county?
## => Just go back and add more counties!

## Export time-series data to excel
write.csv(acs_annual2, "output/WFH_annual.csv")

## Graph trends in R: example
ggplot(acs_annual2, aes(x = year, y = p_WFH, fill = NAME)) +
  geom_col(fill = "orange") +
  facet_wrap(~ NAME, ncol = 1) +
  scale_y_continuous(labels = scales::percent_format()) +
  scale_x_continuous(breaks = 2006:2023) +
  labs(title = "Percent of Workers Who Worked from Home by County",
       x = NULL,
       y = NULL) +
  theme_minimal()

