```
# Load libraries
library(tidyverse)
### ACW data
acw.df <- read_csv("lonesome449.csv")
acw.df$placefip <- paste0(acw.df$stfips,acw.df$placefip)
# Note, ACW's lowest pop is 4k not 2k and is for year 2000
min(acw.df$population)
# Also, AK is still in the data
length(acw.df$placefip[acw.df$stfips=="02"])
### NHGIS data on places in 2000
place.00.dat <- read_csv("nhgis0058_ds146_2000_place.csv")
colnames(place.00.dat) <- tolower(colnames(place.00.dat))
place.00.dat$place.fip <- paste0(place.00.dat$statea,place.00.dat$placea)
place.00.dat <- place.00.dat %>%
select(year, stusab,statea, place,place.fip,placecc,placedc,arealand,areawatr,intptlat,intplon,
fl5001,fnh001,fki001)
#
colnames(place.00.dat) <- c("year", "stabbr","state.fip",
"place.nm","place.fip","placecc","placedc",
"place.area.land","place.area.water","place.lat","place.lng",
"pop","households","housing.units")
### NBER data on place distances in 2000
place.00.dist <- read_csv("sf12000placedistance50miles.csv")
place.00.dist <- place.00.dist %>%
mutate(place1 = paste0(state1,place1),
place2 = paste0(state2,place2)) %>%
select(place1,mi_to_place,place2)
### Drop HI and other areas
# ACW says it is only continental US, but data has AK
# NHGIS file only has 50states place DC, so filter out HI
place.00.dat <- place.00.dat %>%
filter(stabbr!="HI")
# filter out from NBER file
place.00.dist <- place.00.dist %>%
mutate(tmp = str_sub(place1,1,2)) %>%
filter(tmp %in% unique(place.00.dat$state.fip)) %>%
select(-tmp)
### Combine the NHGIS and NBER
# need to add pop data to the NBER file
# probably a better way to do this, but creating helper file, delete later
place.00.dat.use <- place.00.dat %>%
select(place.fip,pop,place.nm)
colnames(place.00.dat.use) <- c("place1","pop","place.nm")
#
place.00.df <- full_join(place.00.dat.use,place.00.dist) %>%
left_join(place.00.dat.use,by=c("place2"="place1"))
# add places with minimum distances greater than 50
place.00.df$tmp <- is.na(place.00.df$mi_to_place)
place.00.df$place2[place.00.df$tmp==T] <- "9999999"
place.00.df$place.nm.y[place.00.df$tmp==T] <- "none"
place.00.df$pop.y[place.00.df$tmp==T] <- 0
place.00.df$mi_to_place[place.00.df$tmp==T] <- 999
place.00.df$tmp <- NULL
rm(place.00.dat.use)
gc()
#### Combine Dist with ACW
acw.dist <- acw.df %>%
left_join(place.00.df, by=c("placefip"="place1")) %>%
select(-c(class,stfips,nbr30,nbr40,autoroute))
# how many of the places have neighbors with pop>4000 within (5,15) miles
# note: use (5,15) since there could be some error in the distances from NBER and ACW
length(unique(acw.dist$placefip[acw.dist$mi_to_place<15
& acw.dist$mi_to_place>5
& acw.dist$pop.y>4000]))
# = 20
```