library(tidycensus)
library(tidyverse)
library(sf)
library(tmap)
# Calls up variable codes from 2010 decennial census and assigns to object named "decennial_variables"
decennial_variables<-load_variables(2010,"sf1")
# Prints contents of "decennial_variables"
decennial_variables
## # A tibble: 8,959 × 3
## name label concept
## <chr> <chr> <chr>
## 1 H001001 Total HOUSING UNITS
## 2 H002001 Total URBAN AND RURAL
## 3 H002002 Total!!Urban URBAN AND RURAL
## 4 H002003 Total!!Urban!!Inside urbanized areas URBAN AND RURAL
## 5 H002004 Total!!Urban!!Inside urban clusters URBAN AND RURAL
## 6 H002005 Total!!Rural URBAN AND RURAL
## 7 H002006 Total!!Not defined for this file URBAN AND RURAL
## 8 H003001 Total OCCUPANCY STATUS
## 9 H003002 Total!!Occupied OCCUPANCY STATUS
## 10 H003003 Total!!Vacant OCCUPANCY STATUS
## # … with 8,949 more rows
get_decennial
# Generate a table of population by state, based on the 2010 decennial census
state_population_2010<-get_decennial(geography = "state",
variables = "P001001",
geometry=TRUE,
year = 2010)
# prints "state_population_2010"
state_population_2010
## Simple feature collection with 52 features and 4 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -179.1473 ymin: 17.88481 xmax: 179.7785 ymax: 71.35256
## geographic CRS: NAD83
## # A tibble: 52 × 5
## GEOID NAME variable value geometry
## <chr> <chr> <chr> <dbl> <MULTIPOLYGON [°]>
## 1 23 Maine P001001 1328361 (((-67.61976 44.51975, -67.61541 44.5…
## 2 25 Massachusetts P001001 6547629 (((-70.83204 41.6065, -70.82373 41.59…
## 3 26 Michigan P001001 9883640 (((-88.68443 48.11578, -88.67563 48.1…
## 4 30 Montana P001001 989415 (((-104.0577 44.99743, -104.2501 44.9…
## 5 32 Nevada P001001 2700551 (((-114.0506 37.0004, -114.05 36.9577…
## 6 34 New Jersey P001001 8791894 (((-75.52684 39.65571, -75.52634 39.6…
## 7 36 New York P001001 19378102 (((-71.94356 41.28667, -71.9268 41.29…
## 8 37 North Carolina P001001 9535483 (((-82.60288 36.03983, -82.60074 36.0…
## 9 39 Ohio P001001 11536504 (((-82.81349 41.72347, -82.81049 41.7…
## 10 42 Pennsylvania P001001 12702379 (((-75.41504 39.80179, -75.42804 39.8…
## # … with 42 more rows
# Generate a table of the population distribution across Colorado counties in the year 2010, based on the 2010 decennial census
CO_county_population_2010<-get_decennial(geography = "county",
state="CO",
variables = "P001001",
year = 2010)
# Prints "CO_county_population_2010"
CO_county_population_2010
## # A tibble: 64 × 4
## GEOID NAME variable value
## <chr> <chr> <chr> <dbl>
## 1 08023 Costilla County, Colorado P001001 3524
## 2 08025 Crowley County, Colorado P001001 5823
## 3 08027 Custer County, Colorado P001001 4255
## 4 08029 Delta County, Colorado P001001 30952
## 5 08031 Denver County, Colorado P001001 600158
## 6 08035 Douglas County, Colorado P001001 285465
## 7 08033 Dolores County, Colorado P001001 2064
## 8 08049 Grand County, Colorado P001001 14843
## 9 08039 Elbert County, Colorado P001001 23086
## 10 08041 El Paso County, Colorado P001001 622263
## # … with 54 more rows
# Clean up the table of CO population by county by removing the "variable" column, and renaming the "value" column as "Population", which we can do using the tidyverse's "dplyr" package
CO_county_population_2010<-CO_county_population_2010 %>%
mutate(variable=NULL) %>%
rename(population=value)
# prints updated contents of "CO_county_population_2010"
CO_county_population_2010
## # A tibble: 64 × 3
## GEOID NAME population
## <chr> <chr> <dbl>
## 1 08023 Costilla County, Colorado 3524
## 2 08025 Crowley County, Colorado 5823
## 3 08027 Custer County, Colorado 4255
## 4 08029 Delta County, Colorado 30952
## 5 08031 Denver County, Colorado 600158
## 6 08035 Douglas County, Colorado 285465
## 7 08033 Dolores County, Colorado 2064
## 8 08049 Grand County, Colorado 14843
## 9 08039 Elbert County, Colorado 23086
## 10 08041 El Paso County, Colorado 622263
## # … with 54 more rows
# Create a new object containing a dataset of rural population AND overall population by state,based on the 2010 decennial. Also, renames the variables, and arrange the dataset in descending order with respect to the rural population
state_pop_ruralpop_2010<-get_decennial(geography = "state",
variables = c("P001001", "P002005"),
output="wide",
year = 2010) %>%
rename(total_population=P001001, rural_population=P002005) %>%
arrange(desc(rural_population))
## Getting data from the 2010 decennial Census
## Using Census Summary File 1
# prints contents of "state_pop_ruralpop_2010"
state_pop_ruralpop_2010
## # A tibble: 52 × 4
## GEOID NAME total_population rural_population
## <chr> <chr> <dbl> <dbl>
## 1 48 Texas 25145561 3847522
## 2 37 North Carolina 9535483 3233727
## 3 42 Pennsylvania 12702379 2711092
## 4 39 Ohio 11536504 2546810
## 5 26 Michigan 9883640 2513683
## 6 13 Georgia 9687653 2415502
## 7 36 New York 19378102 2349997
## 8 47 Tennessee 6346105 2132860
## 9 51 Virginia 8001024 1963930
## 10 01 Alabama 4779736 1957932
## # … with 42 more rows
#Using dplyr, generate a new variable in the dataset based on existing variables;
# in particular, create a variable called "rural_pct" that reflects the rural population
# as a percentage of the population, and then arranges it in descrend order
# with respect to the new "rural_pct" variable
state_pop_ruralpop_2010<- state_pop_ruralpop_2010 %>%
mutate(rural_pct=(rural_population/total_population)*100) %>%
arrange(desc(rural_pct))
# prints "state_pop_ruralpop_2010"
state_pop_ruralpop_2010
## # A tibble: 52 × 5
## GEOID NAME total_population rural_population rural_pct
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 23 Maine 1328361 814819 61.3
## 2 50 Vermont 625741 382356 61.1
## 3 54 West Virginia 1852994 950184 51.3
## 4 28 Mississippi 2967297 1503073 50.7
## 5 30 Montana 989415 436401 44.1
## 6 05 Arkansas 2915918 1278329 43.8
## 7 46 South Dakota 814180 352933 43.3
## 8 21 Kentucky 4339367 1806024 41.6
## 9 01 Alabama 4779736 1957932 41.0
## 10 38 North Dakota 672591 269719 40.1
## # … with 42 more rows
# Extracts observations from "state_pop_ruralpop_2010" where rural_pct>40 and assigns to a new object named "rural_pct_over40"
rural_pct_over40<-state_pop_ruralpop_2010 %>%
filter(rural_pct>40)
rural_pct_over40
## # A tibble: 10 × 5
## GEOID NAME total_population rural_population rural_pct
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 23 Maine 1328361 814819 61.3
## 2 50 Vermont 625741 382356 61.1
## 3 54 West Virginia 1852994 950184 51.3
## 4 28 Mississippi 2967297 1503073 50.7
## 5 30 Montana 989415 436401 44.1
## 6 05 Arkansas 2915918 1278329 43.8
## 7 46 South Dakota 814180 352933 43.3
## 8 21 Kentucky 4339367 1806024 41.6
## 9 01 Alabama 4779736 1957932 41.0
## 10 38 North Dakota 672591 269719 40.1
Student Exercise 1: Create a dataset of Colorado counties whose rural population percentage(with respect to the overall county population) exceededs 50% (based on the 2010 decennialcensus). Sort the dataset in descending order with respect to the rural percentage variable.
Make a graph that visually conveys the median age in Colorado, by county, based on the 2010 census.
# Extracts Colorado median age dataset
median_age_CO<- get_decennial(geography = "county",
state="CO",
variables = "P013001",
geometry=TRUE,
year = 2010) %>%
rename(median_age=value) %>%
mutate(County=str_remove(NAME, " County, Colorado")) %>%
select(-NAME)
median_age_CO
## Simple feature collection with 64 features and 4 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -109.0603 ymin: 36.99243 xmax: -102.0415 ymax: 41.00344
## geographic CRS: NAD83
## # A tibble: 64 × 5
## GEOID variable median_age geometry County
## <chr> <chr> <dbl> <MULTIPOLYGON [°]> <chr>
## 1 08053 P013001 47.1 (((-107.2116 37.42296, -107.2179 37.42298, … Hinsd…
## 2 08061 P013001 47.4 (((-102.1985 38.61522, -102.0688 38.61518, … Kiowa
## 3 08063 P013001 39.7 (((-102.0498 39.57406, -102.0496 39.53893, … Kit C…
## 4 08071 P013001 43.9 (((-104.4804 36.99372, -104.5193 36.99377, … Las A…
## 5 08073 P013001 40.9 (((-103.7149 39.26751, -103.7149 39.2677, -… Linco…
## 6 08075 P013001 38.4 (((-102.9049 41.00221, -102.9048 41.00221, … Logan
## 7 08079 P013001 53.1 (((-106.7108 37.40423, -106.7108 37.39624, … Miner…
## 8 08085 P013001 42 (((-109.0418 38.15302, -109.0418 38.16469, … Montr…
## 9 08087 P013001 36 (((-104.0706 40.5243, -104.0514 40.52432, -… Morgan
## 10 08089 P013001 40.9 (((-104.0597 37.85263, -104.0594 37.99616, … Otero
## # … with 54 more rows
# Creates ggplot visualization of CO median age
median_age_CO__visualization<-
median_age_CO %>%
ggplot(aes(x = median_age, y = reorder(County, median_age))) +
geom_point()+
labs(title="Median Age by County, CO", x="Median Age", y="County", caption="Source: United States census accessed via tidycensus")+
theme(plot.title=element_text(hjust=0.5),
plot.caption=element_text(size=5))
median_age_CO__visualization
# Makes map object of median age
median_age_CO_map<-
tm_shape(median_age_CO)+
tm_polygons(col="median_age",
breaks=c(30,35,40,45,50),
palette="YlGnBu",
midpoint=TRUE)+
tm_layout(frame=FALSE,
main.title="Median Age by County,\nColorado",
main.title.position="left",
legend.outside=TRUE,
attr.outside=TRUE)+
tm_credits("Source: US Census via tidycensus", position=c("right", "bottom"))
median_age_CO_map
## Warning: Values have found that are higher than the highest break
# changes tmap_mode to "View"
tmap_mode("view")
## tmap mode set to interactive viewing
# prints "median_age_CO_map" in "View" mode
median_age_CO_map
## Credits not supported in view mode.
## Warning: Values have found that are higher than the highest break
# changes mode to "lot"
tmap_mode("plot")
## tmap mode set to plotting
# prints "median_age_CO_map" in "Plot" mode
median_age_CO_map
## Warning: Values have found that are higher than the highest break
# Extract 5-year ACS data variable codes for year ending in 2018
ACS_5_2018<-load_variables(2018,"acs5")
# prints "ACS_5_2018"
ACS_5_2018
## # A tibble: 26,997 × 3
## name label concept
## <chr> <chr> <chr>
## 1 B00001_001 Estimate!!Total UNWEIGHTED SAMPLE COUNT OF…
## 2 B00002_001 Estimate!!Total UNWEIGHTED SAMPLE HOUSING …
## 3 B01001_001 Estimate!!Total SEX BY AGE
## 4 B01001_002 Estimate!!Total!!Male SEX BY AGE
## 5 B01001_003 Estimate!!Total!!Male!!Under 5 years SEX BY AGE
## 6 B01001_004 Estimate!!Total!!Male!!5 to 9 years SEX BY AGE
## 7 B01001_005 Estimate!!Total!!Male!!10 to 14 years SEX BY AGE
## 8 B01001_006 Estimate!!Total!!Male!!15 to 17 years SEX BY AGE
## 9 B01001_007 Estimate!!Total!!Male!!18 and 19 years SEX BY AGE
## 10 B01001_008 Estimate!!Total!!Male!!20 years SEX BY AGE
## # … with 26,987 more rows
# Uses 2018 ACS to extract median income estimates by county for CO
median_income_CO_counties_2018<-get_acs(geography="county",
state="CO",
variables="B19013_001",
year=2018) %>%
rename(median_income=estimate) %>%
arrange(desc(median_income))
## Getting data from the 2014-2018 5-year ACS
median_income_CO_counties_2018
## # A tibble: 64 × 5
## GEOID NAME variable median_income moe
## <chr> <chr> <chr> <dbl> <dbl>
## 1 08035 Douglas County, Colorado B19013_001 115314 2028
## 2 08039 Elbert County, Colorado B19013_001 96658 4279
## 3 08014 Broomfield County, Colorado B19013_001 89624 4013
## 4 08037 Eagle County, Colorado B19013_001 84685 4478
## 5 08059 Jefferson County, Colorado B19013_001 78943 1142
## 6 08013 Boulder County, Colorado B19013_001 78642 1583
## 7 08117 Summit County, Colorado B19013_001 77589 4772
## 8 08047 Gilpin County, Colorado B19013_001 75120 6107
## 9 08107 Routt County, Colorado B19013_001 74273 3839
## 10 08005 Arapahoe County, Colorado B19013_001 73925 902
## # … with 54 more rows
# Creates point visualization of median income by county with error bars
median_income_CO_counties_2018_viz<-
median_income_CO_counties_2018 %>%
mutate(County_Name=str_remove_all(NAME,"County, Colorado")) %>%
ggplot(aes(x=median_income,y=reorder(County_Name, median_income)))+
geom_errorbarh(aes(xmin = median_income - moe, xmax = median_income + moe)) +
geom_point(color = "blue", size = 3)+
labs(title="Median Income in Colorado, by County (2018)",
y="", x="Median Income Estimate from 5 year ACS\n(Bars indicate margin of error)")+
theme(plot.title=element_text(hjust=0.5))
# prints "median_income_CO_counties_2018_viz"
median_income_CO_counties_2018_viz