TigerFetch.jl

Download TIGER/Line shapefiles from the US Census Bureau
Log | Files | Refs | README | LICENSE

commit 4407b30b84018ef32e01671db4e1e762670a2c6d
parent a56463a7c0e281e95d7e088b7c18d06fb0e26684
Author: Erik Loualiche <eloualic@umn.edu>
Date:   Mon, 24 Feb 2025 09:20:08 -0600

cleaning up the log

Diffstat:
Mdocs/src/demo/simple_map.md | 32++++++++++++++++----------------
Msrc/download.jl | 37+++++++++++++++++++++++++++----------
Msrc/main.jl | 65++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
3 files changed, 101 insertions(+), 33 deletions(-)

diff --git a/docs/src/demo/simple_map.md b/docs/src/demo/simple_map.md @@ -1,4 +1,4 @@ -# Stata-like utilities +# Drawing a simple map ```@setup simplemap @@ -19,27 +19,27 @@ tmp_dir = mktempdir(); map_dir = joinpath(tmp_dir, "map"); We are downloading the county shapefiles (which is a national file), subsets it to Minnesota and plot it. ```@example simplemap; -tigerdownload("county"; output=map_dir) +tigerdownload("county"; output=map_dir) ; isfile(joinpath(map_dir, "tl_2024_us_county.zip")) ``` First, we process the file which is national to only keep counties in the state ```@example simplemap; -df_shp_cty = Shapefile.Table(joinpath(map_dir, "tl_2024_us_county.zip")) |> DataFrame -@rsubset!(df_shp_cty, :STATEFP=="27") +df_shp_cty = Shapefile.Table(joinpath(map_dir, "tl_2024_us_county.zip")) |> DataFrame; +@rsubset!(df_shp_cty, :STATEFP=="27"); ``` ```@example simplemap; fig = Figure(size=(750,900)); -homerule_centroid = GeometryOps.centroid(df_shp_cty.geometry) +homerule_centroid = GeometryOps.centroid(df_shp_cty.geometry); ga = GeoAxis(fig[1, 1]; dest = "+proj=ortho +lon_0=$(homerule_centroid[1]) +lat_0=$(homerule_centroid[2])", xticksvisible = false, xgridvisible = false, xticks=[0], - yticksvisible = false, ygridvisible = false, yticks=[0]) + yticksvisible = false, ygridvisible = false, yticks=[0]); poly!(ga, df_shp_cty.geometry; color=:white, strokecolor = :black, strokewidth = 0.5, shading = NoShading, - colormap = :dense, alpha = 0.5) + colormap = :dense, alpha = 0.5); save("p1.svg", fig); nothing # hide ``` ![](p1.svg) @@ -51,9 +51,9 @@ Roads are at the state level and water areas at the county level, so this will c From simple to more complicated we start with primary and secondary roads ```@example simplemap; -tigerdownload("primarysecondaryroads"; state="MN", output=map_dir) -df_shp_roads = Shapefile.Table(joinpath(map_dir, "tl_2024_27_prisecroads.zip")) |> DataFrame -lines!(ga, df_shp_roads.geometry; color=RGBf(255/255, 203/255, 71/255), alpha=0.75, linewidth=0.2) +tigerdownload("primarysecondaryroads"; state="MN", output=map_dir); +df_shp_roads = Shapefile.Table(joinpath(map_dir, "tl_2024_27_prisecroads.zip")) |> DataFrame; +lines!(ga, df_shp_roads.geometry; color=RGBf(255/255, 203/255, 71/255), alpha=0.75, linewidth=0.2); save("p2.svg", fig); nothing # hide ``` ![](p2.svg) @@ -62,23 +62,23 @@ save("p2.svg", fig); nothing # hide And the water areas; we download them in a separate directory because there is one file per county: ```@example simplemap; -mkpath(joinpath(map_dir, "MN")) -tigerdownload("areawater"; state="MN", output=joinpath(map_dir, "MN")) +mkpath(joinpath(map_dir, "MN")); +tigerdownload("areawater"; state="MN", output=joinpath(map_dir, "MN")); ``` Then we read all of the downloaded shapefiles in the dictionary and keep only the subset of the largest lakes or rivers: ```@example simplemap df_shp_water = [ DataFrame(Shapefile.Table(joinpath(map_dir, "MN", f))) - for f in readdir(joinpath(map_dir, "MN")) ] -df_shp_water = reduce(vcat, df_shp_water, cols=:union) -@rsubset!(df_shp_water, :AWATER > 1_000_000) # only keep larger water + for f in readdir(joinpath(map_dir, "MN")) ]; +df_shp_water = reduce(vcat, df_shp_water, cols=:union); +@rsubset!(df_shp_water, :AWATER > 1_000_000); # only keep larger water ``` And now the plot: ```@example simplemap poly!(ga, df_shp_water.geometry; color=RGBf(170/255, 218/255, 255/255), - strokewidth=0.5, strokecolor=RGBf(144/255, 202/255, 249/255)) + strokewidth=0.5, strokecolor=RGBf(144/255, 202/255, 249/255)); save("p3.svg", fig); nothing # hide ``` ![](p3.svg) diff --git a/src/download.jl b/src/download.jl @@ -1,11 +1,24 @@ +# -------------------------------------------------------------------------------------------------- +macro conditional_log(verbose, level, message, params...) + return quote + if $(esc(verbose)) + @info $(esc(message)) $(map(esc, params)...) + else + @debug $(esc(message)) $(map(esc, params)...) + end + end +end +# -------------------------------------------------------------------------------------------------- + # -------------------------------------------------------------------------------------------------- # National scope (States, Counties nationally) function download_shapefile( geo::T; output_dir::String=pwd(), - force::Bool=false) where {T <: NationalGeography} + force::Bool=false, + verbose::Bool=false) where {T <: NationalGeography} geo_type = typeof(geo) filename = "tl_$(geo.year)_us_$(lowercase(tiger_name(geo_type))).zip" @@ -14,12 +27,12 @@ function download_shapefile( output_path = joinpath(output_dir, filename) if isfile(output_path) && !force - @info "File exists" path=output_path + @conditional_log verbose "File exists" path=output_path return output_path end try - @info "Downloading $(description(geo_type))" url=url + @conditional_log verbose "Downloading $(description(geo_type))" url=url mkpath(output_dir) Downloads.download(url, output_path) return output_path @@ -37,7 +50,8 @@ function download_shapefile( geo::T; state::Union{String, Integer, Nothing}=nothing, output_dir::String=pwd(), - force::Bool=false) where T<:StateGeography + force::Bool=false, + verbose::Bool=false) where T<:StateGeography # Get states to process if !isnothing(state) @@ -69,12 +83,12 @@ function download_shapefile( output_path = joinpath(output_dir, filename) if isfile(output_path) && !force - @info "File exists" state=state_name path=output_path + @conditional_log verbose "File exists" state=state_name path=output_path continue end try - @info "Downloading" state=state_name url=url + @conditional_log verbose "Downloading" state=state_name url=url Downloads.download(url, output_path) catch e if e isa InterruptException @@ -87,7 +101,7 @@ function download_shapefile( end catch e if e isa InterruptException - @info "Download process interrupted by user" + @warn "Download process interrupted by user" # Optional: Clean up partially downloaded file try isfile(output_path) && rm(output_path) @@ -110,7 +124,8 @@ function download_shapefile( state::Union{String, Integer, Nothing}=nothing, county::Union{String, Integer, Nothing}=nothing, output_dir::String=pwd(), - force::Bool=false) where {T <: CountyGeography} + force::Bool=false, + verbose::Bool=false) where {T <: CountyGeography} # Get states to process @@ -153,12 +168,12 @@ function download_shapefile( output_path = joinpath(output_dir, filename) if isfile(output_path) && !force - @info "File exists" state=state_name county=county_name path=output_path + @conditional_log verbose "File exists" state=state_name county=county_name path=output_path continue end try - @info "Downloading" state=state_name county=county_name url=url + @conditional_log verbose "Downloading" state=state_name county=county_name url=url mkpath(output_dir) Downloads.download(url, output_path) catch e @@ -174,3 +189,5 @@ function download_shapefile( end end # -------------------------------------------------------------------------------------------------- + + diff --git a/src/main.jl b/src/main.jl @@ -1,4 +1,3 @@ - # -------------------------------------------------------------------------------------------------- const GEOGRAPHY_TYPES = Dict( "state" => State, @@ -16,14 +15,66 @@ const GEOGRAPHY_TYPES = Dict( "road" => Roads, ) +# -------------------------------------------------------------------------------------------------- + + +# -------------------------------------------------------------------------------------------------- +""" + tigerdownload(type::String, year::Int=2024; + state::String="", county::String="", + output::String=pwd(), force::Bool=false, + verbose::Bool=false) + +Download TIGER/Line shapefiles from the U.S. Census Bureau. + +# Arguments +- `type::String`: Geography type. Available options: $(join(keys(GEOGRAPHY_TYPES), ", ")). +- `year::Int=2024`: Data year. Census typically provides shapefiles from 2000 onward. + +# Keyword Arguments +- `state::String=""`: State identifier (name, abbreviation, or FIPS code). +- `county::String=""`: County identifier (name or FIPS code). Requires `state` to be specified. +- `output::String=pwd()`: Directory where shapefiles will be saved. +- `force::Bool=false`: If `true`, redownload files even if they already exist. +- `verbose::Bool=false`: If `true`, display more detailed progress information. + +# Returns +- `Vector{String}`: Paths to downloaded files. -# julia function +# Examples +```julia +# Download state boundaries for the entire USA +tigerdownload("state") + +# Download county subdivisions for California +tigerdownload("cousub", state="CA") + +# Download census tracts for Los Angeles County, California +tigerdownload("tract", state="California", county="Los Angeles") + +# Download with custom output directory and force redownload +tigerdownload("county", output="/path/to/data", force=true) +``` + +# Notes +- Geography types follow a hierarchy: national-level (state, county), state-level (cousub, place), + and county-level (tract, areawater). +- For national-level geographies, the `state` and `county` arguments are ignored. +- For state-level geographies, the `county` argument is ignored. +- For county-level geographies, both `state` and `county` are used when provided. +- If no state is specified when downloading state or county-level geographies, all states will be downloaded. +- TIGER/Line shapefiles are downloaded as ZIP archives containing multiple files (.shp, .dbf, .prj, etc.). + +# See Also +- [Census TIGER/Line Documentation](https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.html) +""" function tigerdownload( type::String, year::Int=2024; state::String="", county::String="", output::String=pwd(), - force::Bool=false) + force::Bool=false, + verbose::Bool=false) type_lower = lowercase(type) if !haskey(GEOGRAPHY_TYPES, type_lower) @@ -60,9 +111,8 @@ function tigerdownload( end state_arg = isempty(state) ? nothing : state county_arg = isempty(county) ? nothing : county - download_shapefile(geo; state=state_arg, county=county_arg, output_dir=output, force=force) + download_shapefile(geo; state=state_arg, county=county_arg, output_dir=output, + force=force, verbose=verbose) end end -# -------------------------------------------------------------------------------------------------- - - +# --------------------------------------------------------------------------------------------------+ \ No newline at end of file