TigerFetch.jl

Download TIGER/Line shapefiles from the US Census Bureau
Log | Files | Refs | README | LICENSE

commit b4f0d3fe2688761fbce881deaf361b724b946dd5
parent 66c9c87654f46370f9e28f48a6b5aa0d1a309ef5
Author: Erik Loualiche <eloualic@umn.edu>
Date:   Sat, 22 Feb 2025 21:45:13 -0600

first code commit ... some divisions work well

Diffstat:
M.gitignore | 1+
Msrc/TigerFetch.jl | 33+++++++++++++++++++++++++++++++--
Asrc/artifacts.jl | 58++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/cli.jl | 75+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/download.jl | 155+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/geotypes.jl | 55+++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asrc/reference.jl | 86+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest/UnitTests/assets.jl | 66++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mtest/runtests.jl | 23+++++++++++++++++++++--
9 files changed, 548 insertions(+), 4 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -1 +1,2 @@ /Manifest.toml +.DS_Store diff --git a/src/TigerFetch.jl b/src/TigerFetch.jl @@ -1,5 +1,34 @@ module TigerFetch -# Write your package code here. -end +# -------------------------------------------------------------------------------------------------- +import Comonicon: @cast, @main +import Downloads +import Pkg +using Infiltrator +# -------------------------------------------------------------------------------------------------- + + +# -------------------------------------------------------------------------------------------------- +include("artifacts.jl") +include("geotypes.jl") # Internal type system +include("reference.jl") +include("download.jl") +include("cli.jl") +# -------------------------------------------------------------------------------------------------- + + +# --------------------------------------------------------------------------------------------------# +# Export types +# export download_shapefile # this actually relies on internal types ... that we might not want to export +# Export CLI function +export tigerfetch +# -------------------------------------------------------------------------------------------------- + + +# -------------------------------------------------------------------------------------------------- +# -------------------------------------------------------------------------------------------------- + + + +end # module diff --git a/src/artifacts.jl b/src/artifacts.jl @@ -0,0 +1,58 @@ +using Pkg.Artifacts + +""" +Create and bind the artifact for existing files. +Run this script once to set up the artifact. +""" +function create_artifacts() + # Create a new artifact + artifact_hash = create_artifact() do artifact_dir + # Copy your existing files into the artifact directory + mkpath(artifact_dir) + + # Assuming your files are in a directory named "assets" at the package root + source_dir = joinpath(@__DIR__, "..", "assets") + + # Copy the files + cp(joinpath(source_dir, "national_county2020.txt"), + joinpath(artifact_dir, "national_county2020.txt")) + cp(joinpath(source_dir, "national_state2020.txt"), + joinpath(artifact_dir, "national_state2020.txt")) + end + + # Bind the artifact in Artifacts.toml + bind_artifact!( + "Artifacts.toml", # Your Artifacts.toml file + "package_assets", # Name for your artifact + artifact_hash; # Hash from create_artifact + force=true # Overwrite if exists + ) +end + +""" +Get the directory containing the artifact files. +""" +function artifact_dir() + artifact_toml = joinpath(@__DIR__, "..", "Artifacts.toml") + # Get the hash from the Artifacts.toml file + hash = artifact_hash("package_assets", artifact_toml) + if hash === nothing + error("Could not find package_assets entry in Artifacts.toml") + end + # Ensure the artifact is installed + ensure_artifact_installed("package_assets", artifact_toml) + # Now use the hash to get the path + return artifact_path(hash) +end + + +""" +Get paths to specific reference files. +""" +function get_reference_data() + base_path = artifact_dir() + return Dict( + "county" => joinpath(base_path, "national_county2020.txt"), + "state" => joinpath(base_path, "national_state2020.txt") + ) +end diff --git a/src/cli.jl b/src/cli.jl @@ -0,0 +1,75 @@ + +const GEOGRAPHY_TYPES = Dict( + "state" => State, + "county" => County, + "cousub" => CountySubdivision, + "tract" => Tract, + "areawater" => AreaWater, +) + +""" +Download TIGER/Line shapefiles. + +# Arguments + +- `type`: Geography type (state, county, cousub, tract) +- `year`: Data year (default: 2024) + +# Options + +- `--state`: State identifier (name, abbreviation, or FIPS) +- `--county`: County identifier (name or FIPS, requires --state) +- `--output`: Output directory (default: current directory) +- `--force`: Override existing files + +# Examples +tigerfetch state 2024 +tigerfetch cousub 2024 --state CA +tigerfetch tract 2024 --state "New York" + +""" +@main function tigerfetch( + type::String, year::Int=2024; + state::String="", + county::String="", + output::String=pwd(), + force::Bool=false) + + type_lower = lowercase(type) + if !haskey(GEOGRAPHY_TYPES, type_lower) + throw(ArgumentError("Invalid type. Choose from: $(join(keys(GEOGRAPHY_TYPES), ", "))")) + end + + # Get the type and create instance + geo_type = GEOGRAPHY_TYPES[type_lower] + geo = geo_type(year) # No need to pass scope anymore, it's inherent in the type + + # Dispatch based on the type's hierarchy + if geo isa NationalGeography + if !isempty(state) || !isempty(county) + @warn "State/county options ignored for national-level data" + end + download_shapefile(geo; output_dir=output, force=force) + + elseif geo isa StateGeography + if !isempty(county) + @warn "County option ignored for state-level data" + end + if isempty(state) + @warn "No state specified - downloading all states" + end + state_arg = isempty(state) ? nothing : state + download_shapefile(geo; state=state_arg, output_dir=output, force=force) + + elseif geo isa CountyGeography + if isempty(state) + @warn "No state specified - downloading all states" + end + if !isempty(county) && isempty(state) + throw(ArgumentError("--county option requires --state to be specified")) + end + state_arg = isempty(state) ? nothing : state + county_arg = isempty(county) ? nothing : county + download_shapefile(geo; state=state_arg, county=county_arg, output_dir=output, force=force) + end +end diff --git a/src/download.jl b/src/download.jl @@ -0,0 +1,155 @@ + + +# -------------------------------------------------------------------------------------------------- +# National scope (States, Counties nationally) +function download_shapefile( + geo::T; + output_dir::String=pwd(), + force::Bool=false) where {T <: NationalGeography} + + geo_type = typeof(geo) + filename = "tl_$(geo.year)_us_$(lowercase(tiger_name(geo_type))).zip" + + url = "https://www2.census.gov/geo/tiger/TIGER$(geo.year)/$(tiger_name(geo_type))/" * filename + output_path = joinpath(output_dir, filename) + + if isfile(output_path) && !force + @info "File exists" path=output_path + return output_path + end + + try + @info "Downloading $(description(geo_type))" url=url + mkpath(output_dir) + Downloads.download(url, output_path) + return output_path + catch e + @error "Download failed" exception=e + rethrow(e) + end +end +# -------------------------------------------------------------------------------------------------- +# +# +# -------------------------------------------------------------------------------------------------- +# State scope (CountySubdivisions, Places) +function download_shapefile( + geo::T; + state::Union{String, Integer, Nothing}=nothing, + output_dir::String=pwd(), + force::Bool=false) where T<:StateGeography + + # Get states to process + if !isnothing(state) + state_info = standardize_state_input(state) + if isnothing(state_info) + throw(ArgumentError("Invalid state identifier provided")) + end + states_to_process = [state_info] + else + @warn "No state specified - downloading all states" + states_to_process = get_state_list() + end + + # Use the type of geo to get tiger_name + geo_type = typeof(geo) + base_url = "https://www2.census.gov/geo/tiger/TIGER$(geo.year)/$(tiger_name(geo_type))/" + + # Process each state + for state_info in states_to_process + fips = state_info[2] + state_name = state_info[3] + filename = "tl_$(geo.year)_$(fips)_$(lowercase(tiger_name(geo_type))).zip" + url = base_url * filename + output_path = joinpath(output_dir, filename) + + if isfile(output_path) && !force + @info "File exists" state=state_name path=output_path + continue + end + + try + @info "Downloading" state=state_name url=url + Downloads.download(url, output_path) + catch e + @error "Download failed" state=state_name exception=e + continue + end + end +end +# -------------------------------------------------------------------------------------------------- + + +# -------------------------------------------------------------------------------------------------- +# County scope (Tracts, WaterAreas) +function download_shapefile( + geo::T; + state::Union{String, Integer, Nothing}=nothing, + county::Union{String, Integer, Nothing}=nothing, + output_dir::String=pwd(), + force::Bool=false) where {T <: CountyGeography} + + + # Get states to process + if !isnothing(state) + state_info = standardize_state_input(state) + if isnothing(state_info) + throw(ArgumentError("Invalid state identifier: $state")) + end + states_to_process = [state_info] + else + @warn "No state specified - downloading all states" + states_to_process = get_state_list() + end + + # Track failures + failed_downloads = String[] + + for state_info in states_to_process + state_fips = state_info[2] + state_name = state_info[3] + + # @infiltrate + + # Get counties for this state + counties = get_county_list(state) + + # Filter for specific county if provided + if !isnothing(county) + county_info = standardize_county_input(county, state_fips) + if isnothing(county_info) + throw(ArgumentError("Invalid county identifier for $(state_name)")) + end + counties = [county_info] + end + + for county_info in counties + county_fips = county_info[3] # Assuming similar structure to state_info + county_name = county_info[4] + + filename = "tl_$(geo.year)_$(state_fips)$(county_fips)_$(lowercase(tiger_name(geo))).zip" + url = "https://www2.census.gov/geo/tiger/TIGER$(geo.year)/$(tiger_name(geo))/" * filename + output_path = joinpath(output_dir, filename) + + if isfile(output_path) && !force + @info "File exists" state=state_name county=county_name path=output_path + continue + end + + try + @info "Downloading" state=state_name county=county_name url=url + mkpath(output_dir) + Downloads.download(url, output_path) + catch e + push!(failed_downloads, "$(state_name) - $(county_name)") + @error "Download failed" state=state_name county=county_name exception=e + continue + end + end + end + + if !isempty(failed_downloads) + @warn "Some downloads failed" failed_locations=failed_downloads + end +end +# -------------------------------------------------------------------------------------------------- diff --git a/src/geotypes.jl b/src/geotypes.jl @@ -0,0 +1,55 @@ +# Abstract base type +abstract type TigerGeography end + +# Abstract types for each scope +abstract type NationalGeography <: TigerGeography end +abstract type StateGeography <: TigerGeography end +abstract type CountyGeography <: TigerGeography end + +# Concrete types with their metadata as constants +struct State <: NationalGeography + year::Int +end +const STATE_META = (tiger_name = "STATE", description = "State Boundaries") + +struct County <: NationalGeography + year::Int +end +const COUNTY_META = (tiger_name = "COUNTY", description = "County Boundaries") + +struct CountySubdivision <: StateGeography + year::Int +end +const COUSUB_META = (tiger_name = "COUSUB", description = "County Subdivisions") + +struct Tract <: CountyGeography + year::Int +end +const TRACT_META = (tiger_name = "TRACT", description = "Census Tracts") + +struct AreaWater <: CountyGeography + year::Int +end +const AREAWATER_META = (tiger_name = "AREAWATER", description = "Area Water") + +# Helper methods to access metadata +tiger_name(::Type{State}) = STATE_META.tiger_name +tiger_name(::Type{County}) = COUNTY_META.tiger_name +tiger_name(::Type{CountySubdivision}) = COUSUB_META.tiger_name +tiger_name(::Type{Tract}) = TRACT_META.tiger_name +tiger_name(::Type{AreaWater}) = AREAWATER_META.tiger_name + +tiger_name(x::T) where T <: TigerGeography = tiger_name(T) + +description(::Type{State}) = STATE_META.description +description(::Type{County}) = COUNTY_META.description +description(::Type{CountySubdivision}) = COUSUB_META.description +description(::Type{Tract}) = TRACT_META.description +description(::Type{AreaWater}) = AREAWATER_META.description + +description(x::T) where T <: TigerGeography = description(T) + +# Helper methods now just reference the type hierarchy +scope(::Type{T}) where {T <: NationalGeography} = National +scope(::Type{T}) where {T <: StateGeography} = ByState +scope(::Type{T}) where {T <: CountyGeography} = ByCounty diff --git a/src/reference.jl b/src/reference.jl @@ -0,0 +1,86 @@ +function get_state_list()::Vector{Vector{String}} + paths = get_reference_data() + state_file = paths["state"] + + # we do not need to load CSV so we read the file by hand + state_list = readlines(state_file) |> + l -> split.(l, "|") |> # split by vertical bar + l -> map(s -> String.(s[ [1,2,4] ]), l) |> # select some columns + l -> l[2:end] # remove the header + + return unique(state_list) +end + +# Takes a string input (handles names and abbreviations) +function standardize_state_input(state_input::String)::Union{Vector{String}, Nothing} + normalized_input = uppercase(strip(state_input)) + states = get_state_list() + matched_state = findfirst(state -> + any(uppercase(identifier) == normalized_input for identifier in state), + states) + return isnothing(matched_state) ? nothing : states[matched_state] +end + +# Takes numeric input (handles FIPS codes) +function standardize_state_input(fips::Integer)::Union{Vector{String}, Nothing} + fips_str = lpad(string(fips), 2, '0') + states = get_state_list() + matched_state = findfirst(state -> state[2] == fips_str, states) + return isnothing(matched_state) ? nothing : states[matched_state] +end + +# Handles the default case +standardize_state_input(::Nothing) = nothing + + +# ------------------------------------------------------------------------------------------------- + +function get_county_list(state=nothing)::Vector{Vector{AbstractString}} + paths = get_reference_data() # Remove TigerFetch. prefix since we're inside the module + county_file = paths["county"] + + # we do not need to load CSV so we read the file by hand + county_list = readlines(county_file) |> + ( l -> split.(l, "|") ) |> # split by vertical bar + ( l -> map(s -> String.(s[ [1,2,3,5] ]), l) ) |> # select some columns + ( l -> l[2:end] ) # remove the header + + if isnothing(state) + return county_list + elseif !isnothing(tryparse(Int, state)) # then its the fips + return unique(filter(l -> l[2] == state, county_list)) + else # then its the abbreviation state name + return unique(filter(l -> l[1] == state, county_list)) + end + +end + + + +function standardize_county_input( + county_input::Union{String, Integer}, + state_fips::String)::Union{Vector{String}, Nothing} + + # Handle numeric input (FIPS code) + if county_input isa Integer + # Convert to three-digit string with leading zeros + county_fips = lpad(string(county_input), 3, '0') + return find_county(county_fips, state_fips) + end + + # Handle string input (name or FIPS) + normalized_input = uppercase(strip(county_input)) + return find_county(normalized_input, state_fips) +end + + +function find_county(identifier::String, state_fips::String)::Union{Vector{String}, Nothing} + counties = get_county_list(state_fips) + + # Try to match based on any identifier in the county vector + matched_county = findfirst(county -> + any(uppercase(id) == uppercase(identifier) for id in county), + counties) + + return isnothing(matched_county) ? nothing : counties[matched_county] +end diff --git a/test/UnitTests/assets.jl b/test/UnitTests/assets.jl @@ -0,0 +1,66 @@ +@testset "Asset Installation Tests" begin + + @testset "Artifact Existence" begin + + # Test that the Artifacts.toml file exists + artifact_toml = joinpath(pkgdir(TigerFetch), "Artifacts.toml") + @test isfile(artifact_toml) + + # Test that we can get the artifact directory + artifact_toml = joinpath(@__DIR__, "..", "..", "Artifacts.toml") + @test_nowarn ensure_artifact_installed("package_assets", artifact_toml) + + # Test that the artifact path is valid + artifact_path = TigerFetch.artifact_dir() + @test isdir(artifact_path) + end + + @testset "Reference Data Files" begin + # Get reference data paths + data_paths = TigerFetch.get_reference_data() + + @testset "County Data File" begin + county_path = data_paths["county"] + @test isfile(county_path) + + # Test county file content structure + content = readlines(county_path) + @test length(content) > 0 + @test occursin("|", first(content)) + first_line = split(first(content), "|") + @test length(first_line) >= 4 + end + + @testset "State Data File" begin + state_path = data_paths["state"] + @test isfile(state_path) + + # Test state file content structure + content = readlines(state_path) + @test length(content) > 0 + @test occursin("|", first(content)) + + first_line = split(first(content), "|") + @test length(first_line) >= 4 + end + end + + + + @testset "Data Accessibility" begin + # Test state list functionality + state_list = TigerFetch.get_state_list() + @test length(state_list) > 0 + @test all(x -> length(x) == 3, state_list) # Each state should have 3 identifiers + + # Test county list functionality + county_list = TigerFetch.get_county_list() + @test length(county_list) > 0 + @test all(x -> length(x) == 3, county_list) # Each county should have 3 identifiers + + # Test specific state county list + al_counties = TigerFetch.get_county_list("AL") + @test length(al_counties) > 0 + @test all(x -> x[1] == "AL", al_counties) # All counties should be from Alabama + end +end diff --git a/test/runtests.jl b/test/runtests.jl @@ -1,6 +1,25 @@ +# -------------------------------------------------------------------------------------------------- using TigerFetch using Test +using Pkg.Artifacts +# using LazyArtifacts -@testset "TigerFetch.jl" begin - # Write your tests here. + +const testsuite = [ + "assets", +] + +# -------------------------------------------------------------------------------------------------- + + +# -------------------------------------------------------------------------------------------------- +printstyled("Running tests:\n", color=:blue, bold=true) + +@testset verbose=true "TigerFetch.jl" begin + for test in testsuite + println("\033[1m\033[32m → RUNNING\033[0m: $(test)") + include("UnitTests/$(test).jl") + println("\033[1m\033[32m PASSED\033[0m") + end end +# --------------------------------------------------------------------------------------------------