TigerFetch.jl

Download TIGER/Line shapefiles from the US Census Bureau
Log | Files | Refs | README | LICENSE

reference.jl (3788B)


      1 # ABOUTME: State and county reference data lookup from bundled FIPS lists
      2 # ABOUTME: Provides standardization of state/county identifiers (name, abbreviation, FIPS code)
      3 
      4 # Module-level cache for parsed reference data
      5 const _STATE_LIST_CACHE = Ref{Vector{Vector{String}}}()
      6 const _COUNTY_LIST_CACHE = Ref{Vector{Vector{AbstractString}}}()
      7 const _CACHE_INITIALIZED = Ref(false)
      8 
      9 function _ensure_cache()
     10     _CACHE_INITIALIZED[] && return
     11     paths = get_reference_data()
     12 
     13     # Parse state list
     14     state_file = paths["state"]
     15     _STATE_LIST_CACHE[] = readlines(state_file) |>
     16         l -> split.(l, "|") |>
     17         l -> map(s -> String.(s[ [1,2,4] ]), l) |>
     18         l -> l[2:end] |>
     19         unique
     20 
     21     # Parse county list
     22     county_file = paths["county"]
     23     _COUNTY_LIST_CACHE[] = readlines(county_file) |>
     24         ( l -> split.(l, "|") ) |>
     25         ( l -> map(s -> String.(s[ [1,2,3,5] ]), l) ) |>
     26         ( l -> l[2:end] )
     27 
     28     _CACHE_INITIALIZED[] = true
     29     return
     30 end
     31 
     32 function get_state_list()::Vector{Vector{String}}
     33     _ensure_cache()
     34     return _STATE_LIST_CACHE[]
     35 end
     36 
     37 # Takes a string input (handles names and abbreviations)
     38 function standardize_state_input(state_input::String)::Union{Vector{String}, Nothing}
     39     normalized_input = uppercase(strip(state_input))
     40     states = get_state_list()
     41     matched_state = findfirst(state ->
     42         any(uppercase(identifier) == normalized_input for identifier in state),
     43         states)
     44     return isnothing(matched_state) ? nothing : states[matched_state]
     45 end
     46 
     47 # Takes numeric input (handles FIPS codes)
     48 function standardize_state_input(fips::Integer)::Union{Vector{String}, Nothing}
     49     fips_str = lpad(string(fips), 2, '0')
     50     states = get_state_list()
     51     matched_state = findfirst(state -> state[2] == fips_str, states)
     52     return isnothing(matched_state) ? nothing : states[matched_state]
     53 end
     54 
     55 # Handles the default case
     56 standardize_state_input(::Nothing) = nothing
     57 
     58 
     59 # -------------------------------------------------------------------------------------------------
     60 
     61 function get_county_list(state=nothing)::Vector{Vector{AbstractString}}
     62     _ensure_cache()
     63     county_list = _COUNTY_LIST_CACHE[]
     64 
     65     if isnothing(state)
     66         return county_list
     67     elseif !isnothing(tryparse(Int, state))  # then its the fips
     68         return unique(filter(l -> l[2] == state, county_list))
     69     else   # then its the abbreviation state name
     70         return unique(filter(l -> l[1] == state, county_list))
     71     end
     72 
     73 end
     74 
     75 
     76 
     77 function standardize_county_input(
     78     county_input::Union{String, Integer},
     79     state_fips::String)::Union{Vector{String}, Nothing}
     80 
     81     # Handle numeric input (FIPS code)
     82     if county_input isa Integer
     83         # Convert to three-digit string with leading zeros
     84         county_fips = lpad(string(county_input), 3, '0')
     85         return find_county(county_fips, state_fips)
     86     end
     87 
     88     # Handle string input (name or FIPS)
     89     normalized_input = uppercase(strip(county_input))
     90     return find_county(normalized_input, state_fips)
     91 end
     92 
     93 
     94 function find_county(identifier::String, state_fips::String)::Union{Vector{String}, Nothing}
     95 
     96     counties = get_county_list(state_fips)
     97 
     98     COUNTY_SUFFIXES = ["COUNTY", "MUNICIPIO", "BOROUGH", "PARISH", "MUNICIPALITY", "CENSUS AREA"]
     99     clean_county_name(name::String) = replace(uppercase(strip(name)), 
    100         Regex("\\s+(" * join(COUNTY_SUFFIXES, "|") * ")\$") => "")
    101     clean_identifier = clean_county_name(uppercase(identifier))
    102 
    103     # Try to match based on any identifier in the county vector only on fips and name to avoid false positive
    104     matched_county = findfirst(
    105         county -> any(clean_county_name(id) == clean_identifier for id in county[[3,4]]),
    106         counties)
    107 
    108     return isnothing(matched_county) ? nothing : counties[matched_county]
    109 end
    110 
    111 
    112 
    113 
    114 
    115 
    116 
    117 
    118