TigerFetch.jl

Download TIGER/Line shapefiles from the US Census Bureau
Log | Files | Refs | README | LICENSE

download.jl (8541B)


      1 # ABOUTME: Download logic for Census TIGER/Line shapefiles
      2 # ABOUTME: Dispatches on geography scope (national, state, county) to construct URLs and manage downloads
      3 
      4 # --------------------------------------------------------------------------------------------------
      5 macro conditional_log(verbose, level, message, params...)
      6     return quote
      7         if $(esc(verbose))
      8             @info $(esc(message)) $(map(esc, params)...)
      9         else
     10             @debug $(esc(message)) $(map(esc, params)...)
     11         end
     12     end
     13 end
     14 # --------------------------------------------------------------------------------------------------
     15 
     16 
     17 # --------------------------------------------------------------------------------------------------
     18 const MAX_RETRIES = 3
     19 const RETRY_BASE_DELAY = 2  # seconds
     20 
     21 """
     22     download_with_retry(url, output_path; max_retries=MAX_RETRIES)
     23 
     24 Download a file with exponential backoff retry on transient failures.
     25 Returns the output path on success, rethrows on persistent failure.
     26 """
     27 function download_with_retry(url::String, output_path::String; max_retries::Int=MAX_RETRIES)
     28     for attempt in 1:max_retries
     29         try
     30             Downloads.download(url, output_path)
     31             return output_path
     32         catch e
     33             e isa InterruptException && rethrow(e)
     34             if attempt == max_retries
     35                 rethrow(e)
     36             end
     37             delay = RETRY_BASE_DELAY * 2^(attempt - 1)
     38             @warn "Download attempt $attempt/$max_retries failed, retrying in $(delay)s" url=url
     39             sleep(delay)
     40             # Clean up partial download before retry
     41             isfile(output_path) && rm(output_path; force=true)
     42         end
     43     end
     44 end
     45 # --------------------------------------------------------------------------------------------------
     46 
     47 
     48 # --------------------------------------------------------------------------------------------------
     49 # National scope (States, Counties nationally)
     50 function download_shapefile(
     51     geo::T;
     52     output_dir::String=pwd(),
     53     force::Bool=false,
     54     verbose::Bool=false) where {T <: NationalGeography}
     55 
     56     geo_type = typeof(geo)
     57     filename = "tl_$(geo.year)_us_$(lowercase(tiger_name(geo_type))).zip"
     58 
     59     url = "https://www2.census.gov/geo/tiger/TIGER$(geo.year)/$(tiger_name(geo_type))/" * filename
     60     output_path = joinpath(output_dir, filename)
     61 
     62     if isfile(output_path) && !force
     63         @conditional_log verbose "File exists" path=output_path
     64         return output_path
     65     end
     66 
     67     try
     68         @conditional_log verbose "Downloading $(description(geo_type))" url=url
     69         mkpath(output_dir)
     70         download_with_retry(url, output_path)
     71         return output_path
     72     catch e
     73         @error "Download failed" exception=e
     74         rethrow(e)
     75     end
     76 end
     77 # --------------------------------------------------------------------------------------------------
     78 #
     79 #
     80 # --------------------------------------------------------------------------------------------------
     81 # State scope (CountySubdivisions, Places)
     82 function download_shapefile(
     83     geo::T;
     84     state::Union{String, Integer, Nothing}=nothing,
     85     output_dir::String=pwd(),
     86     force::Bool=false,
     87     verbose::Bool=false) where T<:StateGeography
     88 
     89     # Get states to process
     90     if !isnothing(state)
     91         state_info = standardize_state_input(state)
     92         if isnothing(state_info)
     93             throw(ArgumentError("Invalid state identifier provided"))
     94         end
     95         states_to_process = [state_info]
     96     else
     97         @warn "No state specified - downloading all states"
     98         states_to_process = get_state_list()
     99         
    100         # There are some exceptions because not everything is available all the time!
    101         (geo isa CountySubdivision) ? filter!(s -> s[2] != "74", states_to_process) : nothing
    102 
    103     end
    104 
    105     # Use the type of geo to get tiger_name
    106     geo_type = typeof(geo)
    107     base_url = "https://www2.census.gov/geo/tiger/TIGER$(geo.year)/$(tiger_name(geo_type))/"
    108 
    109     n_states = length(states_to_process)
    110 
    111     try
    112         # Process each state with total interrupt by user ...
    113         for (i, state_info) in enumerate(states_to_process)
    114             fips = state_info[2]
    115             state_name = state_info[3]
    116             n_states > 1 && @info "[$i/$n_states] $(state_name)"
    117             filename = "tl_$(geo.year)_$(fips)_$(lowercase(tiger_name(T))).zip"
    118             url = base_url * filename
    119             output_path = joinpath(output_dir, filename)
    120 
    121             if isfile(output_path) && !force
    122                 @conditional_log verbose "File exists" state=state_name path=output_path
    123                 continue
    124             end
    125 
    126             try
    127                 @conditional_log verbose "Downloading" state=state_name url=url
    128                 download_with_retry(url, output_path)
    129             catch e
    130                 if e isa InterruptException
    131                     # Re-throw interrupt to be caught by outer try block
    132                     rethrow(e)
    133                 end
    134                 @error "Download failed" state=state_name exception=e
    135                 continue
    136             end
    137         end
    138     catch e
    139         if e isa InterruptException
    140             @warn "Download process interrupted by user"
    141             # Optional: Clean up partially downloaded file
    142             try
    143                 isfile(output_path) && rm(output_path)
    144             catch
    145                 # Ignore cleanup errors
    146             end
    147             rethrow(e)  # This will exit the function
    148         end
    149         rethrow(e)  # Re-throw any other unexpected errors
    150     end
    151 
    152 end
    153 # --------------------------------------------------------------------------------------------------
    154 
    155 
    156 # --------------------------------------------------------------------------------------------------
    157 # County scope (Tracts, WaterAreas)
    158 function download_shapefile(
    159     geo::T;
    160     state::Union{String, Integer, Nothing}=nothing,
    161     county::Union{String, Integer, Nothing}=nothing,
    162     output_dir::String=pwd(),
    163     force::Bool=false,
    164     verbose::Bool=false) where {T <: CountyGeography}
    165 
    166 
    167     # Get states to process
    168     if !isnothing(state)
    169         state_info = standardize_state_input(state)
    170         if isnothing(state_info)
    171             throw(ArgumentError("Invalid state identifier: $state"))
    172         end
    173         states_to_process = [state_info]
    174     else
    175         @warn "No state specified - downloading all states"
    176         states_to_process = get_state_list()
    177     end
    178 
    179     # Track failures
    180     failed_downloads = String[]
    181 
    182     n_states = length(states_to_process)
    183 
    184     for (si, state_info) in enumerate(states_to_process)
    185         state_fips = state_info[2]
    186         state_name = state_info[3]
    187 
    188         # Get counties for this state
    189         counties = get_county_list(state_fips)
    190 
    191         # Filter for specific county if provided
    192         if !isnothing(county)
    193             county_info = standardize_county_input(county, state_fips)
    194             if isnothing(county_info)
    195                 throw(ArgumentError("Invalid county identifier for $(state_name)"))
    196             end
    197             counties = [county_info]
    198         end
    199 
    200         n_counties = length(counties)
    201         state_label = n_states > 1 ? "[state $si/$n_states] " : ""
    202 
    203         for (ci, county_info) in enumerate(counties)
    204             county_fips = county_info[3]  # Assuming similar structure to state_info
    205             county_name = county_info[4]
    206             n_counties > 1 && @info "$(state_label)$(state_name): [$ci/$n_counties] $(county_name)"
    207 
    208             filename = "tl_$(geo.year)_$(state_fips)$(county_fips)_$(lowercase(tiger_name(geo))).zip"
    209             url = "https://www2.census.gov/geo/tiger/TIGER$(geo.year)/$(tiger_name(geo))/" * filename
    210             output_path = joinpath(output_dir, filename)
    211 
    212             if isfile(output_path) && !force
    213                 @conditional_log verbose "File exists" state=state_name county=county_name path=output_path
    214                 continue
    215             end
    216 
    217             try
    218                 @conditional_log verbose "Downloading" state=state_name county=county_name url=url
    219                 mkpath(output_dir)
    220                 download_with_retry(url, output_path)
    221             catch e
    222                 push!(failed_downloads, "$(state_name) - $(county_name)")
    223                 @error "Download failed" state=state_name county=county_name exception=e
    224                 continue
    225             end
    226         end
    227     end
    228 
    229     if !isempty(failed_downloads)
    230         @warn "Some downloads failed" failed_locations=failed_downloads
    231     end
    232 end
    233 # --------------------------------------------------------------------------------------------------
    234 
    235