download.jl (8541B)
1 # ABOUTME: Download logic for Census TIGER/Line shapefiles 2 # ABOUTME: Dispatches on geography scope (national, state, county) to construct URLs and manage downloads 3 4 # -------------------------------------------------------------------------------------------------- 5 macro conditional_log(verbose, level, message, params...) 6 return quote 7 if $(esc(verbose)) 8 @info $(esc(message)) $(map(esc, params)...) 9 else 10 @debug $(esc(message)) $(map(esc, params)...) 11 end 12 end 13 end 14 # -------------------------------------------------------------------------------------------------- 15 16 17 # -------------------------------------------------------------------------------------------------- 18 const MAX_RETRIES = 3 19 const RETRY_BASE_DELAY = 2 # seconds 20 21 """ 22 download_with_retry(url, output_path; max_retries=MAX_RETRIES) 23 24 Download a file with exponential backoff retry on transient failures. 25 Returns the output path on success, rethrows on persistent failure. 26 """ 27 function download_with_retry(url::String, output_path::String; max_retries::Int=MAX_RETRIES) 28 for attempt in 1:max_retries 29 try 30 Downloads.download(url, output_path) 31 return output_path 32 catch e 33 e isa InterruptException && rethrow(e) 34 if attempt == max_retries 35 rethrow(e) 36 end 37 delay = RETRY_BASE_DELAY * 2^(attempt - 1) 38 @warn "Download attempt $attempt/$max_retries failed, retrying in $(delay)s" url=url 39 sleep(delay) 40 # Clean up partial download before retry 41 isfile(output_path) && rm(output_path; force=true) 42 end 43 end 44 end 45 # -------------------------------------------------------------------------------------------------- 46 47 48 # -------------------------------------------------------------------------------------------------- 49 # National scope (States, Counties nationally) 50 function download_shapefile( 51 geo::T; 52 output_dir::String=pwd(), 53 force::Bool=false, 54 verbose::Bool=false) where {T <: NationalGeography} 55 56 geo_type = typeof(geo) 57 filename = "tl_$(geo.year)_us_$(lowercase(tiger_name(geo_type))).zip" 58 59 url = "https://www2.census.gov/geo/tiger/TIGER$(geo.year)/$(tiger_name(geo_type))/" * filename 60 output_path = joinpath(output_dir, filename) 61 62 if isfile(output_path) && !force 63 @conditional_log verbose "File exists" path=output_path 64 return output_path 65 end 66 67 try 68 @conditional_log verbose "Downloading $(description(geo_type))" url=url 69 mkpath(output_dir) 70 download_with_retry(url, output_path) 71 return output_path 72 catch e 73 @error "Download failed" exception=e 74 rethrow(e) 75 end 76 end 77 # -------------------------------------------------------------------------------------------------- 78 # 79 # 80 # -------------------------------------------------------------------------------------------------- 81 # State scope (CountySubdivisions, Places) 82 function download_shapefile( 83 geo::T; 84 state::Union{String, Integer, Nothing}=nothing, 85 output_dir::String=pwd(), 86 force::Bool=false, 87 verbose::Bool=false) where T<:StateGeography 88 89 # Get states to process 90 if !isnothing(state) 91 state_info = standardize_state_input(state) 92 if isnothing(state_info) 93 throw(ArgumentError("Invalid state identifier provided")) 94 end 95 states_to_process = [state_info] 96 else 97 @warn "No state specified - downloading all states" 98 states_to_process = get_state_list() 99 100 # There are some exceptions because not everything is available all the time! 101 (geo isa CountySubdivision) ? filter!(s -> s[2] != "74", states_to_process) : nothing 102 103 end 104 105 # Use the type of geo to get tiger_name 106 geo_type = typeof(geo) 107 base_url = "https://www2.census.gov/geo/tiger/TIGER$(geo.year)/$(tiger_name(geo_type))/" 108 109 n_states = length(states_to_process) 110 111 try 112 # Process each state with total interrupt by user ... 113 for (i, state_info) in enumerate(states_to_process) 114 fips = state_info[2] 115 state_name = state_info[3] 116 n_states > 1 && @info "[$i/$n_states] $(state_name)" 117 filename = "tl_$(geo.year)_$(fips)_$(lowercase(tiger_name(T))).zip" 118 url = base_url * filename 119 output_path = joinpath(output_dir, filename) 120 121 if isfile(output_path) && !force 122 @conditional_log verbose "File exists" state=state_name path=output_path 123 continue 124 end 125 126 try 127 @conditional_log verbose "Downloading" state=state_name url=url 128 download_with_retry(url, output_path) 129 catch e 130 if e isa InterruptException 131 # Re-throw interrupt to be caught by outer try block 132 rethrow(e) 133 end 134 @error "Download failed" state=state_name exception=e 135 continue 136 end 137 end 138 catch e 139 if e isa InterruptException 140 @warn "Download process interrupted by user" 141 # Optional: Clean up partially downloaded file 142 try 143 isfile(output_path) && rm(output_path) 144 catch 145 # Ignore cleanup errors 146 end 147 rethrow(e) # This will exit the function 148 end 149 rethrow(e) # Re-throw any other unexpected errors 150 end 151 152 end 153 # -------------------------------------------------------------------------------------------------- 154 155 156 # -------------------------------------------------------------------------------------------------- 157 # County scope (Tracts, WaterAreas) 158 function download_shapefile( 159 geo::T; 160 state::Union{String, Integer, Nothing}=nothing, 161 county::Union{String, Integer, Nothing}=nothing, 162 output_dir::String=pwd(), 163 force::Bool=false, 164 verbose::Bool=false) where {T <: CountyGeography} 165 166 167 # Get states to process 168 if !isnothing(state) 169 state_info = standardize_state_input(state) 170 if isnothing(state_info) 171 throw(ArgumentError("Invalid state identifier: $state")) 172 end 173 states_to_process = [state_info] 174 else 175 @warn "No state specified - downloading all states" 176 states_to_process = get_state_list() 177 end 178 179 # Track failures 180 failed_downloads = String[] 181 182 n_states = length(states_to_process) 183 184 for (si, state_info) in enumerate(states_to_process) 185 state_fips = state_info[2] 186 state_name = state_info[3] 187 188 # Get counties for this state 189 counties = get_county_list(state_fips) 190 191 # Filter for specific county if provided 192 if !isnothing(county) 193 county_info = standardize_county_input(county, state_fips) 194 if isnothing(county_info) 195 throw(ArgumentError("Invalid county identifier for $(state_name)")) 196 end 197 counties = [county_info] 198 end 199 200 n_counties = length(counties) 201 state_label = n_states > 1 ? "[state $si/$n_states] " : "" 202 203 for (ci, county_info) in enumerate(counties) 204 county_fips = county_info[3] # Assuming similar structure to state_info 205 county_name = county_info[4] 206 n_counties > 1 && @info "$(state_label)$(state_name): [$ci/$n_counties] $(county_name)" 207 208 filename = "tl_$(geo.year)_$(state_fips)$(county_fips)_$(lowercase(tiger_name(geo))).zip" 209 url = "https://www2.census.gov/geo/tiger/TIGER$(geo.year)/$(tiger_name(geo))/" * filename 210 output_path = joinpath(output_dir, filename) 211 212 if isfile(output_path) && !force 213 @conditional_log verbose "File exists" state=state_name county=county_name path=output_path 214 continue 215 end 216 217 try 218 @conditional_log verbose "Downloading" state=state_name county=county_name url=url 219 mkpath(output_dir) 220 download_with_retry(url, output_path) 221 catch e 222 push!(failed_downloads, "$(state_name) - $(county_name)") 223 @error "Download failed" state=state_name county=county_name exception=e 224 continue 225 end 226 end 227 end 228 229 if !isempty(failed_downloads) 230 @warn "Some downloads failed" failed_locations=failed_downloads 231 end 232 end 233 # -------------------------------------------------------------------------------------------------- 234 235