commit 47116de2b63fc72e696d3462e732991c4adfdd30
parent c5a5c8172b0f1905f7754468bbe2e229e08da6e9
Author: Erik Loualiche <eloualic@umn.edu>
Date: Sun, 23 Feb 2025 20:39:56 -0600
Filling slowly all available shapes.
Diffstat:
7 files changed, 269 insertions(+), 38 deletions(-)
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -3,9 +3,8 @@ on:
push:
branches:
- main
- tags: ['*']
+ tags: ["*"]
pull_request:
- workflow_dispatch:
concurrency:
# Skip intermediate builds: always.
# Cancel intermediate builds: only if it is a pull request build.
@@ -13,29 +12,33 @@ concurrency:
cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
jobs:
test:
- name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }}
+ name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }}
runs-on: ${{ matrix.os }}
- timeout-minutes: 60
- permissions: # needed to allow julia-actions/cache to proactively delete old caches that it has created
- actions: write
- contents: read
+ env:
+ WRDS_USERNAME: ${{ secrets.WRDS_USERNAME }}
+ WRDS_PWD: ${{ secrets.WRDS_PWD }}
strategy:
fail-fast: false
matrix:
version:
- - '1.11'
- - '1.6'
- - 'pre'
+ - "1"
+ - "1.11"
os:
- ubuntu-latest
arch:
- x64
steps:
- - uses: actions/checkout@v4
- - uses: julia-actions/setup-julia@v2
+ - uses: actions/checkout@v2
+ - uses: julia-actions/setup-julia@v1
with:
version: ${{ matrix.version }}
arch: ${{ matrix.arch }}
- - uses: julia-actions/cache@v2
+ - uses: julia-actions/cache@v1
- uses: julia-actions/julia-buildpkg@v1
- uses: julia-actions/julia-runtest@v1
+ - uses: julia-actions/julia-processcoverage@v1
+ - uses: codecov/codecov-action@v5
+ with:
+ token: ${{ secrets.CODECOV_TOKEN }} # required
+ fail_ci_if_error: false
+ file: lcov.info
diff --git a/README.md b/README.md
@@ -6,8 +6,8 @@
Install the command line tool (you need a julia installation for this)
```bash
mkdir -p /.local/share/julia # or some other directory
-git clone git@github.com:eloualiche/TigerFetch.jl.git ~/.local/share/julia
-julia --project deps/build.jl install
+git clone git@github.com:eloualiche/TigerFetch.jl.git ~/.local/share/julia
+cd ~/.local/share/julia && julia --project deps/build.jl install
```
The binary will available at `~/.julia/bin/tigerfetch` but also depends on the downloaded packages.
@@ -42,3 +42,7 @@ You can use it
~/.julia/bin/tigerfetch areawater --state "Minnesota" --county "Hennepin" --output tmp # works
```
+
+#### Julia package
+
+Look at the test suite (specifically `UnitTests/downloads.jl`) for now
diff --git a/src/download.jl b/src/download.jl
@@ -49,33 +49,56 @@ function download_shapefile(
else
@warn "No state specified - downloading all states"
states_to_process = get_state_list()
+
+ # There are some exceptions because not everything is available all the time!
+ (geo isa CountySubdivision) ? filter!(s -> s[2] != "74", states_to_process) : nothing
+
end
# Use the type of geo to get tiger_name
geo_type = typeof(geo)
base_url = "https://www2.census.gov/geo/tiger/TIGER$(geo.year)/$(tiger_name(geo_type))/"
- # Process each state
- for state_info in states_to_process
- fips = state_info[2]
- state_name = state_info[3]
- filename = "tl_$(geo.year)_$(fips)_$(lowercase(tiger_name(geo_type))).zip"
- url = base_url * filename
- output_path = joinpath(output_dir, filename)
+ try
+ # Process each state with total interrupt by user ...
+ for state_info in states_to_process
+ fips = state_info[2]
+ state_name = state_info[3]
+ filename = "tl_$(geo.year)_$(fips)_$(lowercase(tiger_name(T))).zip"
+ url = base_url * filename
+ output_path = joinpath(output_dir, filename)
- if isfile(output_path) && !force
- @info "File exists" state=state_name path=output_path
- continue
- end
+ if isfile(output_path) && !force
+ @info "File exists" state=state_name path=output_path
+ continue
+ end
- try
- @info "Downloading" state=state_name url=url
- Downloads.download(url, output_path)
- catch e
- @error "Download failed" state=state_name exception=e
- continue
+ try
+ @info "Downloading" state=state_name url=url
+ Downloads.download(url, output_path)
+ catch e
+ if e isa InterruptException
+ # Re-throw interrupt to be caught by outer try block
+ rethrow(e)
+ end
+ @error "Download failed" state=state_name exception=e
+ continue
+ end
end
+ catch e
+ if e isa InterruptException
+ @info "Download process interrupted by user"
+ # Optional: Clean up partially downloaded file
+ try
+ isfile(output_path) && rm(output_path)
+ catch
+ # Ignore cleanup errors
+ end
+ rethrow(e) # This will exit the function
+ end
+ rethrow(e) # Re-throw any other unexpected errors
end
+
end
# --------------------------------------------------------------------------------------------------
diff --git a/src/geotypes.jl b/src/geotypes.jl
@@ -1,3 +1,5 @@
+
+# --------------------------------------------------------------------------------------------------
# Abstract base type
abstract type TigerGeography end
@@ -5,7 +7,10 @@ abstract type TigerGeography end
abstract type NationalGeography <: TigerGeography end
abstract type StateGeography <: TigerGeography end
abstract type CountyGeography <: TigerGeography end
+# --------------------------------------------------------------------------------------------------
+
+# --------------------------------------------------------------------------------------------------
# Concrete types with their metadata as constants
struct State <: NationalGeography
year::Int
@@ -17,39 +22,112 @@ struct County <: NationalGeography
end
const COUNTY_META = (tiger_name = "COUNTY", description = "County Boundaries")
+struct ZipCode <: NationalGeography
+ year::Int
+end
+const ZIP_META = (tiger_name = "ZCTA520", description = "2020 5-Digit ZIP Code Tabulation Area")
+
+struct UrbanArea <: NationalGeography
+ year::Int
+end
+const URBANAREA_META = (tiger_name = "UAC20", description = "2020 Urban Area/Urban Cluster")
+
+struct PrimaryRoads <: NationalGeography
+ year::Int
+end
+const PRIMARYROADS_META = (tiger_name = "PRIMARYROADS", description = "Primary Roads")
+
+struct CBSA <: NationalGeography
+ year::Int
+end
+const CBSA_META = (tiger_name = "CBSA", description = "Core Based Statistical Area")
+
+struct METDIV <: NationalGeography
+ year::Int
+end
+const METDIV_META = (tiger_name = "METDIV", description = "Metropolitan Division")
+# --------------------------------------------------------------------------------------------------
+
+
+# --------------------------------------------------------------------------------------------------
struct CountySubdivision <: StateGeography
year::Int
end
-const COUSUB_META = (tiger_name = "COUSUB", description = "County Subdivisions")
+const COUSUB_META = (tiger_name = "COUSUB", description = "County Subdivision")
+
+struct Tract <: StateGeography
+ year::Int
+end
+const TRACT_META = (tiger_name = "TRACT", description = "Census Tract")
-struct Tract <: CountyGeography
+struct PrimarySecondaryRoads <: StateGeography
year::Int
end
-const TRACT_META = (tiger_name = "TRACT", description = "Census Tracts")
+const PSROADS_META = (tiger_name = "PRISECROADS", description = "Primary and Secondary Roads")
+# --------------------------------------------------------------------------------------------------
+
+# --------------------------------------------------------------------------------------------------
+# --- county geographies
struct AreaWater <: CountyGeography
year::Int
end
-const AREAWATER_META = (tiger_name = "AREAWATER", description = "Area Water")
+const AREAWATER_META = (tiger_name = "AREAWATER", description = "Area Hydrography")
+
+struct LinearWater <: CountyGeography
+ year::Int
+end
+const LINEARWATER_META = (tiger_name = "LINEARWATER", description = "Linear Hydrography")
+struct Roads <: CountyGeography
+ year::Int
+end
+const ROADS_META = (tiger_name = "ROADS", description = "Roads")
+# --------------------------------------------------------------------------------------------------
+
+
+# --------------------------------------------------------------------------------------------------
# Helper methods to access metadata
tiger_name(::Type{State}) = STATE_META.tiger_name
tiger_name(::Type{County}) = COUNTY_META.tiger_name
+tiger_name(::Type{ZipCode}) = ZIP_META.tiger_name
+tiger_name(::Type{UrbanArea}) = URBANAREA_META.tiger_name
+tiger_name(::Type{PrimaryRoads}) = PRIMARYROADS_META.tiger_name
+tiger_name(::Type{CBSA}) = CBSA_META.tiger_name
+tiger_name(::Type{METDIV}) = METDIV_META.tiger_name
+
tiger_name(::Type{CountySubdivision}) = COUSUB_META.tiger_name
tiger_name(::Type{Tract}) = TRACT_META.tiger_name
+tiger_name(::Type{PrimarySecondaryRoads}) = PSROADS_META.tiger_name
+
tiger_name(::Type{AreaWater}) = AREAWATER_META.tiger_name
+tiger_name(::Type{LinearWater}) = LINEARWATER_META.tiger_name
+tiger_name(::Type{Roads}) = ROADS_META.tiger_name
tiger_name(x::T) where T <: TigerGeography = tiger_name(T)
+# -- description
description(::Type{State}) = STATE_META.description
description(::Type{County}) = COUNTY_META.description
+description(::Type{ZipCode}) = ZIP_META.description
+description(::Type{UrbanArea}) = URBANAREA_META.description
+description(::Type{PrimaryRoads}) = PRIMARYROADS_META.description
+description(::Type{CBSA}) = CBSA_META.description
+description(::Type{METDIV}) = METDIV_META.description
+
description(::Type{CountySubdivision}) = COUSUB_META.description
description(::Type{Tract}) = TRACT_META.description
+description(::Type{PrimarySecondaryRoads}) = PSROADS_META.description
+
description(::Type{AreaWater}) = AREAWATER_META.description
+description(::Type{LinearWater}) = LINEARWATER_META.description
+description(::Type{Roads}) = ROADS_META.description
description(x::T) where T <: TigerGeography = description(T)
+# --
# Helper methods now just reference the type hierarchy
scope(::Type{T}) where {T <: NationalGeography} = National
scope(::Type{T}) where {T <: StateGeography} = ByState
scope(::Type{T}) where {T <: CountyGeography} = ByCounty
+# --------------------------------------------------------------------------------------------------+
\ No newline at end of file
diff --git a/src/main.jl b/src/main.jl
@@ -3,9 +3,18 @@
const GEOGRAPHY_TYPES = Dict(
"state" => State,
"county" => County,
+ "zipcode" => ZipCode,
+ "urbanarea" => UrbanArea,
+ "primaryroads" => PrimaryRoads,
+
"cousub" => CountySubdivision,
"tract" => Tract,
+ "primarysecondaryroads" => PrimarySecondaryRoads,
+
"areawater" => AreaWater,
+ "linearwater" => LinearWater,
+ "road" => Roads,
+
)
# julia function
diff --git a/test/UnitTests/downloads.jl b/test/UnitTests/downloads.jl
@@ -0,0 +1,111 @@
+@testset "Download Tests" begin
+
+
+# --------------------------------------------------------------------------------------------------
+ @testset "National Level Downloads" begin
+
+ test_dir = mktempdir()
+
+ # Download the states shapefiles
+ tigerdownload("state", 2024; state="MN", county="", output=test_dir, force=true)
+ state_file_download = joinpath(test_dir, "tl_2024_us_state.zip")
+ # stat(state_file_download)
+ @test bytes2hex(SHA.sha256(read(state_file_download))) ==
+ "e30bad8922b177b5991bf8606d3d95de8f5f0b4bab25848648de53b25f72c17f"
+
+ tigerdownload("county", 2024; state="MN", county="Hennepin", output=test_dir, force=true)
+ county_file_download = joinpath(test_dir, "tl_2024_us_county.zip")
+ # stat(county_file_download)
+ @test bytes2hex(SHA.sha256(read(county_file_download))) ==
+ "a344b72be48f2448df1ae1757098d94571b96556d3b9253cf9d6ee77bce8a0b4"
+
+ # -- still to test zcta520, urban area, cbsa
+
+
+ end
+# --------------------------------------------------------------------------------------------------
+
+
+# --------------------------------------------------------------------------------------------------
+ @testset "State Level Downloads" begin
+
+ test_dir = mktempdir()
+
+ # Download the county subdivisions shapefiles
+ tigerdownload("cousub", 2024; state="MN", county="", output=test_dir, force=true)
+ cousub_file_download = joinpath(test_dir, "tl_2024_27_cousub.zip")
+ # stat(cousub_file_download)
+ @test bytes2hex(SHA.sha256(read(cousub_file_download))) ==
+ "b1cf4855fe102d9ebc34e165457986b8d906052868da0079ea650d39d973ec98"
+
+ # for all the states ...
+ tigerdownload("cousub", 2024; output=test_dir, force=false)
+ cousub_file_list = [ "tl_2024_$(x[2])_cousub.zip"
+ for x in TigerFetch.get_state_list() ]
+ cousub_file_list = joinpath.(test_dir, cousub_file_list)
+ @test !all(isfile.(cousub_file_list)) # there should be one missing file
+ @test all(.!isfile.(filter(contains("tl_2024_74_cousub.zip"), cousub_file_list))) # there should be one missing file
+
+ cousub_file_download = filter(contains("tl_2024_28_cousub.zip"), cousub_file_list)[1]
+ round(stat(cousub_file_download).size / 1024, digits=2)
+ @test bytes2hex(SHA.sha256(read(cousub_file_download))) ==
+ "f91963513bf14f64267fefc5ffda24161e879bfb76a48c19517eba0f85c638ba"
+
+ # -- tracts
+ tigerdownload("tract", 2024; state="27", county="", output=test_dir, force=true)
+ tract_file_download = joinpath(test_dir, "tl_2024_27_tract.zip")
+ round(stat(tract_file_download).size / 1024, digits=2)
+ @test bytes2hex(SHA.sha256(read(tract_file_download))) ==
+ "83f784b2042d0af55723baaac37b2b29840d1485ac233b3bb73d6af4ec7246eb"
+
+ # -- roads
+ tigerdownload("primarysecondaryroads", 2024; state="27", county="", output=test_dir, force=true)
+ road_file_download = joinpath(test_dir, "tl_2024_27_prisecroads.zip")
+ round(stat(road_file_download).size / 1024, digits=2)
+ @test bytes2hex(SHA.sha256(read(road_file_download))) ==
+ "3c06a9b03ca06abf42db85b3b9ab3110d251d54ccf3d59335a2e5b98d2e6f52a"
+
+
+
+ end
+# --------------------------------------------------------------------------------------------------
+
+
+# --------------------------------------------------------------------------------------------------
+ @testset "County Level Downloads" begin
+
+ test_dir = mktempdir()
+
+ # Download the areawater shapefiles
+ tigerdownload("areawater", 2024; state="MN", county="Hennepin", output=test_dir, force=true)
+ areawater_file_download = joinpath(test_dir, "tl_2024_27053_areawater.zip")
+ # stat(cousub_file_download)
+ @test bytes2hex(SHA.sha256(read(areawater_file_download))) ==
+ "54a2825f26405fbb83bd4c5c7a96190867437bc46dc0d4a8155198890d63db54"
+
+ # Download the linear water shapefiles for all of Michigan
+ tigerdownload("linearwater", 2024; state="MI", output=test_dir, force=true)
+ linearwater_file_list = [ "tl_2024_$(x[2])$(x[3])_linearwater.zip"
+ for x in TigerFetch.get_county_list("MI") ]
+ linearwater_file_list = joinpath.(test_dir, linearwater_file_list)
+ @test all(isfile.(linearwater_file_list)) # test that all the files are there
+
+ linearwater_file_download = filter(contains("tl_2024_26089_linearwater.zip"), linearwater_file_list)[1]
+ round(stat(linearwater_file_download).size / 1024, digits=2)
+ @test bytes2hex(SHA.sha256(read(linearwater_file_download))) ==
+ "b05a58ddb37abdc9287c533a6f87110ef4b153dc4fbd20833d3d1cf56470cba7"
+
+ # roads
+ tigerdownload("road", 2024; state="MN", county="Hennepin", output=test_dir, force=true)
+ roads_file_download = joinpath(test_dir, "tl_2024_27053_roads.zip")
+ round(stat(roads_file_download).size / 1024, digits=2)
+ @test bytes2hex(SHA.sha256(read(roads_file_download))) ==
+ "b828ad38a8bc3cd3299efcc7e3b333ec2954229392eb254a460e596c1db78511"
+
+
+
+ end
+# --------------------------------------------------------------------------------------------------
+
+
+end+
\ No newline at end of file
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,14 +1,15 @@
# --------------------------------------------------------------------------------------------------
using TigerFetch
using Test
-using Pkg.Artifacts
+using Pkg.Artifacts
using SHA
-# using LazyArtifacts
+
const testsuite = [
"assets",
+ "downloads",
]
# --------------------------------------------------------------------------------------------------