commit a2b36d1f63d137ed45e6baaff0023c432a8aad56
parent 6b65bccfd07d30b22a6c8ca105cfb5445021f4e5
Author: Erik Loualiche <eloualic@umn.edu>
Date: Fri, 20 Feb 2026 09:42:56 -0600
Replace DuckDB CGo export with pure Go pgx + parquet-go pipeline
Drop the go-duckdb CGo dependency that embedded the entire DuckDB C++
library (55MB binary). Export now streams rows directly from Postgres
via pgx and writes Parquet (parquet-go with ZSTD) or CSV (encoding/csv).
This eliminates CGo, halves the binary size (~28MB, 19MB stripped), and
enables CGO_ENABLED=0 cross-compilation.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat:
7 files changed, 379 insertions(+), 147 deletions(-)
diff --git a/cmd/download.go b/cmd/download.go
@@ -83,7 +83,7 @@ func buildQuery() (string, error) {
if dlColumns != "" && dlColumns != "*" {
sel = dlColumns
}
- q := fmt.Sprintf("SELECT %s FROM wrds.%s.%s", sel, dlSchema, dlTable)
+ q := fmt.Sprintf("SELECT %s FROM %s.%s", sel, dlSchema, dlTable)
if dlWhere != "" {
q += " WHERE " + dlWhere
diff --git a/go.mod b/go.mod
@@ -3,13 +3,19 @@ module github.com/eloualiche/wrds-download
go 1.25.0
require (
- github.com/apache/arrow-go/v18 v18.1.0 // indirect
+ github.com/charmbracelet/bubbles v1.0.0
+ github.com/charmbracelet/bubbletea v1.3.10
+ github.com/charmbracelet/lipgloss v1.1.0
+ github.com/jackc/pgx/v5 v5.8.0
+ github.com/parquet-go/parquet-go v0.27.0
+ github.com/spf13/cobra v1.10.2
+)
+
+require (
+ github.com/andybalholm/brotli v1.1.1 // indirect
github.com/atotto/clipboard v0.1.4 // indirect
github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
- github.com/charmbracelet/bubbles v1.0.0 // indirect
- github.com/charmbracelet/bubbletea v1.3.10 // indirect
github.com/charmbracelet/colorprofile v0.4.1 // indirect
- github.com/charmbracelet/lipgloss v1.1.0 // indirect
github.com/charmbracelet/x/ansi v0.11.6 // indirect
github.com/charmbracelet/x/cellbuf v0.0.15 // indirect
github.com/charmbracelet/x/term v0.2.2 // indirect
@@ -17,37 +23,31 @@ require (
github.com/clipperhouse/stringish v0.1.1 // indirect
github.com/clipperhouse/uax29/v2 v2.5.0 // indirect
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect
- github.com/go-viper/mapstructure/v2 v2.2.1 // indirect
- github.com/goccy/go-json v0.10.5 // indirect
- github.com/google/flatbuffers v25.1.24+incompatible // indirect
+ github.com/google/go-cmp v0.6.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
- github.com/jackc/pgx/v5 v5.8.0 // indirect
github.com/jackc/puddle/v2 v2.2.2 // indirect
github.com/klauspost/compress v1.17.11 // indirect
- github.com/klauspost/cpuid/v2 v2.2.9 // indirect
github.com/lucasb-eyer/go-colorful v1.3.0 // indirect
- github.com/marcboeker/go-duckdb v1.8.5 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mattn/go-localereader v0.0.1 // indirect
github.com/mattn/go-runewidth v0.0.19 // indirect
github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect
github.com/muesli/cancelreader v0.2.2 // indirect
github.com/muesli/termenv v0.16.0 // indirect
+ github.com/parquet-go/bitpack v1.0.0 // indirect
+ github.com/parquet-go/jsonlite v1.0.0 // indirect
github.com/pierrec/lz4/v4 v4.1.22 // indirect
github.com/rivo/uniseg v0.4.7 // indirect
github.com/sahilm/fuzzy v0.1.1 // indirect
- github.com/spf13/cobra v1.10.2 // indirect
github.com/spf13/pflag v1.0.9 // indirect
+ github.com/twpayne/go-geom v1.6.1 // indirect
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
- github.com/zeebo/xxh3 v1.0.2 // indirect
golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c // indirect
- golang.org/x/mod v0.27.0 // indirect
golang.org/x/sync v0.17.0 // indirect
golang.org/x/sys v0.38.0 // indirect
golang.org/x/text v0.29.0 // indirect
- golang.org/x/tools v0.36.0 // indirect
- golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect
+ google.golang.org/protobuf v1.36.1 // indirect
)
diff --git a/go.sum b/go.sum
@@ -1,9 +1,17 @@
-github.com/apache/arrow-go/v18 v18.1.0 h1:agLwJUiVuwXZdwPYVrlITfx7bndULJ/dggbnLFgDp/Y=
-github.com/apache/arrow-go/v18 v18.1.0/go.mod h1:tigU/sIgKNXaesf5d7Y95jBBKS5KsxTqYBKXFsvKzo0=
+github.com/DATA-DOG/go-sqlmock v1.5.2 h1:OcvFkGmslmlZibjAjaHm3L//6LiuBgolP7OputlJIzU=
+github.com/DATA-DOG/go-sqlmock v1.5.2/go.mod h1:88MAG/4G7SMwSE3CeA0ZKzrT5CiOU3OJ+JlNzwDqpNU=
+github.com/alecthomas/assert/v2 v2.10.0 h1:jjRCHsj6hBJhkmhznrCzoNpbA3zqy0fYiUcYZP/GkPY=
+github.com/alecthomas/assert/v2 v2.10.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k=
+github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc=
+github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
+github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA=
+github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA=
github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4=
github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI=
github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
+github.com/aymanbagabas/go-udiff v0.3.1 h1:LV+qyBQ2pqe0u42ZsUEtPiCaUoqgA9gYRDs3vj1nolY=
+github.com/aymanbagabas/go-udiff v0.3.1/go.mod h1:G0fsKmG+P6ylD0r6N/KgQD/nWzgfnl8ZBcNLgcbrw8E=
github.com/charmbracelet/bubbles v1.0.0 h1:12J8/ak/uCZEMQ6KU7pcfwceyjLlWsDLAxB5fXonfvc=
github.com/charmbracelet/bubbles v1.0.0/go.mod h1:9d/Zd5GdnauMI5ivUIVisuEm3ave1XwXtD1ckyV6r3E=
github.com/charmbracelet/bubbletea v1.3.10 h1:otUDHWMMzQSB0Pkc87rm691KZ3SWa4KUlvF9nRvCICw=
@@ -16,6 +24,8 @@ github.com/charmbracelet/x/ansi v0.11.6 h1:GhV21SiDz/45W9AnV2R61xZMRri5NlLnl6CVF
github.com/charmbracelet/x/ansi v0.11.6/go.mod h1:2JNYLgQUsyqaiLovhU2Rv/pb8r6ydXKS3NIttu3VGZQ=
github.com/charmbracelet/x/cellbuf v0.0.15 h1:ur3pZy0o6z/R7EylET877CBxaiE1Sp1GMxoFPAIztPI=
github.com/charmbracelet/x/cellbuf v0.0.15/go.mod h1:J1YVbR7MUuEGIFPCaaZ96KDl5NoS0DAWkskup+mOY+Q=
+github.com/charmbracelet/x/exp/golden v0.0.0-20241011142426-46044092ad91 h1:payRxjMjKgx2PaCWLZ4p3ro9y97+TVLZNaRZgJwSVDQ=
+github.com/charmbracelet/x/exp/golden v0.0.0-20241011142426-46044092ad91/go.mod h1:wDlXFlCrmJ8J+swcL/MnGUuYnqgQdW9rhSD61oNMb6U=
github.com/charmbracelet/x/term v0.2.2 h1:xVRT/S2ZcKdhhOuSP4t5cLi5o+JxklsoEObBSgfgZRk=
github.com/charmbracelet/x/term v0.2.2/go.mod h1:kF8CY5RddLWrsgVwpw4kAa6TESp6EB5y3uxGLeCqzAI=
github.com/clipperhouse/displaywidth v0.9.0 h1:Qb4KOhYwRiN3viMv1v/3cTBlz3AcAZX3+y9OLhMtAtA=
@@ -26,16 +36,16 @@ github.com/clipperhouse/uax29/v2 v2.5.0 h1:x7T0T4eTHDONxFJsL94uKNKPHrclyFI0lm7+w
github.com/clipperhouse/uax29/v2 v2.5.0/go.mod h1:Wn1g7MK6OoeDT0vL+Q0SQLDz/KpfsVRgg6W7ihQeh4g=
github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4=
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM=
-github.com/go-viper/mapstructure/v2 v2.2.1 h1:ZAaOCxANMuZx5RCeg0mBdEZk7DZasvvZIxtHqx8aGss=
-github.com/go-viper/mapstructure/v2 v2.2.1/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM=
-github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4=
-github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
-github.com/google/flatbuffers v25.1.24+incompatible h1:4wPqL3K7GzBd1CwyhSd3usxLKOaJN/AC6puCca6Jm7o=
-github.com/google/flatbuffers v25.1.24+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8=
+github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
+github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM=
+github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
@@ -48,12 +58,10 @@ github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo
github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc=
github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
-github.com/klauspost/cpuid/v2 v2.2.9 h1:66ze0taIn2H33fBvCkXuv9BmCwDfafmiIVpKV9kKGuY=
-github.com/klauspost/cpuid/v2 v2.2.9/go.mod h1:rqkxqrZ1EhYM9G+hXH7YdowN5R5RGN6NK4QwQ3WMXF8=
+github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
+github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/lucasb-eyer/go-colorful v1.3.0 h1:2/yBRLdWBZKrf7gB40FoiKfAWYQ0lqNcbuQwVHXptag=
github.com/lucasb-eyer/go-colorful v1.3.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
-github.com/marcboeker/go-duckdb v1.8.5 h1:tkYp+TANippy0DaIOP5OEfBEwbUINqiFqgwMQ44jME0=
-github.com/marcboeker/go-duckdb v1.8.5/go.mod h1:6mK7+WQE4P4u5AFLvVBmhFxY5fvhymFptghgJX6B+/8=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4=
@@ -66,8 +74,15 @@ github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELU
github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo=
github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc=
github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk=
+github.com/parquet-go/bitpack v1.0.0 h1:AUqzlKzPPXf2bCdjfj4sTeacrUwsT7NlcYDMUQxPcQA=
+github.com/parquet-go/bitpack v1.0.0/go.mod h1:XnVk9TH+O40eOOmvpAVZ7K2ocQFrQwysLMnc6M/8lgs=
+github.com/parquet-go/jsonlite v1.0.0 h1:87QNdi56wOfsE5bdgas0vRzHPxfJgzrXGml1zZdd7VU=
+github.com/parquet-go/jsonlite v1.0.0/go.mod h1:nDjpkpL4EOtqs6NQugUsi0Rleq9sW/OtC1NnZEnxzF0=
+github.com/parquet-go/parquet-go v0.27.0 h1:vHWK2xaHbj+v1DYps03yDRpEsdtOeKbhiXUaixoPb3g=
+github.com/parquet-go/parquet-go v0.27.0/go.mod h1:navtkAYr2LGoJVp141oXPlO/sxLvaOe3la2JEoD8+rg=
github.com/pierrec/lz4/v4 v4.1.22 h1:cKFw6uJDK+/gfw5BcDL0JL5aBsAFdsIT18eRtLj7VIU=
github.com/pierrec/lz4/v4 v4.1.22/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
@@ -81,15 +96,17 @@ github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
+github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
+github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
+github.com/twpayne/go-geom v1.6.1 h1:iLE+Opv0Ihm/ABIcvQFGIiFBXd76oBIar9drAwHFhR4=
+github.com/twpayne/go-geom v1.6.1/go.mod h1:Kr+Nly6BswFsKM5sd31YaoWS5PeDDH2NftJTK7Gd028=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
-github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0=
-github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA=
+github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
+github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg=
golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c h1:KL/ZBHXgKGVmuZBZ01Lt57yE5ws8ZPSkkihmEyq7FXc=
golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c/go.mod h1:tujkw807nyEEAamNbDrEGzRav+ilXA7PCRAd6xsmwiU=
-golang.org/x/mod v0.27.0 h1:kb+q2PyFnEADO2IEF935ehFUXlWiNjJWtRNgBLSfbxQ=
-golang.org/x/mod v0.27.0/go.mod h1:rWI627Fq0DEoudcK+MBkNkCe0EetEaDSwJJkCcjpazc=
golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
@@ -98,9 +115,9 @@ golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk=
golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4=
-golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg=
-golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s=
-golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY=
-golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90=
+google.golang.org/protobuf v1.36.1 h1:yBPeRvTftaleIgM3PZ/WBIZ7XM/eEYAaEyCwvyjq/gk=
+google.golang.org/protobuf v1.36.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
diff --git a/internal/db/client.go b/internal/db/client.go
@@ -5,7 +5,6 @@ import (
"errors"
"fmt"
"os"
- "strconv"
"github.com/jackc/pgx/v5/pgxpool"
)
@@ -41,16 +40,6 @@ func DSNFromEnv() (string, error) {
return dsn, nil
}
-// PortFromEnv returns the port as an integer (for DuckDB attach).
-func PortFromEnv() int {
- p := getenv("PGPORT", "9737")
- n, _ := strconv.Atoi(p)
- if n == 0 {
- n = 9737
- }
- return n
-}
-
func getenv(key, fallback string) string {
if v := os.Getenv(key); v != "" {
return v
diff --git a/internal/export/duckdb.go b/internal/export/duckdb.go
@@ -1,99 +0,0 @@
-package export
-
-import (
- "database/sql"
- "fmt"
- "os"
- "strings"
-
- _ "github.com/marcboeker/go-duckdb"
-)
-
-// Options controls the export behaviour.
-type Options struct {
- Format string // "parquet" or "csv"
-}
-
-// Export runs query against the WRDS PostgreSQL instance and writes output to outPath.
-// Format is determined by opts.Format (default: parquet).
-func Export(query, outPath string, opts Options) error {
- format := strings.ToLower(opts.Format)
- if format == "" {
- if strings.HasSuffix(strings.ToLower(outPath), ".csv") {
- format = "csv"
- } else {
- format = "parquet"
- }
- }
-
- db, err := sql.Open("duckdb", "")
- if err != nil {
- return fmt.Errorf("open duckdb: %w", err)
- }
- defer db.Close()
-
- // Install and load postgres extension.
- for _, stmt := range []string{
- "INSTALL postgres;",
- "LOAD postgres;",
- } {
- if _, err := db.Exec(stmt); err != nil {
- // Ignore "already installed" errors.
- if !strings.Contains(err.Error(), "already") {
- return fmt.Errorf("%s: %w", stmt, err)
- }
- }
- }
-
- // Build the ATTACH string from env.
- attachDSN := buildAttachDSN()
- attach := fmt.Sprintf("ATTACH '%s' AS wrds (TYPE POSTGRES, READ_ONLY);", attachDSN)
- if _, err := db.Exec(attach); err != nil {
- return fmt.Errorf("attach: %w", err)
- }
-
- // Wrap query in a COPY statement.
- var copySQL string
- switch format {
- case "csv":
- copySQL = fmt.Sprintf("COPY (%s) TO '%s' (FORMAT CSV, HEADER true);", query, outPath)
- default:
- copySQL = fmt.Sprintf("COPY (%s) TO '%s' (FORMAT PARQUET, COMPRESSION ZSTD);", query, outPath)
- }
-
- if _, err := db.Exec(copySQL); err != nil {
- return fmt.Errorf("copy: %w", err)
- }
- return nil
-}
-
-// buildAttachDSN builds the postgres attach DSN string from standard PG env vars.
-func buildAttachDSN() string {
- host := getenv("PGHOST", "wrds-pgdata.wharton.upenn.edu")
- port := getenv("PGPORT", "9737")
- user := getenv("PGUSER", "")
- password := getenv("PGPASSWORD", "")
- dbname := getenv("PGDATABASE", user)
-
- // DuckDB postgres attach DSN format.
- parts := []string{
- "host=" + host,
- "port=" + port,
- "dbname=" + dbname,
- "sslmode=require",
- }
- if user != "" {
- parts = append(parts, "user="+user)
- }
- if password != "" {
- parts = append(parts, "password="+password)
- }
- return strings.Join(parts, " ")
-}
-
-func getenv(key, fallback string) string {
- if v := os.Getenv(key); v != "" {
- return v
- }
- return fallback
-}
diff --git a/internal/export/export.go b/internal/export/export.go
@@ -0,0 +1,325 @@
+package export
+
+import (
+ "context"
+ "encoding/csv"
+ "fmt"
+ "math/big"
+ "os"
+ "strings"
+ "time"
+
+ "github.com/jackc/pgx/v5"
+ "github.com/jackc/pgx/v5/pgconn"
+ "github.com/jackc/pgx/v5/pgtype"
+ "github.com/parquet-go/parquet-go"
+ "github.com/parquet-go/parquet-go/compress/zstd"
+
+ "github.com/eloualiche/wrds-download/internal/db"
+)
+
+// Options controls the export behaviour.
+type Options struct {
+ Format string // "parquet" or "csv"
+}
+
+const rowGroupSize = 10_000
+
+// Export runs query against the WRDS PostgreSQL instance and writes output to outPath.
+// Format is determined by opts.Format (default: parquet).
+func Export(query, outPath string, opts Options) error {
+ format := strings.ToLower(opts.Format)
+ if format == "" {
+ if strings.HasSuffix(strings.ToLower(outPath), ".csv") {
+ format = "csv"
+ } else {
+ format = "parquet"
+ }
+ }
+
+ dsn, err := db.DSNFromEnv()
+ if err != nil {
+ return fmt.Errorf("dsn: %w", err)
+ }
+
+ ctx := context.Background()
+ conn, err := pgx.Connect(ctx, dsn)
+ if err != nil {
+ return fmt.Errorf("connect: %w", err)
+ }
+ defer conn.Close(ctx)
+
+ rows, err := conn.Query(ctx, query)
+ if err != nil {
+ return fmt.Errorf("query: %w", err)
+ }
+ defer rows.Close()
+
+ switch format {
+ case "csv":
+ return writeCSV(rows, outPath)
+ default:
+ return writeParquet(rows, outPath)
+ }
+}
+
+// writeCSV streams rows into a CSV file with a header row.
+func writeCSV(rows pgx.Rows, outPath string) error {
+ f, err := os.Create(outPath)
+ if err != nil {
+ return fmt.Errorf("create csv: %w", err)
+ }
+ defer f.Close()
+
+ w := csv.NewWriter(f)
+ defer w.Flush()
+
+ fds := rows.FieldDescriptions()
+ header := make([]string, len(fds))
+ for i, fd := range fds {
+ header[i] = fd.Name
+ }
+ if err := w.Write(header); err != nil {
+ return fmt.Errorf("write header: %w", err)
+ }
+
+ record := make([]string, len(fds))
+ for rows.Next() {
+ vals, err := rows.Values()
+ if err != nil {
+ return fmt.Errorf("scan row: %w", err)
+ }
+ for i, v := range vals {
+ record[i] = formatValue(v)
+ }
+ if err := w.Write(record); err != nil {
+ return fmt.Errorf("write row: %w", err)
+ }
+ }
+ if err := rows.Err(); err != nil {
+ return fmt.Errorf("rows: %w", err)
+ }
+
+ w.Flush()
+ return w.Error()
+}
+
+// writeParquet streams rows into a Parquet file using parquet-go.
+func writeParquet(rows pgx.Rows, outPath string) error {
+ fds := rows.FieldDescriptions()
+
+ schema, colTypes := buildParquetSchema(fds)
+
+ f, err := os.Create(outPath)
+ if err != nil {
+ return fmt.Errorf("create parquet: %w", err)
+ }
+ defer f.Close()
+
+ writer := parquet.NewGenericWriter[map[string]any](f,
+ schema,
+ parquet.Compression(&zstd.Codec{}),
+ )
+
+ buf := make([]map[string]any, 0, rowGroupSize)
+
+ for rows.Next() {
+ vals, err := rows.Values()
+ if err != nil {
+ return fmt.Errorf("scan row: %w", err)
+ }
+
+ row := make(map[string]any, len(fds))
+ for i, v := range vals {
+ row[fds[i].Name] = convertValue(v, colTypes[i])
+ }
+ buf = append(buf, row)
+
+ if len(buf) >= rowGroupSize {
+ if _, err := writer.Write(buf); err != nil {
+ return fmt.Errorf("write row group: %w", err)
+ }
+ buf = buf[:0]
+ }
+ }
+ if err := rows.Err(); err != nil {
+ return fmt.Errorf("rows: %w", err)
+ }
+
+ // Flush remaining rows.
+ if len(buf) > 0 {
+ if _, err := writer.Write(buf); err != nil {
+ return fmt.Errorf("write final rows: %w", err)
+ }
+ }
+
+ return writer.Close()
+}
+
+// colType tags how we convert PG values for Parquet.
+type colType int
+
+const (
+ colString colType = iota
+ colBool
+ colInt32
+ colInt64
+ colFloat32
+ colFloat64
+ colDate // days since epoch → int32
+ colTimestamp // microseconds since epoch → int64
+)
+
+// buildParquetSchema maps PG field descriptors to a parquet schema.
+func buildParquetSchema(fds []pgconn.FieldDescription) (*parquet.Schema, []colType) {
+ cols := make([]colType, len(fds))
+ group := make(parquet.Group, len(fds))
+
+ for i, fd := range fds {
+ var node parquet.Node
+
+ switch fd.DataTypeOID {
+ case 16: // bool
+ cols[i] = colBool
+ node = parquet.Optional(parquet.Leaf(parquet.BooleanType))
+ case 21: // int2
+ cols[i] = colInt32
+ node = parquet.Optional(parquet.Leaf(parquet.Int32Type))
+ case 23: // int4
+ cols[i] = colInt32
+ node = parquet.Optional(parquet.Leaf(parquet.Int32Type))
+ case 20: // int8
+ cols[i] = colInt64
+ node = parquet.Optional(parquet.Leaf(parquet.Int64Type))
+ case 700: // float4
+ cols[i] = colFloat32
+ node = parquet.Optional(parquet.Leaf(parquet.FloatType))
+ case 701: // float8
+ cols[i] = colFloat64
+ node = parquet.Optional(parquet.Leaf(parquet.DoubleType))
+ case 1082: // date
+ cols[i] = colDate
+ node = parquet.Optional(parquet.Date())
+ case 1114, 1184: // timestamp, timestamptz
+ cols[i] = colTimestamp
+ node = parquet.Optional(parquet.Timestamp(parquet.Microsecond))
+ default:
+ // text (25), varchar (1043), char (18, 1042), numeric (1700), etc.
+ cols[i] = colString
+ node = parquet.Optional(parquet.String())
+ }
+
+ group[fd.Name] = node
+ }
+
+ return parquet.NewSchema("wrds", group), cols
+}
+
+var epoch = time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC)
+
+// convertValue converts a pgx-scanned value to the appropriate Go type for parquet-go.
+func convertValue(v any, ct colType) any {
+ if v == nil {
+ return nil
+ }
+
+ switch ct {
+ case colBool:
+ if b, ok := v.(bool); ok {
+ return b
+ }
+ case colInt32:
+ switch n := v.(type) {
+ case int16:
+ return int32(n)
+ case int32:
+ return n
+ case int64:
+ return int32(n)
+ }
+ case colInt64:
+ switch n := v.(type) {
+ case int64:
+ return n
+ case int32:
+ return int64(n)
+ case int16:
+ return int64(n)
+ }
+ case colFloat32:
+ if f, ok := v.(float32); ok {
+ return f
+ }
+ if f, ok := v.(float64); ok {
+ return float32(f)
+ }
+ case colFloat64:
+ if f, ok := v.(float64); ok {
+ return f
+ }
+ if f, ok := v.(float32); ok {
+ return float64(f)
+ }
+ case colDate:
+ if t, ok := v.(time.Time); ok {
+ days := int32(t.Sub(epoch).Hours() / 24)
+ return days
+ }
+ case colTimestamp:
+ if t, ok := v.(time.Time); ok {
+ return t.Sub(epoch).Microseconds()
+ }
+ case colString:
+ return formatValue(v)
+ }
+
+ // Fallback: stringify.
+ return formatValue(v)
+}
+
+// formatValue converts any value to its string representation.
+func formatValue(v any) string {
+ if v == nil {
+ return ""
+ }
+ switch val := v.(type) {
+ case string:
+ return val
+ case []byte:
+ return string(val)
+ case time.Time:
+ if val.Hour() == 0 && val.Minute() == 0 && val.Second() == 0 && val.Nanosecond() == 0 {
+ return val.Format("2006-01-02")
+ }
+ return val.Format(time.RFC3339)
+ case pgtype.Numeric:
+ if !val.Valid {
+ return ""
+ }
+ if val.NaN {
+ return "NaN"
+ }
+ if val.InfinityModifier == pgtype.Infinity {
+ return "Infinity"
+ }
+ if val.InfinityModifier == pgtype.NegativeInfinity {
+ return "-Infinity"
+ }
+ // Convert to big.Float for string representation.
+ bi := val.Int
+ if bi == nil {
+ bi = new(big.Int)
+ }
+ bf := new(big.Float).SetInt(bi)
+ if val.Exp < 0 {
+ divisor := new(big.Float).SetInt(new(big.Int).Exp(big.NewInt(10), big.NewInt(int64(-val.Exp)), nil))
+ bf.Quo(bf, divisor)
+ } else if val.Exp > 0 {
+ multiplier := new(big.Float).SetInt(new(big.Int).Exp(big.NewInt(10), big.NewInt(int64(val.Exp)), nil))
+ bf.Mul(bf, multiplier)
+ }
+ return bf.Text('f', -1)
+ default:
+ return fmt.Sprintf("%v", val)
+ }
+}
diff --git a/internal/tui/app.go b/internal/tui/app.go
@@ -233,7 +233,7 @@ func (a *App) startDownload(msg DlSubmitMsg) tea.Cmd {
if msg.Columns != "" && msg.Columns != "*" {
sel = msg.Columns
}
- query := fmt.Sprintf("SELECT %s FROM wrds.%s.%s", sel, msg.Schema, msg.Table)
+ query := fmt.Sprintf("SELECT %s FROM %s.%s", sel, msg.Schema, msg.Table)
if msg.Where != "" {
query += " WHERE " + msg.Where
}