wrds-download

TUI/CLI tool for browsing and downloading WRDS data
Log | Files | Refs | README

commit 916fb139cc5c14a3d2f513fff60c7678e6afaaaf
Author: Erik Loualiche <eloualic@umn.edu>
Date:   Thu, 19 Feb 2026 23:51:24 -0600

Initial implementation of wrds-dl

TUI/CLI tool for browsing and downloading WRDS PostgreSQL data.
Includes Bubble Tea TUI, cobra CLI, pgx metadata queries, and
DuckDB-based Parquet/CSV export via postgres_scanner.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

Diffstat:
A.github/workflows/release.yml | 61+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A.gitignore | 5+++++
AREADME.md | 152+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acmd/download.go | 97+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acmd/root.go | 28++++++++++++++++++++++++++++
Acmd/tui.go | 39+++++++++++++++++++++++++++++++++++++++
Ago.mod | 53+++++++++++++++++++++++++++++++++++++++++++++++++++++
Ago.sum | 106+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainternal/db/client.go | 67+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainternal/db/meta.go | 162+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainternal/export/duckdb.go | 99+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainternal/tui/app.go | 479+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainternal/tui/dlform.go | 145+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ainternal/tui/styles.go | 47+++++++++++++++++++++++++++++++++++++++++++++++
Amain.go | 7+++++++
15 files changed, 1547 insertions(+), 0 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml @@ -0,0 +1,61 @@ +name: Release + +on: + push: + tags: + - "v*" + workflow_dispatch: + +jobs: + build-linux: + runs-on: ubuntu-latest + container: almalinux:8 # glibc 2.28 — matches RHEL 8 HPC clusters + steps: + - uses: actions/checkout@v4 + + - name: Install build tools + run: dnf install -y gcc gcc-c++ git + + - uses: actions/setup-go@v5 + with: + go-version: "1.25" + + - name: Build + run: go build -o wrds-dl-linux-amd64 . + + - uses: actions/upload-artifact@v4 + with: + name: wrds-dl-linux-amd64 + path: wrds-dl-linux-amd64 + + build-macos: + runs-on: macos-14 # Apple Silicon (arm64) macOS runner + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-go@v5 + with: + go-version: "1.25" + + - name: Build + run: go build -o wrds-dl-darwin-arm64 . + + - uses: actions/upload-artifact@v4 + with: + name: wrds-dl-darwin-arm64 + path: wrds-dl-darwin-arm64 + + release: + needs: [build-linux, build-macos] + runs-on: ubuntu-latest + if: startsWith(github.ref, 'refs/tags/') + permissions: + contents: write + steps: + - uses: actions/download-artifact@v4 + + - uses: softprops/action-gh-release@v2 + with: + files: | + wrds-dl-linux-amd64/wrds-dl-linux-amd64 + wrds-dl-darwin-arm64/wrds-dl-darwin-arm64 diff --git a/.gitignore b/.gitignore @@ -0,0 +1,5 @@ +wrds-dl +wrds-dl-* +*.parquet +*.csv +.claude/ diff --git a/README.md b/README.md @@ -0,0 +1,152 @@ +# wrds-dl + +A terminal tool for browsing and downloading data from the [WRDS](https://wrds-www.wharton.upenn.edu/) PostgreSQL database. Comes with an interactive TUI for exploration and a CLI for scripted downloads. Output is Parquet (via DuckDB) or CSV. + +## Features + +- **TUI** — browse schemas and tables, preview rows, trigger downloads without leaving the terminal +- **CLI** — scriptable `download` command with structured flags or raw SQL +- **Parquet output** — uses DuckDB's `postgres_scanner` for fast, efficient export with ZSTD compression +- **CSV output** — plain CSV alternative via the same DuckDB pipeline +- **Standard auth** — reads from `PG*` environment variables or `~/.pgpass`, no configuration file needed + +## Installation + +### Pre-built binaries (recommended) + +Download the latest release from the [Releases page](https://github.com/eloualiche/wrds-download/releases): + +| Platform | Binary | +|---|---| +| macOS (Apple Silicon) | `wrds-dl-darwin-arm64` | +| Linux x86-64 | `wrds-dl-linux-amd64` | + +```sh +# macOS example +curl -L https://github.com/eloualiche/wrds-download/releases/latest/download/wrds-dl-darwin-arm64 \ + -o /usr/local/bin/wrds-dl +chmod +x /usr/local/bin/wrds-dl +``` + +### Build from source + +Requires Go 1.21+, CGo, and a C++ compiler (`gcc-c++` on Linux, Xcode CLT on macOS). + +```sh +git clone https://github.com/eloualiche/wrds-download +cd wrds-download +go build -o wrds-dl . +mv wrds-dl /usr/local/bin/ +``` + +## Authentication + +`wrds-dl` uses the standard PostgreSQL environment variables. Set them before running: + +```sh +export PGHOST=wrds-pgdata.wharton.upenn.edu +export PGPORT=9737 +export PGUSER=your_username +export PGPASSWORD=your_password +export PGDATABASE=your_username # on WRDS, database name = username +``` + +Alternatively, store credentials in `~/.pgpass` (no `PGPASSWORD` needed): + +``` +wrds-pgdata.wharton.upenn.edu:9737:your_username:your_username:your_password +``` + +## TUI + +Launch the interactive browser: + +```sh +wrds-dl tui +``` + +``` +┌─ WRDS ──────────────────────────────────────────────────────────┐ +│ Schemas │ Tables (crsp) │ Preview: crsp.dsf │ +│ ───────────── │ ───────────────── │ ────────────────────── │ +│ > crsp │ > dsf │ permno date prc │ +│ comp │ mse │ 10001 2020-01-02 45.23 │ +│ ibes │ ccm_final │ 10001 2020-01-03 47.11 │ +│ optionm │ ... │ ... │ +│ ... │ │ ~2.1M rows │ +│ │ │ │ +│ [tab] switch pane [d] download [/] filter [q] quit │ +└─────────────────────────────────────────────────────────────────┘ +``` + +### Keybindings + +| Key | Action | +|---|---| +| `tab` / `shift+tab` | Cycle focus between panes | +| `enter` | Drill into schema or table | +| `d` | Open download dialog for the selected table | +| `/` | Filter list | +| `esc` | Cancel / dismiss | +| `q` / `ctrl+c` | Quit | + +In the download dialog, `tab` moves between fields and `enter` on the last field confirms. + +## CLI + +### Structured download + +```sh +wrds-dl download \ + --schema crsp \ + --table dsf \ + --where "date >= '2020-01-01' AND date < '2021-01-01'" \ + --out crsp_dsf_2020.parquet +``` + +### Raw SQL + +```sh +wrds-dl download \ + --query "SELECT permno, date, prc FROM crsp.dsf WHERE date > '2020-01-01'" \ + --out crsp_dsf.parquet +``` + +### CSV output + +```sh +wrds-dl download \ + --schema comp \ + --table funda \ + --out funda.csv +``` + +Format is inferred from the output file extension (`.parquet` → Parquet, `.csv` → CSV). Override with `--format`. + +### Flags + +| Flag | Description | +|---|---| +| `--schema` | Schema name (e.g. `crsp`, `comp`) | +| `--table` | Table name (e.g. `dsf`, `funda`) | +| `--where` | SQL `WHERE` clause, without the keyword (e.g. `date > '2020-01-01'`) | +| `--query` | Full SQL query — overrides `--schema`, `--table`, `--where` | +| `--out` | Output file path (required) | +| `--format` | `parquet` or `csv` (inferred from extension if omitted) | +| `--limit` | Row limit, useful for testing (default: no limit) | + +## How it works + +- **Metadata** (schema/table/column listing, row preview) uses a `pgx` connection pool talking directly to the WRDS PostgreSQL server. +- **Downloads** use [DuckDB](https://duckdb.org/) with the `postgres_scanner` extension. DuckDB attaches to WRDS as a read-only source and streams data directly into Parquet or CSV without loading it all into memory first. + +## Dependencies + +| Package | Purpose | +|---|---| +| `charmbracelet/bubbletea` | TUI framework | +| `charmbracelet/bubbles` | List, table, text-input, spinner components | +| `charmbracelet/lipgloss` | Layout and styling | +| `jackc/pgx/v5` | PostgreSQL driver for metadata and preview | +| `spf13/cobra` | CLI commands and flags | +| `marcboeker/go-duckdb` | Parquet/CSV export via `postgres_scanner` | diff --git a/cmd/download.go b/cmd/download.go @@ -0,0 +1,97 @@ +package cmd + +import ( + "fmt" + "os" + "strings" + + "github.com/eloualiche/wrds-download/internal/export" + "github.com/spf13/cobra" +) + +var ( + dlSchema string + dlTable string + dlWhere string + dlQuery string + dlOut string + dlFormat string + dlLimit int +) + +var downloadCmd = &cobra.Command{ + Use: "download", + Short: "Download WRDS data to parquet or CSV", + Long: `Download data from WRDS to a local file. + +Examples: + wrds download --schema crsp --table dsf --where "date='2020-01-02'" --out crsp_dsf.parquet + wrds download --query "SELECT permno, date, prc FROM crsp.dsf LIMIT 1000" --out out.parquet + wrds download --schema comp --table funda --out funda.csv --format csv`, + RunE: runDownload, +} + +func init() { + rootCmd.AddCommand(downloadCmd) + + f := downloadCmd.Flags() + f.StringVar(&dlSchema, "schema", "", "Schema name (e.g. crsp)") + f.StringVar(&dlTable, "table", "", "Table name (e.g. dsf)") + f.StringVar(&dlWhere, "where", "", "SQL WHERE clause (without the WHERE keyword)") + f.StringVar(&dlQuery, "query", "", "Full SQL query (overrides --schema/--table/--where)") + f.StringVar(&dlOut, "out", "", "Output file path (required)") + f.StringVar(&dlFormat, "format", "", "Output format: parquet or csv (inferred from extension if omitted)") + f.IntVar(&dlLimit, "limit", 0, "Limit number of rows (0 = no limit)") + + _ = downloadCmd.MarkFlagRequired("out") +} + +func runDownload(cmd *cobra.Command, args []string) error { + query, err := buildQuery() + if err != nil { + return err + } + + format := resolveFormat(dlOut, dlFormat) + + fmt.Fprintf(os.Stderr, "Exporting to %s (%s)...\n", dlOut, format) + + opts := export.Options{Format: format} + if err := export.Export(query, dlOut, opts); err != nil { + return fmt.Errorf("export failed: %w", err) + } + + fmt.Fprintf(os.Stderr, "Done: %s\n", dlOut) + return nil +} + +func buildQuery() (string, error) { + if dlQuery != "" { + return dlQuery, nil + } + if dlSchema == "" || dlTable == "" { + return "", fmt.Errorf("either --query or both --schema and --table must be specified") + } + + q := fmt.Sprintf("SELECT * FROM wrds.%s.%s", dlSchema, dlTable) + + if dlWhere != "" { + q += " WHERE " + dlWhere + } + if dlLimit > 0 { + q += fmt.Sprintf(" LIMIT %d", dlLimit) + } + + return q, nil +} + +func resolveFormat(path, flag string) string { + if flag != "" { + return strings.ToLower(flag) + } + lower := strings.ToLower(path) + if strings.HasSuffix(lower, ".csv") { + return "csv" + } + return "parquet" +} diff --git a/cmd/root.go b/cmd/root.go @@ -0,0 +1,28 @@ +package cmd + +import ( + "fmt" + "os" + + "github.com/spf13/cobra" +) + +var rootCmd = &cobra.Command{ + Use: "wrds-dl", + Short: "WRDS data browser and downloader", + Long: `wrds is a CLI/TUI tool for navigating and downloading data +from the Wharton Research Data Services (WRDS) PostgreSQL database. + +Authentication is read from standard PostgreSQL environment variables: + PGHOST, PGPORT, PGUSER, PGPASSWORD, PGDATABASE + +Or from ~/.pgpass if PGPASSWORD is not set.`, +} + +// Execute runs the root command. +func Execute() { + if err := rootCmd.Execute(); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } +} diff --git a/cmd/tui.go b/cmd/tui.go @@ -0,0 +1,39 @@ +package cmd + +import ( + "context" + "fmt" + "os" + + "github.com/eloualiche/wrds-download/internal/db" + "github.com/eloualiche/wrds-download/internal/tui" + tea "github.com/charmbracelet/bubbletea" + "github.com/spf13/cobra" +) + +var tuiCmd = &cobra.Command{ + Use: "tui", + Short: "Launch the interactive TUI browser", + RunE: runTUI, +} + +func init() { + rootCmd.AddCommand(tuiCmd) +} + +func runTUI(cmd *cobra.Command, args []string) error { + ctx := context.Background() + client, err := db.New(ctx) + if err != nil { + return fmt.Errorf("connect to WRDS: %w", err) + } + defer client.Close() + + m := tui.NewApp(client) + p := tea.NewProgram(m, tea.WithAltScreen()) + if _, err := p.Run(); err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } + return nil +} diff --git a/go.mod b/go.mod @@ -0,0 +1,53 @@ +module github.com/eloualiche/wrds-download + +go 1.25.0 + +require ( + github.com/apache/arrow-go/v18 v18.1.0 // indirect + github.com/atotto/clipboard v0.1.4 // indirect + github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect + github.com/charmbracelet/bubbles v1.0.0 // indirect + github.com/charmbracelet/bubbletea v1.3.10 // indirect + github.com/charmbracelet/colorprofile v0.4.1 // indirect + github.com/charmbracelet/lipgloss v1.1.0 // indirect + github.com/charmbracelet/x/ansi v0.11.6 // indirect + github.com/charmbracelet/x/cellbuf v0.0.15 // indirect + github.com/charmbracelet/x/term v0.2.2 // indirect + github.com/clipperhouse/displaywidth v0.9.0 // indirect + github.com/clipperhouse/stringish v0.1.1 // indirect + github.com/clipperhouse/uax29/v2 v2.5.0 // indirect + github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect + github.com/go-viper/mapstructure/v2 v2.2.1 // indirect + github.com/goccy/go-json v0.10.5 // indirect + github.com/google/flatbuffers v25.1.24+incompatible // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/jackc/pgpassfile v1.0.0 // indirect + github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect + github.com/jackc/pgx/v5 v5.8.0 // indirect + github.com/jackc/puddle/v2 v2.2.2 // indirect + github.com/klauspost/compress v1.17.11 // indirect + github.com/klauspost/cpuid/v2 v2.2.9 // indirect + github.com/lucasb-eyer/go-colorful v1.3.0 // indirect + github.com/marcboeker/go-duckdb v1.8.5 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/mattn/go-localereader v0.0.1 // indirect + github.com/mattn/go-runewidth v0.0.19 // indirect + github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect + github.com/muesli/cancelreader v0.2.2 // indirect + github.com/muesli/termenv v0.16.0 // indirect + github.com/pierrec/lz4/v4 v4.1.22 // indirect + github.com/rivo/uniseg v0.4.7 // indirect + github.com/sahilm/fuzzy v0.1.1 // indirect + github.com/spf13/cobra v1.10.2 // indirect + github.com/spf13/pflag v1.0.9 // indirect + github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect + github.com/zeebo/xxh3 v1.0.2 // indirect + golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c // indirect + golang.org/x/mod v0.27.0 // indirect + golang.org/x/sync v0.17.0 // indirect + golang.org/x/sys v0.38.0 // indirect + golang.org/x/text v0.29.0 // indirect + golang.org/x/tools v0.36.0 // indirect + golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect +) diff --git a/go.sum b/go.sum @@ -0,0 +1,106 @@ +github.com/apache/arrow-go/v18 v18.1.0 h1:agLwJUiVuwXZdwPYVrlITfx7bndULJ/dggbnLFgDp/Y= +github.com/apache/arrow-go/v18 v18.1.0/go.mod h1:tigU/sIgKNXaesf5d7Y95jBBKS5KsxTqYBKXFsvKzo0= +github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4= +github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI= +github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k= +github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8= +github.com/charmbracelet/bubbles v1.0.0 h1:12J8/ak/uCZEMQ6KU7pcfwceyjLlWsDLAxB5fXonfvc= +github.com/charmbracelet/bubbles v1.0.0/go.mod h1:9d/Zd5GdnauMI5ivUIVisuEm3ave1XwXtD1ckyV6r3E= +github.com/charmbracelet/bubbletea v1.3.10 h1:otUDHWMMzQSB0Pkc87rm691KZ3SWa4KUlvF9nRvCICw= +github.com/charmbracelet/bubbletea v1.3.10/go.mod h1:ORQfo0fk8U+po9VaNvnV95UPWA1BitP1E0N6xJPlHr4= +github.com/charmbracelet/colorprofile v0.4.1 h1:a1lO03qTrSIRaK8c3JRxJDZOvhvIeSco3ej+ngLk1kk= +github.com/charmbracelet/colorprofile v0.4.1/go.mod h1:U1d9Dljmdf9DLegaJ0nGZNJvoXAhayhmidOdcBwAvKk= +github.com/charmbracelet/lipgloss v1.1.0 h1:vYXsiLHVkK7fp74RkV7b2kq9+zDLoEU4MZoFqR/noCY= +github.com/charmbracelet/lipgloss v1.1.0/go.mod h1:/6Q8FR2o+kj8rz4Dq0zQc3vYf7X+B0binUUBwA0aL30= +github.com/charmbracelet/x/ansi v0.11.6 h1:GhV21SiDz/45W9AnV2R61xZMRri5NlLnl6CVF7ihZW8= +github.com/charmbracelet/x/ansi v0.11.6/go.mod h1:2JNYLgQUsyqaiLovhU2Rv/pb8r6ydXKS3NIttu3VGZQ= +github.com/charmbracelet/x/cellbuf v0.0.15 h1:ur3pZy0o6z/R7EylET877CBxaiE1Sp1GMxoFPAIztPI= +github.com/charmbracelet/x/cellbuf v0.0.15/go.mod h1:J1YVbR7MUuEGIFPCaaZ96KDl5NoS0DAWkskup+mOY+Q= +github.com/charmbracelet/x/term v0.2.2 h1:xVRT/S2ZcKdhhOuSP4t5cLi5o+JxklsoEObBSgfgZRk= +github.com/charmbracelet/x/term v0.2.2/go.mod h1:kF8CY5RddLWrsgVwpw4kAa6TESp6EB5y3uxGLeCqzAI= +github.com/clipperhouse/displaywidth v0.9.0 h1:Qb4KOhYwRiN3viMv1v/3cTBlz3AcAZX3+y9OLhMtAtA= +github.com/clipperhouse/displaywidth v0.9.0/go.mod h1:aCAAqTlh4GIVkhQnJpbL0T/WfcrJXHcj8C0yjYcjOZA= +github.com/clipperhouse/stringish v0.1.1 h1:+NSqMOr3GR6k1FdRhhnXrLfztGzuG+VuFDfatpWHKCs= +github.com/clipperhouse/stringish v0.1.1/go.mod h1:v/WhFtE1q0ovMta2+m+UbpZ+2/HEXNWYXQgCt4hdOzA= +github.com/clipperhouse/uax29/v2 v2.5.0 h1:x7T0T4eTHDONxFJsL94uKNKPHrclyFI0lm7+w94cO8U= +github.com/clipperhouse/uax29/v2 v2.5.0/go.mod h1:Wn1g7MK6OoeDT0vL+Q0SQLDz/KpfsVRgg6W7ihQeh4g= +github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4= +github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM= +github.com/go-viper/mapstructure/v2 v2.2.1 h1:ZAaOCxANMuZx5RCeg0mBdEZk7DZasvvZIxtHqx8aGss= +github.com/go-viper/mapstructure/v2 v2.2.1/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= +github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4= +github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= +github.com/google/flatbuffers v25.1.24+incompatible h1:4wPqL3K7GzBd1CwyhSd3usxLKOaJN/AC6puCca6Jm7o= +github.com/google/flatbuffers v25.1.24+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= +github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= +github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 h1:iCEnooe7UlwOQYpKFhBabPMi4aNAfoODPEFNiAnClxo= +github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM= +github.com/jackc/pgx/v5 v5.8.0 h1:TYPDoleBBme0xGSAX3/+NujXXtpZn9HBONkQC7IEZSo= +github.com/jackc/pgx/v5 v5.8.0/go.mod h1:QVeDInX2m9VyzvNeiCJVjCkNFqzsNb43204HshNSZKw= +github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo= +github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= +github.com/klauspost/cpuid/v2 v2.2.9 h1:66ze0taIn2H33fBvCkXuv9BmCwDfafmiIVpKV9kKGuY= +github.com/klauspost/cpuid/v2 v2.2.9/go.mod h1:rqkxqrZ1EhYM9G+hXH7YdowN5R5RGN6NK4QwQ3WMXF8= +github.com/lucasb-eyer/go-colorful v1.3.0 h1:2/yBRLdWBZKrf7gB40FoiKfAWYQ0lqNcbuQwVHXptag= +github.com/lucasb-eyer/go-colorful v1.3.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= +github.com/marcboeker/go-duckdb v1.8.5 h1:tkYp+TANippy0DaIOP5OEfBEwbUINqiFqgwMQ44jME0= +github.com/marcboeker/go-duckdb v1.8.5/go.mod h1:6mK7+WQE4P4u5AFLvVBmhFxY5fvhymFptghgJX6B+/8= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4= +github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88= +github.com/mattn/go-runewidth v0.0.19 h1:v++JhqYnZuu5jSKrk9RbgF5v4CGUjqRfBm05byFGLdw= +github.com/mattn/go-runewidth v0.0.19/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs= +github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI= +github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo= +github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA= +github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo= +github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc= +github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk= +github.com/pierrec/lz4/v4 v4.1.22 h1:cKFw6uJDK+/gfw5BcDL0JL5aBsAFdsIT18eRtLj7VIU= +github.com/pierrec/lz4/v4 v4.1.22/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= +github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/sahilm/fuzzy v0.1.1 h1:ceu5RHF8DGgoi+/dR5PsECjCDH1BE3Fnmpo7aVXOdRA= +github.com/sahilm/fuzzy v0.1.1/go.mod h1:VFvziUEIMCrT6A6tw2RFIXPXXmzXbOsSHF0DOI8ZK9Y= +github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU= +github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4= +github.com/spf13/pflag v1.0.9 h1:9exaQaMOCwffKiiiYk6/BndUBv+iRViNW+4lEMi0PvY= +github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no= +github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM= +github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= +github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= +go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= +golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c h1:KL/ZBHXgKGVmuZBZ01Lt57yE5ws8ZPSkkihmEyq7FXc= +golang.org/x/exp v0.0.0-20250128182459-e0ece0dbea4c/go.mod h1:tujkw807nyEEAamNbDrEGzRav+ilXA7PCRAd6xsmwiU= +golang.org/x/mod v0.27.0 h1:kb+q2PyFnEADO2IEF935ehFUXlWiNjJWtRNgBLSfbxQ= +golang.org/x/mod v0.27.0/go.mod h1:rWI627Fq0DEoudcK+MBkNkCe0EetEaDSwJJkCcjpazc= +golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= +golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= +golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk= +golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4= +golang.org/x/tools v0.36.0 h1:kWS0uv/zsvHEle1LbV5LE8QujrxB3wfQyxHfhOk0Qkg= +golang.org/x/tools v0.36.0/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s= +golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY= +golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/db/client.go b/internal/db/client.go @@ -0,0 +1,67 @@ +package db + +import ( + "context" + "fmt" + "os" + "strconv" + + "github.com/jackc/pgx/v5/pgxpool" +) + +// Client wraps a pgx connection pool. +type Client struct { + Pool *pgxpool.Pool +} + +// DSNFromEnv builds a PostgreSQL DSN from standard PG environment variables. +func DSNFromEnv() string { + host := getenv("PGHOST", "wrds-pgdata.wharton.upenn.edu") + port := getenv("PGPORT", "9737") + user := getenv("PGUSER", "") + password := getenv("PGPASSWORD", "") + database := getenv("PGDATABASE", user) // WRDS default db = username + + if password != "" { + return fmt.Sprintf("host=%s port=%s user=%s password=%s dbname=%s sslmode=require", + host, port, user, password, database) + } + return fmt.Sprintf("host=%s port=%s user=%s dbname=%s sslmode=require", + host, port, user, database) +} + +// PortFromEnv returns the port as an integer (for DuckDB attach). +func PortFromEnv() int { + p := getenv("PGPORT", "9737") + n, _ := strconv.Atoi(p) + if n == 0 { + n = 9737 + } + return n +} + +func getenv(key, fallback string) string { + if v := os.Getenv(key); v != "" { + return v + } + return fallback +} + +// New creates and pings a pgx pool using DSNFromEnv. +func New(ctx context.Context) (*Client, error) { + dsn := DSNFromEnv() + pool, err := pgxpool.New(ctx, dsn) + if err != nil { + return nil, fmt.Errorf("pgxpool.New: %w", err) + } + if err := pool.Ping(ctx); err != nil { + pool.Close() + return nil, fmt.Errorf("ping: %w", err) + } + return &Client{Pool: pool}, nil +} + +// Close releases the pool. +func (c *Client) Close() { + c.Pool.Close() +} diff --git a/internal/db/meta.go b/internal/db/meta.go @@ -0,0 +1,162 @@ +package db + +import ( + "context" + "fmt" + "strings" +) + +// Schema represents a PostgreSQL schema. +type Schema struct { + Name string +} + +// Table represents a PostgreSQL table within a schema. +type Table struct { + Schema string + Name string +} + +// Column represents a column in a table. +type Column struct { + Name string + DataType string +} + +// PreviewResult holds sample rows and row count for a table. +type PreviewResult struct { + Columns []string + Rows [][]string + Total int64 // estimated row count +} + +// Schemas returns all non-system schemas sorted by name. +func (c *Client) Schemas(ctx context.Context) ([]Schema, error) { + rows, err := c.Pool.Query(ctx, ` + SELECT schema_name + FROM information_schema.schemata + WHERE schema_name NOT IN ('pg_catalog', 'information_schema', 'pg_toast', + 'pg_temp_1', 'pg_toast_temp_1') + AND schema_name NOT LIKE 'pg_%' + ORDER BY schema_name + `) + if err != nil { + return nil, fmt.Errorf("schemas query: %w", err) + } + defer rows.Close() + + var schemas []Schema + for rows.Next() { + var s Schema + if err := rows.Scan(&s.Name); err != nil { + return nil, err + } + schemas = append(schemas, s) + } + return schemas, rows.Err() +} + +// Tables returns all tables in the given schema. +func (c *Client) Tables(ctx context.Context, schema string) ([]Table, error) { + rows, err := c.Pool.Query(ctx, ` + SELECT table_name + FROM information_schema.tables + WHERE table_schema = $1 + AND table_type IN ('BASE TABLE', 'VIEW') + ORDER BY table_name + `, schema) + if err != nil { + return nil, fmt.Errorf("tables query: %w", err) + } + defer rows.Close() + + var tables []Table + for rows.Next() { + var t Table + t.Schema = schema + if err := rows.Scan(&t.Name); err != nil { + return nil, err + } + tables = append(tables, t) + } + return tables, rows.Err() +} + +// Columns returns column metadata for the given table. +func (c *Client) Columns(ctx context.Context, schema, table string) ([]Column, error) { + rows, err := c.Pool.Query(ctx, ` + SELECT column_name, data_type + FROM information_schema.columns + WHERE table_schema = $1 AND table_name = $2 + ORDER BY ordinal_position + `, schema, table) + if err != nil { + return nil, fmt.Errorf("columns query: %w", err) + } + defer rows.Close() + + var cols []Column + for rows.Next() { + var col Column + if err := rows.Scan(&col.Name, &col.DataType); err != nil { + return nil, err + } + cols = append(cols, col) + } + return cols, rows.Err() +} + +// Preview fetches the first `limit` rows and an estimated row count. +func (c *Client) Preview(ctx context.Context, schema, table string, limit int) (*PreviewResult, error) { + if limit <= 0 { + limit = 50 + } + + qualified := fmt.Sprintf("%s.%s", quoteIdent(schema), quoteIdent(table)) + + // Estimated count via pg stats (fast). + var total int64 + _ = c.Pool.QueryRow(ctx, ` + SELECT reltuples::bigint + FROM pg_class c + JOIN pg_namespace n ON n.oid = c.relnamespace + WHERE n.nspname = $1 AND c.relname = $2 + `, schema, table).Scan(&total) + + rows, err := c.Pool.Query(ctx, fmt.Sprintf("SELECT * FROM %s LIMIT %d", qualified, limit)) + if err != nil { + return nil, fmt.Errorf("preview query: %w", err) + } + defer rows.Close() + + fieldDescs := rows.FieldDescriptions() + cols := make([]string, len(fieldDescs)) + for i, fd := range fieldDescs { + cols[i] = string(fd.Name) + } + + var result PreviewResult + result.Columns = cols + result.Total = total + + for rows.Next() { + vals, err := rows.Values() + if err != nil { + return nil, err + } + row := make([]string, len(vals)) + for i, v := range vals { + if v == nil { + row[i] = "NULL" + } else { + row[i] = fmt.Sprintf("%v", v) + } + } + result.Rows = append(result.Rows, row) + } + return &result, rows.Err() +} + +func quoteIdent(s string) string { + return `"` + strings.ReplaceAll(s, `"`, `""`) + `"` +} diff --git a/internal/export/duckdb.go b/internal/export/duckdb.go @@ -0,0 +1,99 @@ +package export + +import ( + "database/sql" + "fmt" + "os" + "strings" + + _ "github.com/marcboeker/go-duckdb" +) + +// Options controls the export behaviour. +type Options struct { + Format string // "parquet" or "csv" +} + +// Export runs query against the WRDS PostgreSQL instance and writes output to outPath. +// Format is determined by opts.Format (default: parquet). +func Export(query, outPath string, opts Options) error { + format := strings.ToLower(opts.Format) + if format == "" { + if strings.HasSuffix(strings.ToLower(outPath), ".csv") { + format = "csv" + } else { + format = "parquet" + } + } + + db, err := sql.Open("duckdb", "") + if err != nil { + return fmt.Errorf("open duckdb: %w", err) + } + defer db.Close() + + // Install and load postgres extension. + for _, stmt := range []string{ + "INSTALL postgres;", + "LOAD postgres;", + } { + if _, err := db.Exec(stmt); err != nil { + // Ignore "already installed" errors. + if !strings.Contains(err.Error(), "already") { + return fmt.Errorf("%s: %w", stmt, err) + } + } + } + + // Build the ATTACH string from env. + attachDSN := buildAttachDSN() + attach := fmt.Sprintf("ATTACH '%s' AS wrds (TYPE POSTGRES, READ_ONLY);", attachDSN) + if _, err := db.Exec(attach); err != nil { + return fmt.Errorf("attach: %w", err) + } + + // Wrap query in a COPY statement. + var copySQL string + switch format { + case "csv": + copySQL = fmt.Sprintf("COPY (%s) TO '%s' (FORMAT CSV, HEADER true);", query, outPath) + default: + copySQL = fmt.Sprintf("COPY (%s) TO '%s' (FORMAT PARQUET, COMPRESSION ZSTD);", query, outPath) + } + + if _, err := db.Exec(copySQL); err != nil { + return fmt.Errorf("copy: %w", err) + } + return nil +} + +// buildAttachDSN builds the postgres attach DSN string from standard PG env vars. +func buildAttachDSN() string { + host := getenv("PGHOST", "wrds-pgdata.wharton.upenn.edu") + port := getenv("PGPORT", "9737") + user := getenv("PGUSER", "") + password := getenv("PGPASSWORD", "") + dbname := getenv("PGDATABASE", user) + + // DuckDB postgres attach DSN format. + parts := []string{ + "host=" + host, + "port=" + port, + "dbname=" + dbname, + "sslmode=require", + } + if user != "" { + parts = append(parts, "user="+user) + } + if password != "" { + parts = append(parts, "password="+password) + } + return strings.Join(parts, " ") +} + +func getenv(key, fallback string) string { + if v := os.Getenv(key); v != "" { + return v + } + return fallback +} diff --git a/internal/tui/app.go b/internal/tui/app.go @@ -0,0 +1,479 @@ +package tui + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/charmbracelet/bubbles/list" + "github.com/charmbracelet/bubbles/spinner" + "github.com/charmbracelet/bubbles/table" + tea "github.com/charmbracelet/bubbletea" + "github.com/charmbracelet/lipgloss" + "github.com/eloualiche/wrds-download/internal/db" + "github.com/eloualiche/wrds-download/internal/export" +) + +// pane identifies which panel is focused. +type pane int + +const ( + paneSchema pane = iota + paneTable + panePreview +) + +// appState represents the TUI state machine. +type appState int + +const ( + stateBrowse appState = iota + stateDownloadForm + stateDownloading + stateDone +) + +// -- Tea messages -- + +type schemasLoadedMsg struct{ schemas []db.Schema } +type tablesLoadedMsg struct{ tables []db.Table } +type previewLoadedMsg struct{ result *db.PreviewResult } +type errMsg struct{ err error } +type downloadDoneMsg struct{ path string } +type tickMsg time.Time + +func errCmd(err error) tea.Cmd { + return func() tea.Msg { return errMsg{err} } +} + +// item wraps a string to satisfy the bubbles list.Item interface. +type item struct{ title string } + +func (i item) FilterValue() string { return i.title } +func (i item) Title() string { return i.title } +func (i item) Description() string { return "" } + +// App is the root Bubble Tea model. +type App struct { + client *db.Client + + width, height int + focus pane + state appState + + schemaList list.Model + tableList list.Model + previewTbl table.Model + previewInfo string // "~2.1M rows" etc. + + dlForm DlForm + spinner spinner.Model + statusOK string + statusErr string + + selectedSchema string + selectedTable string +} + +// NewApp constructs the root model. +func NewApp(client *db.Client) *App { + del := list.NewDefaultDelegate() + del.ShowDescription = false + + schemaList := list.New(nil, del, 0, 0) + schemaList.Title = "Schemas" + schemaList.SetShowStatusBar(false) + schemaList.SetFilteringEnabled(true) + + tableList := list.New(nil, del, 0, 0) + tableList.Title = "Tables" + tableList.SetShowStatusBar(false) + tableList.SetFilteringEnabled(true) + + sp := spinner.New() + sp.Spinner = spinner.Dot + + return &App{ + client: client, + schemaList: schemaList, + tableList: tableList, + spinner: sp, + focus: paneSchema, + state: stateBrowse, + } +} + +// Init loads schemas on startup. +func (a *App) Init() tea.Cmd { + return tea.Batch( + a.loadSchemas(), + a.spinner.Tick, + ) +} + +func (a *App) loadSchemas() tea.Cmd { + return func() tea.Msg { + schemas, err := a.client.Schemas(context.Background()) + if err != nil { + return errMsg{err} + } + return schemasLoadedMsg{schemas} + } +} + +func (a *App) loadTables(schema string) tea.Cmd { + return func() tea.Msg { + tables, err := a.client.Tables(context.Background(), schema) + if err != nil { + return errMsg{err} + } + return tablesLoadedMsg{tables} + } +} + +func (a *App) loadPreview(schema, tbl string) tea.Cmd { + return func() tea.Msg { + result, err := a.client.Preview(context.Background(), schema, tbl, 50) + if err != nil { + return errMsg{err} + } + return previewLoadedMsg{result} + } +} + +func (a *App) startDownload(msg DlSubmitMsg) tea.Cmd { + return func() tea.Msg { + var query string + if msg.Where != "" { + query = fmt.Sprintf("SELECT * FROM wrds.%s.%s WHERE %s", msg.Schema, msg.Table, msg.Where) + } else { + query = fmt.Sprintf("SELECT * FROM wrds.%s.%s", msg.Schema, msg.Table) + } + err := export.Export(query, msg.Out, export.Options{Format: msg.Format}) + if err != nil { + return errMsg{err} + } + return downloadDoneMsg{msg.Out} + } +} + +// Update handles all messages. +func (a *App) Update(msg tea.Msg) (tea.Model, tea.Cmd) { + switch msg := msg.(type) { + + case tea.WindowSizeMsg: + a.width = msg.Width + a.height = msg.Height + a.resizePanels() + return a, nil + + case spinner.TickMsg: + var cmd tea.Cmd + a.spinner, cmd = a.spinner.Update(msg) + return a, cmd + + case schemasLoadedMsg: + items := make([]list.Item, len(msg.schemas)) + for i, s := range msg.schemas { + items[i] = item{s.Name} + } + a.schemaList.SetItems(items) + return a, nil + + case tablesLoadedMsg: + items := make([]list.Item, len(msg.tables)) + for i, t := range msg.tables { + items[i] = item{t.Name} + } + a.tableList.SetItems(items) + a.previewTbl = table.Model{} // clear preview + a.previewInfo = "" + return a, nil + + case previewLoadedMsg: + r := msg.result + cols := make([]table.Column, len(r.Columns)) + for i, c := range r.Columns { + w := maxWidth(c, r.Rows, i, 20) + cols[i] = table.Column{Title: c, Width: w} + } + rows := make([]table.Row, len(r.Rows)) + for i, row := range r.Rows { + rows[i] = table.Row(row) + } + t := table.New( + table.WithColumns(cols), + table.WithRows(rows), + table.WithFocused(false), + table.WithHeight(a.previewHeight()-4), + ) + ts := table.DefaultStyles() + ts.Header = ts.Header.BorderStyle(lipgloss.NormalBorder()).BorderForeground(colorMuted).BorderBottom(true).Bold(true) + ts.Selected = ts.Selected.Foreground(colorPrimary).Bold(false) + t.SetStyles(ts) + + a.previewTbl = t + if r.Total > 0 { + a.previewInfo = fmt.Sprintf("~%s rows", formatCount(r.Total)) + } + return a, nil + + case errMsg: + a.statusErr = msg.err.Error() + a.state = stateBrowse + return a, nil + + case downloadDoneMsg: + a.statusOK = fmt.Sprintf("Saved: %s", msg.path) + a.state = stateDone + return a, nil + + case DlCancelMsg: + a.state = stateBrowse + return a, nil + + case DlSubmitMsg: + a.state = stateDownloading + a.statusErr = "" + a.statusOK = "" + return a, tea.Batch(a.startDownload(msg), a.spinner.Tick) + + case tea.KeyMsg: + if a.state == stateDownloadForm { + var cmd tea.Cmd + a.dlForm, cmd = a.dlForm.Update(msg) + return a, cmd + } + + switch msg.String() { + case "q", "ctrl+c": + return a, tea.Quit + + case "tab": + a.focus = (a.focus + 1) % 3 + return a, nil + + case "shift+tab": + a.focus = (a.focus + 2) % 3 + return a, nil + + case "enter": + switch a.focus { + case paneSchema: + if sel := selectedItemTitle(a.schemaList); sel != "" { + a.selectedSchema = sel + a.selectedTable = "" + a.focus = paneTable + return a, a.loadTables(sel) + } + case paneTable: + if sel := selectedItemTitle(a.tableList); sel != "" { + a.selectedTable = sel + a.focus = panePreview + return a, a.loadPreview(a.selectedSchema, sel) + } + } + + case "d": + if a.selectedSchema != "" && a.selectedTable != "" { + a.dlForm = newDlForm(a.selectedSchema, a.selectedTable) + a.state = stateDownloadForm + return a, nil + } + + case "esc": + if a.state == stateDone { + a.state = stateBrowse + a.statusOK = "" + } + return a, nil + } + + // Delegate keyboard events to the focused list. + var cmd tea.Cmd + switch a.focus { + case paneSchema: + a.schemaList, cmd = a.schemaList.Update(msg) + case paneTable: + a.tableList, cmd = a.tableList.Update(msg) + case panePreview: + a.previewTbl, cmd = a.previewTbl.Update(msg) + } + return a, cmd + } + + // Forward spinner ticks when downloading. + if a.state == stateDownloading { + var cmd tea.Cmd + a.spinner, cmd = a.spinner.Update(msg) + return a, cmd + } + + return a, nil +} + +// View renders the full TUI. +func (a *App) View() string { + if a.width == 0 { + return "Loading…" + } + + header := styleTitle.Render(" WRDS") + styleStatusBar.Render(" Wharton Research Data Services") + footer := a.footerView() + + // Content area height. + contentH := a.height - lipgloss.Height(header) - lipgloss.Height(footer) - 2 + + schemaPanelW, tablePanelW, previewPanelW := a.panelWidths() + + schemaPanel := a.renderListPanel(a.schemaList, "Schemas", paneSchema, schemaPanelW, contentH) + tablePanel := a.renderListPanel(a.tableList, fmt.Sprintf("Tables (%s)", a.selectedSchema), paneTable, tablePanelW, contentH) + previewPanel := a.renderPreviewPanel(previewPanelW, contentH) + + body := lipgloss.JoinHorizontal(lipgloss.Top, schemaPanel, tablePanel, previewPanel) + full := lipgloss.JoinVertical(lipgloss.Left, header, body, footer) + + if a.state == stateDownloadForm { + overlay := a.dlForm.View(a.width) + return overlayCenter(full, overlay, a.width, a.height) + } + if a.state == stateDownloading { + msg := a.spinner.View() + " Downloading…" + return overlayCenter(full, stylePanelFocused.Padding(1, 3).Render(msg), a.width, a.height) + } + if a.state == stateDone { + msg := styleSuccess.Render("✓ ") + a.statusOK + "\n\n" + styleStatusBar.Render("[esc] dismiss") + return overlayCenter(full, stylePanelFocused.Padding(1, 3).Render(msg), a.width, a.height) + } + + return full +} + +func (a *App) footerView() string { + keys := "[tab] switch pane [enter] select [d] download [/] filter [q] quit" + status := "" + if a.statusErr != "" { + status = " " + styleError.Render("Error: "+a.statusErr) + } + return styleStatusBar.Render(keys + status) +} + +func (a *App) renderListPanel(l list.Model, title string, p pane, w, h int) string { + l.SetSize(w-4, h-2) + content := l.View() + style := stylePanelBlurred + if a.focus == p { + style = stylePanelFocused + } + return style.Width(w - 2).Height(h).Render(content) +} + +func (a *App) renderPreviewPanel(w, h int) string { + var sb strings.Builder + label := "Preview" + if a.selectedSchema != "" && a.selectedTable != "" { + label = fmt.Sprintf("Preview: %s.%s", a.selectedSchema, a.selectedTable) + } + sb.WriteString(stylePanelHeader.Render(label) + "\n") + + if len(a.previewTbl.Columns()) > 0 { + a.previewTbl.SetHeight(h - 4) + sb.WriteString(a.previewTbl.View()) + if a.previewInfo != "" { + sb.WriteString("\n" + styleRowCount.Render(a.previewInfo)) + } + } else if a.selectedTable != "" { + sb.WriteString(styleStatusBar.Render("Loading…")) + } else { + sb.WriteString(styleStatusBar.Render("Select a table to preview rows")) + } + + style := stylePanelBlurred + if a.focus == panePreview { + style = stylePanelFocused + } + return style.Width(w - 2).Height(h).Render(sb.String()) +} + +func (a *App) panelWidths() (int, int, int) { + schema := 22 + table := 28 + preview := a.width - schema - table + if preview < 30 { + preview = 30 + } + return schema, table, preview +} + +func (a *App) previewHeight() int { + return a.height - 4 +} + +func (a *App) resizePanels() {} + +// -- helpers -- + +func selectedItemTitle(l list.Model) string { + if sel := l.SelectedItem(); sel != nil { + return sel.(item).title + } + return "" +} + +func maxWidth(header string, rows [][]string, col, max int) int { + w := len(header) + for _, row := range rows { + if col < len(row) && len(row[col]) > w { + w = len(row[col]) + } + } + if w > max { + return max + } + return w + 2 +} + +func formatCount(n int64) string { + if n >= 1_000_000_000 { + return fmt.Sprintf("%.1fB", float64(n)/1e9) + } + if n >= 1_000_000 { + return fmt.Sprintf("%.1fM", float64(n)/1e6) + } + if n >= 1_000 { + return fmt.Sprintf("%.1fK", float64(n)/1e3) + } + return fmt.Sprintf("%d", n) +} + +// overlayCenter places overlay on top of base, centered. +func overlayCenter(base, overlay string, w, h int) string { + _ = w + _ = h + // Simple approach: render overlay below header. + lines := strings.Split(base, "\n") + overlayLines := strings.Split(overlay, "\n") + + startRow := (len(lines) - len(overlayLines)) / 2 + if startRow < 0 { + startRow = 0 + } + + for i, ol := range overlayLines { + row := startRow + i + if row < len(lines) { + lineRunes := []rune(lines[row]) + olRunes := []rune(ol) + startCol := (w - lipgloss.Width(ol)) / 2 + if startCol < 0 { + startCol = 0 + } + _ = lineRunes + _ = olRunes + _ = startCol + lines[row] = ol + } + } + return strings.Join(lines, "\n") +} diff --git a/internal/tui/dlform.go b/internal/tui/dlform.go @@ -0,0 +1,145 @@ +package tui + +import ( + "fmt" + "strings" + + "github.com/charmbracelet/bubbles/textinput" + tea "github.com/charmbracelet/bubbletea" + "github.com/charmbracelet/lipgloss" +) + +type dlFormField int + +const ( + fieldWhere dlFormField = iota + fieldOut + fieldFormat + fieldCount +) + +// DlForm is the download dialog overlay. +type DlForm struct { + schema string + table string + inputs [fieldCount]textinput.Model + focused dlFormField + err string +} + +// DlSubmitMsg is sent when the user confirms the download form. +type DlSubmitMsg struct { + Schema string + Table string + Where string + Out string + Format string +} + +// DlCancelMsg is sent when the user cancels. +type DlCancelMsg struct{} + +func newDlForm(schema, table string) DlForm { + f := DlForm{schema: schema, table: table} + + f.inputs[fieldWhere] = textinput.New() + f.inputs[fieldWhere].Placeholder = "e.g. date >= '2020-01-01'" + f.inputs[fieldWhere].CharLimit = 512 + + f.inputs[fieldOut] = textinput.New() + f.inputs[fieldOut].Placeholder = fmt.Sprintf("./%s_%s.parquet", schema, table) + f.inputs[fieldOut].CharLimit = 256 + f.inputs[fieldOut].SetValue(fmt.Sprintf("./%s_%s.parquet", schema, table)) + + f.inputs[fieldFormat] = textinput.New() + f.inputs[fieldFormat].Placeholder = "parquet" + f.inputs[fieldFormat].CharLimit = 10 + f.inputs[fieldFormat].SetValue("parquet") + + f.inputs[fieldWhere].Focus() + return f +} + +func (f DlForm) Update(msg tea.Msg) (DlForm, tea.Cmd) { + switch msg := msg.(type) { + case tea.KeyMsg: + switch msg.String() { + case "esc": + return f, func() tea.Msg { return DlCancelMsg{} } + case "enter": + if f.focused < fieldCount-1 { + f.inputs[f.focused].Blur() + f.focused++ + f.inputs[f.focused].Focus() + return f, textinput.Blink + } + // Submit + out := f.inputs[fieldOut].Value() + if out == "" { + out = fmt.Sprintf("./%s_%s.parquet", f.schema, f.table) + } + format := strings.ToLower(f.inputs[fieldFormat].Value()) + if format == "" { + format = "parquet" + } + return f, func() tea.Msg { + return DlSubmitMsg{ + Schema: f.schema, + Table: f.table, + Where: f.inputs[fieldWhere].Value(), + Out: out, + Format: format, + } + } + case "tab", "down": + f.inputs[f.focused].Blur() + f.focused = (f.focused + 1) % fieldCount + f.inputs[f.focused].Focus() + return f, textinput.Blink + case "shift+tab", "up": + f.inputs[f.focused].Blur() + f.focused = (f.focused + fieldCount - 1) % fieldCount + f.inputs[f.focused].Focus() + return f, textinput.Blink + } + } + + var cmd tea.Cmd + f.inputs[f.focused], cmd = f.inputs[f.focused].Update(msg) + return f, cmd +} + +func (f DlForm) View(width int) string { + var sb strings.Builder + + title := stylePanelHeader.Render(fmt.Sprintf("Download %s.%s", f.schema, f.table)) + sb.WriteString(title + "\n\n") + + labels := []string{"WHERE clause", "Output path", "Format (parquet/csv)"} + for i, label := range labels { + style := lipgloss.NewStyle().Foreground(colorMuted) + if dlFormField(i) == f.focused { + style = lipgloss.NewStyle().Foreground(colorFocus) + } + sb.WriteString(style.Render(label+" ") + "\n") + sb.WriteString(f.inputs[i].View() + "\n\n") + } + + hint := styleStatusBar.Render("[tab] next field [enter] confirm [esc] cancel") + sb.WriteString(hint) + + content := sb.String() + boxWidth := width - 8 + if boxWidth < 40 { + boxWidth = 40 + } + + box := lipgloss.NewStyle(). + Border(lipgloss.RoundedBorder()). + BorderForeground(colorFocus). + Padding(1, 2). + Width(boxWidth). + Render(content) + + return lipgloss.Place(width, 20, lipgloss.Center, lipgloss.Center, box) +} diff --git a/internal/tui/styles.go b/internal/tui/styles.go @@ -0,0 +1,47 @@ +package tui + +import "github.com/charmbracelet/lipgloss" + +var ( + colorPrimary = lipgloss.Color("#7C3AED") // purple + colorSecondary = lipgloss.Color("#06B6D4") // cyan + colorMuted = lipgloss.Color("#6B7280") + colorSuccess = lipgloss.Color("#10B981") + colorError = lipgloss.Color("#EF4444") + colorFocus = lipgloss.Color("#F59E0B") // amber border when focused + + styleTitle = lipgloss.NewStyle(). + Bold(true). + Foreground(colorPrimary) + + stylePanelFocused = lipgloss.NewStyle(). + Border(lipgloss.RoundedBorder()). + BorderForeground(colorFocus) + + stylePanelBlurred = lipgloss.NewStyle(). + Border(lipgloss.RoundedBorder()). + BorderForeground(colorMuted) + + stylePanelHeader = lipgloss.NewStyle(). + Bold(true). + Foreground(colorSecondary). + Padding(0, 1) + + styleStatusBar = lipgloss.NewStyle(). + Foreground(colorMuted). + Padding(0, 1) + + styleSuccess = lipgloss.NewStyle().Foreground(colorSuccess) + styleError = lipgloss.NewStyle().Foreground(colorError) + + styleCellHeader = lipgloss.NewStyle(). + Bold(true). + Foreground(colorPrimary). + Padding(0, 1) + + styleCellNormal = lipgloss.NewStyle().Padding(0, 1) + + styleRowCount = lipgloss.NewStyle(). + Foreground(colorMuted). + Italic(true) +) diff --git a/main.go b/main.go @@ -0,0 +1,7 @@ +package main + +import "github.com/eloualiche/wrds-download/cmd" + +func main() { + cmd.Execute() +}