wrds-download

TUI/CLI tool for browsing and downloading WRDS data
Log | Files | Refs | README

meta.go (6051B)


      1 package db
      2 
      3 import (
      4 	"context"
      5 	"fmt"
      6 	"strings"
      7 )
      8 
      9 // Schema represents a PostgreSQL schema.
     10 type Schema struct {
     11 	Name string
     12 }
     13 
     14 // Table represents a PostgreSQL table within a schema.
     15 type Table struct {
     16 	Schema string
     17 	Name   string
     18 }
     19 
     20 // Column represents a column in a table.
     21 type Column struct {
     22 	Name     string
     23 	DataType string
     24 }
     25 
     26 // ColumnMeta holds catalog metadata about a single column.
     27 type ColumnMeta struct {
     28 	Name        string
     29 	DataType    string
     30 	Nullable    bool
     31 	Description string // from pg_description (WRDS variable label)
     32 }
     33 
     34 // TableMeta holds catalog metadata for a table (no data scan required).
     35 type TableMeta struct {
     36 	Schema   string
     37 	Table    string
     38 	Comment  string // table-level comment from pg_description
     39 	RowCount int64  // estimated from pg_class.reltuples
     40 	Size     string // human-readable, from pg_size_pretty
     41 	Columns  []ColumnMeta
     42 }
     43 
     44 // PreviewResult holds sample rows and row count for a table.
     45 type PreviewResult struct {
     46 	Columns []string
     47 	Rows    [][]string
     48 	Total   int64 // estimated row count
     49 }
     50 
     51 // Schemas returns all non-system schemas sorted by name.
     52 func (c *Client) Schemas(ctx context.Context) ([]Schema, error) {
     53 	rows, err := c.Pool.Query(ctx, `
     54 		SELECT schema_name
     55 		FROM information_schema.schemata
     56 		WHERE schema_name NOT IN ('pg_catalog', 'information_schema', 'pg_toast',
     57 		                          'pg_temp_1', 'pg_toast_temp_1')
     58 		  AND schema_name NOT LIKE 'pg_%'
     59 		ORDER BY schema_name
     60 	`)
     61 	if err != nil {
     62 		return nil, fmt.Errorf("schemas query: %w", err)
     63 	}
     64 	defer rows.Close()
     65 
     66 	var schemas []Schema
     67 	for rows.Next() {
     68 		var s Schema
     69 		if err := rows.Scan(&s.Name); err != nil {
     70 			return nil, err
     71 		}
     72 		schemas = append(schemas, s)
     73 	}
     74 	return schemas, rows.Err()
     75 }
     76 
     77 // Tables returns all tables in the given schema.
     78 func (c *Client) Tables(ctx context.Context, schema string) ([]Table, error) {
     79 	rows, err := c.Pool.Query(ctx, `
     80 		SELECT table_name
     81 		FROM information_schema.tables
     82 		WHERE table_schema = $1
     83 		  AND table_type IN ('BASE TABLE', 'VIEW')
     84 		ORDER BY table_name
     85 	`, schema)
     86 	if err != nil {
     87 		return nil, fmt.Errorf("tables query: %w", err)
     88 	}
     89 	defer rows.Close()
     90 
     91 	var tables []Table
     92 	for rows.Next() {
     93 		var t Table
     94 		t.Schema = schema
     95 		if err := rows.Scan(&t.Name); err != nil {
     96 			return nil, err
     97 		}
     98 		tables = append(tables, t)
     99 	}
    100 	return tables, rows.Err()
    101 }
    102 
    103 // Columns returns column metadata for the given table.
    104 func (c *Client) Columns(ctx context.Context, schema, table string) ([]Column, error) {
    105 	rows, err := c.Pool.Query(ctx, `
    106 		SELECT column_name, data_type
    107 		FROM information_schema.columns
    108 		WHERE table_schema = $1 AND table_name = $2
    109 		ORDER BY ordinal_position
    110 	`, schema, table)
    111 	if err != nil {
    112 		return nil, fmt.Errorf("columns query: %w", err)
    113 	}
    114 	defer rows.Close()
    115 
    116 	var cols []Column
    117 	for rows.Next() {
    118 		var col Column
    119 		if err := rows.Scan(&col.Name, &col.DataType); err != nil {
    120 			return nil, err
    121 		}
    122 		cols = append(cols, col)
    123 	}
    124 	return cols, rows.Err()
    125 }
    126 
    127 // TableMeta fetches catalog metadata for a table: column info with
    128 // descriptions, estimated row count, and table size. All queries hit
    129 // pg_catalog only — no table data is scanned.
    130 func (c *Client) TableMeta(ctx context.Context, schema, table string) (*TableMeta, error) {
    131 	meta := &TableMeta{Schema: schema, Table: table}
    132 
    133 	// Table-level stats (best effort — some may require permissions).
    134 	_ = c.Pool.QueryRow(ctx, `
    135 		SELECT c.reltuples::bigint,
    136 		       COALESCE(pg_size_pretty(pg_total_relation_size(c.oid)), ''),
    137 		       COALESCE(obj_description(c.oid), '')
    138 		FROM pg_class c
    139 		JOIN pg_namespace n ON n.oid = c.relnamespace
    140 		WHERE n.nspname = $1 AND c.relname = $2
    141 	`, schema, table).Scan(&meta.RowCount, &meta.Size, &meta.Comment)
    142 
    143 	// Column metadata with descriptions from pg_description.
    144 	rows, err := c.Pool.Query(ctx, `
    145 		SELECT a.attname,
    146 		       pg_catalog.format_type(a.atttypid, a.atttypmod),
    147 		       NOT a.attnotnull,
    148 		       COALESCE(d.description, '')
    149 		FROM pg_attribute a
    150 		JOIN pg_class c ON a.attrelid = c.oid
    151 		JOIN pg_namespace n ON c.relnamespace = n.oid
    152 		LEFT JOIN pg_description d ON d.objoid = c.oid AND d.objsubid = a.attnum
    153 		WHERE n.nspname = $1 AND c.relname = $2
    154 		  AND a.attnum > 0 AND NOT a.attisdropped
    155 		ORDER BY a.attnum
    156 	`, schema, table)
    157 	if err != nil {
    158 		return nil, fmt.Errorf("table meta: %w", err)
    159 	}
    160 	defer rows.Close()
    161 
    162 	for rows.Next() {
    163 		var col ColumnMeta
    164 		if err := rows.Scan(&col.Name, &col.DataType, &col.Nullable, &col.Description); err != nil {
    165 			return nil, err
    166 		}
    167 		meta.Columns = append(meta.Columns, col)
    168 	}
    169 	return meta, rows.Err()
    170 }
    171 
    172 // Preview fetches the first `limit` rows and an estimated row count.
    173 func (c *Client) Preview(ctx context.Context, schema, table string, limit int) (*PreviewResult, error) {
    174 	if limit <= 0 {
    175 		limit = 50
    176 	}
    177 
    178 	qualified := fmt.Sprintf("%s.%s", QuoteIdent(schema), QuoteIdent(table))
    179 
    180 	// Estimated count via pg stats (fast).
    181 	var total int64
    182 	_ = c.Pool.QueryRow(ctx, `
    183 		SELECT reltuples::bigint
    184 		FROM pg_class c
    185 		JOIN pg_namespace n ON n.oid = c.relnamespace
    186 		WHERE n.nspname = $1 AND c.relname = $2
    187 	`, schema, table).Scan(&total)
    188 
    189 	rows, err := c.Pool.Query(ctx, fmt.Sprintf("SELECT * FROM %s LIMIT %d", qualified, limit))
    190 	if err != nil {
    191 		return nil, fmt.Errorf("preview query: %w", err)
    192 	}
    193 	defer rows.Close()
    194 
    195 	fieldDescs := rows.FieldDescriptions()
    196 	cols := make([]string, len(fieldDescs))
    197 	for i, fd := range fieldDescs {
    198 		cols[i] = string(fd.Name)
    199 	}
    200 
    201 	var result PreviewResult
    202 	result.Columns = cols
    203 	result.Total = total
    204 
    205 	for rows.Next() {
    206 		vals, err := rows.Values()
    207 		if err != nil {
    208 			return nil, err
    209 		}
    210 		row := make([]string, len(vals))
    211 		for i, v := range vals {
    212 			if v == nil {
    213 				row[i] = "NULL"
    214 			} else {
    215 				row[i] = fmt.Sprintf("%v", v)
    216 			}
    217 		}
    218 		result.Rows = append(result.Rows, row)
    219 	}
    220 	return &result, rows.Err()
    221 }
    222 
    223 // QuoteIdent quotes a PostgreSQL identifier (schema, table, column name)
    224 // to prevent SQL injection.
    225 func QuoteIdent(s string) string {
    226 	return `"` + strings.ReplaceAll(s, `"`, `""`) + `"`
    227 }