Merge pull request #1 from LouLouLibs/feat/v0.2.0-sample-convert - dt-cli-tools - CLI tools for viewing, filtering, and comparing tabular data files

commit 601fbfe88df565049c4894f1024cb016beab9d67
parent b2789b7aff3b7b6cb1c326fac1049494051df3ce
Author: Erik Loualiche <eloualiche@users.noreply.github.com>
Date:   Sat,  4 Apr 2026 13:55:34 -0500

Merge pull request #1 from LouLouLibs/feat/v0.2.0-sample-convert

feat: v0.2.0 — add --sample and --convert to dtcat
Diffstat:
M Cargo.toml  | 3 ++-
M README.md  | 13 +++++++++++++
A docs/superpowers/plans/2026-04-04-v0.2.0-sample-convert.md  | 692 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M src/bin/dtcat.rs  | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
M src/lib.rs  | 2 ++
A src/writer.rs  | 17 +++++++++++++++++
A src/writers/arrow.rs  | 35 +++++++++++++++++++++++++++++++++++
A src/writers/csv.rs  | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/writers/json.rs  | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A src/writers/mod.rs  | 4 ++++
A src/writers/parquet.rs  | 36 ++++++++++++++++++++++++++++++++++++
M tests/dtcat.rs  | 132 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

12 files changed, 1116 insertions(+), 2 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "dt-cli-tools"
-version = "0.1.0"
+version = "0.2.0"
 edition = "2024"
 description = "CLI tools for viewing, filtering, and comparing tabular data files"
 license = "MIT"
@@ -32,6 +32,7 @@ polars = { version = "0.46", default-features = false, features = [
     "parquet",
     "ipc",
     "json",
+    "random",
 ] }
 calamine = "0.26"
 clap = { version = "4", features = ["derive"] }
diff --git a/README.md b/README.md
@@ -102,6 +102,15 @@ dtcat data.csv --tail 5
 # CSV output for piping
 dtcat data.parquet --csv
 
+# Random sample of rows
+dtcat huge.parquet --sample 20
+dtcat huge.parquet --sample 50 --csv
+
+# Convert between formats
+dtcat data.csv --convert parquet -o data.parquet
+dtcat report.xlsx --sheet Revenue --convert csv -o revenue.csv
+dtcat data.parquet --convert ndjson              # text formats go to stdout
+
 # Override format detection
 dtcat data.txt --format csv
 
@@ -141,6 +150,10 @@ dtcat data.csv --skip 2
 
 Modes `--schema`, `--describe`, `--info`, and data (default) are mutually exclusive.
 
+`--sample N` randomly selects N rows; mutually exclusive with `--head`/`--tail`/`--all`.
+
+`--convert FORMAT` writes to a different format. Use `-o PATH` for output file (required for binary formats Parquet/Arrow; optional for text formats which default to stdout). Supported targets: csv, tsv, parquet, arrow, json, ndjson.
+
 ## dtfilter — Query and Filter
 
 <img src="demo/dtfilter.gif" alt="dtfilter demo" width="80%" />
diff --git a/docs/superpowers/plans/2026-04-04-v0.2.0-sample-convert.md b/docs/superpowers/plans/2026-04-04-v0.2.0-sample-convert.md
@@ -0,0 +1,692 @@
+# v0.2.0: --sample and --convert Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Add random row sampling (`--sample N`) and format conversion (`--convert FORMAT -o PATH`) to dtcat.
+
+**Architecture:** Both features extend the existing dtcat binary. `--sample` uses Polars `DataFrame::sample_n_literal` after reading, before display. `--convert` requires new writer functions in a `src/writers/` module mirroring `src/readers/`, then a write path in dtcat that short-circuits display.
+
+**Tech Stack:** Polars (ParquetWriter, IpcWriter, JsonWriter, CsvWriter), clap, anyhow.
+
+---
+
+### Task 1: Add `--sample N` flag and validation
+
+**Files:**
+- Modify: `src/bin/dtcat.rs`
+- Test: `tests/dtcat.rs`
+
+- [ ] **Step 1: Write the failing tests**
+
+Add to `tests/dtcat.rs`:
+
+```rust
+#[test]
+fn sample_returns_n_rows() {
+    // 18-row fixture, sample 5
+    let out = dtcat().arg("demo/sales.csv").arg("--sample").arg("5").arg("--csv")
+        .assert().success();
+    let stdout = String::from_utf8(out.get_output().stdout.clone()).unwrap();
+    // CSV header + 5 data rows = 6 lines (last line may be empty)
+    let lines: Vec<&str> = stdout.trim().lines().collect();
+    assert_eq!(lines.len(), 6, "expected header + 5 rows, got {}", lines.len());
+}
+
+#[test]
+fn sample_ge_total_returns_all() {
+    let f = csv_file("x\n1\n2\n3\n");
+    dtcat().arg(f.path()).arg("--sample").arg("100").arg("--csv")
+        .assert().success();
+}
+
+#[test]
+fn sample_conflicts_with_head() {
+    let f = csv_file("x\n1\n");
+    dtcat().arg(f.path()).arg("--sample").arg("1").arg("--head").arg("1")
+        .assert().code(2);
+}
+
+#[test]
+fn sample_conflicts_with_tail() {
+    let f = csv_file("x\n1\n");
+    dtcat().arg(f.path()).arg("--sample").arg("1").arg("--tail").arg("1")
+        .assert().code(2);
+}
+
+#[test]
+fn sample_conflicts_with_all() {
+    let f = csv_file("x\n1\n");
+    dtcat().arg(f.path()).arg("--sample").arg("1").arg("--all")
+        .assert().code(2);
+}
+```
+
+- [ ] **Step 2: Run tests to verify they fail**
+
+Run: `cargo test --test dtcat sample`
+Expected: FAIL — unknown arg `--sample`
+
+- [ ] **Step 3: Add `--sample` arg and validation to dtcat**
+
+In `src/bin/dtcat.rs`, add to the `Args` struct after the `all` field:
+
+```rust
+    /// Randomly sample N rows
+    #[arg(long, value_name = "N")]
+    sample: Option<usize>,
+```
+
+Update `validate_args`:
+
+```rust
+fn validate_args(args: &Args) -> Result<()> {
+    if args.schema && args.describe {
+        bail!("--schema and --describe are mutually exclusive");
+    }
+    if args.sample.is_some() {
+        if args.head.is_some() {
+            bail!("--sample and --head are mutually exclusive");
+        }
+        if args.tail.is_some() {
+            bail!("--sample and --tail are mutually exclusive");
+        }
+        if args.all {
+            bail!("--sample and --all are mutually exclusive");
+        }
+    }
+    Ok(())
+}
+```
+
+- [ ] **Step 4: Implement sampling logic in the display section**
+
+In `src/bin/dtcat.rs`, replace the display match block (the `let output = match ...` section) with:
+
+```rust
+    // Determine what to display
+    let output = if let Some(n) = args.sample {
+        let sampled = if n >= df.height() {
+            df
+        } else {
+            df.sample_n_literal(n, false, false, None)?
+        };
+        format_data_table(&sampled)
+    } else {
+        match (args.head, args.tail) {
+            (Some(h), Some(t)) => {
+                format_head_tail(&df, h, t)
+            }
+            (Some(h), None) => {
+                let sliced = df.head(Some(h));
+                format_data_table(&sliced)
+            }
+            (None, Some(t)) => {
+                let sliced = df.tail(Some(t));
+                format_data_table(&sliced)
+            }
+            (None, None) => {
+                if args.all || df.height() <= DEFAULT_THRESHOLD {
+                    format_data_table(&df)
+                } else {
+                    format_head_tail(&df, DEFAULT_HEAD_TAIL, DEFAULT_HEAD_TAIL)
+                }
+            }
+        }
+    };
+```
+
+Also handle `--sample` with `--csv` output. The current `--csv` branch exits early before the display match. Move sampling before the csv check, or handle it inline. The simplest approach: apply sampling before the `--csv` check. After the line `let df = read_file(&path, fmt, &opts)?;`, add:
+
+```rust
+    // Apply sampling if requested (before any display mode)
+    let df = if let Some(n) = args.sample {
+        if n >= df.height() {
+            df
+        } else {
+            df.sample_n_literal(n, false, false, None)?
+        }
+    } else {
+        df
+    };
+```
+
+Then remove the sample handling from the display match block (revert it to the original match block). This way `--sample` + `--csv` works naturally.
+
+- [ ] **Step 5: Run tests to verify they pass**
+
+Run: `cargo test --test dtcat sample`
+Expected: all 5 sample tests PASS
+
+- [ ] **Step 6: Commit**
+
+```bash
+git add src/bin/dtcat.rs tests/dtcat.rs
+git commit -m "feat: add --sample N flag to dtcat"
+```
+
+---
+
+### Task 2: Create writers module
+
+**Files:**
+- Create: `src/writers/mod.rs`
+- Create: `src/writers/csv.rs`
+- Create: `src/writers/parquet.rs`
+- Create: `src/writers/arrow.rs`
+- Create: `src/writers/json.rs`
+- Modify: `src/lib.rs`
+
+- [ ] **Step 1: Create `src/writers/mod.rs`**
+
+```rust
+pub mod arrow;
+pub mod csv;
+pub mod json;
+pub mod parquet;
+```
+
+- [ ] **Step 2: Create `src/writers/csv.rs`**
+
+```rust
+use anyhow::Result;
+use polars::prelude::*;
+use std::io::Write;
+use std::path::Path;
+
+use crate::format::Format;
+
+pub fn write(df: &mut DataFrame, path: Option<&Path>, format: Format) -> Result<()> {
+    let separator = match format {
+        Format::Tsv => b'\t',
+        _ => b',',
+    };
+
+    match path {
+        Some(p) => {
+            let file = std::fs::File::create(p)?;
+            CsvWriter::new(file)
+                .with_separator(separator)
+                .finish(df)?;
+        }
+        None => {
+            let mut buf = Vec::new();
+            CsvWriter::new(&mut buf)
+                .with_separator(separator)
+                .finish(df)?;
+            std::io::stdout().write_all(&buf)?;
+        }
+    }
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::NamedTempFile;
+
+    #[test]
+    fn write_csv_roundtrip() {
+        let s1 = Series::new("name".into(), &["Alice", "Bob"]);
+        let s2 = Series::new("value".into(), &[100i64, 200]);
+        let mut df = DataFrame::new(vec![s1.into_column(), s2.into_column()]).unwrap();
+
+        let f = NamedTempFile::with_suffix(".csv").unwrap();
+        write(&mut df, Some(f.path()), Format::Csv).unwrap();
+
+        let result = crate::readers::csv::read(f.path(), &crate::reader::ReadOptions::default()).unwrap();
+        assert_eq!(result.height(), 2);
+        assert_eq!(result.get_column_names(), df.get_column_names());
+    }
+
+    #[test]
+    fn write_tsv_uses_tab() {
+        let s = Series::new("x".into(), &[1i64]);
+        let mut df = DataFrame::new(vec![s.into_column()]).unwrap();
+
+        let f = NamedTempFile::with_suffix(".tsv").unwrap();
+        write(&mut df, Some(f.path()), Format::Tsv).unwrap();
+
+        let content = std::fs::read_to_string(f.path()).unwrap();
+        assert!(!content.contains(','));
+    }
+}
+```
+
+- [ ] **Step 3: Create `src/writers/parquet.rs`**
+
+```rust
+use anyhow::{bail, Result};
+use polars::prelude::*;
+use std::path::Path;
+
+pub fn write(df: &mut DataFrame, path: Option<&Path>) -> Result<()> {
+    let path = path.ok_or_else(|| anyhow::anyhow!("--convert parquet requires -o PATH"))?;
+    let file = std::fs::File::create(path)?;
+    ParquetWriter::new(file).finish(df)?;
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::NamedTempFile;
+
+    #[test]
+    fn write_parquet_roundtrip() {
+        let s1 = Series::new("name".into(), &["Alice", "Bob"]);
+        let s2 = Series::new("value".into(), &[100i64, 200]);
+        let mut df = DataFrame::new(vec![s1.into_column(), s2.into_column()]).unwrap();
+
+        let f = NamedTempFile::with_suffix(".parquet").unwrap();
+        write(&mut df, Some(f.path())).unwrap();
+
+        let result = crate::readers::parquet::read(f.path(), &crate::reader::ReadOptions::default()).unwrap();
+        assert_eq!(result.height(), 2);
+    }
+
+    #[test]
+    fn write_parquet_no_path_errors() {
+        let s = Series::new("x".into(), &[1i64]);
+        let mut df = DataFrame::new(vec![s.into_column()]).unwrap();
+        assert!(write(&mut df, None).is_err());
+    }
+}
+```
+
+- [ ] **Step 4: Create `src/writers/arrow.rs`**
+
+```rust
+use anyhow::Result;
+use polars::prelude::*;
+use std::path::Path;
+
+pub fn write(df: &mut DataFrame, path: Option<&Path>) -> Result<()> {
+    let path = path.ok_or_else(|| anyhow::anyhow!("--convert arrow requires -o PATH"))?;
+    let file = std::fs::File::create(path)?;
+    IpcWriter::new(file).finish(df)?;
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::NamedTempFile;
+
+    #[test]
+    fn write_arrow_roundtrip() {
+        let s = Series::new("x".into(), &[1i64, 2, 3]);
+        let mut df = DataFrame::new(vec![s.into_column()]).unwrap();
+
+        let f = NamedTempFile::with_suffix(".arrow").unwrap();
+        write(&mut df, Some(f.path())).unwrap();
+
+        let result = crate::readers::arrow::read(f.path(), &crate::reader::ReadOptions::default()).unwrap();
+        assert_eq!(result.height(), 3);
+    }
+
+    #[test]
+    fn write_arrow_no_path_errors() {
+        let s = Series::new("x".into(), &[1i64]);
+        let mut df = DataFrame::new(vec![s.into_column()]).unwrap();
+        assert!(write(&mut df, None).is_err());
+    }
+}
+```
+
+- [ ] **Step 5: Create `src/writers/json.rs`**
+
+```rust
+use anyhow::Result;
+use polars::prelude::*;
+use std::io::Write as IoWrite;
+use std::path::Path;
+
+use crate::format::Format;
+
+pub fn write(df: &mut DataFrame, path: Option<&Path>, format: Format) -> Result<()> {
+    match format {
+        Format::Ndjson => write_ndjson(df, path),
+        _ => write_json(df, path),
+    }
+}
+
+fn write_json(df: &mut DataFrame, path: Option<&Path>) -> Result<()> {
+    match path {
+        Some(p) => {
+            let file = std::fs::File::create(p)?;
+            JsonWriter::new(file).finish(df)?;
+        }
+        None => {
+            let mut buf = Vec::new();
+            JsonWriter::new(&mut buf).finish(df)?;
+            std::io::stdout().write_all(&buf)?;
+        }
+    }
+    Ok(())
+}
+
+fn write_ndjson(df: &mut DataFrame, path: Option<&Path>) -> Result<()> {
+    match path {
+        Some(p) => {
+            let file = std::fs::File::create(p)?;
+            JsonLineWriter::new(file).finish(df)?;
+        }
+        None => {
+            let mut buf = Vec::new();
+            JsonLineWriter::new(&mut buf).finish(df)?;
+            std::io::stdout().write_all(&buf)?;
+        }
+    }
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::NamedTempFile;
+
+    #[test]
+    fn write_json_roundtrip() {
+        let s = Series::new("x".into(), &[1i64, 2]);
+        let mut df = DataFrame::new(vec![s.into_column()]).unwrap();
+
+        let f = NamedTempFile::with_suffix(".json").unwrap();
+        write(&mut df, Some(f.path()), Format::Json).unwrap();
+
+        let result = crate::readers::json::read(f.path(), Format::Json, &crate::reader::ReadOptions::default()).unwrap();
+        assert_eq!(result.height(), 2);
+    }
+
+    #[test]
+    fn write_ndjson_roundtrip() {
+        let s = Series::new("x".into(), &[1i64, 2]);
+        let mut df = DataFrame::new(vec![s.into_column()]).unwrap();
+
+        let f = NamedTempFile::with_suffix(".ndjson").unwrap();
+        write(&mut df, Some(f.path()), Format::Ndjson).unwrap();
+
+        let result = crate::readers::json::read(f.path(), Format::Ndjson, &crate::reader::ReadOptions::default()).unwrap();
+        assert_eq!(result.height(), 2);
+    }
+}
+```
+
+- [ ] **Step 6: Add `writers` to `src/lib.rs`**
+
+Replace the contents of `src/lib.rs` with:
+
+```rust
+pub mod diff;
+pub mod filter;
+pub mod format;
+pub mod formatter;
+pub mod metadata;
+pub mod reader;
+pub mod readers;
+pub mod writers;
+```
+
+- [ ] **Step 7: Run unit tests**
+
+Run: `cargo test --lib`
+Expected: all unit tests pass including new writer tests
+
+- [ ] **Step 8: Commit**
+
+```bash
+git add src/writers/ src/lib.rs
+git commit -m "feat: add writers module (csv, tsv, parquet, arrow, json, ndjson)"
+```
+
+---
+
+### Task 3: Add write_file dispatch function
+
+**Files:**
+- Create: `src/writer.rs`
+- Modify: `src/lib.rs`
+
+- [ ] **Step 1: Create `src/writer.rs`**
+
+```rust
+use anyhow::{bail, Result};
+use polars::prelude::*;
+use std::path::Path;
+
+use crate::format::Format;
+use crate::writers;
+
+/// Write a DataFrame to a file or stdout, dispatching to the appropriate writer.
+///
+/// For binary formats (Parquet, Arrow), `path` is required.
+/// For text formats (CSV, TSV, JSON, NDJSON), `path` is optional (None = stdout).
+/// Excel writing is not supported.
+pub fn write_file(df: &mut DataFrame, path: Option<&Path>, format: Format) -> Result<()> {
+    match format {
+        Format::Csv | Format::Tsv => writers::csv::write(df, path, format),
+        Format::Parquet => writers::parquet::write(df, path),
+        Format::Arrow => writers::arrow::write(df, path),
+        Format::Json | Format::Ndjson => writers::json::write(df, path, format),
+        Format::Excel => bail!("writing Excel format is not supported; use csv or parquet"),
+    }
+}
+```
+
+- [ ] **Step 2: Add `writer` to `src/lib.rs`**
+
+```rust
+pub mod diff;
+pub mod filter;
+pub mod format;
+pub mod formatter;
+pub mod metadata;
+pub mod reader;
+pub mod readers;
+pub mod writer;
+pub mod writers;
+```
+
+- [ ] **Step 3: Run tests**
+
+Run: `cargo test --lib`
+Expected: PASS
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add src/writer.rs src/lib.rs
+git commit -m "feat: add write_file dispatch function"
+```
+
+---
+
+### Task 4: Add `--convert` and `-o` flags to dtcat
+
+**Files:**
+- Modify: `src/bin/dtcat.rs`
+- Test: `tests/dtcat.rs`
+
+- [ ] **Step 1: Write the failing tests**
+
+Add to `tests/dtcat.rs`:
+
+```rust
+#[test]
+fn convert_csv_to_parquet() {
+    let out = NamedTempFile::with_suffix(".parquet").unwrap();
+    dtcat().arg("tests/fixtures/data.csv")
+        .arg("--convert").arg("parquet")
+        .arg("-o").arg(out.path())
+        .assert().success();
+    // Read back and verify
+    dtcat().arg(out.path()).arg("--csv")
+        .assert().success()
+        .stdout(predicate::str::contains("Alice"))
+        .stdout(predicate::str::contains("Charlie"));
+}
+
+#[test]
+fn convert_parquet_to_csv_file() {
+    let out = NamedTempFile::with_suffix(".csv").unwrap();
+    dtcat().arg("tests/fixtures/data.parquet")
+        .arg("--convert").arg("csv")
+        .arg("-o").arg(out.path())
+        .assert().success();
+    dtcat().arg(out.path())
+        .assert().success()
+        .stdout(predicate::str::contains("Alice"));
+}
+
+#[test]
+fn convert_csv_to_json_stdout() {
+    dtcat().arg("tests/fixtures/data.csv")
+        .arg("--convert").arg("json")
+        .assert().success()
+        .stdout(predicate::str::contains("Alice"));
+}
+
+#[test]
+fn convert_csv_to_ndjson_stdout() {
+    dtcat().arg("tests/fixtures/data.csv")
+        .arg("--convert").arg("ndjson")
+        .assert().success()
+        .stdout(predicate::str::contains("Alice"));
+}
+
+#[test]
+fn convert_parquet_no_output_errors() {
+    dtcat().arg("tests/fixtures/data.csv")
+        .arg("--convert").arg("parquet")
+        .assert().failure();
+}
+
+#[test]
+fn convert_arrow_no_output_errors() {
+    dtcat().arg("tests/fixtures/data.csv")
+        .arg("--convert").arg("arrow")
+        .assert().failure();
+}
+
+#[test]
+fn convert_conflicts_with_schema() {
+    let f = csv_file("x\n1\n");
+    dtcat().arg(f.path()).arg("--convert").arg("csv").arg("--schema")
+        .assert().code(2);
+}
+
+#[test]
+fn convert_with_skip() {
+    let f = csv_file("meta\nname,value\nAlice,100\n");
+    dtcat().arg(f.path()).arg("--skip").arg("1").arg("--convert").arg("csv")
+        .assert().success()
+        .stdout(predicate::str::contains("Alice"));
+}
+```
+
+- [ ] **Step 2: Run tests to verify they fail**
+
+Run: `cargo test --test dtcat convert`
+Expected: FAIL — unknown arg `--convert`
+
+- [ ] **Step 3: Add `--convert` and `-o` args and validation**
+
+In `src/bin/dtcat.rs`, add to the `Args` struct:
+
+```rust
+    /// Convert to format (csv, tsv, parquet, arrow, json, ndjson)
+    #[arg(long, value_name = "FORMAT")]
+    convert: Option<String>,
+
+    /// Output file path (required for binary formats with --convert)
+    #[arg(short = 'o', value_name = "PATH")]
+    output: Option<String>,
+```
+
+Add to imports at the top of the file:
+
+```rust
+use dtcore::format::parse_format_str;
+use dtcore::writer::write_file;
+```
+
+Update `validate_args` to add after the sample checks:
+
+```rust
+    if args.convert.is_some() {
+        if args.schema || args.describe || args.info || args.csv
+            || args.head.is_some() || args.tail.is_some()
+            || args.all || args.sample.is_some()
+        {
+            bail!("--convert is mutually exclusive with display flags");
+        }
+    }
+```
+
+- [ ] **Step 4: Add convert logic to the run function**
+
+In `src/bin/dtcat.rs`, insert after the sampling block and before the empty DataFrame check:
+
+```rust
+    // --convert: write to a different format and exit
+    if let Some(ref convert_str) = args.convert {
+        let target_fmt = parse_format_str(convert_str)?;
+        let out_path = args.output.as_deref().map(std::path::Path::new);
+        let mut df = df;
+        write_file(&mut df, out_path, target_fmt)?;
+        return Ok(());
+    }
+```
+
+- [ ] **Step 5: Run tests to verify they pass**
+
+Run: `cargo test --test dtcat convert`
+Expected: all 8 convert tests PASS
+
+- [ ] **Step 6: Run all tests**
+
+Run: `cargo test`
+Expected: all tests PASS
+
+- [ ] **Step 7: Commit**
+
+```bash
+git add src/bin/dtcat.rs tests/dtcat.rs
+git commit -m "feat: add --convert FORMAT and -o PATH to dtcat"
+```
+
+---
+
+### Task 5: Bump version and final verification
+
+**Files:**
+- Modify: `Cargo.toml`
+
+- [ ] **Step 1: Bump version**
+
+In `Cargo.toml`, change:
+
+```toml
+version = "0.2.0"
+```
+
+- [ ] **Step 2: Run full test suite**
+
+Run: `cargo test`
+Expected: all tests PASS
+
+- [ ] **Step 3: Run clippy**
+
+Run: `cargo clippy --release`
+Expected: no warnings
+
+- [ ] **Step 4: Verify CLI help**
+
+Run: `cargo run --release --bin dtcat -- --help`
+Expected: output includes `--sample`, `--convert`, `-o`
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add Cargo.toml
+git commit -m "chore: bump version to 0.2.0"
+```
diff --git a/src/bin/dtcat.rs b/src/bin/dtcat.rs
@@ -4,7 +4,8 @@ use std::process;
 use anyhow::{bail, Result};
 use clap::Parser;
 
-use dtcore::format::{detect_format, Format};
+use dtcore::format::{detect_format, parse_format_str, Format};
+use dtcore::writer::write_file;
 use dtcore::formatter::{
     format_csv, format_data_table, format_describe, format_empty_sheet, format_head_tail,
     format_header, format_schema, format_sheet_listing,
@@ -63,15 +64,54 @@ struct Args {
     #[arg(long)]
     all: bool,
 
+    /// Randomly sample N rows
+    #[arg(long, value_name = "N")]
+    sample: Option<usize>,
+
     /// Show file metadata only
     #[arg(long)]
     info: bool,
+
+    /// Convert to format (csv, tsv, parquet, arrow, json, ndjson)
+    #[arg(long, value_name = "FORMAT")]
+    convert: Option<String>,
+
+    /// Output file path (required for binary formats with --convert)
+    #[arg(short = 'o', value_name = "PATH")]
+    output: Option<String>,
 }
 
 fn validate_args(args: &Args) -> Result<()> {
     if args.schema && args.describe {
         bail!("--schema and --describe are mutually exclusive");
     }
+    if args.sample.is_some() {
+        if args.schema {
+            bail!("--sample and --schema are mutually exclusive");
+        }
+        if args.describe {
+            bail!("--sample and --describe are mutually exclusive");
+        }
+        if args.info {
+            bail!("--sample and --info are mutually exclusive");
+        }
+        if args.head.is_some() {
+            bail!("--sample and --head are mutually exclusive");
+        }
+        if args.tail.is_some() {
+            bail!("--sample and --tail are mutually exclusive");
+        }
+        if args.all {
+            bail!("--sample and --all are mutually exclusive");
+        }
+    }
+    if args.convert.is_some()
+        && (args.schema || args.describe || args.info || args.csv
+            || args.head.is_some() || args.tail.is_some()
+            || args.all || args.sample.is_some())
+    {
+        bail!("--convert is mutually exclusive with display flags");
+    }
     Ok(())
 }
 
@@ -202,6 +242,26 @@ fn run(args: Args) -> Result<()> {
         sheet_info_from_df(&file_name, &df)
     };
 
+    // Apply sampling if requested (before any display mode)
+    let df = if let Some(n) = args.sample {
+        if n >= df.height() {
+            df
+        } else {
+            df.sample_n_literal(n, false, false, None)?
+        }
+    } else {
+        df
+    };
+
+    // --convert: write to a different format and exit
+    if let Some(ref convert_str) = args.convert {
+        let target_fmt = parse_format_str(convert_str)?;
+        let out_path = args.output.as_deref().map(std::path::Path::new);
+        let mut df = df;
+        write_file(&mut df, out_path, target_fmt)?;
+        return Ok(());
+    }
+
     // Handle empty DataFrame
     if df.is_empty() {
         print!("{}", format_empty_sheet(&sheet));
diff --git a/src/lib.rs b/src/lib.rs
@@ -5,3 +5,5 @@ pub mod formatter;
 pub mod metadata;
 pub mod reader;
 pub mod readers;
+pub mod writer;
+pub mod writers;
diff --git a/src/writer.rs b/src/writer.rs
@@ -0,0 +1,17 @@
+use anyhow::{bail, Result};
+use polars::prelude::*;
+use std::path::Path;
+
+use crate::format::Format;
+use crate::writers;
+
+/// Write a DataFrame to a file or stdout, dispatching to the appropriate writer.
+pub fn write_file(df: &mut DataFrame, path: Option<&Path>, format: Format) -> Result<()> {
+    match format {
+        Format::Csv | Format::Tsv => writers::csv::write(df, path, format),
+        Format::Parquet => writers::parquet::write(df, path),
+        Format::Arrow => writers::arrow::write(df, path),
+        Format::Json | Format::Ndjson => writers::json::write(df, path, format),
+        Format::Excel => bail!("writing Excel format is not supported; use csv or parquet"),
+    }
+}
diff --git a/src/writers/arrow.rs b/src/writers/arrow.rs
@@ -0,0 +1,35 @@
+use anyhow::Result;
+use polars::prelude::*;
+use std::path::Path;
+
+pub fn write(df: &mut DataFrame, path: Option<&Path>) -> Result<()> {
+    let path = path.ok_or_else(|| anyhow::anyhow!("--convert arrow requires -o PATH"))?;
+    let file = std::fs::File::create(path)?;
+    IpcWriter::new(file).finish(df)?;
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::NamedTempFile;
+
+    #[test]
+    fn write_arrow_roundtrip() {
+        let s = Series::new("x".into(), &[1i64, 2, 3]);
+        let mut df = DataFrame::new(vec![s.into_column()]).unwrap();
+
+        let f = NamedTempFile::with_suffix(".arrow").unwrap();
+        write(&mut df, Some(f.path())).unwrap();
+
+        let result = crate::readers::arrow::read(f.path(), &crate::reader::ReadOptions::default()).unwrap();
+        assert_eq!(result.height(), 3);
+    }
+
+    #[test]
+    fn write_arrow_no_path_errors() {
+        let s = Series::new("x".into(), &[1i64]);
+        let mut df = DataFrame::new(vec![s.into_column()]).unwrap();
+        assert!(write(&mut df, None).is_err());
+    }
+}
diff --git a/src/writers/csv.rs b/src/writers/csv.rs
@@ -0,0 +1,62 @@
+use anyhow::Result;
+use polars::prelude::*;
+use std::io::Write;
+use std::path::Path;
+
+use crate::format::Format;
+
+pub fn write(df: &mut DataFrame, path: Option<&Path>, format: Format) -> Result<()> {
+    let separator = match format {
+        Format::Tsv => b'\t',
+        _ => b',',
+    };
+
+    match path {
+        Some(p) => {
+            let file = std::fs::File::create(p)?;
+            CsvWriter::new(file)
+                .with_separator(separator)
+                .finish(df)?;
+        }
+        None => {
+            let mut buf = Vec::new();
+            CsvWriter::new(&mut buf)
+                .with_separator(separator)
+                .finish(df)?;
+            std::io::stdout().write_all(&buf)?;
+        }
+    }
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::NamedTempFile;
+
+    #[test]
+    fn write_csv_roundtrip() {
+        let s1 = Series::new("name".into(), &["Alice", "Bob"]);
+        let s2 = Series::new("value".into(), &[100i64, 200]);
+        let mut df = DataFrame::new(vec![s1.into_column(), s2.into_column()]).unwrap();
+
+        let f = NamedTempFile::with_suffix(".csv").unwrap();
+        write(&mut df, Some(f.path()), Format::Csv).unwrap();
+
+        let result = crate::readers::csv::read(f.path(), &crate::reader::ReadOptions::default()).unwrap();
+        assert_eq!(result.height(), 2);
+        assert_eq!(result.get_column_names(), df.get_column_names());
+    }
+
+    #[test]
+    fn write_tsv_uses_tab() {
+        let s = Series::new("x".into(), &[1i64]);
+        let mut df = DataFrame::new(vec![s.into_column()]).unwrap();
+
+        let f = NamedTempFile::with_suffix(".tsv").unwrap();
+        write(&mut df, Some(f.path()), Format::Tsv).unwrap();
+
+        let content = std::fs::read_to_string(f.path()).unwrap();
+        assert!(!content.contains(','));
+    }
+}
diff --git a/src/writers/json.rs b/src/writers/json.rs
@@ -0,0 +1,60 @@
+use anyhow::Result;
+use polars::prelude::*;
+use std::io::Write as IoWrite;
+use std::path::Path;
+
+use crate::format::Format;
+
+pub fn write(df: &mut DataFrame, path: Option<&Path>, format: Format) -> Result<()> {
+    let json_format = match format {
+        Format::Ndjson => JsonFormat::JsonLines,
+        _ => JsonFormat::Json,
+    };
+
+    match path {
+        Some(p) => {
+            let file = std::fs::File::create(p)?;
+            JsonWriter::new(file)
+                .with_json_format(json_format)
+                .finish(df)?;
+        }
+        None => {
+            let mut buf = Vec::new();
+            JsonWriter::new(&mut buf)
+                .with_json_format(json_format)
+                .finish(df)?;
+            std::io::stdout().write_all(&buf)?;
+        }
+    }
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::NamedTempFile;
+
+    #[test]
+    fn write_json_roundtrip() {
+        let s = Series::new("x".into(), &[1i64, 2]);
+        let mut df = DataFrame::new(vec![s.into_column()]).unwrap();
+
+        let f = NamedTempFile::with_suffix(".json").unwrap();
+        write(&mut df, Some(f.path()), Format::Json).unwrap();
+
+        let result = crate::readers::json::read(f.path(), Format::Json, &crate::reader::ReadOptions::default()).unwrap();
+        assert_eq!(result.height(), 2);
+    }
+
+    #[test]
+    fn write_ndjson_roundtrip() {
+        let s = Series::new("x".into(), &[1i64, 2]);
+        let mut df = DataFrame::new(vec![s.into_column()]).unwrap();
+
+        let f = NamedTempFile::with_suffix(".ndjson").unwrap();
+        write(&mut df, Some(f.path()), Format::Ndjson).unwrap();
+
+        let result = crate::readers::json::read(f.path(), Format::Ndjson, &crate::reader::ReadOptions::default()).unwrap();
+        assert_eq!(result.height(), 2);
+    }
+}
diff --git a/src/writers/mod.rs b/src/writers/mod.rs
@@ -0,0 +1,4 @@
+pub mod arrow;
+pub mod csv;
+pub mod json;
+pub mod parquet;
diff --git a/src/writers/parquet.rs b/src/writers/parquet.rs
@@ -0,0 +1,36 @@
+use anyhow::Result;
+use polars::prelude::*;
+use std::path::Path;
+
+pub fn write(df: &mut DataFrame, path: Option<&Path>) -> Result<()> {
+    let path = path.ok_or_else(|| anyhow::anyhow!("--convert parquet requires -o PATH"))?;
+    let file = std::fs::File::create(path)?;
+    ParquetWriter::new(file).finish(df)?;
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile::NamedTempFile;
+
+    #[test]
+    fn write_parquet_roundtrip() {
+        let s1 = Series::new("name".into(), &["Alice", "Bob"]);
+        let s2 = Series::new("value".into(), &[100i64, 200]);
+        let mut df = DataFrame::new(vec![s1.into_column(), s2.into_column()]).unwrap();
+
+        let f = NamedTempFile::with_suffix(".parquet").unwrap();
+        write(&mut df, Some(f.path())).unwrap();
+
+        let result = crate::readers::parquet::read(f.path(), &crate::reader::ReadOptions::default()).unwrap();
+        assert_eq!(result.height(), 2);
+    }
+
+    #[test]
+    fn write_parquet_no_path_errors() {
+        let s = Series::new("x".into(), &[1i64]);
+        let mut df = DataFrame::new(vec![s.into_column()]).unwrap();
+        assert!(write(&mut df, None).is_err());
+    }
+}
diff --git a/tests/dtcat.rs b/tests/dtcat.rs
@@ -145,6 +145,66 @@ fn all_flag_shows_every_row() {
         .stdout(predicate::str::contains("| 30 "));
 }
 
+// ─── Sample ───
+
+#[test]
+fn sample_returns_n_rows() {
+    let out = dtcat().arg("demo/sales.csv").arg("--sample").arg("5").arg("--csv")
+        .assert().success();
+    let stdout = String::from_utf8(out.get_output().stdout.clone()).unwrap();
+    let lines: Vec<&str> = stdout.trim().lines().collect();
+    assert_eq!(lines.len(), 6, "expected header + 5 rows, got {}", lines.len());
+}
+
+#[test]
+fn sample_ge_total_returns_all() {
+    let f = csv_file("x\n1\n2\n3\n");
+    dtcat().arg(f.path()).arg("--sample").arg("100").arg("--csv")
+        .assert().success();
+}
+
+#[test]
+fn sample_conflicts_with_head() {
+    let f = csv_file("x\n1\n");
+    dtcat().arg(f.path()).arg("--sample").arg("1").arg("--head").arg("1")
+        .assert().code(2);
+}
+
+#[test]
+fn sample_conflicts_with_tail() {
+    let f = csv_file("x\n1\n");
+    dtcat().arg(f.path()).arg("--sample").arg("1").arg("--tail").arg("1")
+        .assert().code(2);
+}
+
+#[test]
+fn sample_conflicts_with_all() {
+    let f = csv_file("x\n1\n");
+    dtcat().arg(f.path()).arg("--sample").arg("1").arg("--all")
+        .assert().code(2);
+}
+
+#[test]
+fn sample_conflicts_with_schema() {
+    let f = csv_file("x\n1\n");
+    dtcat().arg(f.path()).arg("--sample").arg("1").arg("--schema")
+        .assert().code(2);
+}
+
+#[test]
+fn sample_conflicts_with_describe() {
+    let f = csv_file("x\n1\n");
+    dtcat().arg(f.path()).arg("--sample").arg("1").arg("--describe")
+        .assert().code(2);
+}
+
+#[test]
+fn sample_conflicts_with_info() {
+    let f = csv_file("x\n1\n");
+    dtcat().arg(f.path()).arg("--sample").arg("1").arg("--info")
+        .assert().code(2);
+}
+
 // ─── Parquet ───
 
 #[test]
@@ -216,3 +276,75 @@ fn excel_info() {
         .stdout(predicate::str::contains("Excel"))
         .stdout(predicate::str::contains("Sheet1"));
 }
+
+// ─── Convert ───
+
+#[test]
+fn convert_csv_to_parquet() {
+    let out = NamedTempFile::with_suffix(".parquet").unwrap();
+    dtcat().arg("tests/fixtures/data.csv")
+        .arg("--convert").arg("parquet")
+        .arg("-o").arg(out.path())
+        .assert().success();
+    dtcat().arg(out.path()).arg("--csv")
+        .assert().success()
+        .stdout(predicate::str::contains("Alice"))
+        .stdout(predicate::str::contains("Charlie"));
+}
+
+#[test]
+fn convert_parquet_to_csv_file() {
+    let out = NamedTempFile::with_suffix(".csv").unwrap();
+    dtcat().arg("tests/fixtures/data.parquet")
+        .arg("--convert").arg("csv")
+        .arg("-o").arg(out.path())
+        .assert().success();
+    dtcat().arg(out.path())
+        .assert().success()
+        .stdout(predicate::str::contains("Alice"));
+}
+
+#[test]
+fn convert_csv_to_json_stdout() {
+    dtcat().arg("tests/fixtures/data.csv")
+        .arg("--convert").arg("json")
+        .assert().success()
+        .stdout(predicate::str::contains("Alice"));
+}
+
+#[test]
+fn convert_csv_to_ndjson_stdout() {
+    dtcat().arg("tests/fixtures/data.csv")
+        .arg("--convert").arg("ndjson")
+        .assert().success()
+        .stdout(predicate::str::contains("Alice"));
+}
+
+#[test]
+fn convert_parquet_no_output_errors() {
+    dtcat().arg("tests/fixtures/data.csv")
+        .arg("--convert").arg("parquet")
+        .assert().failure();
+}
+
+#[test]
+fn convert_arrow_no_output_errors() {
+    dtcat().arg("tests/fixtures/data.csv")
+        .arg("--convert").arg("arrow")
+        .assert().failure();
+}
+
+#[test]
+fn convert_conflicts_with_schema() {
+    let f = csv_file("x\n1\n");
+    dtcat().arg(f.path()).arg("--convert").arg("csv").arg("--schema")
+        .assert().code(2);
+}
+
+#[test]
+fn convert_with_skip() {
+    let f = csv_file("meta\nname,value\nAlice,100\n");
+    dtcat().arg(f.path()).arg("--skip").arg("1").arg("--convert").arg("csv")
+        .assert().success()
+        .stdout(predicate::str::contains("Alice"));
+}

	dt-cli-tools CLI tools for viewing, filtering, and comparing tabular data files
	Log \| Files \| Refs \| README \| LICENSE

M	Cargo.toml	\|	3	++-
M	README.md	\|	13	+++++++++++++
A	docs/superpowers/plans/2026-04-04-v0.2.0-sample-convert.md	\|	692	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	src/bin/dtcat.rs	\|	62	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
M	src/lib.rs	\|	2	++
A	src/writer.rs	\|	17	+++++++++++++++++
A	src/writers/arrow.rs	\|	35	+++++++++++++++++++++++++++++++++++
A	src/writers/csv.rs	\|	62	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/writers/json.rs	\|	60	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	src/writers/mod.rs	\|	4	++++
A	src/writers/parquet.rs	\|	36	++++++++++++++++++++++++++++++++++++
M	tests/dtcat.rs	\|	132	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++