feat: add dtcat, dtfilter, and dtdiff binaries - dt-cli-tools - CLI tools for viewing, filtering, and comparing tabular data files

commit 8f3a14e14fd5eb2b61c31c855004cf94e55425f8
parent 279df986b6657b229bdcb695e99294081a7a3fe4
Author: Erik Loualiche <eloualic@umn.edu>
Date:   Tue, 31 Mar 2026 10:51:15 -0500

feat: add dtcat, dtfilter, and dtdiff binaries

- dtcat: view/inspect tabular data files with schema, describe, head/tail
- dtfilter: filter/query with expressions, sort, column selection
- dtdiff: compare two files with positional or key-based diff, multiple output formats

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
M src/bin/dtcat.rs  | 275 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
M src/bin/dtdiff.rs  | 420 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
M src/bin/dtfilter.rs  | 173 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
M src/reader.rs  | 2 +-

4 files changed, 866 insertions(+), 4 deletions(-)
diff --git a/src/bin/dtcat.rs b/src/bin/dtcat.rs
@@ -1 +1,274 @@
-fn main() {}
+use std::path::PathBuf;
+use std::process;
+
+use anyhow::{bail, Result};
+use clap::Parser;
+
+use dtcore::format::{detect_format, Format};
+use dtcore::formatter::{
+    format_csv, format_data_table, format_describe, format_empty_sheet, format_head_tail,
+    format_header, format_schema, format_sheet_listing,
+};
+use dtcore::metadata::SheetInfo;
+use dtcore::reader::{read_file, read_file_info, ReadOptions};
+
+/// Default row threshold: show all rows if <= this many, otherwise head+tail
+const DEFAULT_THRESHOLD: usize = 50;
+/// Default head/tail row count when splitting
+const DEFAULT_HEAD_TAIL: usize = 25;
+
+#[derive(Parser)]
+#[command(
+    name = "dtcat",
+    about = "View tabular data files in the terminal",
+    version
+)]
+struct Args {
+    /// File to view
+    file: String,
+
+    /// Override format detection (csv, tsv, parquet, arrow, json, ndjson, excel)
+    #[arg(long, value_name = "FMT")]
+    format: Option<String>,
+
+    /// Select sheet by name or 0-based index (Excel only)
+    #[arg(long, value_name = "NAME|INDEX")]
+    sheet: Option<String>,
+
+    /// Skip first N rows
+    #[arg(long, value_name = "N")]
+    skip: Option<usize>,
+
+    /// Show column names and types only
+    #[arg(long)]
+    schema: bool,
+
+    /// Show summary statistics
+    #[arg(long)]
+    describe: bool,
+
+    /// Show first N rows
+    #[arg(long, value_name = "N")]
+    head: Option<usize>,
+
+    /// Show last N rows
+    #[arg(long, value_name = "N")]
+    tail: Option<usize>,
+
+    /// Output as CSV instead of markdown table
+    #[arg(long)]
+    csv: bool,
+
+    /// Show file metadata only
+    #[arg(long)]
+    info: bool,
+}
+
+fn validate_args(args: &Args) -> Result<()> {
+    if args.schema && args.describe {
+        bail!("--schema and --describe are mutually exclusive");
+    }
+    Ok(())
+}
+
+/// Build a synthetic SheetInfo for non-Excel formats from a loaded DataFrame.
+fn sheet_info_from_df(file_name: &str, df: &polars::prelude::DataFrame) -> SheetInfo {
+    SheetInfo {
+        name: file_name.to_string(),
+        // rows includes the header row conceptually; formatter subtracts 1
+        rows: df.height() + 1,
+        cols: df.width(),
+    }
+}
+
+fn run(args: Args) -> Result<()> {
+    validate_args(&args)?;
+
+    let path = PathBuf::from(&args.file);
+    if !path.exists() {
+        bail!("file not found: {}", path.display());
+    }
+
+    let fmt = detect_format(&path, args.format.as_deref())?;
+
+    let file_name = path
+        .file_name()
+        .map(|s| s.to_string_lossy().to_string())
+        .unwrap_or_else(|| args.file.clone());
+
+    // --info: show metadata and exit
+    if args.info {
+        let info = read_file_info(&path, fmt)?;
+        print!("{}", format_header(&file_name, &info));
+
+        // For Excel, also list sheet names and dimensions
+        if fmt == Format::Excel && !info.sheets.is_empty() {
+            println!();
+            for sheet in &info.sheets {
+                let data_rows = if sheet.rows == 0 { 0 } else { sheet.rows - 1 };
+                println!("  {} ({} rows x {} cols)", sheet.name, data_rows, sheet.cols);
+            }
+        }
+        return Ok(());
+    }
+
+    // Excel with multiple sheets and no --sheet: show sheet listing
+    if fmt == Format::Excel && args.sheet.is_none() {
+        let info = read_file_info(&path, fmt)?;
+        if info.sheets.len() > 1 {
+            // Load a small sample of each sheet to display schemas
+            let mut schemas: Vec<(SheetInfo, polars::prelude::DataFrame)> = Vec::new();
+            for sheet in &info.sheets {
+                let opts = ReadOptions {
+                    sheet: Some(sheet.name.clone()),
+                    skip_rows: args.skip,
+                    separator: None,
+                };
+                match read_file(&path, fmt, &opts) {
+                    Ok(df) => schemas.push((sheet.clone(), df)),
+                    Err(_) => {
+                        // Empty or unreadable sheet
+                        schemas.push((
+                            SheetInfo {
+                                name: sheet.name.clone(),
+                                rows: 0,
+                                cols: 0,
+                            },
+                            polars::prelude::DataFrame::default(),
+                        ));
+                    }
+                }
+            }
+            let schema_refs: Vec<(&SheetInfo, polars::prelude::DataFrame)> = schemas
+                .iter()
+                .map(|(s, df)| (s, df.clone()))
+                .collect();
+            print!(
+                "{}",
+                format_sheet_listing(&file_name, &info, &schema_refs)
+            );
+            return Ok(());
+        }
+    }
+
+    // Build read options
+    let opts = ReadOptions {
+        sheet: args.sheet.clone(),
+        skip_rows: args.skip,
+        separator: None,
+    };
+
+    let df = read_file(&path, fmt, &opts)?;
+
+    // Determine sheet info for display
+    let sheet = if fmt == Format::Excel {
+        // Try to get the sheet name we actually read
+        let info = read_file_info(&path, fmt)?;
+        if let Some(sheet_arg) = &args.sheet {
+            // Find the matching sheet in info
+            let matched = info.sheets.iter().find(|s| {
+                &s.name == sheet_arg
+                    || sheet_arg
+                        .parse::<usize>()
+                        .map(|idx| {
+                            info.sheets
+                                .iter()
+                                .position(|x| x.name == s.name)
+                                .map(|i| i == idx)
+                                .unwrap_or(false)
+                        })
+                        .unwrap_or(false)
+            });
+            if let Some(s) = matched {
+                s.clone()
+            } else {
+                // Fallback: build from df
+                SheetInfo {
+                    name: sheet_arg.clone(),
+                    rows: df.height() + 1,
+                    cols: df.width(),
+                }
+            }
+        } else if let Some(first) = info.sheets.first() {
+            first.clone()
+        } else {
+            sheet_info_from_df(&file_name, &df)
+        }
+    } else {
+        sheet_info_from_df(&file_name, &df)
+    };
+
+    // Handle empty DataFrame
+    if df.is_empty() {
+        print!("{}", format_empty_sheet(&sheet));
+        return Ok(());
+    }
+
+    // --schema
+    if args.schema {
+        print!("{}", format_schema(&sheet, &df));
+        return Ok(());
+    }
+
+    // --describe
+    if args.describe {
+        print!("{}", format_describe(&df));
+        return Ok(());
+    }
+
+    // --csv output mode
+    if args.csv {
+        print!("{}", format_csv(&df));
+        return Ok(());
+    }
+
+    // Determine what to display
+    let output = match (args.head, args.tail) {
+        (Some(h), Some(t)) => {
+            // Both specified: show head + tail with omission line
+            format_head_tail(&df, h, t)
+        }
+        (Some(h), None) => {
+            // Only --head: slice the DataFrame and show all
+            let sliced = df.head(Some(h));
+            format_data_table(&sliced)
+        }
+        (None, Some(t)) => {
+            // Only --tail: slice and show all
+            let sliced = df.tail(Some(t));
+            format_data_table(&sliced)
+        }
+        (None, None) => {
+            // Default: show all if <= threshold, otherwise head+tail
+            if df.height() <= DEFAULT_THRESHOLD {
+                format_data_table(&df)
+            } else {
+                format_head_tail(&df, DEFAULT_HEAD_TAIL, DEFAULT_HEAD_TAIL)
+            }
+        }
+    };
+
+    print!("{}", output);
+    Ok(())
+}
+
+fn main() {
+    let args = Args::parse();
+    match run(args) {
+        Ok(()) => {}
+        Err(err) => {
+            // Check if this is an arg validation error (exit 2) vs runtime error (exit 1)
+            let msg = err.to_string();
+            if msg.contains("mutually exclusive")
+                || msg.contains("invalid")
+                || msg.contains("unknown format")
+            {
+                eprintln!("dtcat: {err}");
+                process::exit(2);
+            } else {
+                eprintln!("dtcat: {err}");
+                process::exit(1);
+            }
+        }
+    }
+}
diff --git a/src/bin/dtdiff.rs b/src/bin/dtdiff.rs
@@ -1 +1,419 @@
-fn main() {}
+use std::io::IsTerminal;
+use std::path::PathBuf;
+use std::process;
+
+use anyhow::{Result, bail};
+use clap::Parser;
+use serde_json::{Map, Value, json};
+
+use dtcore::diff::{DiffOptions, DiffResult, SheetSource};
+use dtcore::format::{detect_format, Format};
+use dtcore::reader::{ReadOptions, read_file};
+
+#[derive(Parser)]
+#[command(
+    name = "dtdiff",
+    about = "Compare two tabular data files and show differences",
+    version
+)]
+struct Args {
+    /// First file to compare
+    file_a: String,
+
+    /// Second file to compare
+    file_b: String,
+
+    /// Override format detection (applies to both files)
+    #[arg(long, value_name = "FMT")]
+    format: Option<String>,
+
+    /// Select sheet by name or index (Excel only)
+    #[arg(long, value_name = "NAME|INDEX")]
+    sheet: Option<String>,
+
+    /// Key column(s) for matched comparison (comma-separated)
+    #[arg(long, value_name = "COL")]
+    key: Option<String>,
+
+    /// Float comparison tolerance (default: 1e-10)
+    #[arg(long)]
+    tolerance: Option<f64>,
+
+    /// Output as JSON
+    #[arg(long)]
+    json: bool,
+
+    /// Output as CSV
+    #[arg(long)]
+    csv: bool,
+
+    /// Disable colored output
+    #[arg(long)]
+    no_color: bool,
+}
+
+// ---------------------------------------------------------------------------
+// Output formatters (ported from xldiff.rs)
+// ---------------------------------------------------------------------------
+
+/// Format a row's values inline: `Name: "Alice"  Score: "90"`
+fn format_row_inline(headers: &[String], values: &[String]) -> String {
+    headers
+        .iter()
+        .zip(values.iter())
+        .map(|(h, v)| format!("{}: \"{}\"", h, v))
+        .collect::<Vec<_>>()
+        .join("  ")
+}
+
+/// Format diff result as colored (or plain) text output.
+fn format_text(result: &DiffResult, color: bool) -> String {
+    if !result.has_differences() {
+        return "No differences found.\n".to_string();
+    }
+
+    let (red, green, yellow, reset) = if color {
+        ("\x1b[31m", "\x1b[32m", "\x1b[33m", "\x1b[0m")
+    } else {
+        ("", "", "", "")
+    };
+
+    let mut out = String::new();
+
+    // Header
+    out.push_str(&format!(
+        "--- {} ({})\n+++ {} ({})\n\n",
+        result.source_a.sheet_name,
+        result.source_a.file_name,
+        result.source_b.sheet_name,
+        result.source_b.file_name,
+    ));
+
+    // Summary
+    out.push_str(&format!(
+        "Added: {} | Removed: {} | Modified: {}\n\n",
+        result.added.len(),
+        result.removed.len(),
+        result.modified.len(),
+    ));
+
+    // Removed rows
+    for row in &result.removed {
+        out.push_str(&format!(
+            "{}- {}{}",
+            red,
+            format_row_inline(&result.headers, &row.values),
+            reset,
+        ));
+        out.push('\n');
+    }
+
+    // Added rows
+    for row in &result.added {
+        out.push_str(&format!(
+            "{}+ {}{}",
+            green,
+            format_row_inline(&result.headers, &row.values),
+            reset,
+        ));
+        out.push('\n');
+    }
+
+    // Modified rows
+    for m in &result.modified {
+        let key_display: Vec<String> = result
+            .key_columns
+            .iter()
+            .zip(m.key.iter())
+            .map(|(col, val)| format!("{}: \"{}\"", col, val))
+            .collect();
+        out.push_str(&format!(
+            "{}~ {}{}",
+            yellow,
+            key_display.join("  "),
+            reset,
+        ));
+        out.push('\n');
+        for change in &m.changes {
+            out.push_str(&format!(
+                "    {}: \"{}\" \u{2192} \"{}\"\n",
+                change.column, change.old_value, change.new_value,
+            ));
+        }
+    }
+
+    out
+}
+
+/// Format diff result as JSON.
+fn format_json(result: &DiffResult) -> String {
+    let added: Vec<Value> = result
+        .added
+        .iter()
+        .map(|row| {
+            let mut map = Map::new();
+            for (h, v) in result.headers.iter().zip(row.values.iter()) {
+                map.insert(h.clone(), Value::String(v.clone()));
+            }
+            Value::Object(map)
+        })
+        .collect();
+
+    let removed: Vec<Value> = result
+        .removed
+        .iter()
+        .map(|row| {
+            let mut map = Map::new();
+            for (h, v) in result.headers.iter().zip(row.values.iter()) {
+                map.insert(h.clone(), Value::String(v.clone()));
+            }
+            Value::Object(map)
+        })
+        .collect();
+
+    let modified: Vec<Value> = result
+        .modified
+        .iter()
+        .map(|m| {
+            let mut key_map = Map::new();
+            for (col, val) in result.key_columns.iter().zip(m.key.iter()) {
+                key_map.insert(col.clone(), Value::String(val.clone()));
+            }
+            let changes: Vec<Value> = m
+                .changes
+                .iter()
+                .map(|c| {
+                    json!({
+                        "column": c.column,
+                        "old": c.old_value,
+                        "new": c.new_value,
+                    })
+                })
+                .collect();
+            json!({
+                "key": Value::Object(key_map),
+                "changes": changes,
+            })
+        })
+        .collect();
+
+    let output = json!({
+        "added": added,
+        "removed": removed,
+        "modified": modified,
+    });
+
+    serde_json::to_string_pretty(&output).unwrap() + "\n"
+}
+
+/// Quote a value per RFC 4180: if it contains comma, quote, or newline, wrap
+/// in double quotes and escape any internal quotes by doubling them.
+fn csv_quote(value: &str) -> String {
+    if value.contains(',') || value.contains('"') || value.contains('\n') {
+        format!("\"{}\"", value.replace('"', "\"\""))
+    } else {
+        value.to_string()
+    }
+}
+
+/// Build a CSV row from a slice of values.
+fn csv_row(values: &[String]) -> String {
+    values.iter().map(|v| csv_quote(v)).collect::<Vec<_>>().join(",")
+}
+
+/// Format diff result as CSV.
+///
+/// Header: _status, col1, col2, ..., _old_col1, _old_col2, ...
+/// Added rows: "added" + values + empty _old_ columns
+/// Removed rows: "removed" + values + empty _old_ columns
+/// Modified rows: "modified" + new values + old values in _old_ columns
+fn format_csv_output(result: &DiffResult) -> String {
+    let mut out = String::new();
+
+    // Build header
+    let mut header_parts: Vec<String> = vec!["_status".to_string()];
+    for h in &result.headers {
+        header_parts.push(h.clone());
+    }
+    for h in &result.headers {
+        header_parts.push(format!("_old_{}", h));
+    }
+    out.push_str(&csv_row(&header_parts));
+    out.push('\n');
+
+    let empty_cols: Vec<String> = result.headers.iter().map(|_| String::new()).collect();
+
+    // Removed rows
+    for row in &result.removed {
+        let mut parts: Vec<String> = vec!["removed".to_string()];
+        parts.extend(row.values.iter().cloned());
+        while parts.len() < 1 + result.headers.len() {
+            parts.push(String::new());
+        }
+        parts.extend(empty_cols.iter().cloned());
+        out.push_str(&csv_row(&parts));
+        out.push('\n');
+    }
+
+    // Added rows
+    for row in &result.added {
+        let mut parts: Vec<String> = vec!["added".to_string()];
+        parts.extend(row.values.iter().cloned());
+        while parts.len() < 1 + result.headers.len() {
+            parts.push(String::new());
+        }
+        parts.extend(empty_cols.iter().cloned());
+        out.push_str(&csv_row(&parts));
+        out.push('\n');
+    }
+
+    // Modified rows
+    for m in &result.modified {
+        let mut main_cols: Vec<String> = Vec::new();
+        let mut old_cols: Vec<String> = Vec::new();
+
+        for h in &result.headers {
+            if let Some(key_idx) = result.key_columns.iter().position(|k| k == h) {
+                main_cols.push(m.key.get(key_idx).cloned().unwrap_or_default());
+                old_cols.push(String::new());
+            } else if let Some(change) = m.changes.iter().find(|c| c.column == *h) {
+                main_cols.push(change.new_value.clone());
+                old_cols.push(change.old_value.clone());
+            } else {
+                // Unchanged non-key column — leave empty in both
+                main_cols.push(String::new());
+                old_cols.push(String::new());
+            }
+        }
+
+        let mut parts: Vec<String> = vec!["modified".to_string()];
+        parts.extend(main_cols);
+        parts.extend(old_cols);
+        out.push_str(&csv_row(&parts));
+        out.push('\n');
+    }
+
+    out
+}
+
+// ---------------------------------------------------------------------------
+// run / main
+// ---------------------------------------------------------------------------
+
+fn run(args: Args) -> Result<()> {
+    let path_a = PathBuf::from(&args.file_a);
+    let path_b = PathBuf::from(&args.file_b);
+
+    // Validate files exist
+    if !path_a.exists() {
+        bail!("file not found: {}", path_a.display());
+    }
+    if !path_b.exists() {
+        bail!("file not found: {}", path_b.display());
+    }
+
+    // Detect formats
+    let fmt_a = detect_format(&path_a, args.format.as_deref())?;
+    let fmt_b = detect_format(&path_b, args.format.as_deref())?;
+
+    // Enforce same-format constraint
+    if !fmt_a.same_family(fmt_b) {
+        bail!(
+            "files have incompatible formats: {:?} vs {:?}. Both files must use the same format family.",
+            fmt_a,
+            fmt_b
+        );
+    }
+
+    // Build read options
+    let opts_a = ReadOptions {
+        sheet: args.sheet.clone(),
+        skip_rows: None,
+        separator: None,
+    };
+    let opts_b = ReadOptions {
+        sheet: args.sheet.clone(),
+        skip_rows: None,
+        separator: None,
+    };
+
+    // Read DataFrames
+    let df_a = read_file(&path_a, fmt_a, &opts_a)?;
+    let df_b = read_file(&path_b, fmt_b, &opts_b)?;
+
+    // Resolve key columns
+    let key_columns: Vec<String> = if let Some(ref key_str) = args.key {
+        key_str.split(',').map(|s| s.trim().to_string()).collect()
+    } else {
+        vec![]
+    };
+
+    // Build source labels
+    let file_name_a = path_a
+        .file_name()
+        .map(|s| s.to_string_lossy().to_string())
+        .unwrap_or_else(|| args.file_a.clone());
+    let file_name_b = path_b
+        .file_name()
+        .map(|s| s.to_string_lossy().to_string())
+        .unwrap_or_else(|| args.file_b.clone());
+
+    // Use file name as "sheet name" for non-Excel formats; for Excel use the
+    // sheet name from opts (or a placeholder if none was specified).
+    let sheet_name_a = if fmt_a == Format::Excel {
+        args.sheet.clone().unwrap_or_else(|| file_name_a.clone())
+    } else {
+        file_name_a.clone()
+    };
+    let sheet_name_b = if fmt_b == Format::Excel {
+        args.sheet.clone().unwrap_or_else(|| file_name_b.clone())
+    } else {
+        file_name_b.clone()
+    };
+
+    let source_a = SheetSource {
+        file_name: file_name_a,
+        sheet_name: sheet_name_a,
+    };
+    let source_b = SheetSource {
+        file_name: file_name_b,
+        sheet_name: sheet_name_b,
+    };
+
+    let diff_opts = DiffOptions {
+        key_columns,
+        tolerance: args.tolerance,
+    };
+
+    // Run diff
+    let result = dtcore::diff::diff_sheets(&df_a, &df_b, &diff_opts, source_a, source_b)?;
+
+    // TTY detection for color
+    let use_color = !args.no_color && std::io::stdout().is_terminal();
+
+    // Format output: --json and --csv are mutually exclusive flags; default is text
+    let output = if args.json {
+        format_json(&result)
+    } else if args.csv {
+        format_csv_output(&result)
+    } else {
+        format_text(&result, use_color)
+    };
+
+    print!("{}", output);
+
+    // Exit 1 if differences found (diff convention), 0 if identical
+    if result.has_differences() {
+        process::exit(1);
+    }
+
+    Ok(())
+}
+
+fn main() {
+    let args = Args::parse();
+    if let Err(err) = run(args) {
+        eprintln!("dtdiff: {err}");
+        process::exit(2);
+    }
+}
diff --git a/src/bin/dtfilter.rs b/src/bin/dtfilter.rs
@@ -1 +1,172 @@
-fn main() {}
+use std::io::Write;
+use std::path::PathBuf;
+use std::process;
+
+use anyhow::{Result, bail};
+use clap::Parser;
+
+use dtcore::filter::{FilterOptions, parse_filter_expr, parse_sort_spec, filter_pipeline};
+use dtcore::format::detect_format;
+use dtcore::formatter::{format_data_table, format_csv};
+use dtcore::reader::{ReadOptions, read_file};
+
+// ---------------------------------------------------------------------------
+// Argument parsing
+// ---------------------------------------------------------------------------
+
+#[derive(Parser)]
+#[command(
+    name = "dtfilter",
+    about = "Filter, sort, and select columns from tabular data files",
+    version
+)]
+struct Args {
+    /// Input file
+    file: String,
+
+    /// Override format detection
+    #[arg(long, value_name = "FMT")]
+    format: Option<String>,
+
+    /// Select sheet by name or index (Excel only)
+    #[arg(long, value_name = "NAME|INDEX")]
+    sheet: Option<String>,
+
+    /// Skip first N rows after the header
+    #[arg(long, value_name = "N")]
+    skip: Option<usize>,
+
+    /// Filter expression(s), e.g. "State=CA", "Amount>1000" (repeatable, ANDed)
+    #[arg(long = "filter", value_name = "EXPR", action = clap::ArgAction::Append)]
+    filters: Vec<String>,
+
+    /// Sort spec, e.g. "Amount:desc" or "Name"
+    #[arg(long, value_name = "SPEC")]
+    sort: Option<String>,
+
+    /// Select columns by name (comma-separated)
+    #[arg(long, value_name = "COLS")]
+    columns: Option<String>,
+
+    /// First N rows (before filter)
+    #[arg(long, value_name = "N")]
+    head: Option<usize>,
+
+    /// Last N rows (before filter)
+    #[arg(long, value_name = "N")]
+    tail: Option<usize>,
+
+    /// Max output rows (after filter)
+    #[arg(long, value_name = "N")]
+    limit: Option<usize>,
+
+    /// Output as CSV
+    #[arg(long)]
+    csv: bool,
+}
+
+// ---------------------------------------------------------------------------
+// Validation helpers
+// ---------------------------------------------------------------------------
+
+/// Validate args and return an error message for invalid combinations.
+/// Returns exit-code 2 on any argument error.
+fn validate_args(args: &Args) -> Result<(), ArgError> {
+    if args.head.is_some() && args.tail.is_some() {
+        return Err(ArgError("--head and --tail are mutually exclusive".to_string()));
+    }
+    Ok(())
+}
+
+struct ArgError(String);
+
+// ---------------------------------------------------------------------------
+// Core logic
+// ---------------------------------------------------------------------------
+
+fn run(args: Args) -> Result<()> {
+    let path = PathBuf::from(&args.file);
+
+    if !path.exists() {
+        bail!("file not found: {}", path.display());
+    }
+
+    // Detect format
+    let fmt = detect_format(&path, args.format.as_deref())?;
+
+    // Build read options
+    let read_opts = ReadOptions {
+        sheet: args.sheet.clone(),
+        skip_rows: args.skip,
+        separator: None,
+    };
+
+    // Read the DataFrame
+    let df = read_file(&path, fmt, &read_opts)?;
+
+    // Parse filter expressions
+    let filters = args
+        .filters
+        .iter()
+        .map(|s| parse_filter_expr(s).map_err(|e| anyhow::anyhow!("{}", e)))
+        .collect::<Result<Vec<_>>>()?;
+
+    // Parse sort spec
+    let sort = args
+        .sort
+        .as_deref()
+        .map(|s| parse_sort_spec(s).map_err(|e| anyhow::anyhow!("{}", e)))
+        .transpose()?;
+
+    // Parse column selection
+    let cols: Option<Vec<String>> = args.columns.as_deref().map(|s| {
+        s.split(',')
+            .map(|c| c.trim().to_string())
+            .filter(|c| !c.is_empty())
+            .collect()
+    });
+
+    // Build filter options
+    let filter_opts = FilterOptions {
+        filters,
+        cols,
+        sort,
+        limit: args.limit,
+        head: args.head,
+        tail: args.tail,
+    };
+
+    // Run the pipeline
+    let result = filter_pipeline(df, &filter_opts)?;
+
+    // Report row count to stderr
+    let row_count = result.height();
+    eprintln!("{} row{}", row_count, if row_count == 1 { "" } else { "s" });
+
+    // Output
+    let output = if args.csv {
+        format_csv(&result)
+    } else {
+        format_data_table(&result)
+    };
+
+    let stdout = std::io::stdout();
+    let mut out = stdout.lock();
+    out.write_all(output.as_bytes())?;
+
+    Ok(())
+}
+
+fn main() {
+    let args = Args::parse();
+
+    if let Err(e) = validate_args(&args) {
+        eprintln!("dtfilter: {}", e.0);
+        process::exit(2);
+    }
+
+    if let Err(err) = run(args) {
+        eprintln!("dtfilter: {err}");
+        process::exit(1);
+    }
+}
diff --git a/src/reader.rs b/src/reader.rs
@@ -3,7 +3,7 @@ use polars::prelude::*;
 use std::path::Path;
 
 use crate::format::Format;
-use crate::metadata::{FileInfo, SheetInfo};
+use crate::metadata::FileInfo;
 use crate::readers;
 
 /// Options that control how a file is read.

	dt-cli-tools CLI tools for viewing, filtering, and comparing tabular data files
	Log \| Files \| Refs \| README \| LICENSE

M	src/bin/dtcat.rs	\|	275	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
M	src/bin/dtdiff.rs	\|	420	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
M	src/bin/dtfilter.rs	\|	173	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
M	src/reader.rs	\|	2	+-