commit 8f3a14e14fd5eb2b61c31c855004cf94e55425f8
parent 279df986b6657b229bdcb695e99294081a7a3fe4
Author: Erik Loualiche <eloualic@umn.edu>
Date: Tue, 31 Mar 2026 10:51:15 -0500
feat: add dtcat, dtfilter, and dtdiff binaries
- dtcat: view/inspect tabular data files with schema, describe, head/tail
- dtfilter: filter/query with expressions, sort, column selection
- dtdiff: compare two files with positional or key-based diff, multiple output formats
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
| M | src/bin/dtcat.rs | | | 275 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- |
| M | src/bin/dtdiff.rs | | | 420 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- |
| M | src/bin/dtfilter.rs | | | 173 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- |
| M | src/reader.rs | | | 2 | +- |
4 files changed, 866 insertions(+), 4 deletions(-)
diff --git a/src/bin/dtcat.rs b/src/bin/dtcat.rs
@@ -1 +1,274 @@
-fn main() {}
+use std::path::PathBuf;
+use std::process;
+
+use anyhow::{bail, Result};
+use clap::Parser;
+
+use dtcore::format::{detect_format, Format};
+use dtcore::formatter::{
+ format_csv, format_data_table, format_describe, format_empty_sheet, format_head_tail,
+ format_header, format_schema, format_sheet_listing,
+};
+use dtcore::metadata::SheetInfo;
+use dtcore::reader::{read_file, read_file_info, ReadOptions};
+
+/// Default row threshold: show all rows if <= this many, otherwise head+tail
+const DEFAULT_THRESHOLD: usize = 50;
+/// Default head/tail row count when splitting
+const DEFAULT_HEAD_TAIL: usize = 25;
+
+#[derive(Parser)]
+#[command(
+ name = "dtcat",
+ about = "View tabular data files in the terminal",
+ version
+)]
+struct Args {
+ /// File to view
+ file: String,
+
+ /// Override format detection (csv, tsv, parquet, arrow, json, ndjson, excel)
+ #[arg(long, value_name = "FMT")]
+ format: Option<String>,
+
+ /// Select sheet by name or 0-based index (Excel only)
+ #[arg(long, value_name = "NAME|INDEX")]
+ sheet: Option<String>,
+
+ /// Skip first N rows
+ #[arg(long, value_name = "N")]
+ skip: Option<usize>,
+
+ /// Show column names and types only
+ #[arg(long)]
+ schema: bool,
+
+ /// Show summary statistics
+ #[arg(long)]
+ describe: bool,
+
+ /// Show first N rows
+ #[arg(long, value_name = "N")]
+ head: Option<usize>,
+
+ /// Show last N rows
+ #[arg(long, value_name = "N")]
+ tail: Option<usize>,
+
+ /// Output as CSV instead of markdown table
+ #[arg(long)]
+ csv: bool,
+
+ /// Show file metadata only
+ #[arg(long)]
+ info: bool,
+}
+
+fn validate_args(args: &Args) -> Result<()> {
+ if args.schema && args.describe {
+ bail!("--schema and --describe are mutually exclusive");
+ }
+ Ok(())
+}
+
+/// Build a synthetic SheetInfo for non-Excel formats from a loaded DataFrame.
+fn sheet_info_from_df(file_name: &str, df: &polars::prelude::DataFrame) -> SheetInfo {
+ SheetInfo {
+ name: file_name.to_string(),
+ // rows includes the header row conceptually; formatter subtracts 1
+ rows: df.height() + 1,
+ cols: df.width(),
+ }
+}
+
+fn run(args: Args) -> Result<()> {
+ validate_args(&args)?;
+
+ let path = PathBuf::from(&args.file);
+ if !path.exists() {
+ bail!("file not found: {}", path.display());
+ }
+
+ let fmt = detect_format(&path, args.format.as_deref())?;
+
+ let file_name = path
+ .file_name()
+ .map(|s| s.to_string_lossy().to_string())
+ .unwrap_or_else(|| args.file.clone());
+
+ // --info: show metadata and exit
+ if args.info {
+ let info = read_file_info(&path, fmt)?;
+ print!("{}", format_header(&file_name, &info));
+
+ // For Excel, also list sheet names and dimensions
+ if fmt == Format::Excel && !info.sheets.is_empty() {
+ println!();
+ for sheet in &info.sheets {
+ let data_rows = if sheet.rows == 0 { 0 } else { sheet.rows - 1 };
+ println!(" {} ({} rows x {} cols)", sheet.name, data_rows, sheet.cols);
+ }
+ }
+ return Ok(());
+ }
+
+ // Excel with multiple sheets and no --sheet: show sheet listing
+ if fmt == Format::Excel && args.sheet.is_none() {
+ let info = read_file_info(&path, fmt)?;
+ if info.sheets.len() > 1 {
+ // Load a small sample of each sheet to display schemas
+ let mut schemas: Vec<(SheetInfo, polars::prelude::DataFrame)> = Vec::new();
+ for sheet in &info.sheets {
+ let opts = ReadOptions {
+ sheet: Some(sheet.name.clone()),
+ skip_rows: args.skip,
+ separator: None,
+ };
+ match read_file(&path, fmt, &opts) {
+ Ok(df) => schemas.push((sheet.clone(), df)),
+ Err(_) => {
+ // Empty or unreadable sheet
+ schemas.push((
+ SheetInfo {
+ name: sheet.name.clone(),
+ rows: 0,
+ cols: 0,
+ },
+ polars::prelude::DataFrame::default(),
+ ));
+ }
+ }
+ }
+ let schema_refs: Vec<(&SheetInfo, polars::prelude::DataFrame)> = schemas
+ .iter()
+ .map(|(s, df)| (s, df.clone()))
+ .collect();
+ print!(
+ "{}",
+ format_sheet_listing(&file_name, &info, &schema_refs)
+ );
+ return Ok(());
+ }
+ }
+
+ // Build read options
+ let opts = ReadOptions {
+ sheet: args.sheet.clone(),
+ skip_rows: args.skip,
+ separator: None,
+ };
+
+ let df = read_file(&path, fmt, &opts)?;
+
+ // Determine sheet info for display
+ let sheet = if fmt == Format::Excel {
+ // Try to get the sheet name we actually read
+ let info = read_file_info(&path, fmt)?;
+ if let Some(sheet_arg) = &args.sheet {
+ // Find the matching sheet in info
+ let matched = info.sheets.iter().find(|s| {
+ &s.name == sheet_arg
+ || sheet_arg
+ .parse::<usize>()
+ .map(|idx| {
+ info.sheets
+ .iter()
+ .position(|x| x.name == s.name)
+ .map(|i| i == idx)
+ .unwrap_or(false)
+ })
+ .unwrap_or(false)
+ });
+ if let Some(s) = matched {
+ s.clone()
+ } else {
+ // Fallback: build from df
+ SheetInfo {
+ name: sheet_arg.clone(),
+ rows: df.height() + 1,
+ cols: df.width(),
+ }
+ }
+ } else if let Some(first) = info.sheets.first() {
+ first.clone()
+ } else {
+ sheet_info_from_df(&file_name, &df)
+ }
+ } else {
+ sheet_info_from_df(&file_name, &df)
+ };
+
+ // Handle empty DataFrame
+ if df.is_empty() {
+ print!("{}", format_empty_sheet(&sheet));
+ return Ok(());
+ }
+
+ // --schema
+ if args.schema {
+ print!("{}", format_schema(&sheet, &df));
+ return Ok(());
+ }
+
+ // --describe
+ if args.describe {
+ print!("{}", format_describe(&df));
+ return Ok(());
+ }
+
+ // --csv output mode
+ if args.csv {
+ print!("{}", format_csv(&df));
+ return Ok(());
+ }
+
+ // Determine what to display
+ let output = match (args.head, args.tail) {
+ (Some(h), Some(t)) => {
+ // Both specified: show head + tail with omission line
+ format_head_tail(&df, h, t)
+ }
+ (Some(h), None) => {
+ // Only --head: slice the DataFrame and show all
+ let sliced = df.head(Some(h));
+ format_data_table(&sliced)
+ }
+ (None, Some(t)) => {
+ // Only --tail: slice and show all
+ let sliced = df.tail(Some(t));
+ format_data_table(&sliced)
+ }
+ (None, None) => {
+ // Default: show all if <= threshold, otherwise head+tail
+ if df.height() <= DEFAULT_THRESHOLD {
+ format_data_table(&df)
+ } else {
+ format_head_tail(&df, DEFAULT_HEAD_TAIL, DEFAULT_HEAD_TAIL)
+ }
+ }
+ };
+
+ print!("{}", output);
+ Ok(())
+}
+
+fn main() {
+ let args = Args::parse();
+ match run(args) {
+ Ok(()) => {}
+ Err(err) => {
+ // Check if this is an arg validation error (exit 2) vs runtime error (exit 1)
+ let msg = err.to_string();
+ if msg.contains("mutually exclusive")
+ || msg.contains("invalid")
+ || msg.contains("unknown format")
+ {
+ eprintln!("dtcat: {err}");
+ process::exit(2);
+ } else {
+ eprintln!("dtcat: {err}");
+ process::exit(1);
+ }
+ }
+ }
+}
diff --git a/src/bin/dtdiff.rs b/src/bin/dtdiff.rs
@@ -1 +1,419 @@
-fn main() {}
+use std::io::IsTerminal;
+use std::path::PathBuf;
+use std::process;
+
+use anyhow::{Result, bail};
+use clap::Parser;
+use serde_json::{Map, Value, json};
+
+use dtcore::diff::{DiffOptions, DiffResult, SheetSource};
+use dtcore::format::{detect_format, Format};
+use dtcore::reader::{ReadOptions, read_file};
+
+#[derive(Parser)]
+#[command(
+ name = "dtdiff",
+ about = "Compare two tabular data files and show differences",
+ version
+)]
+struct Args {
+ /// First file to compare
+ file_a: String,
+
+ /// Second file to compare
+ file_b: String,
+
+ /// Override format detection (applies to both files)
+ #[arg(long, value_name = "FMT")]
+ format: Option<String>,
+
+ /// Select sheet by name or index (Excel only)
+ #[arg(long, value_name = "NAME|INDEX")]
+ sheet: Option<String>,
+
+ /// Key column(s) for matched comparison (comma-separated)
+ #[arg(long, value_name = "COL")]
+ key: Option<String>,
+
+ /// Float comparison tolerance (default: 1e-10)
+ #[arg(long)]
+ tolerance: Option<f64>,
+
+ /// Output as JSON
+ #[arg(long)]
+ json: bool,
+
+ /// Output as CSV
+ #[arg(long)]
+ csv: bool,
+
+ /// Disable colored output
+ #[arg(long)]
+ no_color: bool,
+}
+
+// ---------------------------------------------------------------------------
+// Output formatters (ported from xldiff.rs)
+// ---------------------------------------------------------------------------
+
+/// Format a row's values inline: `Name: "Alice" Score: "90"`
+fn format_row_inline(headers: &[String], values: &[String]) -> String {
+ headers
+ .iter()
+ .zip(values.iter())
+ .map(|(h, v)| format!("{}: \"{}\"", h, v))
+ .collect::<Vec<_>>()
+ .join(" ")
+}
+
+/// Format diff result as colored (or plain) text output.
+fn format_text(result: &DiffResult, color: bool) -> String {
+ if !result.has_differences() {
+ return "No differences found.\n".to_string();
+ }
+
+ let (red, green, yellow, reset) = if color {
+ ("\x1b[31m", "\x1b[32m", "\x1b[33m", "\x1b[0m")
+ } else {
+ ("", "", "", "")
+ };
+
+ let mut out = String::new();
+
+ // Header
+ out.push_str(&format!(
+ "--- {} ({})\n+++ {} ({})\n\n",
+ result.source_a.sheet_name,
+ result.source_a.file_name,
+ result.source_b.sheet_name,
+ result.source_b.file_name,
+ ));
+
+ // Summary
+ out.push_str(&format!(
+ "Added: {} | Removed: {} | Modified: {}\n\n",
+ result.added.len(),
+ result.removed.len(),
+ result.modified.len(),
+ ));
+
+ // Removed rows
+ for row in &result.removed {
+ out.push_str(&format!(
+ "{}- {}{}",
+ red,
+ format_row_inline(&result.headers, &row.values),
+ reset,
+ ));
+ out.push('\n');
+ }
+
+ // Added rows
+ for row in &result.added {
+ out.push_str(&format!(
+ "{}+ {}{}",
+ green,
+ format_row_inline(&result.headers, &row.values),
+ reset,
+ ));
+ out.push('\n');
+ }
+
+ // Modified rows
+ for m in &result.modified {
+ let key_display: Vec<String> = result
+ .key_columns
+ .iter()
+ .zip(m.key.iter())
+ .map(|(col, val)| format!("{}: \"{}\"", col, val))
+ .collect();
+ out.push_str(&format!(
+ "{}~ {}{}",
+ yellow,
+ key_display.join(" "),
+ reset,
+ ));
+ out.push('\n');
+ for change in &m.changes {
+ out.push_str(&format!(
+ " {}: \"{}\" \u{2192} \"{}\"\n",
+ change.column, change.old_value, change.new_value,
+ ));
+ }
+ }
+
+ out
+}
+
+/// Format diff result as JSON.
+fn format_json(result: &DiffResult) -> String {
+ let added: Vec<Value> = result
+ .added
+ .iter()
+ .map(|row| {
+ let mut map = Map::new();
+ for (h, v) in result.headers.iter().zip(row.values.iter()) {
+ map.insert(h.clone(), Value::String(v.clone()));
+ }
+ Value::Object(map)
+ })
+ .collect();
+
+ let removed: Vec<Value> = result
+ .removed
+ .iter()
+ .map(|row| {
+ let mut map = Map::new();
+ for (h, v) in result.headers.iter().zip(row.values.iter()) {
+ map.insert(h.clone(), Value::String(v.clone()));
+ }
+ Value::Object(map)
+ })
+ .collect();
+
+ let modified: Vec<Value> = result
+ .modified
+ .iter()
+ .map(|m| {
+ let mut key_map = Map::new();
+ for (col, val) in result.key_columns.iter().zip(m.key.iter()) {
+ key_map.insert(col.clone(), Value::String(val.clone()));
+ }
+ let changes: Vec<Value> = m
+ .changes
+ .iter()
+ .map(|c| {
+ json!({
+ "column": c.column,
+ "old": c.old_value,
+ "new": c.new_value,
+ })
+ })
+ .collect();
+ json!({
+ "key": Value::Object(key_map),
+ "changes": changes,
+ })
+ })
+ .collect();
+
+ let output = json!({
+ "added": added,
+ "removed": removed,
+ "modified": modified,
+ });
+
+ serde_json::to_string_pretty(&output).unwrap() + "\n"
+}
+
+/// Quote a value per RFC 4180: if it contains comma, quote, or newline, wrap
+/// in double quotes and escape any internal quotes by doubling them.
+fn csv_quote(value: &str) -> String {
+ if value.contains(',') || value.contains('"') || value.contains('\n') {
+ format!("\"{}\"", value.replace('"', "\"\""))
+ } else {
+ value.to_string()
+ }
+}
+
+/// Build a CSV row from a slice of values.
+fn csv_row(values: &[String]) -> String {
+ values.iter().map(|v| csv_quote(v)).collect::<Vec<_>>().join(",")
+}
+
+/// Format diff result as CSV.
+///
+/// Header: _status, col1, col2, ..., _old_col1, _old_col2, ...
+/// Added rows: "added" + values + empty _old_ columns
+/// Removed rows: "removed" + values + empty _old_ columns
+/// Modified rows: "modified" + new values + old values in _old_ columns
+fn format_csv_output(result: &DiffResult) -> String {
+ let mut out = String::new();
+
+ // Build header
+ let mut header_parts: Vec<String> = vec!["_status".to_string()];
+ for h in &result.headers {
+ header_parts.push(h.clone());
+ }
+ for h in &result.headers {
+ header_parts.push(format!("_old_{}", h));
+ }
+ out.push_str(&csv_row(&header_parts));
+ out.push('\n');
+
+ let empty_cols: Vec<String> = result.headers.iter().map(|_| String::new()).collect();
+
+ // Removed rows
+ for row in &result.removed {
+ let mut parts: Vec<String> = vec!["removed".to_string()];
+ parts.extend(row.values.iter().cloned());
+ while parts.len() < 1 + result.headers.len() {
+ parts.push(String::new());
+ }
+ parts.extend(empty_cols.iter().cloned());
+ out.push_str(&csv_row(&parts));
+ out.push('\n');
+ }
+
+ // Added rows
+ for row in &result.added {
+ let mut parts: Vec<String> = vec!["added".to_string()];
+ parts.extend(row.values.iter().cloned());
+ while parts.len() < 1 + result.headers.len() {
+ parts.push(String::new());
+ }
+ parts.extend(empty_cols.iter().cloned());
+ out.push_str(&csv_row(&parts));
+ out.push('\n');
+ }
+
+ // Modified rows
+ for m in &result.modified {
+ let mut main_cols: Vec<String> = Vec::new();
+ let mut old_cols: Vec<String> = Vec::new();
+
+ for h in &result.headers {
+ if let Some(key_idx) = result.key_columns.iter().position(|k| k == h) {
+ main_cols.push(m.key.get(key_idx).cloned().unwrap_or_default());
+ old_cols.push(String::new());
+ } else if let Some(change) = m.changes.iter().find(|c| c.column == *h) {
+ main_cols.push(change.new_value.clone());
+ old_cols.push(change.old_value.clone());
+ } else {
+ // Unchanged non-key column — leave empty in both
+ main_cols.push(String::new());
+ old_cols.push(String::new());
+ }
+ }
+
+ let mut parts: Vec<String> = vec!["modified".to_string()];
+ parts.extend(main_cols);
+ parts.extend(old_cols);
+ out.push_str(&csv_row(&parts));
+ out.push('\n');
+ }
+
+ out
+}
+
+// ---------------------------------------------------------------------------
+// run / main
+// ---------------------------------------------------------------------------
+
+fn run(args: Args) -> Result<()> {
+ let path_a = PathBuf::from(&args.file_a);
+ let path_b = PathBuf::from(&args.file_b);
+
+ // Validate files exist
+ if !path_a.exists() {
+ bail!("file not found: {}", path_a.display());
+ }
+ if !path_b.exists() {
+ bail!("file not found: {}", path_b.display());
+ }
+
+ // Detect formats
+ let fmt_a = detect_format(&path_a, args.format.as_deref())?;
+ let fmt_b = detect_format(&path_b, args.format.as_deref())?;
+
+ // Enforce same-format constraint
+ if !fmt_a.same_family(fmt_b) {
+ bail!(
+ "files have incompatible formats: {:?} vs {:?}. Both files must use the same format family.",
+ fmt_a,
+ fmt_b
+ );
+ }
+
+ // Build read options
+ let opts_a = ReadOptions {
+ sheet: args.sheet.clone(),
+ skip_rows: None,
+ separator: None,
+ };
+ let opts_b = ReadOptions {
+ sheet: args.sheet.clone(),
+ skip_rows: None,
+ separator: None,
+ };
+
+ // Read DataFrames
+ let df_a = read_file(&path_a, fmt_a, &opts_a)?;
+ let df_b = read_file(&path_b, fmt_b, &opts_b)?;
+
+ // Resolve key columns
+ let key_columns: Vec<String> = if let Some(ref key_str) = args.key {
+ key_str.split(',').map(|s| s.trim().to_string()).collect()
+ } else {
+ vec![]
+ };
+
+ // Build source labels
+ let file_name_a = path_a
+ .file_name()
+ .map(|s| s.to_string_lossy().to_string())
+ .unwrap_or_else(|| args.file_a.clone());
+ let file_name_b = path_b
+ .file_name()
+ .map(|s| s.to_string_lossy().to_string())
+ .unwrap_or_else(|| args.file_b.clone());
+
+ // Use file name as "sheet name" for non-Excel formats; for Excel use the
+ // sheet name from opts (or a placeholder if none was specified).
+ let sheet_name_a = if fmt_a == Format::Excel {
+ args.sheet.clone().unwrap_or_else(|| file_name_a.clone())
+ } else {
+ file_name_a.clone()
+ };
+ let sheet_name_b = if fmt_b == Format::Excel {
+ args.sheet.clone().unwrap_or_else(|| file_name_b.clone())
+ } else {
+ file_name_b.clone()
+ };
+
+ let source_a = SheetSource {
+ file_name: file_name_a,
+ sheet_name: sheet_name_a,
+ };
+ let source_b = SheetSource {
+ file_name: file_name_b,
+ sheet_name: sheet_name_b,
+ };
+
+ let diff_opts = DiffOptions {
+ key_columns,
+ tolerance: args.tolerance,
+ };
+
+ // Run diff
+ let result = dtcore::diff::diff_sheets(&df_a, &df_b, &diff_opts, source_a, source_b)?;
+
+ // TTY detection for color
+ let use_color = !args.no_color && std::io::stdout().is_terminal();
+
+ // Format output: --json and --csv are mutually exclusive flags; default is text
+ let output = if args.json {
+ format_json(&result)
+ } else if args.csv {
+ format_csv_output(&result)
+ } else {
+ format_text(&result, use_color)
+ };
+
+ print!("{}", output);
+
+ // Exit 1 if differences found (diff convention), 0 if identical
+ if result.has_differences() {
+ process::exit(1);
+ }
+
+ Ok(())
+}
+
+fn main() {
+ let args = Args::parse();
+ if let Err(err) = run(args) {
+ eprintln!("dtdiff: {err}");
+ process::exit(2);
+ }
+}
diff --git a/src/bin/dtfilter.rs b/src/bin/dtfilter.rs
@@ -1 +1,172 @@
-fn main() {}
+use std::io::Write;
+use std::path::PathBuf;
+use std::process;
+
+use anyhow::{Result, bail};
+use clap::Parser;
+
+use dtcore::filter::{FilterOptions, parse_filter_expr, parse_sort_spec, filter_pipeline};
+use dtcore::format::detect_format;
+use dtcore::formatter::{format_data_table, format_csv};
+use dtcore::reader::{ReadOptions, read_file};
+
+// ---------------------------------------------------------------------------
+// Argument parsing
+// ---------------------------------------------------------------------------
+
+#[derive(Parser)]
+#[command(
+ name = "dtfilter",
+ about = "Filter, sort, and select columns from tabular data files",
+ version
+)]
+struct Args {
+ /// Input file
+ file: String,
+
+ /// Override format detection
+ #[arg(long, value_name = "FMT")]
+ format: Option<String>,
+
+ /// Select sheet by name or index (Excel only)
+ #[arg(long, value_name = "NAME|INDEX")]
+ sheet: Option<String>,
+
+ /// Skip first N rows after the header
+ #[arg(long, value_name = "N")]
+ skip: Option<usize>,
+
+ /// Filter expression(s), e.g. "State=CA", "Amount>1000" (repeatable, ANDed)
+ #[arg(long = "filter", value_name = "EXPR", action = clap::ArgAction::Append)]
+ filters: Vec<String>,
+
+ /// Sort spec, e.g. "Amount:desc" or "Name"
+ #[arg(long, value_name = "SPEC")]
+ sort: Option<String>,
+
+ /// Select columns by name (comma-separated)
+ #[arg(long, value_name = "COLS")]
+ columns: Option<String>,
+
+ /// First N rows (before filter)
+ #[arg(long, value_name = "N")]
+ head: Option<usize>,
+
+ /// Last N rows (before filter)
+ #[arg(long, value_name = "N")]
+ tail: Option<usize>,
+
+ /// Max output rows (after filter)
+ #[arg(long, value_name = "N")]
+ limit: Option<usize>,
+
+ /// Output as CSV
+ #[arg(long)]
+ csv: bool,
+}
+
+// ---------------------------------------------------------------------------
+// Validation helpers
+// ---------------------------------------------------------------------------
+
+/// Validate args and return an error message for invalid combinations.
+/// Returns exit-code 2 on any argument error.
+fn validate_args(args: &Args) -> Result<(), ArgError> {
+ if args.head.is_some() && args.tail.is_some() {
+ return Err(ArgError("--head and --tail are mutually exclusive".to_string()));
+ }
+ Ok(())
+}
+
+struct ArgError(String);
+
+// ---------------------------------------------------------------------------
+// Core logic
+// ---------------------------------------------------------------------------
+
+fn run(args: Args) -> Result<()> {
+ let path = PathBuf::from(&args.file);
+
+ if !path.exists() {
+ bail!("file not found: {}", path.display());
+ }
+
+ // Detect format
+ let fmt = detect_format(&path, args.format.as_deref())?;
+
+ // Build read options
+ let read_opts = ReadOptions {
+ sheet: args.sheet.clone(),
+ skip_rows: args.skip,
+ separator: None,
+ };
+
+ // Read the DataFrame
+ let df = read_file(&path, fmt, &read_opts)?;
+
+ // Parse filter expressions
+ let filters = args
+ .filters
+ .iter()
+ .map(|s| parse_filter_expr(s).map_err(|e| anyhow::anyhow!("{}", e)))
+ .collect::<Result<Vec<_>>>()?;
+
+ // Parse sort spec
+ let sort = args
+ .sort
+ .as_deref()
+ .map(|s| parse_sort_spec(s).map_err(|e| anyhow::anyhow!("{}", e)))
+ .transpose()?;
+
+ // Parse column selection
+ let cols: Option<Vec<String>> = args.columns.as_deref().map(|s| {
+ s.split(',')
+ .map(|c| c.trim().to_string())
+ .filter(|c| !c.is_empty())
+ .collect()
+ });
+
+ // Build filter options
+ let filter_opts = FilterOptions {
+ filters,
+ cols,
+ sort,
+ limit: args.limit,
+ head: args.head,
+ tail: args.tail,
+ };
+
+ // Run the pipeline
+ let result = filter_pipeline(df, &filter_opts)?;
+
+ // Report row count to stderr
+ let row_count = result.height();
+ eprintln!("{} row{}", row_count, if row_count == 1 { "" } else { "s" });
+
+ // Output
+ let output = if args.csv {
+ format_csv(&result)
+ } else {
+ format_data_table(&result)
+ };
+
+ let stdout = std::io::stdout();
+ let mut out = stdout.lock();
+ out.write_all(output.as_bytes())?;
+
+ Ok(())
+}
+
+fn main() {
+ let args = Args::parse();
+
+ if let Err(e) = validate_args(&args) {
+ eprintln!("dtfilter: {}", e.0);
+ process::exit(2);
+ }
+
+ if let Err(err) = run(args) {
+ eprintln!("dtfilter: {err}");
+ process::exit(1);
+ }
+}
diff --git a/src/reader.rs b/src/reader.rs
@@ -3,7 +3,7 @@ use polars::prelude::*;
use std::path::Path;
use crate::format::Format;
-use crate::metadata::{FileInfo, SheetInfo};
+use crate::metadata::FileInfo;
use crate::readers;
/// Options that control how a file is read.