dt-cli-tools

CLI tools for viewing, filtering, and comparing tabular data files
Log | Files | Refs | README | LICENSE

commit 1a1d85d4018c7ae27a2ddd6ebea1afc2e35dba70
parent 8f3a14e14fd5eb2b61c31c855004cf94e55425f8
Author: Erik Loualiche <eloualic@umn.edu>
Date:   Tue, 31 Mar 2026 11:11:51 -0500

feat: add integration tests and fix clippy warnings

- 18 integration tests across dtcat, dtfilter, dtdiff
- Clippy auto-fixes for collapsible ifs and byte string literals
- 143 total tests, all passing

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
Msrc/filter.rs | 10++++------
Msrc/format.rs | 2+-
Msrc/readers/arrow.rs | 5++---
Msrc/readers/json.rs | 5++---
Msrc/readers/parquet.rs | 5++---
Atests/dtcat.rs | 66++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/dtdiff.rs | 56++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atests/dtfilter.rs | 59+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
8 files changed, 192 insertions(+), 16 deletions(-)

diff --git a/src/filter.rs b/src/filter.rs @@ -142,22 +142,20 @@ fn build_filter_mask(df: &DataFrame, expr: &FilterExpr) -> Result<BooleanChunked match &expr.op { FilterOp::Eq => { - if is_numeric_dtype(dtype) { - if let Ok(n) = expr.value.parse::<f64>() { + if is_numeric_dtype(dtype) + && let Ok(n) = expr.value.parse::<f64>() { let s = series.cast(&DataType::Float64)?; return Ok(s.f64()?.equal(n)); } - } let s = series.cast(&DataType::String)?; Ok(s.str()?.equal(expr.value.as_str())) } FilterOp::NotEq => { - if is_numeric_dtype(dtype) { - if let Ok(n) = expr.value.parse::<f64>() { + if is_numeric_dtype(dtype) + && let Ok(n) = expr.value.parse::<f64>() { let s = series.cast(&DataType::Float64)?; return Ok(s.f64()?.not_equal(n)); } - } let s = series.cast(&DataType::String)?; Ok(s.str()?.not_equal(expr.value.as_str())) } diff --git a/src/format.rs b/src/format.rs @@ -130,7 +130,7 @@ pub fn detect_csv_delimiter(path: &Path) -> Result<u8> { let n = file.read(&mut buf)?; buf.truncate(n); - let candidates: &[u8] = &[b',', b'\t', b';']; + let candidates: &[u8] = b",\t;"; // min count per delimiter across lines; start at usize::MAX so we can take min let mut min_counts = [usize::MAX; 3]; let mut line_count = 0usize; diff --git a/src/readers/arrow.rs b/src/readers/arrow.rs @@ -8,11 +8,10 @@ pub fn read(path: &Path, opts: &ReadOptions) -> Result<DataFrame> { let file = std::fs::File::open(path)?; let mut df = IpcReader::new(file).finish()?; - if let Some(skip) = opts.skip_rows { - if skip > 0 && skip < df.height() { + if let Some(skip) = opts.skip_rows + && skip > 0 && skip < df.height() { df = df.slice(skip as i64, df.height() - skip); } - } Ok(df) } diff --git a/src/readers/json.rs b/src/readers/json.rs @@ -19,11 +19,10 @@ pub fn read(path: &Path, format: Format, opts: &ReadOptions) -> Result<DataFrame } }; - if let Some(skip) = opts.skip_rows { - if skip > 0 && skip < df.height() { + if let Some(skip) = opts.skip_rows + && skip > 0 && skip < df.height() { df = df.slice(skip as i64, df.height() - skip); } - } Ok(df) } diff --git a/src/readers/parquet.rs b/src/readers/parquet.rs @@ -8,11 +8,10 @@ pub fn read(path: &Path, opts: &ReadOptions) -> Result<DataFrame> { let file = std::fs::File::open(path)?; let mut df = ParquetReader::new(file).finish()?; - if let Some(skip) = opts.skip_rows { - if skip > 0 && skip < df.height() { + if let Some(skip) = opts.skip_rows + && skip > 0 && skip < df.height() { df = df.slice(skip as i64, df.height() - skip); } - } Ok(df) } diff --git a/tests/dtcat.rs b/tests/dtcat.rs @@ -0,0 +1,66 @@ +use assert_cmd::Command; +use predicates::prelude::*; +use std::io::Write; +use tempfile::NamedTempFile; + +fn dtcat() -> Command { + Command::cargo_bin("dtcat").unwrap() +} + +fn csv_file(content: &str) -> NamedTempFile { + let mut f = NamedTempFile::with_suffix(".csv").unwrap(); + write!(f, "{}", content).unwrap(); + f.flush().unwrap(); + f +} + +#[test] +fn shows_csv_data() { + let f = csv_file("name,value\nAlice,100\nBob,200\n"); + dtcat().arg(f.path()).assert().success() + .stdout(predicate::str::contains("Alice")) + .stdout(predicate::str::contains("Bob")); +} + +#[test] +fn schema_flag() { + let f = csv_file("name,value\nAlice,100\n"); + dtcat().arg(f.path()).arg("--schema").assert().success() + .stdout(predicate::str::contains("Column")) + .stdout(predicate::str::contains("Type")); +} + +#[test] +fn csv_output_flag() { + let f = csv_file("name,value\nAlice,100\n"); + dtcat().arg(f.path()).arg("--csv").assert().success() + .stdout(predicate::str::contains("name,value")); +} + +#[test] +fn head_flag() { + let f = csv_file("x\n1\n2\n3\n4\n5\n"); + dtcat().arg(f.path()).arg("--head").arg("2").assert().success(); +} + +#[test] +fn nonexistent_file_exits_1() { + dtcat().arg("/tmp/does_not_exist_12345.csv").assert().failure(); +} + +#[test] +fn format_override() { + let mut f = NamedTempFile::with_suffix(".txt").unwrap(); + write!(f, "a,b\n1,2\n").unwrap(); + f.flush().unwrap(); + dtcat().arg(f.path()).arg("--format").arg("csv").assert().success() + .stdout(predicate::str::contains("1")); +} + +#[test] +fn describe_flag() { + let f = csv_file("name,value\nAlice,100\nBob,200\n"); + dtcat().arg(f.path()).arg("--describe").assert().success() + .stdout(predicate::str::contains("count")) + .stdout(predicate::str::contains("mean")); +} diff --git a/tests/dtdiff.rs b/tests/dtdiff.rs @@ -0,0 +1,56 @@ +use assert_cmd::Command; +use predicates::prelude::*; +use std::io::Write; +use tempfile::NamedTempFile; + +fn dtdiff() -> Command { + Command::cargo_bin("dtdiff").unwrap() +} + +fn csv_file(content: &str) -> NamedTempFile { + let mut f = NamedTempFile::with_suffix(".csv").unwrap(); + write!(f, "{}", content).unwrap(); + f.flush().unwrap(); + f +} + +#[test] +fn no_diff_exits_0() { + let a = csv_file("name,value\nAlice,100\n"); + let b = csv_file("name,value\nAlice,100\n"); + dtdiff().arg(a.path()).arg(b.path()).assert().success() + .stdout(predicate::str::contains("No differences")); +} + +#[test] +fn diff_exits_1() { + let a = csv_file("name,value\nAlice,100\n"); + let b = csv_file("name,value\nBob,200\n"); + dtdiff().arg(a.path()).arg(b.path()).assert().code(1); +} + +#[test] +fn keyed_diff() { + let a = csv_file("id,name\n1,Alice\n2,Bob\n"); + let b = csv_file("id,name\n1,Alice\n2,Robert\n"); + dtdiff().arg(a.path()).arg(b.path()).arg("--key").arg("id") + .assert().code(1); +} + +#[test] +fn json_output() { + let a = csv_file("id,val\n1,a\n"); + let b = csv_file("id,val\n1,b\n"); + dtdiff().arg(a.path()).arg(b.path()).arg("--key").arg("id").arg("--json") + .assert().code(1) + .stdout(predicate::str::contains("\"modified\"")); +} + +#[test] +fn csv_output() { + let a = csv_file("id,val\n1,a\n"); + let b = csv_file("id,val\n1,b\n"); + dtdiff().arg(a.path()).arg(b.path()).arg("--key").arg("id").arg("--csv") + .assert().code(1) + .stdout(predicate::str::contains("_status")); +} diff --git a/tests/dtfilter.rs b/tests/dtfilter.rs @@ -0,0 +1,59 @@ +use assert_cmd::Command; +use predicates::prelude::*; +use std::io::Write; +use tempfile::NamedTempFile; + +fn dtfilter() -> Command { + Command::cargo_bin("dtfilter").unwrap() +} + +fn csv_file(content: &str) -> NamedTempFile { + let mut f = NamedTempFile::with_suffix(".csv").unwrap(); + write!(f, "{}", content).unwrap(); + f.flush().unwrap(); + f +} + +#[test] +fn filter_eq() { + let f = csv_file("name,value\nAlice,100\nBob,200\n"); + dtfilter().arg(f.path()).arg("--filter").arg("name=Alice").assert().success() + .stdout(predicate::str::contains("Alice")) + .stdout(predicate::str::contains("Bob").not()); +} + +#[test] +fn filter_gt() { + let f = csv_file("name,value\nAlice,100\nBob,200\nCharlie,300\n"); + dtfilter().arg(f.path()).arg("--filter").arg("value>150").assert().success() + .stdout(predicate::str::contains("Bob")) + .stdout(predicate::str::contains("Charlie")); +} + +#[test] +fn sort_desc() { + let f = csv_file("name,value\nAlice,100\nBob,200\n"); + dtfilter().arg(f.path()).arg("--sort").arg("value:desc").assert().success(); +} + +#[test] +fn columns_select() { + let f = csv_file("name,value,extra\nAlice,100,x\n"); + dtfilter().arg(f.path()).arg("--columns").arg("name,value").assert().success() + .stdout(predicate::str::contains("name")) + .stdout(predicate::str::contains("extra").not()); +} + +#[test] +fn csv_output() { + let f = csv_file("name,value\nAlice,100\n"); + dtfilter().arg(f.path()).arg("--csv").assert().success() + .stdout(predicate::str::contains("name,value")); +} + +#[test] +fn head_tail_exclusive() { + let f = csv_file("x\n1\n2\n"); + dtfilter().arg(f.path()).arg("--head").arg("1").arg("--tail").arg("1") + .assert().code(2); +}