commit 1a1d85d4018c7ae27a2ddd6ebea1afc2e35dba70
parent 8f3a14e14fd5eb2b61c31c855004cf94e55425f8
Author: Erik Loualiche <eloualic@umn.edu>
Date: Tue, 31 Mar 2026 11:11:51 -0500
feat: add integration tests and fix clippy warnings
- 18 integration tests across dtcat, dtfilter, dtdiff
- Clippy auto-fixes for collapsible ifs and byte string literals
- 143 total tests, all passing
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
8 files changed, 192 insertions(+), 16 deletions(-)
diff --git a/src/filter.rs b/src/filter.rs
@@ -142,22 +142,20 @@ fn build_filter_mask(df: &DataFrame, expr: &FilterExpr) -> Result<BooleanChunked
match &expr.op {
FilterOp::Eq => {
- if is_numeric_dtype(dtype) {
- if let Ok(n) = expr.value.parse::<f64>() {
+ if is_numeric_dtype(dtype)
+ && let Ok(n) = expr.value.parse::<f64>() {
let s = series.cast(&DataType::Float64)?;
return Ok(s.f64()?.equal(n));
}
- }
let s = series.cast(&DataType::String)?;
Ok(s.str()?.equal(expr.value.as_str()))
}
FilterOp::NotEq => {
- if is_numeric_dtype(dtype) {
- if let Ok(n) = expr.value.parse::<f64>() {
+ if is_numeric_dtype(dtype)
+ && let Ok(n) = expr.value.parse::<f64>() {
let s = series.cast(&DataType::Float64)?;
return Ok(s.f64()?.not_equal(n));
}
- }
let s = series.cast(&DataType::String)?;
Ok(s.str()?.not_equal(expr.value.as_str()))
}
diff --git a/src/format.rs b/src/format.rs
@@ -130,7 +130,7 @@ pub fn detect_csv_delimiter(path: &Path) -> Result<u8> {
let n = file.read(&mut buf)?;
buf.truncate(n);
- let candidates: &[u8] = &[b',', b'\t', b';'];
+ let candidates: &[u8] = b",\t;";
// min count per delimiter across lines; start at usize::MAX so we can take min
let mut min_counts = [usize::MAX; 3];
let mut line_count = 0usize;
diff --git a/src/readers/arrow.rs b/src/readers/arrow.rs
@@ -8,11 +8,10 @@ pub fn read(path: &Path, opts: &ReadOptions) -> Result<DataFrame> {
let file = std::fs::File::open(path)?;
let mut df = IpcReader::new(file).finish()?;
- if let Some(skip) = opts.skip_rows {
- if skip > 0 && skip < df.height() {
+ if let Some(skip) = opts.skip_rows
+ && skip > 0 && skip < df.height() {
df = df.slice(skip as i64, df.height() - skip);
}
- }
Ok(df)
}
diff --git a/src/readers/json.rs b/src/readers/json.rs
@@ -19,11 +19,10 @@ pub fn read(path: &Path, format: Format, opts: &ReadOptions) -> Result<DataFrame
}
};
- if let Some(skip) = opts.skip_rows {
- if skip > 0 && skip < df.height() {
+ if let Some(skip) = opts.skip_rows
+ && skip > 0 && skip < df.height() {
df = df.slice(skip as i64, df.height() - skip);
}
- }
Ok(df)
}
diff --git a/src/readers/parquet.rs b/src/readers/parquet.rs
@@ -8,11 +8,10 @@ pub fn read(path: &Path, opts: &ReadOptions) -> Result<DataFrame> {
let file = std::fs::File::open(path)?;
let mut df = ParquetReader::new(file).finish()?;
- if let Some(skip) = opts.skip_rows {
- if skip > 0 && skip < df.height() {
+ if let Some(skip) = opts.skip_rows
+ && skip > 0 && skip < df.height() {
df = df.slice(skip as i64, df.height() - skip);
}
- }
Ok(df)
}
diff --git a/tests/dtcat.rs b/tests/dtcat.rs
@@ -0,0 +1,66 @@
+use assert_cmd::Command;
+use predicates::prelude::*;
+use std::io::Write;
+use tempfile::NamedTempFile;
+
+fn dtcat() -> Command {
+ Command::cargo_bin("dtcat").unwrap()
+}
+
+fn csv_file(content: &str) -> NamedTempFile {
+ let mut f = NamedTempFile::with_suffix(".csv").unwrap();
+ write!(f, "{}", content).unwrap();
+ f.flush().unwrap();
+ f
+}
+
+#[test]
+fn shows_csv_data() {
+ let f = csv_file("name,value\nAlice,100\nBob,200\n");
+ dtcat().arg(f.path()).assert().success()
+ .stdout(predicate::str::contains("Alice"))
+ .stdout(predicate::str::contains("Bob"));
+}
+
+#[test]
+fn schema_flag() {
+ let f = csv_file("name,value\nAlice,100\n");
+ dtcat().arg(f.path()).arg("--schema").assert().success()
+ .stdout(predicate::str::contains("Column"))
+ .stdout(predicate::str::contains("Type"));
+}
+
+#[test]
+fn csv_output_flag() {
+ let f = csv_file("name,value\nAlice,100\n");
+ dtcat().arg(f.path()).arg("--csv").assert().success()
+ .stdout(predicate::str::contains("name,value"));
+}
+
+#[test]
+fn head_flag() {
+ let f = csv_file("x\n1\n2\n3\n4\n5\n");
+ dtcat().arg(f.path()).arg("--head").arg("2").assert().success();
+}
+
+#[test]
+fn nonexistent_file_exits_1() {
+ dtcat().arg("/tmp/does_not_exist_12345.csv").assert().failure();
+}
+
+#[test]
+fn format_override() {
+ let mut f = NamedTempFile::with_suffix(".txt").unwrap();
+ write!(f, "a,b\n1,2\n").unwrap();
+ f.flush().unwrap();
+ dtcat().arg(f.path()).arg("--format").arg("csv").assert().success()
+ .stdout(predicate::str::contains("1"));
+}
+
+#[test]
+fn describe_flag() {
+ let f = csv_file("name,value\nAlice,100\nBob,200\n");
+ dtcat().arg(f.path()).arg("--describe").assert().success()
+ .stdout(predicate::str::contains("count"))
+ .stdout(predicate::str::contains("mean"));
+}
diff --git a/tests/dtdiff.rs b/tests/dtdiff.rs
@@ -0,0 +1,56 @@
+use assert_cmd::Command;
+use predicates::prelude::*;
+use std::io::Write;
+use tempfile::NamedTempFile;
+
+fn dtdiff() -> Command {
+ Command::cargo_bin("dtdiff").unwrap()
+}
+
+fn csv_file(content: &str) -> NamedTempFile {
+ let mut f = NamedTempFile::with_suffix(".csv").unwrap();
+ write!(f, "{}", content).unwrap();
+ f.flush().unwrap();
+ f
+}
+
+#[test]
+fn no_diff_exits_0() {
+ let a = csv_file("name,value\nAlice,100\n");
+ let b = csv_file("name,value\nAlice,100\n");
+ dtdiff().arg(a.path()).arg(b.path()).assert().success()
+ .stdout(predicate::str::contains("No differences"));
+}
+
+#[test]
+fn diff_exits_1() {
+ let a = csv_file("name,value\nAlice,100\n");
+ let b = csv_file("name,value\nBob,200\n");
+ dtdiff().arg(a.path()).arg(b.path()).assert().code(1);
+}
+
+#[test]
+fn keyed_diff() {
+ let a = csv_file("id,name\n1,Alice\n2,Bob\n");
+ let b = csv_file("id,name\n1,Alice\n2,Robert\n");
+ dtdiff().arg(a.path()).arg(b.path()).arg("--key").arg("id")
+ .assert().code(1);
+}
+
+#[test]
+fn json_output() {
+ let a = csv_file("id,val\n1,a\n");
+ let b = csv_file("id,val\n1,b\n");
+ dtdiff().arg(a.path()).arg(b.path()).arg("--key").arg("id").arg("--json")
+ .assert().code(1)
+ .stdout(predicate::str::contains("\"modified\""));
+}
+
+#[test]
+fn csv_output() {
+ let a = csv_file("id,val\n1,a\n");
+ let b = csv_file("id,val\n1,b\n");
+ dtdiff().arg(a.path()).arg(b.path()).arg("--key").arg("id").arg("--csv")
+ .assert().code(1)
+ .stdout(predicate::str::contains("_status"));
+}
diff --git a/tests/dtfilter.rs b/tests/dtfilter.rs
@@ -0,0 +1,59 @@
+use assert_cmd::Command;
+use predicates::prelude::*;
+use std::io::Write;
+use tempfile::NamedTempFile;
+
+fn dtfilter() -> Command {
+ Command::cargo_bin("dtfilter").unwrap()
+}
+
+fn csv_file(content: &str) -> NamedTempFile {
+ let mut f = NamedTempFile::with_suffix(".csv").unwrap();
+ write!(f, "{}", content).unwrap();
+ f.flush().unwrap();
+ f
+}
+
+#[test]
+fn filter_eq() {
+ let f = csv_file("name,value\nAlice,100\nBob,200\n");
+ dtfilter().arg(f.path()).arg("--filter").arg("name=Alice").assert().success()
+ .stdout(predicate::str::contains("Alice"))
+ .stdout(predicate::str::contains("Bob").not());
+}
+
+#[test]
+fn filter_gt() {
+ let f = csv_file("name,value\nAlice,100\nBob,200\nCharlie,300\n");
+ dtfilter().arg(f.path()).arg("--filter").arg("value>150").assert().success()
+ .stdout(predicate::str::contains("Bob"))
+ .stdout(predicate::str::contains("Charlie"));
+}
+
+#[test]
+fn sort_desc() {
+ let f = csv_file("name,value\nAlice,100\nBob,200\n");
+ dtfilter().arg(f.path()).arg("--sort").arg("value:desc").assert().success();
+}
+
+#[test]
+fn columns_select() {
+ let f = csv_file("name,value,extra\nAlice,100,x\n");
+ dtfilter().arg(f.path()).arg("--columns").arg("name,value").assert().success()
+ .stdout(predicate::str::contains("name"))
+ .stdout(predicate::str::contains("extra").not());
+}
+
+#[test]
+fn csv_output() {
+ let f = csv_file("name,value\nAlice,100\n");
+ dtfilter().arg(f.path()).arg("--csv").assert().success()
+ .stdout(predicate::str::contains("name,value"));
+}
+
+#[test]
+fn head_tail_exclusive() {
+ let f = csv_file("x\n1\n2\n");
+ dtfilter().arg(f.path()).arg("--head").arg("1").arg("--tail").arg("1")
+ .assert().code(2);
+}