commit 1228da956b1940425e10a130d7f829382cd593d3
parent d63e6dd2fe430319758f09c653a81637dd24308c
Author: Erik Loualiche <eloualic@umn.edu>
Date: Fri, 13 Mar 2026 16:00:42 -0500
feat: wire up main orchestration — data, schema, multi-sheet modes
Full CLI orchestration: single/multi-sheet routing, row selection
(--head/--tail/--all), adaptive display (<=50 all, >50 head+tail),
large-file gate, --csv output, --schema/--describe modes, and
proper error handling with exit code 2 for arg errors.
12 integration tests covering all modes and error paths.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Diffstat:
2 files changed, 472 insertions(+), 11 deletions(-)
diff --git a/xlcat/src/main.rs b/xlcat/src/main.rs
@@ -4,7 +4,11 @@ mod reader;
use anyhow::Result;
use clap::Parser;
+use polars::prelude::*;
use std::path::PathBuf;
+use std::process;
+
+use metadata::{FileInfo, SheetInfo};
#[derive(Parser, Debug)]
#[command(name = "xlcat", about = "View Excel files in the terminal")]
@@ -66,21 +70,293 @@ fn parse_size(s: &str) -> Result<u64, String> {
Ok((num * multiplier as f64) as u64)
}
-fn main() -> Result<()> {
- let cli = Cli::parse();
+// ---------------------------------------------------------------------------
+// ArgError — used for user-facing flag/argument errors (exit code 2)
+// ---------------------------------------------------------------------------
+
+#[derive(Debug)]
+struct ArgError(String);
- // Validate flag combinations
- let mode_count = cli.schema as u8 + cli.describe as u8;
- if mode_count > 1 {
- anyhow::bail!("--schema and --describe are mutually exclusive");
+impl std::fmt::Display for ArgError {
+ fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+ write!(f, "{}", self.0)
}
- if (cli.schema || cli.describe) && (cli.head.is_some() || cli.tail.is_some() || cli.all) {
- anyhow::bail!("--schema and --describe cannot be combined with --head, --tail, or --all");
+}
+
+impl std::error::Error for ArgError {}
+
+// ---------------------------------------------------------------------------
+// Sheet resolution
+// ---------------------------------------------------------------------------
+
+enum SheetTarget {
+ Single(usize),
+ ListAll,
+}
+
+// ---------------------------------------------------------------------------
+// run() — main orchestration
+// ---------------------------------------------------------------------------
+
+fn run(cli: &Cli) -> Result<()> {
+ // 1. Validate flag combinations
+ if cli.schema && cli.describe {
+ return Err(ArgError("--schema and --describe are mutually exclusive".into()).into());
+ }
+ if (cli.schema || cli.describe)
+ && (cli.head.is_some() || cli.tail.is_some() || cli.all)
+ {
+ return Err(ArgError(
+ "--schema/--describe cannot be combined with --head, --tail, or --all".into(),
+ )
+ .into());
}
if (cli.schema || cli.describe) && cli.csv {
- anyhow::bail!("--csv can only be used in data mode (not with --schema or --describe)");
+ return Err(ArgError(
+ "--csv cannot be combined with --schema or --describe".into(),
+ )
+ .into());
+ }
+
+ // 2. Read file metadata
+ let info = metadata::read_file_info(&cli.file)?;
+ let file_name = cli
+ .file
+ .file_name()
+ .map(|s| s.to_string_lossy().to_string())
+ .unwrap_or_else(|| cli.file.display().to_string());
+
+ // 3. Resolve sheet target
+ let target = resolve_sheet_target(cli, &info)?;
+
+ match target {
+ SheetTarget::Single(idx) => {
+ let sheet = &info.sheets[idx];
+ let df = reader::read_sheet(&cli.file, &sheet.name)?;
+ render_single_sheet(cli, &file_name, &info, sheet, &df)?;
+ }
+ SheetTarget::ListAll => {
+ if cli.describe {
+ // --describe on multi-sheet: iterate all sheets
+ let mut out = formatter::format_header(&file_name, &info);
+ out.push('\n');
+ for sheet in &info.sheets {
+ let df = reader::read_sheet(&cli.file, &sheet.name)?;
+ if sheet.rows == 0 && sheet.cols == 0 {
+ out.push_str(&formatter::format_empty_sheet(sheet));
+ } else {
+ out.push_str(&formatter::format_schema(sheet, &df));
+ out.push_str(&formatter::format_describe(&df));
+ }
+ out.push('\n');
+ }
+ print!("{out}");
+ } else {
+ // Default multi-sheet: list schemas
+ let mut pairs: Vec<(&SheetInfo, DataFrame)> = Vec::new();
+ for sheet in &info.sheets {
+ let df = reader::read_sheet(&cli.file, &sheet.name)?;
+ pairs.push((sheet, df));
+ }
+ let out = formatter::format_sheet_listing(&file_name, &info, &pairs);
+ print!("{out}");
+ }
+ }
}
- eprintln!("xlcat: not yet implemented");
- std::process::exit(1);
+ Ok(())
+}
+
+fn resolve_sheet_target(cli: &Cli, info: &FileInfo) -> Result<SheetTarget> {
+ if let Some(ref sheet_arg) = cli.sheet {
+ // Try name match first
+ if let Some(idx) = info.sheets.iter().position(|s| s.name == *sheet_arg) {
+ return Ok(SheetTarget::Single(idx));
+ }
+ // Try 0-based index
+ if let Ok(idx) = sheet_arg.parse::<usize>() {
+ if idx < info.sheets.len() {
+ return Ok(SheetTarget::Single(idx));
+ }
+ return Err(ArgError(format!(
+ "Sheet index {idx} out of range (file has {} sheets)",
+ info.sheets.len()
+ ))
+ .into());
+ }
+ return Err(ArgError(format!("Sheet not found: {sheet_arg}")).into());
+ }
+
+ if info.sheets.len() == 1 {
+ return Ok(SheetTarget::Single(0));
+ }
+
+ // Multi-sheet, no --sheet specified
+ let has_row_flags = cli.all || cli.head.is_some() || cli.tail.is_some() || cli.csv;
+ if has_row_flags {
+ return Err(ArgError(
+ "Multiple sheets found. Use --sheet <name> to select one before using --all, --head, --tail, or --csv.".into(),
+ )
+ .into());
+ }
+
+ Ok(SheetTarget::ListAll)
+}
+
+fn render_single_sheet(
+ cli: &Cli,
+ file_name: &str,
+ info: &FileInfo,
+ sheet: &SheetInfo,
+ df: &DataFrame,
+) -> Result<()> {
+ // CSV mode: apply row selection, output CSV, done
+ if cli.csv {
+ let selected = apply_row_selection(cli, info, df);
+ let csv_out = formatter::format_csv(&selected);
+ print!("{csv_out}");
+ return Ok(());
+ }
+
+ let mut out = formatter::format_header(file_name, info);
+ out.push('\n');
+
+ // Completely empty sheet (0 rows, 0 cols)
+ if sheet.rows == 0 && sheet.cols == 0 {
+ out.push_str(&formatter::format_empty_sheet(sheet));
+ print!("{out}");
+ return Ok(());
+ }
+
+ // Header-only sheet (has columns but 0 data rows)
+ if df.height() == 0 {
+ out.push_str(&formatter::format_schema(sheet, df));
+ out.push_str("\n(no data rows)\n");
+ print!("{out}");
+ return Ok(());
+ }
+
+ if cli.schema {
+ out.push_str(&formatter::format_schema(sheet, df));
+ } else if cli.describe {
+ out.push_str(&formatter::format_schema(sheet, df));
+ out.push_str(&formatter::format_describe(df));
+ } else {
+ // Data mode
+ out.push_str(&formatter::format_schema(sheet, df));
+ out.push('\n');
+ out.push_str(&format_data_with_selection(cli, info, df));
+ }
+
+ print!("{out}");
+ Ok(())
+}
+
+/// Format data output with row selection logic.
+fn format_data_with_selection(cli: &Cli, info: &FileInfo, df: &DataFrame) -> String {
+ let total = df.height();
+
+ // --all: show everything
+ if cli.all {
+ return formatter::format_data_table(df);
+ }
+
+ // Explicit --head and/or --tail
+ if cli.head.is_some() || cli.tail.is_some() {
+ let head_n = cli.head.unwrap_or(0);
+ let tail_n = cli.tail.unwrap_or(0);
+ if head_n + tail_n >= total || (head_n == 0 && tail_n == 0) {
+ return formatter::format_data_table(df);
+ }
+ // If only --head, show first N
+ if cli.tail.is_none() {
+ let head_df = df.head(Some(head_n));
+ return formatter::format_data_table(&head_df);
+ }
+ // If only --tail, show last N
+ if cli.head.is_none() {
+ let tail_df = df.tail(Some(tail_n));
+ return formatter::format_data_table(&tail_df);
+ }
+ // Both specified
+ return formatter::format_head_tail(df, head_n, tail_n);
+ }
+
+ // Large file gate: file_size > max_size and no explicit flags
+ if info.file_size > cli.max_size {
+ let mut out = formatter::format_head_tail(df, 25, 0);
+ out.push_str(&format!(
+ "\nLarge file ({}) — showing first 25 of {total} rows. Use --all to see everything.\n",
+ metadata::format_file_size(info.file_size)
+ ));
+ return out;
+ }
+
+ // Adaptive default: <=50 rows show all, >50 show head 25 + tail 25
+ if total <= 50 {
+ formatter::format_data_table(df)
+ } else {
+ formatter::format_head_tail(df, 25, 25)
+ }
+}
+
+/// Apply row selection for CSV mode — returns a (possibly sliced) DataFrame.
+fn apply_row_selection(cli: &Cli, info: &FileInfo, df: &DataFrame) -> DataFrame {
+ let total = df.height();
+
+ if cli.all {
+ return df.clone();
+ }
+
+ if cli.head.is_some() || cli.tail.is_some() {
+ let head_n = cli.head.unwrap_or(0);
+ let tail_n = cli.tail.unwrap_or(0);
+
+ if head_n + tail_n >= total || (head_n == 0 && tail_n == 0) {
+ return df.clone();
+ }
+
+ if cli.tail.is_none() {
+ return df.head(Some(head_n));
+ }
+ if cli.head.is_none() {
+ return df.tail(Some(tail_n));
+ }
+
+ // Both head and tail: combine
+ let head_df = df.head(Some(head_n));
+ let tail_df = df.tail(Some(tail_n));
+ return head_df.vstack(&tail_df).unwrap_or_else(|_| df.clone());
+ }
+
+ // Large file gate
+ if info.file_size > cli.max_size {
+ return df.head(Some(25));
+ }
+
+ // Adaptive default
+ if total <= 50 {
+ df.clone()
+ } else {
+ let head_df = df.head(Some(25));
+ let tail_df = df.tail(Some(25));
+ head_df.vstack(&tail_df).unwrap_or_else(|_| df.clone())
+ }
+}
+
+// ---------------------------------------------------------------------------
+// main()
+// ---------------------------------------------------------------------------
+
+fn main() {
+ let cli = Cli::parse();
+ if let Err(err) = run(&cli) {
+ // Check if the root cause is an ArgError
+ if err.downcast_ref::<ArgError>().is_some() {
+ eprintln!("xlcat: {err}");
+ process::exit(2);
+ }
+ eprintln!("xlcat: {err}");
+ process::exit(1);
+ }
}
diff --git a/xlcat/tests/test_integration.rs b/xlcat/tests/test_integration.rs
@@ -0,0 +1,185 @@
+mod common;
+
+use assert_cmd::Command;
+use predicates::prelude::*;
+use tempfile::TempDir;
+
+fn xlcat() -> Command {
+ Command::cargo_bin("xlcat").unwrap()
+}
+
+#[test]
+fn test_simple_file_default() {
+ let dir = TempDir::new().unwrap();
+ let path = dir.path().join("simple.xlsx");
+ common::create_simple(&path);
+
+ xlcat()
+ .arg(path.to_str().unwrap())
+ .assert()
+ .success()
+ .stdout(predicate::str::contains("# File:"))
+ .stdout(predicate::str::contains("# Sheets: 1"))
+ .stdout(predicate::str::contains("## Sheet: Data"))
+ .stdout(predicate::str::contains("| name |"))
+ .stdout(predicate::str::contains("| Alice |"));
+}
+
+#[test]
+fn test_schema_mode() {
+ let dir = TempDir::new().unwrap();
+ let path = dir.path().join("simple.xlsx");
+ common::create_simple(&path);
+
+ xlcat()
+ .arg("--schema")
+ .arg(path.to_str().unwrap())
+ .assert()
+ .success()
+ .stdout(predicate::str::contains("| Column | Type |"))
+ .stdout(predicate::str::contains("| name |"))
+ .stdout(predicate::str::contains("| Alice |").not());
+}
+
+#[test]
+fn test_multi_sheet_default_lists_schemas() {
+ let dir = TempDir::new().unwrap();
+ let path = dir.path().join("multi.xlsx");
+ common::create_multi_sheet(&path);
+
+ xlcat()
+ .arg(path.to_str().unwrap())
+ .assert()
+ .success()
+ .stdout(predicate::str::contains("# Sheets: 3"))
+ .stdout(predicate::str::contains("## Sheet: Revenue"))
+ .stdout(predicate::str::contains("## Sheet: Expenses"))
+ .stdout(predicate::str::contains("## Sheet: Summary"))
+ .stdout(predicate::str::contains("Use --sheet"));
+}
+
+#[test]
+fn test_multi_sheet_select_by_name() {
+ let dir = TempDir::new().unwrap();
+ let path = dir.path().join("multi.xlsx");
+ common::create_multi_sheet(&path);
+
+ xlcat()
+ .arg("--sheet")
+ .arg("Revenue")
+ .arg(path.to_str().unwrap())
+ .assert()
+ .success()
+ .stdout(predicate::str::contains("| region |"))
+ .stdout(predicate::str::contains("| Region 1 |"));
+}
+
+#[test]
+fn test_multi_sheet_select_by_index() {
+ let dir = TempDir::new().unwrap();
+ let path = dir.path().join("multi.xlsx");
+ common::create_multi_sheet(&path);
+
+ xlcat()
+ .arg("--sheet")
+ .arg("1")
+ .arg(path.to_str().unwrap())
+ .assert()
+ .success()
+ .stdout(predicate::str::contains("## Sheet: Expenses"));
+}
+
+#[test]
+fn test_head_tail_adaptive() {
+ let dir = TempDir::new().unwrap();
+ let path = dir.path().join("many.xlsx");
+ common::create_many_rows(&path);
+
+ xlcat()
+ .arg(path.to_str().unwrap())
+ .assert()
+ .success()
+ .stdout(predicate::str::contains("rows omitted"));
+}
+
+#[test]
+fn test_head_flag() {
+ let dir = TempDir::new().unwrap();
+ let path = dir.path().join("many.xlsx");
+ common::create_many_rows(&path);
+
+ xlcat()
+ .arg("--head")
+ .arg("3")
+ .arg(path.to_str().unwrap())
+ .assert()
+ .success()
+ .stdout(predicate::str::contains("omitted").not());
+}
+
+#[test]
+fn test_csv_mode() {
+ let dir = TempDir::new().unwrap();
+ let path = dir.path().join("simple.xlsx");
+ common::create_simple(&path);
+
+ xlcat()
+ .arg("--csv")
+ .arg(path.to_str().unwrap())
+ .assert()
+ .success()
+ .stdout(predicate::str::contains("# File:").not())
+ .stdout(predicate::str::contains("name,"));
+}
+
+#[test]
+fn test_invalid_flag_combo() {
+ let dir = TempDir::new().unwrap();
+ let path = dir.path().join("simple.xlsx");
+ common::create_simple(&path);
+
+ xlcat()
+ .arg("--schema")
+ .arg("--head")
+ .arg("10")
+ .arg(path.to_str().unwrap())
+ .assert()
+ .code(2)
+ .stderr(predicate::str::contains("cannot be combined"));
+}
+
+#[test]
+fn test_file_not_found() {
+ xlcat()
+ .arg("/nonexistent.xlsx")
+ .assert()
+ .failure()
+ .stderr(predicate::str::contains("Cannot"));
+}
+
+#[test]
+fn test_empty_sheet() {
+ let dir = TempDir::new().unwrap();
+ let path = dir.path().join("empty.xlsx");
+ common::create_empty_sheet(&path);
+
+ xlcat()
+ .arg(path.to_str().unwrap())
+ .assert()
+ .success()
+ .stdout(predicate::str::contains("empty"));
+}
+
+#[test]
+fn test_all_without_sheet_on_multi() {
+ let dir = TempDir::new().unwrap();
+ let path = dir.path().join("multi.xlsx");
+ common::create_multi_sheet(&path);
+
+ xlcat()
+ .arg("--all")
+ .arg(path.to_str().unwrap())
+ .assert()
+ .failure()
+ .stderr(predicate::str::contains("Multiple sheets"));
+}