xl-cli-tools

CLI tools for viewing and editing Excel files
Log | Files | Refs | README | LICENSE

commit 1228da956b1940425e10a130d7f829382cd593d3
parent d63e6dd2fe430319758f09c653a81637dd24308c
Author: Erik Loualiche <eloualic@umn.edu>
Date:   Fri, 13 Mar 2026 16:00:42 -0500

feat: wire up main orchestration — data, schema, multi-sheet modes

Full CLI orchestration: single/multi-sheet routing, row selection
(--head/--tail/--all), adaptive display (<=50 all, >50 head+tail),
large-file gate, --csv output, --schema/--describe modes, and
proper error handling with exit code 2 for arg errors.

12 integration tests covering all modes and error paths.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
Mxlcat/src/main.rs | 298++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Axlcat/tests/test_integration.rs | 185+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 472 insertions(+), 11 deletions(-)

diff --git a/xlcat/src/main.rs b/xlcat/src/main.rs @@ -4,7 +4,11 @@ mod reader; use anyhow::Result; use clap::Parser; +use polars::prelude::*; use std::path::PathBuf; +use std::process; + +use metadata::{FileInfo, SheetInfo}; #[derive(Parser, Debug)] #[command(name = "xlcat", about = "View Excel files in the terminal")] @@ -66,21 +70,293 @@ fn parse_size(s: &str) -> Result<u64, String> { Ok((num * multiplier as f64) as u64) } -fn main() -> Result<()> { - let cli = Cli::parse(); +// --------------------------------------------------------------------------- +// ArgError — used for user-facing flag/argument errors (exit code 2) +// --------------------------------------------------------------------------- + +#[derive(Debug)] +struct ArgError(String); - // Validate flag combinations - let mode_count = cli.schema as u8 + cli.describe as u8; - if mode_count > 1 { - anyhow::bail!("--schema and --describe are mutually exclusive"); +impl std::fmt::Display for ArgError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{}", self.0) } - if (cli.schema || cli.describe) && (cli.head.is_some() || cli.tail.is_some() || cli.all) { - anyhow::bail!("--schema and --describe cannot be combined with --head, --tail, or --all"); +} + +impl std::error::Error for ArgError {} + +// --------------------------------------------------------------------------- +// Sheet resolution +// --------------------------------------------------------------------------- + +enum SheetTarget { + Single(usize), + ListAll, +} + +// --------------------------------------------------------------------------- +// run() — main orchestration +// --------------------------------------------------------------------------- + +fn run(cli: &Cli) -> Result<()> { + // 1. Validate flag combinations + if cli.schema && cli.describe { + return Err(ArgError("--schema and --describe are mutually exclusive".into()).into()); + } + if (cli.schema || cli.describe) + && (cli.head.is_some() || cli.tail.is_some() || cli.all) + { + return Err(ArgError( + "--schema/--describe cannot be combined with --head, --tail, or --all".into(), + ) + .into()); } if (cli.schema || cli.describe) && cli.csv { - anyhow::bail!("--csv can only be used in data mode (not with --schema or --describe)"); + return Err(ArgError( + "--csv cannot be combined with --schema or --describe".into(), + ) + .into()); + } + + // 2. Read file metadata + let info = metadata::read_file_info(&cli.file)?; + let file_name = cli + .file + .file_name() + .map(|s| s.to_string_lossy().to_string()) + .unwrap_or_else(|| cli.file.display().to_string()); + + // 3. Resolve sheet target + let target = resolve_sheet_target(cli, &info)?; + + match target { + SheetTarget::Single(idx) => { + let sheet = &info.sheets[idx]; + let df = reader::read_sheet(&cli.file, &sheet.name)?; + render_single_sheet(cli, &file_name, &info, sheet, &df)?; + } + SheetTarget::ListAll => { + if cli.describe { + // --describe on multi-sheet: iterate all sheets + let mut out = formatter::format_header(&file_name, &info); + out.push('\n'); + for sheet in &info.sheets { + let df = reader::read_sheet(&cli.file, &sheet.name)?; + if sheet.rows == 0 && sheet.cols == 0 { + out.push_str(&formatter::format_empty_sheet(sheet)); + } else { + out.push_str(&formatter::format_schema(sheet, &df)); + out.push_str(&formatter::format_describe(&df)); + } + out.push('\n'); + } + print!("{out}"); + } else { + // Default multi-sheet: list schemas + let mut pairs: Vec<(&SheetInfo, DataFrame)> = Vec::new(); + for sheet in &info.sheets { + let df = reader::read_sheet(&cli.file, &sheet.name)?; + pairs.push((sheet, df)); + } + let out = formatter::format_sheet_listing(&file_name, &info, &pairs); + print!("{out}"); + } + } } - eprintln!("xlcat: not yet implemented"); - std::process::exit(1); + Ok(()) +} + +fn resolve_sheet_target(cli: &Cli, info: &FileInfo) -> Result<SheetTarget> { + if let Some(ref sheet_arg) = cli.sheet { + // Try name match first + if let Some(idx) = info.sheets.iter().position(|s| s.name == *sheet_arg) { + return Ok(SheetTarget::Single(idx)); + } + // Try 0-based index + if let Ok(idx) = sheet_arg.parse::<usize>() { + if idx < info.sheets.len() { + return Ok(SheetTarget::Single(idx)); + } + return Err(ArgError(format!( + "Sheet index {idx} out of range (file has {} sheets)", + info.sheets.len() + )) + .into()); + } + return Err(ArgError(format!("Sheet not found: {sheet_arg}")).into()); + } + + if info.sheets.len() == 1 { + return Ok(SheetTarget::Single(0)); + } + + // Multi-sheet, no --sheet specified + let has_row_flags = cli.all || cli.head.is_some() || cli.tail.is_some() || cli.csv; + if has_row_flags { + return Err(ArgError( + "Multiple sheets found. Use --sheet <name> to select one before using --all, --head, --tail, or --csv.".into(), + ) + .into()); + } + + Ok(SheetTarget::ListAll) +} + +fn render_single_sheet( + cli: &Cli, + file_name: &str, + info: &FileInfo, + sheet: &SheetInfo, + df: &DataFrame, +) -> Result<()> { + // CSV mode: apply row selection, output CSV, done + if cli.csv { + let selected = apply_row_selection(cli, info, df); + let csv_out = formatter::format_csv(&selected); + print!("{csv_out}"); + return Ok(()); + } + + let mut out = formatter::format_header(file_name, info); + out.push('\n'); + + // Completely empty sheet (0 rows, 0 cols) + if sheet.rows == 0 && sheet.cols == 0 { + out.push_str(&formatter::format_empty_sheet(sheet)); + print!("{out}"); + return Ok(()); + } + + // Header-only sheet (has columns but 0 data rows) + if df.height() == 0 { + out.push_str(&formatter::format_schema(sheet, df)); + out.push_str("\n(no data rows)\n"); + print!("{out}"); + return Ok(()); + } + + if cli.schema { + out.push_str(&formatter::format_schema(sheet, df)); + } else if cli.describe { + out.push_str(&formatter::format_schema(sheet, df)); + out.push_str(&formatter::format_describe(df)); + } else { + // Data mode + out.push_str(&formatter::format_schema(sheet, df)); + out.push('\n'); + out.push_str(&format_data_with_selection(cli, info, df)); + } + + print!("{out}"); + Ok(()) +} + +/// Format data output with row selection logic. +fn format_data_with_selection(cli: &Cli, info: &FileInfo, df: &DataFrame) -> String { + let total = df.height(); + + // --all: show everything + if cli.all { + return formatter::format_data_table(df); + } + + // Explicit --head and/or --tail + if cli.head.is_some() || cli.tail.is_some() { + let head_n = cli.head.unwrap_or(0); + let tail_n = cli.tail.unwrap_or(0); + if head_n + tail_n >= total || (head_n == 0 && tail_n == 0) { + return formatter::format_data_table(df); + } + // If only --head, show first N + if cli.tail.is_none() { + let head_df = df.head(Some(head_n)); + return formatter::format_data_table(&head_df); + } + // If only --tail, show last N + if cli.head.is_none() { + let tail_df = df.tail(Some(tail_n)); + return formatter::format_data_table(&tail_df); + } + // Both specified + return formatter::format_head_tail(df, head_n, tail_n); + } + + // Large file gate: file_size > max_size and no explicit flags + if info.file_size > cli.max_size { + let mut out = formatter::format_head_tail(df, 25, 0); + out.push_str(&format!( + "\nLarge file ({}) — showing first 25 of {total} rows. Use --all to see everything.\n", + metadata::format_file_size(info.file_size) + )); + return out; + } + + // Adaptive default: <=50 rows show all, >50 show head 25 + tail 25 + if total <= 50 { + formatter::format_data_table(df) + } else { + formatter::format_head_tail(df, 25, 25) + } +} + +/// Apply row selection for CSV mode — returns a (possibly sliced) DataFrame. +fn apply_row_selection(cli: &Cli, info: &FileInfo, df: &DataFrame) -> DataFrame { + let total = df.height(); + + if cli.all { + return df.clone(); + } + + if cli.head.is_some() || cli.tail.is_some() { + let head_n = cli.head.unwrap_or(0); + let tail_n = cli.tail.unwrap_or(0); + + if head_n + tail_n >= total || (head_n == 0 && tail_n == 0) { + return df.clone(); + } + + if cli.tail.is_none() { + return df.head(Some(head_n)); + } + if cli.head.is_none() { + return df.tail(Some(tail_n)); + } + + // Both head and tail: combine + let head_df = df.head(Some(head_n)); + let tail_df = df.tail(Some(tail_n)); + return head_df.vstack(&tail_df).unwrap_or_else(|_| df.clone()); + } + + // Large file gate + if info.file_size > cli.max_size { + return df.head(Some(25)); + } + + // Adaptive default + if total <= 50 { + df.clone() + } else { + let head_df = df.head(Some(25)); + let tail_df = df.tail(Some(25)); + head_df.vstack(&tail_df).unwrap_or_else(|_| df.clone()) + } +} + +// --------------------------------------------------------------------------- +// main() +// --------------------------------------------------------------------------- + +fn main() { + let cli = Cli::parse(); + if let Err(err) = run(&cli) { + // Check if the root cause is an ArgError + if err.downcast_ref::<ArgError>().is_some() { + eprintln!("xlcat: {err}"); + process::exit(2); + } + eprintln!("xlcat: {err}"); + process::exit(1); + } } diff --git a/xlcat/tests/test_integration.rs b/xlcat/tests/test_integration.rs @@ -0,0 +1,185 @@ +mod common; + +use assert_cmd::Command; +use predicates::prelude::*; +use tempfile::TempDir; + +fn xlcat() -> Command { + Command::cargo_bin("xlcat").unwrap() +} + +#[test] +fn test_simple_file_default() { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("simple.xlsx"); + common::create_simple(&path); + + xlcat() + .arg(path.to_str().unwrap()) + .assert() + .success() + .stdout(predicate::str::contains("# File:")) + .stdout(predicate::str::contains("# Sheets: 1")) + .stdout(predicate::str::contains("## Sheet: Data")) + .stdout(predicate::str::contains("| name |")) + .stdout(predicate::str::contains("| Alice |")); +} + +#[test] +fn test_schema_mode() { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("simple.xlsx"); + common::create_simple(&path); + + xlcat() + .arg("--schema") + .arg(path.to_str().unwrap()) + .assert() + .success() + .stdout(predicate::str::contains("| Column | Type |")) + .stdout(predicate::str::contains("| name |")) + .stdout(predicate::str::contains("| Alice |").not()); +} + +#[test] +fn test_multi_sheet_default_lists_schemas() { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("multi.xlsx"); + common::create_multi_sheet(&path); + + xlcat() + .arg(path.to_str().unwrap()) + .assert() + .success() + .stdout(predicate::str::contains("# Sheets: 3")) + .stdout(predicate::str::contains("## Sheet: Revenue")) + .stdout(predicate::str::contains("## Sheet: Expenses")) + .stdout(predicate::str::contains("## Sheet: Summary")) + .stdout(predicate::str::contains("Use --sheet")); +} + +#[test] +fn test_multi_sheet_select_by_name() { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("multi.xlsx"); + common::create_multi_sheet(&path); + + xlcat() + .arg("--sheet") + .arg("Revenue") + .arg(path.to_str().unwrap()) + .assert() + .success() + .stdout(predicate::str::contains("| region |")) + .stdout(predicate::str::contains("| Region 1 |")); +} + +#[test] +fn test_multi_sheet_select_by_index() { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("multi.xlsx"); + common::create_multi_sheet(&path); + + xlcat() + .arg("--sheet") + .arg("1") + .arg(path.to_str().unwrap()) + .assert() + .success() + .stdout(predicate::str::contains("## Sheet: Expenses")); +} + +#[test] +fn test_head_tail_adaptive() { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("many.xlsx"); + common::create_many_rows(&path); + + xlcat() + .arg(path.to_str().unwrap()) + .assert() + .success() + .stdout(predicate::str::contains("rows omitted")); +} + +#[test] +fn test_head_flag() { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("many.xlsx"); + common::create_many_rows(&path); + + xlcat() + .arg("--head") + .arg("3") + .arg(path.to_str().unwrap()) + .assert() + .success() + .stdout(predicate::str::contains("omitted").not()); +} + +#[test] +fn test_csv_mode() { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("simple.xlsx"); + common::create_simple(&path); + + xlcat() + .arg("--csv") + .arg(path.to_str().unwrap()) + .assert() + .success() + .stdout(predicate::str::contains("# File:").not()) + .stdout(predicate::str::contains("name,")); +} + +#[test] +fn test_invalid_flag_combo() { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("simple.xlsx"); + common::create_simple(&path); + + xlcat() + .arg("--schema") + .arg("--head") + .arg("10") + .arg(path.to_str().unwrap()) + .assert() + .code(2) + .stderr(predicate::str::contains("cannot be combined")); +} + +#[test] +fn test_file_not_found() { + xlcat() + .arg("/nonexistent.xlsx") + .assert() + .failure() + .stderr(predicate::str::contains("Cannot")); +} + +#[test] +fn test_empty_sheet() { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("empty.xlsx"); + common::create_empty_sheet(&path); + + xlcat() + .arg(path.to_str().unwrap()) + .assert() + .success() + .stdout(predicate::str::contains("empty")); +} + +#[test] +fn test_all_without_sheet_on_multi() { + let dir = TempDir::new().unwrap(); + let path = dir.path().join("multi.xlsx"); + common::create_multi_sheet(&path); + + xlcat() + .arg("--all") + .arg(path.to_str().unwrap()) + .assert() + .failure() + .stderr(predicate::str::contains("Multiple sheets")); +}