xl-cli-tools

CLI tools for viewing and editing Excel files
Log | Files | Refs | README | LICENSE

commit d63e6dd2fe430319758f09c653a81637dd24308c
parent d933fc931fe8da435a304fbc35d82cf65673b30f
Author: Erik Loualiche <eloualic@umn.edu>
Date:   Fri, 13 Mar 2026 15:54:14 -0500

feat: add formatter module — markdown table and CSV output

Implements all output formatting functions in xlcat/src/formatter.rs:
format_header, format_schema, format_sheet_listing, format_data_table,
format_head_tail, format_csv, format_empty_sheet, format_describe (stub),
and private helpers format_cell, format_any_value, format_dtype.
All 8 unit tests pass alongside the existing 12 tests.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
Axlcat/src/formatter.rs | 365+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mxlcat/src/main.rs | 1+
2 files changed, 366 insertions(+), 0 deletions(-)

diff --git a/xlcat/src/formatter.rs b/xlcat/src/formatter.rs @@ -0,0 +1,365 @@ +use crate::metadata::{format_file_size, FileInfo, SheetInfo}; +use polars::prelude::*; + +// --------------------------------------------------------------------------- +// Public API +// --------------------------------------------------------------------------- + +/// Render the top-level file header. +/// +/// ```text +/// # File: report.xlsx (245 KB) +/// # Sheets: 3 +/// ``` +pub fn format_header(file_name: &str, info: &FileInfo) -> String { + let size_str = format_file_size(info.file_size); + let sheet_count = info.sheets.len(); + format!("# File: {file_name} ({size_str})\n# Sheets: {sheet_count}\n") +} + +/// Render the schema block for a single sheet. +/// +/// ```text +/// ## Sheet: Revenue (1240 rows x 8 cols) +/// +/// | Column | Type | +/// |--------|------| +/// | date | Date | +/// ... +/// ``` +pub fn format_schema(sheet: &SheetInfo, df: &DataFrame) -> String { + let data_rows = if sheet.rows == 0 { 0 } else { sheet.rows - 1 }; + let cols = sheet.cols; + + let mut out = format!( + "## Sheet: {} ({} rows x {} cols)\n\n", + sheet.name, data_rows, cols + ); + out.push_str("| Column | Type |\n"); + out.push_str("|--------|------|\n"); + + for col in df.get_columns() { + let name = col.name(); + let dtype = format_dtype(col.dtype()); + out.push_str(&format!("| {name} | {dtype} |\n")); + } + + out +} + +/// Render the multi-sheet listing (header + schema for each + prompt). +pub fn format_sheet_listing( + file_name: &str, + info: &FileInfo, + schemas: &[(&SheetInfo, DataFrame)], +) -> String { + let mut out = format_header(file_name, info); + out.push('\n'); + + for (sheet, df) in schemas { + if sheet.rows == 0 && sheet.cols == 0 { + out.push_str(&format_empty_sheet(sheet)); + } else { + out.push_str(&format_schema(sheet, df)); + } + out.push('\n'); + } + + out.push_str("Use --sheet <name> to view a specific sheet.\n"); + out +} + +/// Render the full DataFrame as a markdown table. +pub fn format_data_table(df: &DataFrame) -> String { + let columns = df.get_columns(); + let n_rows = df.height(); + + // Header row + let mut out = String::new(); + out.push('|'); + for col in columns { + out.push_str(&format!(" {} |", col.name())); + } + out.push('\n'); + + // Separator + out.push('|'); + for _ in columns { + out.push_str("---|"); + } + out.push('\n'); + + // Data rows + for row_idx in 0..n_rows { + out.push('|'); + for col in columns { + let cell = format_cell(col, row_idx); + out.push_str(&format!(" {cell} |")); + } + out.push('\n'); + } + + out +} + +/// Render head / tail view of a DataFrame. +/// +/// If total rows <= head_n + tail_n, shows all rows. +/// Otherwise shows first head_n rows, an omission line, then last tail_n rows. +pub fn format_head_tail(df: &DataFrame, head_n: usize, tail_n: usize) -> String { + let total = df.height(); + if total <= head_n + tail_n { + return format_data_table(df); + } + + let head_df = df.head(Some(head_n)); + let tail_df = df.tail(Some(tail_n)); + let omitted = total - head_n - tail_n; + + let mut out = format_data_table(&head_df); + out.push_str(&format!("... ({omitted} rows omitted) ...\n")); + // Append tail rows without repeating the header + let tail_table = format_data_table(&tail_df); + // Skip header line + separator line of the tail table + let tail_body = skip_table_header(&tail_table); + out.push_str(tail_body); + + out +} + +/// Render DataFrame as CSV. +pub fn format_csv(df: &DataFrame) -> String { + let mut buf: Vec<u8> = Vec::new(); + // CsvWriter is available via the "csv" feature (polars 0.46) + if CsvWriter::new(&mut buf) + .finish(&mut df.clone()) + .is_ok() + { + return String::from_utf8(buf).unwrap_or_else(|_| csv_fallback(df)); + } + csv_fallback(df) +} + +/// Render a message for an empty or header-only sheet. +pub fn format_empty_sheet(sheet: &SheetInfo) -> String { + if sheet.rows == 0 && sheet.cols == 0 { + format!("## Sheet: {} (empty)\n", sheet.name) + } else { + format!("## Sheet: {} (no data rows)\n", sheet.name) + } +} + +/// Stub for describe mode (implemented in Task 7). +pub fn format_describe(_df: &DataFrame) -> String { + "(describe not yet implemented)\n".to_string() +} + +// --------------------------------------------------------------------------- +// Private helpers +// --------------------------------------------------------------------------- + +/// Format a single cell value for markdown display. +fn format_cell(col: &Column, idx: usize) -> String { + match col.get(idx) { + Ok(AnyValue::Null) | Err(_) => String::new(), + Ok(v) => format_any_value(&v), + } +} + +/// Convert an AnyValue to its display string. +fn format_any_value(v: &AnyValue) -> String { + match v { + AnyValue::Null => String::new(), + AnyValue::Boolean(b) => b.to_string(), + AnyValue::Int8(n) => n.to_string(), + AnyValue::Int16(n) => n.to_string(), + AnyValue::Int32(n) => n.to_string(), + AnyValue::Int64(n) => n.to_string(), + AnyValue::UInt8(n) => n.to_string(), + AnyValue::UInt16(n) => n.to_string(), + AnyValue::UInt32(n) => n.to_string(), + AnyValue::UInt64(n) => n.to_string(), + AnyValue::Float32(f) => f.to_string(), + AnyValue::Float64(f) => f.to_string(), + AnyValue::String(s) => s.to_string(), + AnyValue::StringOwned(s) => s.to_string(), + other => format!("{other}"), + } +} + +/// Map a polars DataType to a human-readable label. +fn format_dtype(dtype: &DataType) -> &'static str { + match dtype { + DataType::Boolean => "Boolean", + DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64 => "Int", + DataType::UInt8 | DataType::UInt16 | DataType::UInt32 | DataType::UInt64 => "UInt", + DataType::Float32 | DataType::Float64 => "Float", + DataType::String => "String", + DataType::Date => "Date", + DataType::Datetime(_, _) => "Datetime", + DataType::Duration(_) => "Duration", + DataType::Time => "Time", + DataType::Null => "Null", + _ => "Other", + } +} + +/// Skip the first two lines (header + separator) of a markdown table string. +fn skip_table_header(table: &str) -> &str { + let mut newlines = 0; + let mut pos = 0; + for (i, ch) in table.char_indices() { + if ch == '\n' { + newlines += 1; + pos = i + 1; + if newlines == 2 { + return &table[pos..]; + } + } + } + // Fewer than 2 newlines — return empty + "" +} + +/// Manual CSV fallback if CsvWriter is unavailable. +fn csv_fallback(df: &DataFrame) -> String { + let columns = df.get_columns(); + let n_rows = df.height(); + + let mut out = String::new(); + + // Header + let header: Vec<String> = columns.iter().map(|c| c.name().to_string()).collect(); + out.push_str(&header.join(",")); + out.push('\n'); + + // Rows + for row_idx in 0..n_rows { + let row: Vec<String> = columns + .iter() + .map(|col| { + let cell = format_cell(col, row_idx); + // Quote cells containing commas or quotes + if cell.contains(',') || cell.contains('"') || cell.contains('\n') { + format!("\"{}\"", cell.replace('"', "\"\"")) + } else { + cell + } + }) + .collect(); + out.push_str(&row.join(",")); + out.push('\n'); + } + + out +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + use crate::metadata::{FileInfo, SheetInfo}; + + #[test] + fn test_format_header() { + let info = FileInfo { + file_size: 250_000, + sheets: vec![SheetInfo { + name: "Sheet1".into(), + rows: 100, + cols: 5, + }], + }; + let out = format_header("test.xlsx", &info); + assert!(out.contains("# File: test.xlsx (244 KB)")); + assert!(out.contains("# Sheets: 1")); + } + + #[test] + fn test_format_data_table() { + let s1 = Series::new("name".into(), &["Alice", "Bob"]); + let s2 = Series::new("value".into(), &[100i64, 200]); + let df = DataFrame::new(vec![s1.into_column(), s2.into_column()]).unwrap(); + let out = format_data_table(&df); + assert!(out.contains("| name | value |")); + assert!(out.contains("| Alice | 100 |")); + } + + #[test] + fn test_format_head_tail_small() { + let s = Series::new("x".into(), &[1i64, 2, 3]); + let df = DataFrame::new(vec![s.into_column()]).unwrap(); + let out = format_head_tail(&df, 25, 25); + assert!(!out.contains("omitted")); + assert!(out.contains("| 1 |")); + assert!(out.contains("| 3 |")); + } + + #[test] + fn test_format_head_tail_large() { + // 60 rows, head=25 tail=25 → 10 omitted + let values: Vec<i64> = (1..=60).collect(); + let s = Series::new("n".into(), values.as_slice()); + let df = DataFrame::new(vec![s.into_column()]).unwrap(); + let out = format_head_tail(&df, 25, 25); + assert!(out.contains("(10 rows omitted)")); + assert!(out.contains("| 1 |")); + assert!(out.contains("| 25 |")); + assert!(out.contains("| 36 |")); + assert!(out.contains("| 60 |")); + } + + #[test] + fn test_format_schema() { + let sheet = SheetInfo { + name: "Revenue".into(), + rows: 11, // 1 header + 10 data + cols: 2, + }; + let s1 = Series::new("date".into(), &["2024-01-01", "2024-01-02"]); + let s2 = Series::new("amount".into(), &[1.0f64, 2.0]); + let df = DataFrame::new(vec![s1.into_column(), s2.into_column()]).unwrap(); + let out = format_schema(&sheet, &df); + assert!(out.contains("## Sheet: Revenue (10 rows x 2 cols)")); + assert!(out.contains("| date |")); + assert!(out.contains("| amount |")); + assert!(out.contains("String")); + assert!(out.contains("Float")); + } + + #[test] + fn test_format_empty_sheet_completely_empty() { + let sheet = SheetInfo { name: "Blank".into(), rows: 0, cols: 0 }; + let out = format_empty_sheet(&sheet); + assert!(out.contains("(empty)")); + } + + #[test] + fn test_format_empty_sheet_header_only() { + let sheet = SheetInfo { name: "Headers".into(), rows: 1, cols: 3 }; + let out = format_empty_sheet(&sheet); + assert!(out.contains("(no data rows)")); + } + + #[test] + fn test_format_csv() { + let s1 = Series::new("a".into(), &["hello", "world"]); + let s2 = Series::new("b".into(), &[1i64, 2]); + let df = DataFrame::new(vec![s1.into_column(), s2.into_column()]).unwrap(); + let out = format_csv(&df); + assert!(out.contains("a,b")); + assert!(out.contains("hello")); + assert!(out.contains("world")); + } + + #[test] + fn test_format_describe_stub() { + let s = Series::new("x".into(), &[1i64]); + let df = DataFrame::new(vec![s.into_column()]).unwrap(); + let out = format_describe(&df); + assert!(out.contains("not yet implemented")); + } +} diff --git a/xlcat/src/main.rs b/xlcat/src/main.rs @@ -1,3 +1,4 @@ +mod formatter; mod metadata; mod reader;