feat: add formatter module — markdown table and CSV output - xl-cli-tools - CLI tools for viewing and editing Excel files

commit d63e6dd2fe430319758f09c653a81637dd24308c
parent d933fc931fe8da435a304fbc35d82cf65673b30f
Author: Erik Loualiche <eloualic@umn.edu>
Date:   Fri, 13 Mar 2026 15:54:14 -0500

feat: add formatter module — markdown table and CSV output

Implements all output formatting functions in xlcat/src/formatter.rs:
format_header, format_schema, format_sheet_listing, format_data_table,
format_head_tail, format_csv, format_empty_sheet, format_describe (stub),
and private helpers format_cell, format_any_value, format_dtype.
All 8 unit tests pass alongside the existing 12 tests.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Diffstat:
A xlcat/src/formatter.rs  | 365 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M xlcat/src/main.rs  | 1 +

2 files changed, 366 insertions(+), 0 deletions(-)
diff --git a/xlcat/src/formatter.rs b/xlcat/src/formatter.rs
@@ -0,0 +1,365 @@
+use crate::metadata::{format_file_size, FileInfo, SheetInfo};
+use polars::prelude::*;
+
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+
+/// Render the top-level file header.
+///
+/// ```text
+/// # File: report.xlsx (245 KB)
+/// # Sheets: 3
+/// ```
+pub fn format_header(file_name: &str, info: &FileInfo) -> String {
+    let size_str = format_file_size(info.file_size);
+    let sheet_count = info.sheets.len();
+    format!("# File: {file_name} ({size_str})\n# Sheets: {sheet_count}\n")
+}
+
+/// Render the schema block for a single sheet.
+///
+/// ```text
+/// ## Sheet: Revenue (1240 rows x 8 cols)
+///
+/// | Column | Type |
+/// |--------|------|
+/// | date   | Date |
+/// ...
+/// ```
+pub fn format_schema(sheet: &SheetInfo, df: &DataFrame) -> String {
+    let data_rows = if sheet.rows == 0 { 0 } else { sheet.rows - 1 };
+    let cols = sheet.cols;
+
+    let mut out = format!(
+        "## Sheet: {} ({} rows x {} cols)\n\n",
+        sheet.name, data_rows, cols
+    );
+    out.push_str("| Column | Type |\n");
+    out.push_str("|--------|------|\n");
+
+    for col in df.get_columns() {
+        let name = col.name();
+        let dtype = format_dtype(col.dtype());
+        out.push_str(&format!("| {name} | {dtype} |\n"));
+    }
+
+    out
+}
+
+/// Render the multi-sheet listing (header + schema for each + prompt).
+pub fn format_sheet_listing(
+    file_name: &str,
+    info: &FileInfo,
+    schemas: &[(&SheetInfo, DataFrame)],
+) -> String {
+    let mut out = format_header(file_name, info);
+    out.push('\n');
+
+    for (sheet, df) in schemas {
+        if sheet.rows == 0 && sheet.cols == 0 {
+            out.push_str(&format_empty_sheet(sheet));
+        } else {
+            out.push_str(&format_schema(sheet, df));
+        }
+        out.push('\n');
+    }
+
+    out.push_str("Use --sheet <name> to view a specific sheet.\n");
+    out
+}
+
+/// Render the full DataFrame as a markdown table.
+pub fn format_data_table(df: &DataFrame) -> String {
+    let columns = df.get_columns();
+    let n_rows = df.height();
+
+    // Header row
+    let mut out = String::new();
+    out.push('|');
+    for col in columns {
+        out.push_str(&format!(" {} |", col.name()));
+    }
+    out.push('\n');
+
+    // Separator
+    out.push('|');
+    for _ in columns {
+        out.push_str("---|");
+    }
+    out.push('\n');
+
+    // Data rows
+    for row_idx in 0..n_rows {
+        out.push('|');
+        for col in columns {
+            let cell = format_cell(col, row_idx);
+            out.push_str(&format!(" {cell} |"));
+        }
+        out.push('\n');
+    }
+
+    out
+}
+
+/// Render head / tail view of a DataFrame.
+///
+/// If total rows <= head_n + tail_n, shows all rows.
+/// Otherwise shows first head_n rows, an omission line, then last tail_n rows.
+pub fn format_head_tail(df: &DataFrame, head_n: usize, tail_n: usize) -> String {
+    let total = df.height();
+    if total <= head_n + tail_n {
+        return format_data_table(df);
+    }
+
+    let head_df = df.head(Some(head_n));
+    let tail_df = df.tail(Some(tail_n));
+    let omitted = total - head_n - tail_n;
+
+    let mut out = format_data_table(&head_df);
+    out.push_str(&format!("... ({omitted} rows omitted) ...\n"));
+    // Append tail rows without repeating the header
+    let tail_table = format_data_table(&tail_df);
+    // Skip header line + separator line of the tail table
+    let tail_body = skip_table_header(&tail_table);
+    out.push_str(tail_body);
+
+    out
+}
+
+/// Render DataFrame as CSV.
+pub fn format_csv(df: &DataFrame) -> String {
+    let mut buf: Vec<u8> = Vec::new();
+    // CsvWriter is available via the "csv" feature (polars 0.46)
+    if CsvWriter::new(&mut buf)
+        .finish(&mut df.clone())
+        .is_ok()
+    {
+        return String::from_utf8(buf).unwrap_or_else(|_| csv_fallback(df));
+    }
+    csv_fallback(df)
+}
+
+/// Render a message for an empty or header-only sheet.
+pub fn format_empty_sheet(sheet: &SheetInfo) -> String {
+    if sheet.rows == 0 && sheet.cols == 0 {
+        format!("## Sheet: {} (empty)\n", sheet.name)
+    } else {
+        format!("## Sheet: {} (no data rows)\n", sheet.name)
+    }
+}
+
+/// Stub for describe mode (implemented in Task 7).
+pub fn format_describe(_df: &DataFrame) -> String {
+    "(describe not yet implemented)\n".to_string()
+}
+
+// ---------------------------------------------------------------------------
+// Private helpers
+// ---------------------------------------------------------------------------
+
+/// Format a single cell value for markdown display.
+fn format_cell(col: &Column, idx: usize) -> String {
+    match col.get(idx) {
+        Ok(AnyValue::Null) | Err(_) => String::new(),
+        Ok(v) => format_any_value(&v),
+    }
+}
+
+/// Convert an AnyValue to its display string.
+fn format_any_value(v: &AnyValue) -> String {
+    match v {
+        AnyValue::Null => String::new(),
+        AnyValue::Boolean(b) => b.to_string(),
+        AnyValue::Int8(n) => n.to_string(),
+        AnyValue::Int16(n) => n.to_string(),
+        AnyValue::Int32(n) => n.to_string(),
+        AnyValue::Int64(n) => n.to_string(),
+        AnyValue::UInt8(n) => n.to_string(),
+        AnyValue::UInt16(n) => n.to_string(),
+        AnyValue::UInt32(n) => n.to_string(),
+        AnyValue::UInt64(n) => n.to_string(),
+        AnyValue::Float32(f) => f.to_string(),
+        AnyValue::Float64(f) => f.to_string(),
+        AnyValue::String(s) => s.to_string(),
+        AnyValue::StringOwned(s) => s.to_string(),
+        other => format!("{other}"),
+    }
+}
+
+/// Map a polars DataType to a human-readable label.
+fn format_dtype(dtype: &DataType) -> &'static str {
+    match dtype {
+        DataType::Boolean => "Boolean",
+        DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64 => "Int",
+        DataType::UInt8 | DataType::UInt16 | DataType::UInt32 | DataType::UInt64 => "UInt",
+        DataType::Float32 | DataType::Float64 => "Float",
+        DataType::String => "String",
+        DataType::Date => "Date",
+        DataType::Datetime(_, _) => "Datetime",
+        DataType::Duration(_) => "Duration",
+        DataType::Time => "Time",
+        DataType::Null => "Null",
+        _ => "Other",
+    }
+}
+
+/// Skip the first two lines (header + separator) of a markdown table string.
+fn skip_table_header(table: &str) -> &str {
+    let mut newlines = 0;
+    let mut pos = 0;
+    for (i, ch) in table.char_indices() {
+        if ch == '\n' {
+            newlines += 1;
+            pos = i + 1;
+            if newlines == 2 {
+                return &table[pos..];
+            }
+        }
+    }
+    // Fewer than 2 newlines — return empty
+    ""
+}
+
+/// Manual CSV fallback if CsvWriter is unavailable.
+fn csv_fallback(df: &DataFrame) -> String {
+    let columns = df.get_columns();
+    let n_rows = df.height();
+
+    let mut out = String::new();
+
+    // Header
+    let header: Vec<String> = columns.iter().map(|c| c.name().to_string()).collect();
+    out.push_str(&header.join(","));
+    out.push('\n');
+
+    // Rows
+    for row_idx in 0..n_rows {
+        let row: Vec<String> = columns
+            .iter()
+            .map(|col| {
+                let cell = format_cell(col, row_idx);
+                // Quote cells containing commas or quotes
+                if cell.contains(',') || cell.contains('"') || cell.contains('\n') {
+                    format!("\"{}\"", cell.replace('"', "\"\""))
+                } else {
+                    cell
+                }
+            })
+            .collect();
+        out.push_str(&row.join(","));
+        out.push('\n');
+    }
+
+    out
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::metadata::{FileInfo, SheetInfo};
+
+    #[test]
+    fn test_format_header() {
+        let info = FileInfo {
+            file_size: 250_000,
+            sheets: vec![SheetInfo {
+                name: "Sheet1".into(),
+                rows: 100,
+                cols: 5,
+            }],
+        };
+        let out = format_header("test.xlsx", &info);
+        assert!(out.contains("# File: test.xlsx (244 KB)"));
+        assert!(out.contains("# Sheets: 1"));
+    }
+
+    #[test]
+    fn test_format_data_table() {
+        let s1 = Series::new("name".into(), &["Alice", "Bob"]);
+        let s2 = Series::new("value".into(), &[100i64, 200]);
+        let df = DataFrame::new(vec![s1.into_column(), s2.into_column()]).unwrap();
+        let out = format_data_table(&df);
+        assert!(out.contains("| name | value |"));
+        assert!(out.contains("| Alice | 100 |"));
+    }
+
+    #[test]
+    fn test_format_head_tail_small() {
+        let s = Series::new("x".into(), &[1i64, 2, 3]);
+        let df = DataFrame::new(vec![s.into_column()]).unwrap();
+        let out = format_head_tail(&df, 25, 25);
+        assert!(!out.contains("omitted"));
+        assert!(out.contains("| 1 |"));
+        assert!(out.contains("| 3 |"));
+    }
+
+    #[test]
+    fn test_format_head_tail_large() {
+        // 60 rows, head=25 tail=25 → 10 omitted
+        let values: Vec<i64> = (1..=60).collect();
+        let s = Series::new("n".into(), values.as_slice());
+        let df = DataFrame::new(vec![s.into_column()]).unwrap();
+        let out = format_head_tail(&df, 25, 25);
+        assert!(out.contains("(10 rows omitted)"));
+        assert!(out.contains("| 1 |"));
+        assert!(out.contains("| 25 |"));
+        assert!(out.contains("| 36 |"));
+        assert!(out.contains("| 60 |"));
+    }
+
+    #[test]
+    fn test_format_schema() {
+        let sheet = SheetInfo {
+            name: "Revenue".into(),
+            rows: 11, // 1 header + 10 data
+            cols: 2,
+        };
+        let s1 = Series::new("date".into(), &["2024-01-01", "2024-01-02"]);
+        let s2 = Series::new("amount".into(), &[1.0f64, 2.0]);
+        let df = DataFrame::new(vec![s1.into_column(), s2.into_column()]).unwrap();
+        let out = format_schema(&sheet, &df);
+        assert!(out.contains("## Sheet: Revenue (10 rows x 2 cols)"));
+        assert!(out.contains("| date |"));
+        assert!(out.contains("| amount |"));
+        assert!(out.contains("String"));
+        assert!(out.contains("Float"));
+    }
+
+    #[test]
+    fn test_format_empty_sheet_completely_empty() {
+        let sheet = SheetInfo { name: "Blank".into(), rows: 0, cols: 0 };
+        let out = format_empty_sheet(&sheet);
+        assert!(out.contains("(empty)"));
+    }
+
+    #[test]
+    fn test_format_empty_sheet_header_only() {
+        let sheet = SheetInfo { name: "Headers".into(), rows: 1, cols: 3 };
+        let out = format_empty_sheet(&sheet);
+        assert!(out.contains("(no data rows)"));
+    }
+
+    #[test]
+    fn test_format_csv() {
+        let s1 = Series::new("a".into(), &["hello", "world"]);
+        let s2 = Series::new("b".into(), &[1i64, 2]);
+        let df = DataFrame::new(vec![s1.into_column(), s2.into_column()]).unwrap();
+        let out = format_csv(&df);
+        assert!(out.contains("a,b"));
+        assert!(out.contains("hello"));
+        assert!(out.contains("world"));
+    }
+
+    #[test]
+    fn test_format_describe_stub() {
+        let s = Series::new("x".into(), &[1i64]);
+        let df = DataFrame::new(vec![s.into_column()]).unwrap();
+        let out = format_describe(&df);
+        assert!(out.contains("not yet implemented"));
+    }
+}
diff --git a/xlcat/src/main.rs b/xlcat/src/main.rs
@@ -1,3 +1,4 @@
+mod formatter;
 mod metadata;
 mod reader;

	xl-cli-tools CLI tools for viewing and editing Excel files
	Log \| Files \| Refs \| README \| LICENSE

A	xlcat/src/formatter.rs	\|	365	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	xlcat/src/main.rs	\|	1	+