xl-cli-tools

CLI tools for viewing and editing Excel files
Log | Files | Refs | README | LICENSE

formatter.rs (16916B)


      1 use crate::metadata::{format_file_size, FileInfo, SheetInfo};
      2 use polars::prelude::*;
      3 use std::fmt::Write as FmtWrite;
      4 
      5 // ---------------------------------------------------------------------------
      6 // Public API
      7 // ---------------------------------------------------------------------------
      8 
      9 /// Render the top-level file header.
     10 ///
     11 /// ```text
     12 /// # File: report.xlsx (245 KB)
     13 /// # Sheets: 3
     14 /// ```
     15 pub fn format_header(file_name: &str, info: &FileInfo) -> String {
     16     let size_str = format_file_size(info.file_size);
     17     let sheet_count = info.sheets.len();
     18     format!("# File: {file_name} ({size_str})\n# Sheets: {sheet_count}\n")
     19 }
     20 
     21 /// Render the schema block for a single sheet.
     22 ///
     23 /// ```text
     24 /// ## Sheet: Revenue (1240 rows x 8 cols)
     25 ///
     26 /// | Column | Type |
     27 /// |--------|------|
     28 /// | date   | Date |
     29 /// ...
     30 /// ```
     31 pub fn format_schema(sheet: &SheetInfo, df: &DataFrame) -> String {
     32     let data_rows = if sheet.rows == 0 { 0 } else { sheet.rows - 1 };
     33     let cols = sheet.cols;
     34 
     35     let mut out = format!(
     36         "## Sheet: {} ({} rows x {} cols)\n\n",
     37         sheet.name, data_rows, cols
     38     );
     39     let headers = vec!["Column".to_string(), "Type".to_string()];
     40     let rows: Vec<Vec<String>> = df
     41         .get_columns()
     42         .iter()
     43         .map(|col| vec![col.name().to_string(), format_dtype(col.dtype()).to_string()])
     44         .collect();
     45     out.push_str(&render_table(&headers, &rows));
     46     out
     47 }
     48 
     49 /// Render the multi-sheet listing (header + schema for each + prompt).
     50 pub fn format_sheet_listing(
     51     file_name: &str,
     52     info: &FileInfo,
     53     schemas: &[(&SheetInfo, DataFrame)],
     54 ) -> String {
     55     let mut out = format_header(file_name, info);
     56     out.push('\n');
     57 
     58     for (sheet, df) in schemas {
     59         if sheet.rows == 0 && sheet.cols == 0 {
     60             out.push_str(&format_empty_sheet(sheet));
     61         } else {
     62             out.push_str(&format_schema(sheet, df));
     63         }
     64         out.push('\n');
     65     }
     66 
     67     out.push_str("Use --sheet <name> to view a specific sheet.\n");
     68     out
     69 }
     70 
     71 /// Render the full DataFrame as a markdown table with aligned columns.
     72 pub fn format_data_table(df: &DataFrame) -> String {
     73     let (headers, rows) = df_to_strings(df);
     74     render_table(&headers, &rows)
     75 }
     76 
     77 /// Render head / tail view of a DataFrame with aligned columns.
     78 ///
     79 /// If total rows <= head_n + tail_n, shows all rows.
     80 /// Otherwise shows first head_n rows, an omission line, then last tail_n rows.
     81 /// Column widths are computed from both head and tail so pipes stay aligned.
     82 pub fn format_head_tail(df: &DataFrame, head_n: usize, tail_n: usize) -> String {
     83     let total = df.height();
     84     if total <= head_n + tail_n {
     85         return format_data_table(df);
     86     }
     87 
     88     let head_df = df.head(Some(head_n));
     89     let tail_df = df.tail(Some(tail_n));
     90     let omitted = total - head_n - tail_n;
     91 
     92     let (headers, head_rows) = df_to_strings(&head_df);
     93     let (_, tail_rows) = df_to_strings(&tail_df);
     94 
     95     // Compute widths from both head and tail rows
     96     let mut all_rows = head_rows.clone();
     97     all_rows.extend(tail_rows.clone());
     98     let widths = compute_col_widths(&headers, &all_rows);
     99 
    100     let mut out = render_table_header(&headers, &widths);
    101     out.push_str(&render_table_rows(&head_rows, &widths));
    102     out.push_str(&format!("... ({omitted} rows omitted) ...\n"));
    103     out.push_str(&render_table_rows(&tail_rows, &widths));
    104     out
    105 }
    106 
    107 /// Render DataFrame as CSV.
    108 pub fn format_csv(df: &DataFrame) -> String {
    109     let mut buf: Vec<u8> = Vec::new();
    110     // CsvWriter is available via the "csv" feature (polars 0.46)
    111     if CsvWriter::new(&mut buf)
    112         .finish(&mut df.clone())
    113         .is_ok()
    114     {
    115         return String::from_utf8(buf).unwrap_or_else(|_| csv_fallback(df));
    116     }
    117     csv_fallback(df)
    118 }
    119 
    120 /// Render a message for an empty or header-only sheet.
    121 pub fn format_empty_sheet(sheet: &SheetInfo) -> String {
    122     if sheet.rows == 0 && sheet.cols == 0 {
    123         format!("## Sheet: {} (empty)\n", sheet.name)
    124     } else {
    125         format!("## Sheet: {} (no data rows)\n", sheet.name)
    126     }
    127 }
    128 
    129 /// Render summary statistics for each column as a markdown table.
    130 ///
    131 /// Stats are rows, columns are DataFrame columns:
    132 /// | stat | col1 | col2 | ... |
    133 /// |------|------|------|-----|
    134 /// | count | ... | ... | ... |
    135 /// ...
    136 pub fn format_describe(df: &DataFrame) -> String {
    137     let columns = df.get_columns();
    138     let stats = ["count", "null_count", "mean", "std", "min", "max", "median", "unique"];
    139 
    140     let mut headers = vec!["stat".to_string()];
    141     headers.extend(columns.iter().map(|c| c.name().to_string()));
    142 
    143     let rows: Vec<Vec<String>> = stats
    144         .iter()
    145         .map(|stat| {
    146             let mut row = vec![stat.to_string()];
    147             row.extend(columns.iter().map(|col| compute_stat(col, stat)));
    148             row
    149         })
    150         .collect();
    151 
    152     render_table(&headers, &rows)
    153 }
    154 
    155 fn compute_stat(col: &Column, stat: &str) -> String {
    156     let series = col.as_materialized_series();
    157     match stat {
    158         "count" => series.len().to_string(),
    159         "null_count" => series.null_count().to_string(),
    160         "mean" => {
    161             if is_numeric(series.dtype()) {
    162                 series.mean().map(|v| format!("{v:.4}")).unwrap_or_else(|| "-".into())
    163             } else {
    164                 "-".into()
    165             }
    166         }
    167         "std" => {
    168             if is_numeric(series.dtype()) {
    169                 series.std(1).map(|v| format!("{v:.4}")).unwrap_or_else(|| "-".into())
    170             } else {
    171                 "-".into()
    172             }
    173         }
    174         "min" => {
    175             if is_numeric(series.dtype()) {
    176                 match series.min_reduce() {
    177                     Ok(v) => v.value().to_string(),
    178                     Err(_) => "-".into(),
    179                 }
    180             } else {
    181                 "-".into()
    182             }
    183         }
    184         "max" => {
    185             if is_numeric(series.dtype()) {
    186                 match series.max_reduce() {
    187                     Ok(v) => v.value().to_string(),
    188                     Err(_) => "-".into(),
    189                 }
    190             } else {
    191                 "-".into()
    192             }
    193         }
    194         "median" => {
    195             if is_numeric(series.dtype()) {
    196                 series.median().map(|v| format!("{v:.4}")).unwrap_or_else(|| "-".into())
    197             } else {
    198                 "-".into()
    199             }
    200         }
    201         "unique" => match series.n_unique() {
    202             Ok(n) => n.to_string(),
    203             Err(_) => "-".into(),
    204         },
    205         _ => "-".into(),
    206     }
    207 }
    208 
    209 fn is_numeric(dtype: &DataType) -> bool {
    210     matches!(
    211         dtype,
    212         DataType::Int8
    213             | DataType::Int16
    214             | DataType::Int32
    215             | DataType::Int64
    216             | DataType::UInt8
    217             | DataType::UInt16
    218             | DataType::UInt32
    219             | DataType::UInt64
    220             | DataType::Float32
    221             | DataType::Float64
    222     )
    223 }
    224 
    225 // ---------------------------------------------------------------------------
    226 // Private helpers
    227 // ---------------------------------------------------------------------------
    228 
    229 /// Extract headers and row data as strings from a DataFrame.
    230 fn df_to_strings(df: &DataFrame) -> (Vec<String>, Vec<Vec<String>>) {
    231     let columns = df.get_columns();
    232     let headers: Vec<String> = columns.iter().map(|c| c.name().to_string()).collect();
    233     let rows: Vec<Vec<String>> = (0..df.height())
    234         .map(|i| columns.iter().map(|c| format_cell(c, i)).collect())
    235         .collect();
    236     (headers, rows)
    237 }
    238 
    239 /// Compute the display width for each column.
    240 pub fn compute_col_widths(headers: &[String], rows: &[Vec<String>]) -> Vec<usize> {
    241     let mut widths: Vec<usize> = headers.iter().map(|h| h.len().max(3)).collect();
    242     for row in rows {
    243         for (i, cell) in row.iter().enumerate() {
    244             if i < widths.len() {
    245                 widths[i] = widths[i].max(cell.len());
    246             }
    247         }
    248     }
    249     widths
    250 }
    251 
    252 /// Render a markdown table header + separator line.
    253 pub fn render_table_header(headers: &[String], widths: &[usize]) -> String {
    254     let mut out = String::new();
    255     out.push('|');
    256     for (i, h) in headers.iter().enumerate() {
    257         let _ = write!(out, " {:<w$} |", h, w = widths[i]);
    258     }
    259     out.push('\n');
    260     out.push('|');
    261     for w in widths {
    262         out.push('-');
    263         for _ in 0..*w {
    264             out.push('-');
    265         }
    266         out.push_str("-|");
    267     }
    268     out.push('\n');
    269     out
    270 }
    271 
    272 /// Render markdown table data rows (no header).
    273 pub fn render_table_rows(rows: &[Vec<String>], widths: &[usize]) -> String {
    274     let mut out = String::new();
    275     for row in rows {
    276         out.push('|');
    277         for (i, cell) in row.iter().enumerate() {
    278             let w = if i < widths.len() { widths[i] } else { cell.len() };
    279             let _ = write!(out, " {:<w$} |", cell, w = w);
    280         }
    281         out.push('\n');
    282     }
    283     out
    284 }
    285 
    286 /// Render a complete aligned markdown table.
    287 pub fn render_table(headers: &[String], rows: &[Vec<String>]) -> String {
    288     let widths = compute_col_widths(headers, rows);
    289     let mut out = render_table_header(headers, &widths);
    290     out.push_str(&render_table_rows(rows, &widths));
    291     out
    292 }
    293 
    294 /// Format a single cell value for markdown display.
    295 fn format_cell(col: &Column, idx: usize) -> String {
    296     match col.get(idx) {
    297         Ok(AnyValue::Null) | Err(_) => String::new(),
    298         Ok(v) => format_any_value(&v),
    299     }
    300 }
    301 
    302 /// Convert an AnyValue to its display string.
    303 pub fn format_any_value(v: &AnyValue) -> String {
    304     match v {
    305         AnyValue::Null => String::new(),
    306         AnyValue::Boolean(b) => b.to_string(),
    307         AnyValue::Int8(n) => n.to_string(),
    308         AnyValue::Int16(n) => n.to_string(),
    309         AnyValue::Int32(n) => n.to_string(),
    310         AnyValue::Int64(n) => n.to_string(),
    311         AnyValue::UInt8(n) => n.to_string(),
    312         AnyValue::UInt16(n) => n.to_string(),
    313         AnyValue::UInt32(n) => n.to_string(),
    314         AnyValue::UInt64(n) => n.to_string(),
    315         AnyValue::Float32(f) => f.to_string(),
    316         AnyValue::Float64(f) => f.to_string(),
    317         AnyValue::String(s) => s.to_string(),
    318         AnyValue::StringOwned(s) => s.to_string(),
    319         other => format!("{other}"),
    320     }
    321 }
    322 
    323 /// Map a polars DataType to a human-readable label.
    324 fn format_dtype(dtype: &DataType) -> &'static str {
    325     match dtype {
    326         DataType::Boolean => "Boolean",
    327         DataType::Int8 | DataType::Int16 | DataType::Int32 | DataType::Int64 => "Int",
    328         DataType::UInt8 | DataType::UInt16 | DataType::UInt32 | DataType::UInt64 => "UInt",
    329         DataType::Float32 | DataType::Float64 => "Float",
    330         DataType::String => "String",
    331         DataType::Date => "Date",
    332         DataType::Datetime(_, _) => "Datetime",
    333         DataType::Duration(_) => "Duration",
    334         DataType::Time => "Time",
    335         DataType::Null => "Null",
    336         _ => "Other",
    337     }
    338 }
    339 
    340 /// Manual CSV fallback if CsvWriter is unavailable.
    341 fn csv_fallback(df: &DataFrame) -> String {
    342     let columns = df.get_columns();
    343     let n_rows = df.height();
    344 
    345     let mut out = String::new();
    346 
    347     // Header
    348     let header: Vec<String> = columns.iter().map(|c| c.name().to_string()).collect();
    349     out.push_str(&header.join(","));
    350     out.push('\n');
    351 
    352     // Rows
    353     for row_idx in 0..n_rows {
    354         let row: Vec<String> = columns
    355             .iter()
    356             .map(|col| {
    357                 let cell = format_cell(col, row_idx);
    358                 // Quote cells containing commas or quotes
    359                 if cell.contains(',') || cell.contains('"') || cell.contains('\n') {
    360                     format!("\"{}\"", cell.replace('"', "\"\""))
    361                 } else {
    362                     cell
    363                 }
    364             })
    365             .collect();
    366         out.push_str(&row.join(","));
    367         out.push('\n');
    368     }
    369 
    370     out
    371 }
    372 
    373 // ---------------------------------------------------------------------------
    374 // Tests
    375 // ---------------------------------------------------------------------------
    376 
    377 #[cfg(test)]
    378 mod tests {
    379     use super::*;
    380     use crate::metadata::{FileInfo, SheetInfo};
    381 
    382     #[test]
    383     fn test_format_header() {
    384         let info = FileInfo {
    385             file_size: 250_000,
    386             sheets: vec![SheetInfo {
    387                 name: "Sheet1".into(),
    388                 rows: 100,
    389                 cols: 5,
    390             }],
    391         };
    392         let out = format_header("test.xlsx", &info);
    393         assert!(out.contains("# File: test.xlsx (244 KB)"));
    394         assert!(out.contains("# Sheets: 1"));
    395     }
    396 
    397     #[test]
    398     fn test_format_data_table() {
    399         let s1 = Series::new("name".into(), &["Alice", "Bob"]);
    400         let s2 = Series::new("value".into(), &[100i64, 200]);
    401         let df = DataFrame::new(vec![s1.into_column(), s2.into_column()]).unwrap();
    402         let out = format_data_table(&df);
    403         assert!(out.contains("| name  | value |"));
    404         assert!(out.contains("| Alice | 100   |"));
    405         // Verify pipes are aligned: all lines have same length
    406         let lines: Vec<&str> = out.trim().lines().collect();
    407         assert!(lines.len() >= 3);
    408         let expected_len = lines[0].len();
    409         for line in &lines {
    410             assert_eq!(line.len(), expected_len, "Misaligned: {line}");
    411         }
    412     }
    413 
    414     #[test]
    415     fn test_format_head_tail_small() {
    416         let s = Series::new("x".into(), &[1i64, 2, 3]);
    417         let df = DataFrame::new(vec![s.into_column()]).unwrap();
    418         let out = format_head_tail(&df, 25, 25);
    419         assert!(!out.contains("omitted"));
    420         assert!(out.contains("| 1 "));
    421         assert!(out.contains("| 3 "));
    422     }
    423 
    424     #[test]
    425     fn test_format_head_tail_large() {
    426         // 60 rows, head=25 tail=25 → 10 omitted
    427         let values: Vec<i64> = (1..=60).collect();
    428         let s = Series::new("n".into(), values.as_slice());
    429         let df = DataFrame::new(vec![s.into_column()]).unwrap();
    430         let out = format_head_tail(&df, 25, 25);
    431         assert!(out.contains("(10 rows omitted)"));
    432         assert!(out.contains("| 1 "));
    433         assert!(out.contains("| 25 "));
    434         assert!(out.contains("| 36 "));
    435         assert!(out.contains("| 60 "));
    436     }
    437 
    438     #[test]
    439     fn test_format_schema() {
    440         let sheet = SheetInfo {
    441             name: "Revenue".into(),
    442             rows: 11, // 1 header + 10 data
    443             cols: 2,
    444         };
    445         let s1 = Series::new("date".into(), &["2024-01-01", "2024-01-02"]);
    446         let s2 = Series::new("amount".into(), &[1.0f64, 2.0]);
    447         let df = DataFrame::new(vec![s1.into_column(), s2.into_column()]).unwrap();
    448         let out = format_schema(&sheet, &df);
    449         assert!(out.contains("## Sheet: Revenue (10 rows x 2 cols)"));
    450         assert!(out.contains("| date"));
    451         assert!(out.contains("| amount"));
    452         assert!(out.contains("String"));
    453         assert!(out.contains("Float"));
    454     }
    455 
    456     #[test]
    457     fn test_format_empty_sheet_completely_empty() {
    458         let sheet = SheetInfo { name: "Blank".into(), rows: 0, cols: 0 };
    459         let out = format_empty_sheet(&sheet);
    460         assert!(out.contains("(empty)"));
    461     }
    462 
    463     #[test]
    464     fn test_format_empty_sheet_header_only() {
    465         let sheet = SheetInfo { name: "Headers".into(), rows: 1, cols: 3 };
    466         let out = format_empty_sheet(&sheet);
    467         assert!(out.contains("(no data rows)"));
    468     }
    469 
    470     #[test]
    471     fn test_format_csv() {
    472         let s1 = Series::new("a".into(), &["hello", "world"]);
    473         let s2 = Series::new("b".into(), &[1i64, 2]);
    474         let df = DataFrame::new(vec![s1.into_column(), s2.into_column()]).unwrap();
    475         let out = format_csv(&df);
    476         assert!(out.contains("a,b"));
    477         assert!(out.contains("hello"));
    478         assert!(out.contains("world"));
    479     }
    480 
    481     #[test]
    482     fn test_format_describe() {
    483         let s_name = Series::new("name".into(), &["Alice", "Bob", "Carol"]);
    484         let s_val = Series::new("value".into(), &[10i64, 20, 30]);
    485         let df = DataFrame::new(vec![s_name.into_column(), s_val.into_column()]).unwrap();
    486         let out = format_describe(&df);
    487         // Header row contains stat and column names
    488         assert!(out.contains("| stat"));
    489         assert!(out.contains("name"));
    490         assert!(out.contains("value"));
    491         // All stat rows are present
    492         assert!(out.contains("| count"));
    493         assert!(out.contains("| null_count"));
    494         assert!(out.contains("| mean"));
    495         assert!(out.contains("| std"));
    496         assert!(out.contains("| min"));
    497         assert!(out.contains("| max"));
    498         assert!(out.contains("| median"));
    499         assert!(out.contains("| unique"));
    500         // Non-numeric column shows "-" for mean
    501         assert!(out.contains("| -"));
    502         // Verify alignment: all table lines should have same length
    503         let table_lines: Vec<&str> = out.trim().lines().filter(|l| l.starts_with('|')).collect();
    504         let expected_len = table_lines[0].len();
    505         for line in &table_lines {
    506             assert_eq!(line.len(), expected_len, "Misaligned: {line}");
    507         }
    508     }
    509 }