diff.rs - xl-cli-tools - CLI tools for viewing and editing Excel files

diff.rs (24600B)
      1 // Diff engine for comparing two Excel sheets.
      2 
      3 use anyhow::{Result, bail};
      4 use polars::prelude::*;
      5 use std::collections::HashMap;
      6 
      7 use crate::formatter;
      8 
      9 /// Source file and sheet metadata for display.
     10 #[derive(Debug, Clone)]
     11 pub struct SheetSource {
     12     pub file_name: String,
     13     pub sheet_name: String,
     14 }
     15 
     16 /// A single row from an added or removed set.
     17 #[derive(Debug, Clone)]
     18 pub struct DiffRow {
     19     pub values: Vec<String>,
     20 }
     21 
     22 /// A change in a single cell.
     23 #[derive(Debug, Clone)]
     24 pub struct CellChange {
     25     pub column: String,
     26     pub old_value: String,
     27     pub new_value: String,
     28 }
     29 
     30 /// A row present in both files with cell-level differences.
     31 #[derive(Debug, Clone)]
     32 pub struct ModifiedRow {
     33     pub key: Vec<String>,
     34     pub changes: Vec<CellChange>,
     35 }
     36 
     37 /// Result of comparing two sheets.
     38 #[derive(Debug, Clone)]
     39 pub struct DiffResult {
     40     pub headers: Vec<String>,
     41     pub key_columns: Vec<String>,
     42     pub added: Vec<DiffRow>,
     43     pub removed: Vec<DiffRow>,
     44     pub modified: Vec<ModifiedRow>,
     45     pub source_a: SheetSource,
     46     pub source_b: SheetSource,
     47 }
     48 
     49 impl DiffResult {
     50     pub fn has_differences(&self) -> bool {
     51         !self.added.is_empty() || !self.removed.is_empty() || !self.modified.is_empty()
     52     }
     53 }
     54 
     55 /// Options controlling how the diff is performed.
     56 #[derive(Debug, Clone, Default)]
     57 pub struct DiffOptions {
     58     pub key_columns: Vec<String>,
     59     pub tolerance: Option<f64>,
     60 }
     61 
     62 // ---------------------------------------------------------------------------
     63 // Helper functions
     64 // ---------------------------------------------------------------------------
     65 
     66 /// Format a cell value for display. Returns empty string for null.
     67 fn cell_to_string(col: &Column, idx: usize) -> String {
     68     match col.get(idx) {
     69         Ok(AnyValue::Null) | Err(_) => String::new(),
     70         Ok(v) => formatter::format_any_value(&v),
     71     }
     72 }
     73 
     74 /// Format a cell value for hashing. Uses a sentinel for null so that null
     75 /// and empty string produce different keys.
     76 fn cell_to_key_part(col: &Column, idx: usize) -> String {
     77     match col.get(idx) {
     78         Ok(AnyValue::Null) | Err(_) => "\x01NULL\x01".to_string(),
     79         Ok(v) => formatter::format_any_value(&v),
     80     }
     81 }
     82 
     83 /// Build a string key for an entire row by joining all column values.
     84 fn row_to_key(df: &DataFrame, row_idx: usize) -> String {
     85     df.get_columns()
     86         .iter()
     87         .map(|col| cell_to_key_part(col, row_idx))
     88         .collect::<Vec<_>>()
     89         .join("\0")
     90 }
     91 
     92 /// Collect display values for every column in a row.
     93 fn row_to_strings(df: &DataFrame, row_idx: usize) -> Vec<String> {
     94     df.get_columns()
     95         .iter()
     96         .map(|col| cell_to_string(col, row_idx))
     97         .collect()
     98 }
     99 
    100 // ---------------------------------------------------------------------------
    101 // Positional diff
    102 // ---------------------------------------------------------------------------
    103 
    104 /// Compare two DataFrames positionally (no key columns).
    105 ///
    106 /// Uses multiset comparison: each unique row is tracked by frequency.
    107 /// Rows present in A but not (or fewer times) in B are "removed";
    108 /// rows present in B but not (or fewer times) in A are "added".
    109 pub fn diff_positional(
    110     df_a: &DataFrame,
    111     df_b: &DataFrame,
    112     _opts: &DiffOptions,
    113     source_a: SheetSource,
    114     source_b: SheetSource,
    115 ) -> Result<DiffResult> {
    116     // Determine headers — use the longer header set.
    117     let headers_a: Vec<String> = df_a.get_column_names().iter().map(|s| s.to_string()).collect();
    118     let headers_b: Vec<String> = df_b.get_column_names().iter().map(|s| s.to_string()).collect();
    119 
    120     let headers = if headers_b.len() > headers_a.len() {
    121         if headers_a.len() != headers_b.len() {
    122             eprintln!(
    123                 "Warning: column count differs ({} vs {}), using wider header set",
    124                 headers_a.len(),
    125                 headers_b.len()
    126             );
    127         }
    128         headers_b.clone()
    129     } else {
    130         if headers_a.len() != headers_b.len() {
    131             eprintln!(
    132                 "Warning: column count differs ({} vs {}), using wider header set",
    133                 headers_a.len(),
    134                 headers_b.len()
    135             );
    136         }
    137         headers_a.clone()
    138     };
    139 
    140     let num_headers = headers.len();
    141 
    142     // Build frequency maps: key → list of row indices (so we can consume them).
    143     let mut freq_a: HashMap<String, Vec<usize>> = HashMap::new();
    144     for i in 0..df_a.height() {
    145         let key = row_to_key(df_a, i);
    146         freq_a.entry(key).or_default().push(i);
    147     }
    148 
    149     let mut freq_b: HashMap<String, Vec<usize>> = HashMap::new();
    150     for i in 0..df_b.height() {
    151         let key = row_to_key(df_b, i);
    152         freq_b.entry(key).or_default().push(i);
    153     }
    154 
    155     let mut removed = Vec::new();
    156     let mut added = Vec::new();
    157 
    158     // Walk A: for each row, try to consume a matching row from B.
    159     for i in 0..df_a.height() {
    160         let key = row_to_key(df_a, i);
    161         let consumed = freq_b
    162             .get_mut(&key)
    163             .and_then(|indices| indices.pop())
    164             .is_some();
    165         if !consumed {
    166             let mut vals = row_to_strings(df_a, i);
    167             vals.resize(num_headers, String::new());
    168             removed.push(DiffRow { values: vals });
    169         }
    170     }
    171 
    172     // Walk B: for each row, try to consume a matching row from A.
    173     for i in 0..df_b.height() {
    174         let key = row_to_key(df_b, i);
    175         let consumed = freq_a
    176             .get_mut(&key)
    177             .and_then(|indices| indices.pop())
    178             .is_some();
    179         if !consumed {
    180             let mut vals = row_to_strings(df_b, i);
    181             vals.resize(num_headers, String::new());
    182             added.push(DiffRow { values: vals });
    183         }
    184     }
    185 
    186     Ok(DiffResult {
    187         headers,
    188         key_columns: vec![],
    189         added,
    190         removed,
    191         modified: vec![],
    192         source_a,
    193         source_b,
    194     })
    195 }
    196 
    197 // ---------------------------------------------------------------------------
    198 // Key-based diff
    199 // ---------------------------------------------------------------------------
    200 
    201 /// A row indexed by its key columns.
    202 struct KeyedRow {
    203     values: Vec<String>,
    204     key_values: Vec<String>,
    205 }
    206 
    207 /// Build a map from composite key string to KeyedRow for every row in the DataFrame.
    208 fn build_key_map(
    209     df: &DataFrame,
    210     key_indices: &[usize],
    211     columns: &[Column],
    212 ) -> HashMap<String, KeyedRow> {
    213     let mut map = HashMap::new();
    214     for i in 0..df.height() {
    215         let key_values: Vec<String> = key_indices
    216             .iter()
    217             .map(|&ki| cell_to_string(&columns[ki], i))
    218             .collect();
    219         let composite_key = key_values.join("\0");
    220         let values: Vec<String> = columns.iter().map(|col| cell_to_string(col, i)).collect();
    221         map.insert(
    222             composite_key,
    223             KeyedRow {
    224                 values,
    225                 key_values,
    226             },
    227         );
    228     }
    229     map
    230 }
    231 
    232 /// Warn on stderr when duplicate keys are found.
    233 fn check_duplicate_keys(
    234     df: &DataFrame,
    235     key_indices: &[usize],
    236     columns: &[Column],
    237     source: &SheetSource,
    238 ) {
    239     let mut seen: HashMap<String, usize> = HashMap::new();
    240     for i in 0..df.height() {
    241         let key: String = key_indices
    242             .iter()
    243             .map(|&ki| cell_to_string(&columns[ki], i))
    244             .collect::<Vec<_>>()
    245             .join("\0");
    246         let count = seen.entry(key.clone()).or_insert(0);
    247         *count += 1;
    248         if *count == 2 {
    249             let display_key = key.replace('\0', ", ");
    250             eprintln!(
    251                 "Warning: duplicate key [{}] in {}:{}",
    252                 display_key, source.file_name, source.sheet_name
    253             );
    254         }
    255     }
    256 }
    257 
    258 /// Check whether a polars DataType is a float type.
    259 fn is_float_dtype(dt: &DataType) -> bool {
    260     matches!(dt, DataType::Float32 | DataType::Float64)
    261 }
    262 
    263 /// Check whether a polars DataType is an integer type.
    264 fn is_int_dtype(dt: &DataType) -> bool {
    265     matches!(
    266         dt,
    267         DataType::Int8
    268             | DataType::Int16
    269             | DataType::Int32
    270             | DataType::Int64
    271             | DataType::UInt8
    272             | DataType::UInt16
    273             | DataType::UInt32
    274             | DataType::UInt64
    275     )
    276 }
    277 
    278 /// Compare two string-rendered values with optional numeric tolerance.
    279 ///
    280 /// Rules:
    281 /// - NaN == NaN is true.
    282 /// - NaN vs non-NaN is false.
    283 /// - Pure int+int columns use exact comparison (no tolerance applied).
    284 /// - At least one float column applies tolerance.
    285 /// - Otherwise exact string comparison.
    286 fn values_equal_with_tolerance(
    287     val_a: &str,
    288     val_b: &str,
    289     tolerance: f64,
    290     df_a: &DataFrame,
    291     df_b: &DataFrame,
    292     col_name: &str,
    293 ) -> bool {
    294     let parsed_a = val_a.parse::<f64>();
    295     let parsed_b = val_b.parse::<f64>();
    296 
    297     match (parsed_a, parsed_b) {
    298         (Ok(a), Ok(b)) => {
    299             if a.is_nan() && b.is_nan() {
    300                 return true;
    301             }
    302             if a.is_nan() || b.is_nan() {
    303                 return false;
    304             }
    305 
    306             let dt_a = df_a
    307                 .column(col_name)
    308                 .map(|c| c.dtype().clone())
    309                 .unwrap_or(DataType::String);
    310             let dt_b = df_b
    311                 .column(col_name)
    312                 .map(|c| c.dtype().clone())
    313                 .unwrap_or(DataType::String);
    314 
    315             if is_int_dtype(&dt_a) && is_int_dtype(&dt_b) {
    316                 val_a == val_b
    317             } else if is_float_dtype(&dt_a) || is_float_dtype(&dt_b) {
    318                 (a - b).abs() <= tolerance
    319             } else {
    320                 val_a == val_b
    321             }
    322         }
    323         _ => val_a == val_b,
    324     }
    325 }
    326 
    327 /// Compare non-key columns of two keyed rows and return cell-level changes.
    328 #[allow(clippy::too_many_arguments)]
    329 fn compare_rows(
    330     df_a: &DataFrame,
    331     df_b: &DataFrame,
    332     headers_a: &[String],
    333     headers_b: &[String],
    334     row_a: &KeyedRow,
    335     row_b: &KeyedRow,
    336     common_columns: &[String],
    337     opts: &DiffOptions,
    338 ) -> Vec<CellChange> {
    339     let mut changes = Vec::new();
    340     for col_name in common_columns {
    341         let idx_a = headers_a.iter().position(|h| h == col_name);
    342         let idx_b = headers_b.iter().position(|h| h == col_name);
    343         let val_a = idx_a
    344             .map(|i| row_a.values.get(i).cloned().unwrap_or_default())
    345             .unwrap_or_default();
    346         let val_b = idx_b
    347             .map(|i| row_b.values.get(i).cloned().unwrap_or_default())
    348             .unwrap_or_default();
    349 
    350         let equal = if let Some(tol) = opts.tolerance {
    351             values_equal_with_tolerance(&val_a, &val_b, tol, df_a, df_b, col_name)
    352         } else {
    353             val_a == val_b
    354         };
    355 
    356         if !equal {
    357             changes.push(CellChange {
    358                 column: col_name.clone(),
    359                 old_value: val_a,
    360                 new_value: val_b,
    361             });
    362         }
    363     }
    364     changes
    365 }
    366 
    367 /// Compare two DataFrames using key columns.
    368 pub fn diff_keyed(
    369     df_a: &DataFrame,
    370     df_b: &DataFrame,
    371     opts: &DiffOptions,
    372     source_a: SheetSource,
    373     source_b: SheetSource,
    374 ) -> Result<DiffResult> {
    375     let columns_a = df_a.get_columns();
    376     let columns_b = df_b.get_columns();
    377     let headers_a: Vec<String> = df_a.get_column_names().iter().map(|s| s.to_string()).collect();
    378     let headers_b: Vec<String> = df_b.get_column_names().iter().map(|s| s.to_string()).collect();
    379 
    380     // Resolve key column indices in both frames.
    381     let mut key_indices_a = Vec::new();
    382     let mut key_indices_b = Vec::new();
    383     for key_col in &opts.key_columns {
    384         match headers_a.iter().position(|h| h == key_col) {
    385             Some(idx) => key_indices_a.push(idx),
    386             None => bail!("Key column '{}' not found in {}", key_col, source_a.file_name),
    387         }
    388         match headers_b.iter().position(|h| h == key_col) {
    389             Some(idx) => key_indices_b.push(idx),
    390             None => bail!("Key column '{}' not found in {}", key_col, source_b.file_name),
    391         }
    392     }
    393 
    394     // Find non-key columns.
    395     let non_key_a: Vec<String> = headers_a
    396         .iter()
    397         .filter(|h| !opts.key_columns.contains(h))
    398         .cloned()
    399         .collect();
    400     let non_key_b: Vec<String> = headers_b
    401         .iter()
    402         .filter(|h| !opts.key_columns.contains(h))
    403         .cloned()
    404         .collect();
    405 
    406     // Common non-key columns (for modification detection).
    407     let common_columns: Vec<String> = non_key_a
    408         .iter()
    409         .filter(|h| non_key_b.contains(h))
    410         .cloned()
    411         .collect();
    412 
    413     // Warn about columns only in one file.
    414     for col in &non_key_a {
    415         if !non_key_b.contains(col) {
    416             eprintln!("Warning: column '{}' only in {}", col, source_a.file_name);
    417         }
    418     }
    419     for col in &non_key_b {
    420         if !non_key_a.contains(col) {
    421             eprintln!("Warning: column '{}' only in {}", col, source_b.file_name);
    422         }
    423     }
    424 
    425     // Build output headers: key columns + all from A non-key + B-only non-key.
    426     let mut headers = opts.key_columns.clone();
    427     headers.extend(non_key_a.iter().cloned());
    428     for col in &non_key_b {
    429         if !non_key_a.contains(col) {
    430             headers.push(col.clone());
    431         }
    432     }
    433 
    434     // Check for duplicate keys.
    435     check_duplicate_keys(df_a, &key_indices_a, columns_a, &source_a);
    436     check_duplicate_keys(df_b, &key_indices_b, columns_b, &source_b);
    437 
    438     // Build key maps.
    439     let map_a = build_key_map(df_a, &key_indices_a, columns_a);
    440     let map_b = build_key_map(df_b, &key_indices_b, columns_b);
    441 
    442     let mut removed = Vec::new();
    443     let mut added = Vec::new();
    444     let mut modified = Vec::new();
    445 
    446     // Keys in A but not in B → removed.
    447     for (composite_key, row_a) in &map_a {
    448         if !map_b.contains_key(composite_key) {
    449             let mut vals = Vec::new();
    450             for h in &headers {
    451                 if let Some(idx) = headers_a.iter().position(|ha| ha == h) {
    452                     vals.push(row_a.values.get(idx).cloned().unwrap_or_default());
    453                 } else {
    454                     vals.push(String::new());
    455                 }
    456             }
    457             removed.push(DiffRow { values: vals });
    458         }
    459     }
    460 
    461     // Keys in B but not in A → added.
    462     for (composite_key, row_b) in &map_b {
    463         if !map_a.contains_key(composite_key) {
    464             let mut vals = Vec::new();
    465             for h in &headers {
    466                 if let Some(idx) = headers_b.iter().position(|hb| hb == h) {
    467                     vals.push(row_b.values.get(idx).cloned().unwrap_or_default());
    468                 } else {
    469                     vals.push(String::new());
    470                 }
    471             }
    472             added.push(DiffRow { values: vals });
    473         }
    474     }
    475 
    476     // Keys in both → compare for modifications.
    477     for (composite_key, row_a) in &map_a {
    478         if let Some(row_b) = map_b.get(composite_key) {
    479             let changes = compare_rows(
    480                 df_a,
    481                 df_b,
    482                 &headers_a,
    483                 &headers_b,
    484                 row_a,
    485                 row_b,
    486                 &common_columns,
    487                 opts,
    488             );
    489             if !changes.is_empty() {
    490                 modified.push(ModifiedRow {
    491                     key: row_a.key_values.clone(),
    492                     changes,
    493                 });
    494             }
    495         }
    496     }
    497 
    498     Ok(DiffResult {
    499         headers,
    500         key_columns: opts.key_columns.clone(),
    501         added,
    502         removed,
    503         modified,
    504         source_a,
    505         source_b,
    506     })
    507 }
    508 
    509 // ---------------------------------------------------------------------------
    510 // Entry point
    511 // ---------------------------------------------------------------------------
    512 
    513 /// Compare two DataFrames, dispatching to positional or key-based diff
    514 /// depending on whether key columns are specified.
    515 pub fn diff_sheets(
    516     df_a: &DataFrame,
    517     df_b: &DataFrame,
    518     opts: &DiffOptions,
    519     source_a: SheetSource,
    520     source_b: SheetSource,
    521 ) -> Result<DiffResult> {
    522     if opts.key_columns.is_empty() {
    523         diff_positional(df_a, df_b, opts, source_a, source_b)
    524     } else {
    525         diff_keyed(df_a, df_b, opts, source_a, source_b)
    526     }
    527 }
    528 
    529 #[cfg(test)]
    530 mod tests {
    531     use super::*;
    532 
    533     fn test_source_a() -> SheetSource {
    534         SheetSource {
    535             file_name: "a.xlsx".into(),
    536             sheet_name: "Sheet1".into(),
    537         }
    538     }
    539 
    540     fn test_source_b() -> SheetSource {
    541         SheetSource {
    542             file_name: "b.xlsx".into(),
    543             sheet_name: "Sheet1".into(),
    544         }
    545     }
    546 
    547     // ---- Positional diff tests ----
    548 
    549     #[test]
    550     fn test_positional_no_diff() {
    551         let df_a = df! {
    552             "name" => &["Alice", "Bob"],
    553             "score" => &[100, 200],
    554         }
    555         .unwrap();
    556         let df_b = df_a.clone();
    557         let opts = DiffOptions::default();
    558 
    559         let result = diff_positional(&df_a, &df_b, &opts, test_source_a(), test_source_b()).unwrap();
    560 
    561         assert!(!result.has_differences());
    562         assert!(result.added.is_empty());
    563         assert!(result.removed.is_empty());
    564         assert!(result.modified.is_empty());
    565     }
    566 
    567     #[test]
    568     fn test_positional_added_removed() {
    569         let df_a = df! {
    570             "name" => &["Alice", "Bob"],
    571         }
    572         .unwrap();
    573         let df_b = df! {
    574             "name" => &["Alice", "Charlie"],
    575         }
    576         .unwrap();
    577         let opts = DiffOptions::default();
    578 
    579         let result = diff_positional(&df_a, &df_b, &opts, test_source_a(), test_source_b()).unwrap();
    580 
    581         assert!(result.has_differences());
    582         assert_eq!(result.removed.len(), 1);
    583         assert_eq!(result.removed[0].values, vec!["Bob"]);
    584         assert_eq!(result.added.len(), 1);
    585         assert_eq!(result.added[0].values, vec!["Charlie"]);
    586     }
    587 
    588     #[test]
    589     fn test_positional_duplicate_rows() {
    590         let df_a = df! {
    591             "val" => &["A", "A", "A"],
    592         }
    593         .unwrap();
    594         let df_b = df! {
    595             "val" => &["A", "A"],
    596         }
    597         .unwrap();
    598         let opts = DiffOptions::default();
    599 
    600         let result = diff_positional(&df_a, &df_b, &opts, test_source_a(), test_source_b()).unwrap();
    601 
    602         assert_eq!(result.removed.len(), 1);
    603         assert_eq!(result.removed[0].values, vec!["A"]);
    604         assert!(result.added.is_empty());
    605     }
    606 
    607     // ---- Key-based diff tests ----
    608 
    609     #[test]
    610     fn test_keyed_no_diff() {
    611         let df_a = df! {
    612             "id" => &[1, 2],
    613             "name" => &["Alice", "Bob"],
    614         }
    615         .unwrap();
    616         let df_b = df_a.clone();
    617         let opts = DiffOptions {
    618             key_columns: vec!["id".into()],
    619             tolerance: None,
    620         };
    621 
    622         let result = diff_keyed(&df_a, &df_b, &opts, test_source_a(), test_source_b()).unwrap();
    623 
    624         assert!(!result.has_differences());
    625     }
    626 
    627     #[test]
    628     fn test_keyed_added_removed() {
    629         let df_a = df! {
    630             "id" => &[1, 2],
    631             "name" => &["Alice", "Bob"],
    632         }
    633         .unwrap();
    634         let df_b = df! {
    635             "id" => &[2, 3],
    636             "name" => &["Bob", "Charlie"],
    637         }
    638         .unwrap();
    639         let opts = DiffOptions {
    640             key_columns: vec!["id".into()],
    641             tolerance: None,
    642         };
    643 
    644         let result = diff_keyed(&df_a, &df_b, &opts, test_source_a(), test_source_b()).unwrap();
    645 
    646         assert_eq!(result.removed.len(), 1);
    647         assert!(result.removed[0].values.contains(&"1".to_string()));
    648         assert!(result.removed[0].values.contains(&"Alice".to_string()));
    649 
    650         assert_eq!(result.added.len(), 1);
    651         assert!(result.added[0].values.contains(&"3".to_string()));
    652         assert!(result.added[0].values.contains(&"Charlie".to_string()));
    653     }
    654 
    655     #[test]
    656     fn test_keyed_modified() {
    657         let df_a = df! {
    658             "id" => &[1, 2],
    659             "score" => &[100, 200],
    660         }
    661         .unwrap();
    662         let df_b = df! {
    663             "id" => &[1, 2],
    664             "score" => &[100, 250],
    665         }
    666         .unwrap();
    667         let opts = DiffOptions {
    668             key_columns: vec!["id".into()],
    669             tolerance: None,
    670         };
    671 
    672         let result = diff_keyed(&df_a, &df_b, &opts, test_source_a(), test_source_b()).unwrap();
    673 
    674         assert!(result.added.is_empty());
    675         assert!(result.removed.is_empty());
    676         assert_eq!(result.modified.len(), 1);
    677 
    678         let m = &result.modified[0];
    679         assert_eq!(m.key, vec!["2"]);
    680         assert_eq!(m.changes.len(), 1);
    681         assert_eq!(m.changes[0].column, "score");
    682         assert_eq!(m.changes[0].old_value, "200");
    683         assert_eq!(m.changes[0].new_value, "250");
    684     }
    685 
    686     #[test]
    687     fn test_keyed_composite_key() {
    688         let df_a = df! {
    689             "date" => &["2024-01-01", "2024-01-01", "2024-01-02"],
    690             "ticker" => &["AAPL", "GOOG", "AAPL"],
    691             "price" => &[150.0, 140.0, 151.0],
    692         }
    693         .unwrap();
    694         let df_b = df! {
    695             "date" => &["2024-01-01", "2024-01-01", "2024-01-02"],
    696             "ticker" => &["AAPL", "GOOG", "AAPL"],
    697             "price" => &[150.0, 142.0, 151.0],
    698         }
    699         .unwrap();
    700         let opts = DiffOptions {
    701             key_columns: vec!["date".into(), "ticker".into()],
    702             tolerance: None,
    703         };
    704 
    705         let result = diff_keyed(&df_a, &df_b, &opts, test_source_a(), test_source_b()).unwrap();
    706 
    707         assert!(result.added.is_empty());
    708         assert!(result.removed.is_empty());
    709         assert_eq!(result.modified.len(), 1);
    710 
    711         let m = &result.modified[0];
    712         assert_eq!(m.key, vec!["2024-01-01", "GOOG"]);
    713         assert_eq!(m.changes[0].column, "price");
    714         assert_eq!(m.changes[0].old_value, "140");
    715         assert_eq!(m.changes[0].new_value, "142");
    716     }
    717 
    718     // ---- Tolerance tests ----
    719 
    720     #[test]
    721     fn test_keyed_tolerance_within() {
    722         let df_a = df! {
    723             "id" => &[1],
    724             "price" => &[100.001_f64],
    725         }
    726         .unwrap();
    727         let df_b = df! {
    728             "id" => &[1],
    729             "price" => &[100.002_f64],
    730         }
    731         .unwrap();
    732         let opts = DiffOptions {
    733             key_columns: vec!["id".into()],
    734             tolerance: Some(0.01),
    735         };
    736 
    737         let result = diff_keyed(&df_a, &df_b, &opts, test_source_a(), test_source_b()).unwrap();
    738 
    739         assert!(!result.has_differences());
    740     }
    741 
    742     #[test]
    743     fn test_keyed_tolerance_exceeded() {
    744         let df_a = df! {
    745             "id" => &[1],
    746             "price" => &[100.0_f64],
    747         }
    748         .unwrap();
    749         let df_b = df! {
    750             "id" => &[1],
    751             "price" => &[100.05_f64],
    752         }
    753         .unwrap();
    754         let opts = DiffOptions {
    755             key_columns: vec!["id".into()],
    756             tolerance: Some(0.01),
    757         };
    758 
    759         let result = diff_keyed(&df_a, &df_b, &opts, test_source_a(), test_source_b()).unwrap();
    760 
    761         assert_eq!(result.modified.len(), 1);
    762         assert_eq!(result.modified[0].changes[0].column, "price");
    763     }
    764 
    765     #[test]
    766     fn test_keyed_nan_handling() {
    767         let df_a = df! {
    768             "id" => &[1],
    769             "value" => &[f64::NAN],
    770         }
    771         .unwrap();
    772         let df_b = df! {
    773             "id" => &[1],
    774             "value" => &[f64::NAN],
    775         }
    776         .unwrap();
    777         let opts = DiffOptions {
    778             key_columns: vec!["id".into()],
    779             tolerance: Some(0.01),
    780         };
    781 
    782         let result = diff_keyed(&df_a, &df_b, &opts, test_source_a(), test_source_b()).unwrap();
    783 
    784         assert!(!result.has_differences(), "NaN vs NaN should be treated as equal");
    785     }
    786 
    787     // ---- diff_sheets entry point tests ----
    788 
    789     #[test]
    790     fn test_diff_sheets_positional() {
    791         let df_a = df! {
    792             "name" => &["Alice", "Bob"],
    793         }
    794         .unwrap();
    795         let df_b = df! {
    796             "name" => &["Alice", "Charlie"],
    797         }
    798         .unwrap();
    799         let opts = DiffOptions::default(); // No key columns → positional.
    800 
    801         let result = diff_sheets(&df_a, &df_b, &opts, test_source_a(), test_source_b()).unwrap();
    802 
    803         assert!(result.key_columns.is_empty());
    804         assert_eq!(result.removed.len(), 1);
    805         assert_eq!(result.added.len(), 1);
    806     }
    807 
    808     #[test]
    809     fn test_diff_sheets_keyed() {
    810         let df_a = df! {
    811             "id" => &[1, 2],
    812             "score" => &[100, 200],
    813         }
    814         .unwrap();
    815         let df_b = df! {
    816             "id" => &[1, 2],
    817             "score" => &[100, 250],
    818         }
    819         .unwrap();
    820         let opts = DiffOptions {
    821             key_columns: vec!["id".into()],
    822             tolerance: None,
    823         };
    824 
    825         let result = diff_sheets(&df_a, &df_b, &opts, test_source_a(), test_source_b()).unwrap();
    826 
    827         assert_eq!(result.key_columns, vec!["id"]);
    828         assert_eq!(result.modified.len(), 1);
    829     }
    830 }
	xl-cli-tools CLI tools for viewing and editing Excel files
	Log \| Files \| Refs \| README \| LICENSE