dtdiff.rs (11954B)
1 use std::io::IsTerminal; 2 use std::path::PathBuf; 3 use std::process; 4 5 use anyhow::{Result, bail}; 6 use clap::Parser; 7 use serde_json::{Map, Value, json}; 8 9 use dtcore::diff::{DiffOptions, DiffResult, SheetSource}; 10 use dtcore::format::{detect_format, Format}; 11 use dtcore::reader::{ReadOptions, read_file}; 12 13 #[derive(Parser)] 14 #[command( 15 name = "dtdiff", 16 about = "Compare two tabular data files and show differences", 17 version 18 )] 19 struct Args { 20 /// First file to compare 21 file_a: String, 22 23 /// Second file to compare 24 file_b: String, 25 26 /// Override format detection (applies to both files) 27 #[arg(long, value_name = "FMT")] 28 format: Option<String>, 29 30 /// Select sheet by name or index (Excel only) 31 #[arg(long, value_name = "NAME|INDEX")] 32 sheet: Option<String>, 33 34 /// Key column(s) for matched comparison (comma-separated) 35 #[arg(long, value_name = "COL")] 36 key: Option<String>, 37 38 /// Float comparison tolerance (default: 1e-10) 39 #[arg(long)] 40 tolerance: Option<f64>, 41 42 /// Output as JSON 43 #[arg(long)] 44 json: bool, 45 46 /// Output as CSV 47 #[arg(long)] 48 csv: bool, 49 50 /// Disable colored output 51 #[arg(long)] 52 no_color: bool, 53 } 54 55 // --------------------------------------------------------------------------- 56 // Output formatters (ported from xldiff.rs) 57 // --------------------------------------------------------------------------- 58 59 /// Format a row's values inline: `Name: "Alice" Score: "90"` 60 fn format_row_inline(headers: &[String], values: &[String]) -> String { 61 headers 62 .iter() 63 .zip(values.iter()) 64 .map(|(h, v)| format!("{}: \"{}\"", h, v)) 65 .collect::<Vec<_>>() 66 .join(" ") 67 } 68 69 /// Format diff result as colored (or plain) text output. 70 fn format_text(result: &DiffResult, color: bool) -> String { 71 if !result.has_differences() { 72 return "No differences found.\n".to_string(); 73 } 74 75 let (red, green, yellow, reset) = if color { 76 ("\x1b[31m", "\x1b[32m", "\x1b[33m", "\x1b[0m") 77 } else { 78 ("", "", "", "") 79 }; 80 81 let mut out = String::new(); 82 83 // Header 84 out.push_str(&format!( 85 "--- {} ({})\n+++ {} ({})\n\n", 86 result.source_a.sheet_name, 87 result.source_a.file_name, 88 result.source_b.sheet_name, 89 result.source_b.file_name, 90 )); 91 92 // Summary 93 out.push_str(&format!( 94 "Added: {} | Removed: {} | Modified: {}\n\n", 95 result.added.len(), 96 result.removed.len(), 97 result.modified.len(), 98 )); 99 100 // Removed rows 101 for row in &result.removed { 102 out.push_str(&format!( 103 "{}- {}{}", 104 red, 105 format_row_inline(&result.headers, &row.values), 106 reset, 107 )); 108 out.push('\n'); 109 } 110 111 // Added rows 112 for row in &result.added { 113 out.push_str(&format!( 114 "{}+ {}{}", 115 green, 116 format_row_inline(&result.headers, &row.values), 117 reset, 118 )); 119 out.push('\n'); 120 } 121 122 // Modified rows 123 for m in &result.modified { 124 let key_display: Vec<String> = result 125 .key_columns 126 .iter() 127 .zip(m.key.iter()) 128 .map(|(col, val)| format!("{}: \"{}\"", col, val)) 129 .collect(); 130 out.push_str(&format!( 131 "{}~ {}{}", 132 yellow, 133 key_display.join(" "), 134 reset, 135 )); 136 out.push('\n'); 137 for change in &m.changes { 138 out.push_str(&format!( 139 " {}: \"{}\" \u{2192} \"{}\"\n", 140 change.column, change.old_value, change.new_value, 141 )); 142 } 143 } 144 145 out 146 } 147 148 /// Format diff result as JSON. 149 fn format_json(result: &DiffResult) -> String { 150 let added: Vec<Value> = result 151 .added 152 .iter() 153 .map(|row| { 154 let mut map = Map::new(); 155 for (h, v) in result.headers.iter().zip(row.values.iter()) { 156 map.insert(h.clone(), Value::String(v.clone())); 157 } 158 Value::Object(map) 159 }) 160 .collect(); 161 162 let removed: Vec<Value> = result 163 .removed 164 .iter() 165 .map(|row| { 166 let mut map = Map::new(); 167 for (h, v) in result.headers.iter().zip(row.values.iter()) { 168 map.insert(h.clone(), Value::String(v.clone())); 169 } 170 Value::Object(map) 171 }) 172 .collect(); 173 174 let modified: Vec<Value> = result 175 .modified 176 .iter() 177 .map(|m| { 178 let mut key_map = Map::new(); 179 for (col, val) in result.key_columns.iter().zip(m.key.iter()) { 180 key_map.insert(col.clone(), Value::String(val.clone())); 181 } 182 let changes: Vec<Value> = m 183 .changes 184 .iter() 185 .map(|c| { 186 json!({ 187 "column": c.column, 188 "old": c.old_value, 189 "new": c.new_value, 190 }) 191 }) 192 .collect(); 193 json!({ 194 "key": Value::Object(key_map), 195 "changes": changes, 196 }) 197 }) 198 .collect(); 199 200 let output = json!({ 201 "added": added, 202 "removed": removed, 203 "modified": modified, 204 }); 205 206 serde_json::to_string_pretty(&output).unwrap() + "\n" 207 } 208 209 /// Quote a value per RFC 4180: if it contains comma, quote, or newline, wrap 210 /// in double quotes and escape any internal quotes by doubling them. 211 fn csv_quote(value: &str) -> String { 212 if value.contains(',') || value.contains('"') || value.contains('\n') { 213 format!("\"{}\"", value.replace('"', "\"\"")) 214 } else { 215 value.to_string() 216 } 217 } 218 219 /// Build a CSV row from a slice of values. 220 fn csv_row(values: &[String]) -> String { 221 values.iter().map(|v| csv_quote(v)).collect::<Vec<_>>().join(",") 222 } 223 224 /// Format diff result as CSV. 225 /// 226 /// Header: _status, col1, col2, ..., _old_col1, _old_col2, ... 227 /// Added rows: "added" + values + empty _old_ columns 228 /// Removed rows: "removed" + values + empty _old_ columns 229 /// Modified rows: "modified" + new values + old values in _old_ columns 230 fn format_csv_output(result: &DiffResult) -> String { 231 let mut out = String::new(); 232 233 // Build header 234 let mut header_parts: Vec<String> = vec!["_status".to_string()]; 235 for h in &result.headers { 236 header_parts.push(h.clone()); 237 } 238 for h in &result.headers { 239 header_parts.push(format!("_old_{}", h)); 240 } 241 out.push_str(&csv_row(&header_parts)); 242 out.push('\n'); 243 244 let empty_cols: Vec<String> = result.headers.iter().map(|_| String::new()).collect(); 245 246 // Removed rows 247 for row in &result.removed { 248 let mut parts: Vec<String> = vec!["removed".to_string()]; 249 parts.extend(row.values.iter().cloned()); 250 while parts.len() < 1 + result.headers.len() { 251 parts.push(String::new()); 252 } 253 parts.extend(empty_cols.iter().cloned()); 254 out.push_str(&csv_row(&parts)); 255 out.push('\n'); 256 } 257 258 // Added rows 259 for row in &result.added { 260 let mut parts: Vec<String> = vec!["added".to_string()]; 261 parts.extend(row.values.iter().cloned()); 262 while parts.len() < 1 + result.headers.len() { 263 parts.push(String::new()); 264 } 265 parts.extend(empty_cols.iter().cloned()); 266 out.push_str(&csv_row(&parts)); 267 out.push('\n'); 268 } 269 270 // Modified rows 271 for m in &result.modified { 272 let mut main_cols: Vec<String> = Vec::new(); 273 let mut old_cols: Vec<String> = Vec::new(); 274 275 for h in &result.headers { 276 if let Some(key_idx) = result.key_columns.iter().position(|k| k == h) { 277 main_cols.push(m.key.get(key_idx).cloned().unwrap_or_default()); 278 old_cols.push(String::new()); 279 } else if let Some(change) = m.changes.iter().find(|c| c.column == *h) { 280 main_cols.push(change.new_value.clone()); 281 old_cols.push(change.old_value.clone()); 282 } else { 283 // Unchanged non-key column — leave empty in both 284 main_cols.push(String::new()); 285 old_cols.push(String::new()); 286 } 287 } 288 289 let mut parts: Vec<String> = vec!["modified".to_string()]; 290 parts.extend(main_cols); 291 parts.extend(old_cols); 292 out.push_str(&csv_row(&parts)); 293 out.push('\n'); 294 } 295 296 out 297 } 298 299 // --------------------------------------------------------------------------- 300 // run / main 301 // --------------------------------------------------------------------------- 302 303 fn run(args: Args) -> Result<()> { 304 let path_a = PathBuf::from(&args.file_a); 305 let path_b = PathBuf::from(&args.file_b); 306 307 // Validate files exist 308 if !path_a.exists() { 309 bail!("file not found: {}", path_a.display()); 310 } 311 if !path_b.exists() { 312 bail!("file not found: {}", path_b.display()); 313 } 314 315 // Detect formats 316 let fmt_a = detect_format(&path_a, args.format.as_deref())?; 317 let fmt_b = detect_format(&path_b, args.format.as_deref())?; 318 319 // Enforce same-format constraint 320 if !fmt_a.same_family(fmt_b) { 321 bail!( 322 "files have incompatible formats: {:?} vs {:?}. Both files must use the same format family.", 323 fmt_a, 324 fmt_b 325 ); 326 } 327 328 // Build read options 329 let opts_a = ReadOptions { 330 sheet: args.sheet.clone(), 331 skip_rows: None, 332 separator: None, 333 }; 334 let opts_b = ReadOptions { 335 sheet: args.sheet.clone(), 336 skip_rows: None, 337 separator: None, 338 }; 339 340 // Read DataFrames 341 let df_a = read_file(&path_a, fmt_a, &opts_a)?; 342 let df_b = read_file(&path_b, fmt_b, &opts_b)?; 343 344 // Resolve key columns 345 let key_columns: Vec<String> = if let Some(ref key_str) = args.key { 346 key_str.split(',').map(|s| s.trim().to_string()).collect() 347 } else { 348 vec![] 349 }; 350 351 // Build source labels 352 let file_name_a = path_a 353 .file_name() 354 .map(|s| s.to_string_lossy().to_string()) 355 .unwrap_or_else(|| args.file_a.clone()); 356 let file_name_b = path_b 357 .file_name() 358 .map(|s| s.to_string_lossy().to_string()) 359 .unwrap_or_else(|| args.file_b.clone()); 360 361 // Use file name as "sheet name" for non-Excel formats; for Excel use the 362 // sheet name from opts (or a placeholder if none was specified). 363 let sheet_name_a = if fmt_a == Format::Excel { 364 args.sheet.clone().unwrap_or_else(|| file_name_a.clone()) 365 } else { 366 file_name_a.clone() 367 }; 368 let sheet_name_b = if fmt_b == Format::Excel { 369 args.sheet.clone().unwrap_or_else(|| file_name_b.clone()) 370 } else { 371 file_name_b.clone() 372 }; 373 374 let source_a = SheetSource { 375 file_name: file_name_a, 376 sheet_name: sheet_name_a, 377 }; 378 let source_b = SheetSource { 379 file_name: file_name_b, 380 sheet_name: sheet_name_b, 381 }; 382 383 let diff_opts = DiffOptions { 384 key_columns, 385 tolerance: args.tolerance, 386 }; 387 388 // Run diff 389 let result = dtcore::diff::diff_sheets(&df_a, &df_b, &diff_opts, source_a, source_b)?; 390 391 // TTY detection for color 392 let use_color = !args.no_color && std::io::stdout().is_terminal(); 393 394 // Format output: --json and --csv are mutually exclusive flags; default is text 395 let output = if args.json { 396 format_json(&result) 397 } else if args.csv { 398 format_csv_output(&result) 399 } else { 400 format_text(&result, use_color) 401 }; 402 403 print!("{}", output); 404 405 // Exit 1 if differences found (diff convention), 0 if identical 406 if result.has_differences() { 407 process::exit(1); 408 } 409 410 Ok(()) 411 } 412 413 fn main() { 414 let args = Args::parse(); 415 if let Err(err) = run(args) { 416 eprintln!("dtdiff: {err}"); 417 process::exit(2); 418 } 419 }