cell.rs (13880B)
1 use std::fmt; 2 3 // ── Data types ──────────────────────────────────────────────────────────────── 4 5 /// A parsed Excel cell reference (e.g. "B10"). 6 /// `col` and `row` are 0-based internally. 7 #[derive(Debug, PartialEq)] 8 pub struct CellRef { 9 pub col: u32, 10 pub row: u32, 11 /// Canonical upper-case label, e.g. "B10" 12 pub label: String, 13 } 14 15 /// A typed cell value. 16 #[derive(Debug, PartialEq)] 17 pub enum CellValue { 18 String(String), 19 Integer(i64), 20 Float(f64), 21 Bool(bool), 22 Date { year: i32, month: u32, day: u32 }, 23 Empty, 24 } 25 26 /// A complete cell assignment: which cell gets which value. 27 #[derive(Debug, PartialEq)] 28 pub struct CellAssignment { 29 pub cell: CellRef, 30 pub value: CellValue, 31 } 32 33 // ── Display ─────────────────────────────────────────────────────────────────── 34 35 impl fmt::Display for CellRef { 36 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 37 write!(f, "{}", self.label) 38 } 39 } 40 41 // ── Column helpers ──────────────────────────────────────────────────────────── 42 43 /// Convert a 0-based column index to its Excel column letters (e.g. 0→"A", 25→"Z", 26→"AA"). 44 fn col_to_letters(mut col: u32) -> String { 45 let mut letters = Vec::new(); 46 loop { 47 letters.push((b'A' + (col % 26) as u8) as char); 48 if col < 26 { 49 break; 50 } 51 col = col / 26 - 1; 52 } 53 letters.iter().rev().collect() 54 } 55 56 /// Parse the alphabetic column prefix of an A1-style reference. 57 /// Returns `(0-based column index, remaining string slice)` or an error. 58 fn parse_col_part(s: &str) -> Result<(u32, &str), String> { 59 let upper = s.to_ascii_uppercase(); 60 let alpha_len = upper.chars().take_while(|c| c.is_ascii_alphabetic()).count(); 61 if alpha_len == 0 { 62 return Err(format!("no column letters found in '{}'", s)); 63 } 64 let col_str = &upper[..alpha_len]; 65 let rest = &s[alpha_len..]; 66 67 // Convert letters to 0-based index (Excel "bijective base-26") 68 let mut col: u32 = 0; 69 for ch in col_str.chars() { 70 col = col * 26 + (ch as u32 - 'A' as u32 + 1); 71 } 72 col -= 1; // convert to 0-based 73 74 // Max column is XFD (0-based index 16383) 75 if col > 16383 { 76 return Err(format!("column '{}' exceeds maximum XFD", col_str)); 77 } 78 79 Ok((col, rest)) 80 } 81 82 // ── Public parsing API ──────────────────────────────────────────────────────── 83 84 /// Parse an A1-style cell reference string into a [`CellRef`]. 85 /// 86 /// - Column letters are case-insensitive. 87 /// - Row numbers are 1-based in the input, stored 0-based. 88 /// - Maximum column is XFD (index 16383); maximum row is 1 048 576. 89 pub fn parse_cell_ref(s: &str) -> Result<CellRef, String> { 90 let s = s.trim(); 91 if s.is_empty() { 92 return Err("cell reference is empty".to_string()); 93 } 94 95 let (col, rest) = parse_col_part(s)?; 96 97 if rest.is_empty() { 98 return Err(format!("no row number found in '{}'", s)); 99 } 100 101 let row_1based: u32 = rest 102 .parse() 103 .map_err(|_| format!("invalid row number '{}' in '{}'", rest, s))?; 104 105 if row_1based == 0 { 106 return Err(format!("row number must be >= 1, got 0 in '{}'", s)); 107 } 108 if row_1based > 1_048_576 { 109 return Err(format!( 110 "row {} exceeds maximum 1048576 in '{}'", 111 row_1based, s 112 )); 113 } 114 115 let row = row_1based - 1; // convert to 0-based 116 let label = format!("{}{}", col_to_letters(col), row_1based); 117 118 Ok(CellRef { col, row, label }) 119 } 120 121 /// Infer a [`CellValue`] from a raw string, applying automatic type detection. 122 /// 123 /// Detection order: 124 /// 1. Empty string → [`CellValue::Empty`] 125 /// 2. `"true"` / `"false"` (case-insensitive) → [`CellValue::Bool`] 126 /// 3. Valid `i64` → [`CellValue::Integer`] 127 /// 4. Valid `f64` → [`CellValue::Float`] 128 /// 5. `YYYY-MM-DD` → [`CellValue::Date`] 129 /// 6. Everything else → [`CellValue::String`] 130 pub fn infer_value(s: &str) -> CellValue { 131 if s.is_empty() { 132 return CellValue::Empty; 133 } 134 135 // Bool 136 match s.to_ascii_lowercase().as_str() { 137 "true" => return CellValue::Bool(true), 138 "false" => return CellValue::Bool(false), 139 _ => {} 140 } 141 142 // Integer (must not contain a '.' to avoid "1.0" being parsed as integer) 143 if !s.contains('.') { 144 if let Ok(i) = s.parse::<i64>() { 145 return CellValue::Integer(i); 146 } 147 } 148 149 // Float 150 if let Ok(f) = s.parse::<f64>() { 151 return CellValue::Float(f); 152 } 153 154 // Date: YYYY-MM-DD 155 if let Some(date) = try_parse_date(s) { 156 return date; 157 } 158 159 CellValue::String(s.to_string()) 160 } 161 162 fn try_parse_date(s: &str) -> Option<CellValue> { 163 // Strict format: exactly YYYY-MM-DD 164 let parts: Vec<&str> = s.splitn(3, '-').collect(); 165 if parts.len() != 3 { 166 return None; 167 } 168 // Lengths: 4-2-2 169 if parts[0].len() != 4 || parts[1].len() != 2 || parts[2].len() != 2 { 170 return None; 171 } 172 // All must be ASCII digits 173 if !parts.iter().all(|p| p.chars().all(|c| c.is_ascii_digit())) { 174 return None; 175 } 176 let year: i32 = parts[0].parse().ok()?; 177 let month: u32 = parts[1].parse().ok()?; 178 let day: u32 = parts[2].parse().ok()?; 179 if month < 1 || month > 12 || day < 1 || day > 31 { 180 return None; 181 } 182 Some(CellValue::Date { year, month, day }) 183 } 184 185 /// Force a value to a specific type based on a tag. 186 /// 187 /// Supported tags: `str`, `num`, `bool`, `date`. 188 fn coerce_value(raw: &str, tag: &str) -> Result<CellValue, String> { 189 match tag { 190 "str" => Ok(CellValue::String(raw.to_string())), 191 "num" => { 192 // Try integer first, then float 193 if !raw.contains('.') { 194 if let Ok(i) = raw.parse::<i64>() { 195 return Ok(CellValue::Integer(i)); 196 } 197 } 198 raw.parse::<f64>() 199 .map(CellValue::Float) 200 .map_err(|_| format!("cannot coerce '{}' to num", raw)) 201 } 202 "bool" => match raw.to_ascii_lowercase().as_str() { 203 "true" | "1" | "yes" => Ok(CellValue::Bool(true)), 204 "false" | "0" | "no" => Ok(CellValue::Bool(false)), 205 _ => Err(format!("cannot coerce '{}' to bool", raw)), 206 }, 207 "date" => try_parse_date(raw) 208 .ok_or_else(|| format!("cannot coerce '{}' to date (expected YYYY-MM-DD)", raw)), 209 other => Err(format!("unknown type tag ':{}'", other)), 210 } 211 } 212 213 /// Parse an assignment string such as `"A1=42"` or `"B2:str=07401"`. 214 /// 215 /// Format: `<cell_ref>[:<tag>]=<value>` 216 /// - `<tag>` is optional; if absent, the value type is inferred automatically. 217 /// - The split is on the **first** `=` only, so values may contain `=`. 218 pub fn parse_assignment(s: &str) -> Result<CellAssignment, String> { 219 let eq_pos = s 220 .find('=') 221 .ok_or_else(|| format!("no '=' found in assignment '{}'", s))?; 222 223 let lhs = &s[..eq_pos]; 224 let raw_value = &s[eq_pos + 1..]; 225 226 // Check for optional :tag in LHS 227 let (cell_str, tag_opt) = if let Some(colon_pos) = lhs.rfind(':') { 228 let tag = &lhs[colon_pos + 1..]; 229 let cell = &lhs[..colon_pos]; 230 (cell, Some(tag)) 231 } else { 232 (lhs, None) 233 }; 234 235 let cell = parse_cell_ref(cell_str)?; 236 237 let value = match tag_opt { 238 Some(tag) => coerce_value(raw_value, tag)?, 239 None => infer_value(raw_value), 240 }; 241 242 Ok(CellAssignment { cell, value }) 243 } 244 245 // ── Tests ───────────────────────────────────────────────────────────────────── 246 247 #[cfg(test)] 248 mod tests { 249 use super::*; 250 251 // ── parse_cell_ref ──────────────────────────────────────────────────────── 252 253 #[test] 254 fn test_parse_a1() { 255 let r = parse_cell_ref("A1").unwrap(); 256 assert_eq!(r.col, 0); 257 assert_eq!(r.row, 0); 258 assert_eq!(r.label, "A1"); 259 } 260 261 #[test] 262 fn test_parse_z1_col25() { 263 let r = parse_cell_ref("Z1").unwrap(); 264 assert_eq!(r.col, 25); 265 assert_eq!(r.row, 0); 266 } 267 268 #[test] 269 fn test_parse_aa1_col26() { 270 let r = parse_cell_ref("AA1").unwrap(); 271 assert_eq!(r.col, 26); 272 assert_eq!(r.row, 0); 273 } 274 275 #[test] 276 fn test_parse_case_insensitive() { 277 let lower = parse_cell_ref("b5").unwrap(); 278 let upper = parse_cell_ref("B5").unwrap(); 279 assert_eq!(lower, upper); 280 assert_eq!(lower.label, "B5"); 281 } 282 283 #[test] 284 fn test_parse_b10_row9() { 285 let r = parse_cell_ref("B10").unwrap(); 286 assert_eq!(r.col, 1); 287 assert_eq!(r.row, 9); 288 } 289 290 #[test] 291 fn test_parse_invalid_no_row() { 292 assert!(parse_cell_ref("A").is_err()); 293 } 294 295 #[test] 296 fn test_parse_invalid_no_col() { 297 assert!(parse_cell_ref("123").is_err()); 298 } 299 300 #[test] 301 fn test_parse_invalid_empty() { 302 assert!(parse_cell_ref("").is_err()); 303 } 304 305 #[test] 306 fn test_parse_invalid_row_zero() { 307 assert!(parse_cell_ref("A0").is_err()); 308 } 309 310 #[test] 311 fn test_parse_invalid_row_too_large() { 312 assert!(parse_cell_ref("A1048577").is_err()); 313 } 314 315 #[test] 316 fn test_parse_max_row() { 317 let r = parse_cell_ref("A1048576").unwrap(); 318 assert_eq!(r.row, 1_048_575); 319 } 320 321 // ── infer_value ─────────────────────────────────────────────────────────── 322 323 #[test] 324 fn test_infer_integer() { 325 assert_eq!(infer_value("42"), CellValue::Integer(42)); 326 } 327 328 #[test] 329 fn test_infer_negative_integer() { 330 assert_eq!(infer_value("-7"), CellValue::Integer(-7)); 331 } 332 333 #[test] 334 fn test_infer_float() { 335 assert_eq!(infer_value("3.14"), CellValue::Float(3.14)); 336 } 337 338 #[test] 339 fn test_infer_bool_true() { 340 assert_eq!(infer_value("true"), CellValue::Bool(true)); 341 assert_eq!(infer_value("TRUE"), CellValue::Bool(true)); 342 } 343 344 #[test] 345 fn test_infer_bool_false() { 346 assert_eq!(infer_value("false"), CellValue::Bool(false)); 347 assert_eq!(infer_value("False"), CellValue::Bool(false)); 348 } 349 350 #[test] 351 fn test_infer_date() { 352 assert_eq!( 353 infer_value("2024-03-15"), 354 CellValue::Date { year: 2024, month: 3, day: 15 } 355 ); 356 } 357 358 #[test] 359 fn test_infer_string() { 360 assert_eq!( 361 infer_value("hello world"), 362 CellValue::String("hello world".to_string()) 363 ); 364 } 365 366 #[test] 367 fn test_infer_leading_zero_becomes_integer() { 368 // "07401" has no dot → parsed as i64 if it parses; but leading zeros parse fine as i64 369 // The spec says "leading-zero-becomes-integer" — 07401 → Integer(7401) 370 assert_eq!(infer_value("07401"), CellValue::Integer(7401)); 371 } 372 373 #[test] 374 fn test_infer_empty() { 375 assert_eq!(infer_value(""), CellValue::Empty); 376 } 377 378 // ── parse_assignment ────────────────────────────────────────────────────── 379 380 #[test] 381 fn test_assignment_basic() { 382 let a = parse_assignment("A1=42").unwrap(); 383 assert_eq!(a.cell, parse_cell_ref("A1").unwrap()); 384 assert_eq!(a.value, CellValue::Integer(42)); 385 } 386 387 #[test] 388 fn test_assignment_with_str_tag() { 389 let a = parse_assignment("B2:str=07401").unwrap(); 390 assert_eq!(a.cell, parse_cell_ref("B2").unwrap()); 391 assert_eq!(a.value, CellValue::String("07401".to_string())); 392 } 393 394 #[test] 395 fn test_assignment_no_equals_error() { 396 assert!(parse_assignment("A1").is_err()); 397 } 398 399 #[test] 400 fn test_assignment_empty_value() { 401 let a = parse_assignment("C3=").unwrap(); 402 assert_eq!(a.value, CellValue::Empty); 403 } 404 405 #[test] 406 fn test_assignment_string_with_spaces() { 407 let a = parse_assignment("D4=hello world").unwrap(); 408 assert_eq!(a.value, CellValue::String("hello world".to_string())); 409 } 410 411 #[test] 412 fn test_assignment_value_contains_equals() { 413 // Split on first '=' only — value may contain '=' 414 let a = parse_assignment("E5=a=b").unwrap(); 415 assert_eq!(a.value, CellValue::String("a=b".to_string())); 416 } 417 418 #[test] 419 fn test_assignment_num_tag() { 420 let a = parse_assignment("A1:num=3.14").unwrap(); 421 assert_eq!(a.value, CellValue::Float(3.14)); 422 } 423 424 #[test] 425 fn test_assignment_bool_tag() { 426 let a = parse_assignment("A1:bool=true").unwrap(); 427 assert_eq!(a.value, CellValue::Bool(true)); 428 } 429 430 #[test] 431 fn test_assignment_date_tag() { 432 let a = parse_assignment("A1:date=2025-01-01").unwrap(); 433 assert_eq!(a.value, CellValue::Date { year: 2025, month: 1, day: 1 }); 434 } 435 436 // ── Display ─────────────────────────────────────────────────────────────── 437 438 #[test] 439 fn test_display() { 440 let r = parse_cell_ref("C7").unwrap(); 441 assert_eq!(format!("{}", r), "C7"); 442 } 443 }