dtcat.rs (10301B)
1 use std::path::PathBuf; 2 use std::process; 3 4 use anyhow::{bail, Result}; 5 use clap::Parser; 6 7 use dtcore::format::{detect_format, parse_format_str, Format}; 8 use dtcore::writer::write_file; 9 use dtcore::formatter::{ 10 format_csv, format_data_table, format_describe, format_empty_sheet, format_head_tail, 11 format_header, format_schema, format_sheet_listing, 12 }; 13 use dtcore::metadata::SheetInfo; 14 use dtcore::reader::{read_file, read_file_info, ReadOptions}; 15 16 /// Default row threshold: show all rows if <= this many, otherwise head+tail 17 const DEFAULT_THRESHOLD: usize = 50; 18 /// Default head/tail row count when splitting 19 const DEFAULT_HEAD_TAIL: usize = 25; 20 21 #[derive(Parser)] 22 #[command( 23 name = "dtcat", 24 about = "View tabular data files in the terminal", 25 version 26 )] 27 struct Args { 28 /// File to view 29 file: String, 30 31 /// Override format detection (csv, tsv, parquet, arrow, json, ndjson, excel) 32 #[arg(long, value_name = "FMT")] 33 format: Option<String>, 34 35 /// Select sheet by name or 0-based index (Excel only) 36 #[arg(long, value_name = "NAME|INDEX")] 37 sheet: Option<String>, 38 39 /// Skip first N rows 40 #[arg(long, value_name = "N")] 41 skip: Option<usize>, 42 43 /// Show column names and types only 44 #[arg(long)] 45 schema: bool, 46 47 /// Show summary statistics 48 #[arg(long)] 49 describe: bool, 50 51 /// Show first N rows 52 #[arg(long, value_name = "N")] 53 head: Option<usize>, 54 55 /// Show last N rows 56 #[arg(long, value_name = "N")] 57 tail: Option<usize>, 58 59 /// Output as CSV instead of markdown table 60 #[arg(long)] 61 csv: bool, 62 63 /// Show all rows (override adaptive row limit) 64 #[arg(long)] 65 all: bool, 66 67 /// Randomly sample N rows 68 #[arg(long, value_name = "N")] 69 sample: Option<usize>, 70 71 /// Show file metadata only 72 #[arg(long)] 73 info: bool, 74 75 /// Convert to format (csv, tsv, parquet, arrow, json, ndjson) 76 #[arg(long, value_name = "FORMAT")] 77 convert: Option<String>, 78 79 /// Output file path (required for binary formats with --convert) 80 #[arg(short = 'o', value_name = "PATH")] 81 output: Option<String>, 82 } 83 84 fn validate_args(args: &Args) -> Result<()> { 85 if args.schema && args.describe { 86 bail!("--schema and --describe are mutually exclusive"); 87 } 88 if args.sample.is_some() { 89 if args.schema { 90 bail!("--sample and --schema are mutually exclusive"); 91 } 92 if args.describe { 93 bail!("--sample and --describe are mutually exclusive"); 94 } 95 if args.info { 96 bail!("--sample and --info are mutually exclusive"); 97 } 98 if args.head.is_some() { 99 bail!("--sample and --head are mutually exclusive"); 100 } 101 if args.tail.is_some() { 102 bail!("--sample and --tail are mutually exclusive"); 103 } 104 if args.all { 105 bail!("--sample and --all are mutually exclusive"); 106 } 107 } 108 if args.convert.is_some() 109 && (args.schema || args.describe || args.info || args.csv 110 || args.head.is_some() || args.tail.is_some() 111 || args.all || args.sample.is_some()) 112 { 113 bail!("--convert is mutually exclusive with display flags"); 114 } 115 Ok(()) 116 } 117 118 /// Build a synthetic SheetInfo for non-Excel formats from a loaded DataFrame. 119 fn sheet_info_from_df(file_name: &str, df: &polars::prelude::DataFrame) -> SheetInfo { 120 SheetInfo { 121 name: file_name.to_string(), 122 // rows includes the header row conceptually; formatter subtracts 1 123 rows: df.height() + 1, 124 cols: df.width(), 125 } 126 } 127 128 fn run(args: Args) -> Result<()> { 129 validate_args(&args)?; 130 131 let path = PathBuf::from(&args.file); 132 if !path.exists() { 133 bail!("file not found: {}", path.display()); 134 } 135 136 let fmt = detect_format(&path, args.format.as_deref())?; 137 138 let file_name = path 139 .file_name() 140 .map(|s| s.to_string_lossy().to_string()) 141 .unwrap_or_else(|| args.file.clone()); 142 143 // --info: show metadata and exit 144 if args.info { 145 let info = read_file_info(&path, fmt)?; 146 print!("{}", format_header(&file_name, &info)); 147 148 // For Excel, also list sheet names and dimensions 149 if fmt == Format::Excel && !info.sheets.is_empty() { 150 println!(); 151 for sheet in &info.sheets { 152 let data_rows = if sheet.rows == 0 { 0 } else { sheet.rows - 1 }; 153 println!(" {} ({} rows x {} cols)", sheet.name, data_rows, sheet.cols); 154 } 155 } 156 return Ok(()); 157 } 158 159 // Excel with multiple sheets and no --sheet: show sheet listing 160 if fmt == Format::Excel && args.sheet.is_none() { 161 let info = read_file_info(&path, fmt)?; 162 if info.sheets.len() > 1 { 163 // Load a small sample of each sheet to display schemas 164 let mut schemas: Vec<(SheetInfo, polars::prelude::DataFrame)> = Vec::new(); 165 for sheet in &info.sheets { 166 let opts = ReadOptions { 167 sheet: Some(sheet.name.clone()), 168 skip_rows: args.skip, 169 separator: None, 170 }; 171 match read_file(&path, fmt, &opts) { 172 Ok(df) => schemas.push((sheet.clone(), df)), 173 Err(_) => { 174 // Empty or unreadable sheet 175 schemas.push(( 176 SheetInfo { 177 name: sheet.name.clone(), 178 rows: 0, 179 cols: 0, 180 }, 181 polars::prelude::DataFrame::default(), 182 )); 183 } 184 } 185 } 186 let schema_refs: Vec<(&SheetInfo, polars::prelude::DataFrame)> = schemas 187 .iter() 188 .map(|(s, df)| (s, df.clone())) 189 .collect(); 190 print!( 191 "{}", 192 format_sheet_listing(&file_name, &info, &schema_refs) 193 ); 194 return Ok(()); 195 } 196 } 197 198 // Build read options 199 let opts = ReadOptions { 200 sheet: args.sheet.clone(), 201 skip_rows: args.skip, 202 separator: None, 203 }; 204 205 let df = read_file(&path, fmt, &opts)?; 206 207 // Determine sheet info for display 208 let sheet = if fmt == Format::Excel { 209 // Try to get the sheet name we actually read 210 let info = read_file_info(&path, fmt)?; 211 if let Some(sheet_arg) = &args.sheet { 212 // Find the matching sheet in info 213 let matched = info.sheets.iter().find(|s| { 214 &s.name == sheet_arg 215 || sheet_arg 216 .parse::<usize>() 217 .map(|idx| { 218 info.sheets 219 .iter() 220 .position(|x| x.name == s.name) 221 .map(|i| i == idx) 222 .unwrap_or(false) 223 }) 224 .unwrap_or(false) 225 }); 226 if let Some(s) = matched { 227 s.clone() 228 } else { 229 // Fallback: build from df 230 SheetInfo { 231 name: sheet_arg.clone(), 232 rows: df.height() + 1, 233 cols: df.width(), 234 } 235 } 236 } else if let Some(first) = info.sheets.first() { 237 first.clone() 238 } else { 239 sheet_info_from_df(&file_name, &df) 240 } 241 } else { 242 sheet_info_from_df(&file_name, &df) 243 }; 244 245 // Apply sampling if requested (before any display mode) 246 let df = if let Some(n) = args.sample { 247 if n >= df.height() { 248 df 249 } else { 250 df.sample_n_literal(n, false, false, None)? 251 } 252 } else { 253 df 254 }; 255 256 // --convert: write to a different format and exit 257 if let Some(ref convert_str) = args.convert { 258 let target_fmt = parse_format_str(convert_str)?; 259 let out_path = args.output.as_deref().map(std::path::Path::new); 260 let mut df = df; 261 write_file(&mut df, out_path, target_fmt)?; 262 return Ok(()); 263 } 264 265 // Handle empty DataFrame 266 if df.is_empty() { 267 print!("{}", format_empty_sheet(&sheet)); 268 return Ok(()); 269 } 270 271 // --schema 272 if args.schema { 273 print!("{}", format_schema(&sheet, &df)); 274 return Ok(()); 275 } 276 277 // --describe 278 if args.describe { 279 print!("{}", format_describe(&df)); 280 return Ok(()); 281 } 282 283 // --csv output mode 284 if args.csv { 285 print!("{}", format_csv(&df)); 286 return Ok(()); 287 } 288 289 // Determine what to display 290 let output = match (args.head, args.tail) { 291 (Some(h), Some(t)) => { 292 // Both specified: show head + tail with omission line 293 format_head_tail(&df, h, t) 294 } 295 (Some(h), None) => { 296 // Only --head: slice the DataFrame and show all 297 let sliced = df.head(Some(h)); 298 format_data_table(&sliced) 299 } 300 (None, Some(t)) => { 301 // Only --tail: slice and show all 302 let sliced = df.tail(Some(t)); 303 format_data_table(&sliced) 304 } 305 (None, None) => { 306 // Default: show all if <= threshold or --all, otherwise head+tail 307 if args.all || df.height() <= DEFAULT_THRESHOLD { 308 format_data_table(&df) 309 } else { 310 format_head_tail(&df, DEFAULT_HEAD_TAIL, DEFAULT_HEAD_TAIL) 311 } 312 } 313 }; 314 315 print!("{}", output); 316 Ok(()) 317 } 318 319 fn main() { 320 let args = Args::parse(); 321 match run(args) { 322 Ok(()) => {} 323 Err(err) => { 324 // Check if this is an arg validation error (exit 2) vs runtime error (exit 1) 325 let msg = err.to_string(); 326 if msg.contains("mutually exclusive") 327 || msg.contains("invalid") 328 || msg.contains("unknown format") 329 { 330 eprintln!("dtcat: {err}"); 331 process::exit(2); 332 } else { 333 eprintln!("dtcat: {err}"); 334 process::exit(1); 335 } 336 } 337 } 338 }