dt-cli-tools

CLI tools for viewing, filtering, and comparing tabular data files
Log | Files | Refs | README | LICENSE

dtcat.rs (10301B)


      1 use std::path::PathBuf;
      2 use std::process;
      3 
      4 use anyhow::{bail, Result};
      5 use clap::Parser;
      6 
      7 use dtcore::format::{detect_format, parse_format_str, Format};
      8 use dtcore::writer::write_file;
      9 use dtcore::formatter::{
     10     format_csv, format_data_table, format_describe, format_empty_sheet, format_head_tail,
     11     format_header, format_schema, format_sheet_listing,
     12 };
     13 use dtcore::metadata::SheetInfo;
     14 use dtcore::reader::{read_file, read_file_info, ReadOptions};
     15 
     16 /// Default row threshold: show all rows if <= this many, otherwise head+tail
     17 const DEFAULT_THRESHOLD: usize = 50;
     18 /// Default head/tail row count when splitting
     19 const DEFAULT_HEAD_TAIL: usize = 25;
     20 
     21 #[derive(Parser)]
     22 #[command(
     23     name = "dtcat",
     24     about = "View tabular data files in the terminal",
     25     version
     26 )]
     27 struct Args {
     28     /// File to view
     29     file: String,
     30 
     31     /// Override format detection (csv, tsv, parquet, arrow, json, ndjson, excel)
     32     #[arg(long, value_name = "FMT")]
     33     format: Option<String>,
     34 
     35     /// Select sheet by name or 0-based index (Excel only)
     36     #[arg(long, value_name = "NAME|INDEX")]
     37     sheet: Option<String>,
     38 
     39     /// Skip first N rows
     40     #[arg(long, value_name = "N")]
     41     skip: Option<usize>,
     42 
     43     /// Show column names and types only
     44     #[arg(long)]
     45     schema: bool,
     46 
     47     /// Show summary statistics
     48     #[arg(long)]
     49     describe: bool,
     50 
     51     /// Show first N rows
     52     #[arg(long, value_name = "N")]
     53     head: Option<usize>,
     54 
     55     /// Show last N rows
     56     #[arg(long, value_name = "N")]
     57     tail: Option<usize>,
     58 
     59     /// Output as CSV instead of markdown table
     60     #[arg(long)]
     61     csv: bool,
     62 
     63     /// Show all rows (override adaptive row limit)
     64     #[arg(long)]
     65     all: bool,
     66 
     67     /// Randomly sample N rows
     68     #[arg(long, value_name = "N")]
     69     sample: Option<usize>,
     70 
     71     /// Show file metadata only
     72     #[arg(long)]
     73     info: bool,
     74 
     75     /// Convert to format (csv, tsv, parquet, arrow, json, ndjson)
     76     #[arg(long, value_name = "FORMAT")]
     77     convert: Option<String>,
     78 
     79     /// Output file path (required for binary formats with --convert)
     80     #[arg(short = 'o', value_name = "PATH")]
     81     output: Option<String>,
     82 }
     83 
     84 fn validate_args(args: &Args) -> Result<()> {
     85     if args.schema && args.describe {
     86         bail!("--schema and --describe are mutually exclusive");
     87     }
     88     if args.sample.is_some() {
     89         if args.schema {
     90             bail!("--sample and --schema are mutually exclusive");
     91         }
     92         if args.describe {
     93             bail!("--sample and --describe are mutually exclusive");
     94         }
     95         if args.info {
     96             bail!("--sample and --info are mutually exclusive");
     97         }
     98         if args.head.is_some() {
     99             bail!("--sample and --head are mutually exclusive");
    100         }
    101         if args.tail.is_some() {
    102             bail!("--sample and --tail are mutually exclusive");
    103         }
    104         if args.all {
    105             bail!("--sample and --all are mutually exclusive");
    106         }
    107     }
    108     if args.convert.is_some()
    109         && (args.schema || args.describe || args.info || args.csv
    110             || args.head.is_some() || args.tail.is_some()
    111             || args.all || args.sample.is_some())
    112     {
    113         bail!("--convert is mutually exclusive with display flags");
    114     }
    115     Ok(())
    116 }
    117 
    118 /// Build a synthetic SheetInfo for non-Excel formats from a loaded DataFrame.
    119 fn sheet_info_from_df(file_name: &str, df: &polars::prelude::DataFrame) -> SheetInfo {
    120     SheetInfo {
    121         name: file_name.to_string(),
    122         // rows includes the header row conceptually; formatter subtracts 1
    123         rows: df.height() + 1,
    124         cols: df.width(),
    125     }
    126 }
    127 
    128 fn run(args: Args) -> Result<()> {
    129     validate_args(&args)?;
    130 
    131     let path = PathBuf::from(&args.file);
    132     if !path.exists() {
    133         bail!("file not found: {}", path.display());
    134     }
    135 
    136     let fmt = detect_format(&path, args.format.as_deref())?;
    137 
    138     let file_name = path
    139         .file_name()
    140         .map(|s| s.to_string_lossy().to_string())
    141         .unwrap_or_else(|| args.file.clone());
    142 
    143     // --info: show metadata and exit
    144     if args.info {
    145         let info = read_file_info(&path, fmt)?;
    146         print!("{}", format_header(&file_name, &info));
    147 
    148         // For Excel, also list sheet names and dimensions
    149         if fmt == Format::Excel && !info.sheets.is_empty() {
    150             println!();
    151             for sheet in &info.sheets {
    152                 let data_rows = if sheet.rows == 0 { 0 } else { sheet.rows - 1 };
    153                 println!("  {} ({} rows x {} cols)", sheet.name, data_rows, sheet.cols);
    154             }
    155         }
    156         return Ok(());
    157     }
    158 
    159     // Excel with multiple sheets and no --sheet: show sheet listing
    160     if fmt == Format::Excel && args.sheet.is_none() {
    161         let info = read_file_info(&path, fmt)?;
    162         if info.sheets.len() > 1 {
    163             // Load a small sample of each sheet to display schemas
    164             let mut schemas: Vec<(SheetInfo, polars::prelude::DataFrame)> = Vec::new();
    165             for sheet in &info.sheets {
    166                 let opts = ReadOptions {
    167                     sheet: Some(sheet.name.clone()),
    168                     skip_rows: args.skip,
    169                     separator: None,
    170                 };
    171                 match read_file(&path, fmt, &opts) {
    172                     Ok(df) => schemas.push((sheet.clone(), df)),
    173                     Err(_) => {
    174                         // Empty or unreadable sheet
    175                         schemas.push((
    176                             SheetInfo {
    177                                 name: sheet.name.clone(),
    178                                 rows: 0,
    179                                 cols: 0,
    180                             },
    181                             polars::prelude::DataFrame::default(),
    182                         ));
    183                     }
    184                 }
    185             }
    186             let schema_refs: Vec<(&SheetInfo, polars::prelude::DataFrame)> = schemas
    187                 .iter()
    188                 .map(|(s, df)| (s, df.clone()))
    189                 .collect();
    190             print!(
    191                 "{}",
    192                 format_sheet_listing(&file_name, &info, &schema_refs)
    193             );
    194             return Ok(());
    195         }
    196     }
    197 
    198     // Build read options
    199     let opts = ReadOptions {
    200         sheet: args.sheet.clone(),
    201         skip_rows: args.skip,
    202         separator: None,
    203     };
    204 
    205     let df = read_file(&path, fmt, &opts)?;
    206 
    207     // Determine sheet info for display
    208     let sheet = if fmt == Format::Excel {
    209         // Try to get the sheet name we actually read
    210         let info = read_file_info(&path, fmt)?;
    211         if let Some(sheet_arg) = &args.sheet {
    212             // Find the matching sheet in info
    213             let matched = info.sheets.iter().find(|s| {
    214                 &s.name == sheet_arg
    215                     || sheet_arg
    216                         .parse::<usize>()
    217                         .map(|idx| {
    218                             info.sheets
    219                                 .iter()
    220                                 .position(|x| x.name == s.name)
    221                                 .map(|i| i == idx)
    222                                 .unwrap_or(false)
    223                         })
    224                         .unwrap_or(false)
    225             });
    226             if let Some(s) = matched {
    227                 s.clone()
    228             } else {
    229                 // Fallback: build from df
    230                 SheetInfo {
    231                     name: sheet_arg.clone(),
    232                     rows: df.height() + 1,
    233                     cols: df.width(),
    234                 }
    235             }
    236         } else if let Some(first) = info.sheets.first() {
    237             first.clone()
    238         } else {
    239             sheet_info_from_df(&file_name, &df)
    240         }
    241     } else {
    242         sheet_info_from_df(&file_name, &df)
    243     };
    244 
    245     // Apply sampling if requested (before any display mode)
    246     let df = if let Some(n) = args.sample {
    247         if n >= df.height() {
    248             df
    249         } else {
    250             df.sample_n_literal(n, false, false, None)?
    251         }
    252     } else {
    253         df
    254     };
    255 
    256     // --convert: write to a different format and exit
    257     if let Some(ref convert_str) = args.convert {
    258         let target_fmt = parse_format_str(convert_str)?;
    259         let out_path = args.output.as_deref().map(std::path::Path::new);
    260         let mut df = df;
    261         write_file(&mut df, out_path, target_fmt)?;
    262         return Ok(());
    263     }
    264 
    265     // Handle empty DataFrame
    266     if df.is_empty() {
    267         print!("{}", format_empty_sheet(&sheet));
    268         return Ok(());
    269     }
    270 
    271     // --schema
    272     if args.schema {
    273         print!("{}", format_schema(&sheet, &df));
    274         return Ok(());
    275     }
    276 
    277     // --describe
    278     if args.describe {
    279         print!("{}", format_describe(&df));
    280         return Ok(());
    281     }
    282 
    283     // --csv output mode
    284     if args.csv {
    285         print!("{}", format_csv(&df));
    286         return Ok(());
    287     }
    288 
    289     // Determine what to display
    290     let output = match (args.head, args.tail) {
    291         (Some(h), Some(t)) => {
    292             // Both specified: show head + tail with omission line
    293             format_head_tail(&df, h, t)
    294         }
    295         (Some(h), None) => {
    296             // Only --head: slice the DataFrame and show all
    297             let sliced = df.head(Some(h));
    298             format_data_table(&sliced)
    299         }
    300         (None, Some(t)) => {
    301             // Only --tail: slice and show all
    302             let sliced = df.tail(Some(t));
    303             format_data_table(&sliced)
    304         }
    305         (None, None) => {
    306             // Default: show all if <= threshold or --all, otherwise head+tail
    307             if args.all || df.height() <= DEFAULT_THRESHOLD {
    308                 format_data_table(&df)
    309             } else {
    310                 format_head_tail(&df, DEFAULT_HEAD_TAIL, DEFAULT_HEAD_TAIL)
    311             }
    312         }
    313     };
    314 
    315     print!("{}", output);
    316     Ok(())
    317 }
    318 
    319 fn main() {
    320     let args = Args::parse();
    321     match run(args) {
    322         Ok(()) => {}
    323         Err(err) => {
    324             // Check if this is an arg validation error (exit 2) vs runtime error (exit 1)
    325             let msg = err.to_string();
    326             if msg.contains("mutually exclusive")
    327                 || msg.contains("invalid")
    328                 || msg.contains("unknown format")
    329             {
    330                 eprintln!("dtcat: {err}");
    331                 process::exit(2);
    332             } else {
    333                 eprintln!("dtcat: {err}");
    334                 process::exit(1);
    335             }
    336         }
    337     }
    338 }