dt-cli-tools

CLI tools for viewing, filtering, and comparing tabular data files
Log | Files | Refs | README | LICENSE

dtfilter.rs (4557B)


      1 use std::io::Write;
      2 use std::path::PathBuf;
      3 use std::process;
      4 
      5 use anyhow::{Result, bail};
      6 use clap::Parser;
      7 
      8 use dtcore::filter::{FilterOptions, parse_filter_expr, parse_sort_spec, filter_pipeline};
      9 use dtcore::format::detect_format;
     10 use dtcore::formatter::{format_data_table, format_csv};
     11 use dtcore::reader::{ReadOptions, read_file};
     12 
     13 // ---------------------------------------------------------------------------
     14 // Argument parsing
     15 // ---------------------------------------------------------------------------
     16 
     17 #[derive(Parser)]
     18 #[command(
     19     name = "dtfilter",
     20     about = "Filter, sort, and select columns from tabular data files",
     21     version
     22 )]
     23 struct Args {
     24     /// Input file
     25     file: String,
     26 
     27     /// Override format detection
     28     #[arg(long, value_name = "FMT")]
     29     format: Option<String>,
     30 
     31     /// Select sheet by name or index (Excel only)
     32     #[arg(long, value_name = "NAME|INDEX")]
     33     sheet: Option<String>,
     34 
     35     /// Skip first N rows after the header
     36     #[arg(long, value_name = "N")]
     37     skip: Option<usize>,
     38 
     39     /// Filter expression(s), e.g. "State=CA", "Amount>1000" (repeatable, ANDed)
     40     #[arg(long = "filter", value_name = "EXPR", action = clap::ArgAction::Append)]
     41     filters: Vec<String>,
     42 
     43     /// Sort spec, e.g. "Amount:desc" or "Name"
     44     #[arg(long, value_name = "SPEC")]
     45     sort: Option<String>,
     46 
     47     /// Select columns by name (comma-separated)
     48     #[arg(long, value_name = "COLS")]
     49     columns: Option<String>,
     50 
     51     /// First N rows (before filter)
     52     #[arg(long, value_name = "N")]
     53     head: Option<usize>,
     54 
     55     /// Last N rows (before filter)
     56     #[arg(long, value_name = "N")]
     57     tail: Option<usize>,
     58 
     59     /// Max output rows (after filter)
     60     #[arg(long, value_name = "N")]
     61     limit: Option<usize>,
     62 
     63     /// Output as CSV
     64     #[arg(long)]
     65     csv: bool,
     66 }
     67 
     68 // ---------------------------------------------------------------------------
     69 // Validation helpers
     70 // ---------------------------------------------------------------------------
     71 
     72 /// Validate args and return an error message for invalid combinations.
     73 /// Returns exit-code 2 on any argument error.
     74 fn validate_args(args: &Args) -> Result<(), ArgError> {
     75     if args.head.is_some() && args.tail.is_some() {
     76         return Err(ArgError("--head and --tail are mutually exclusive".to_string()));
     77     }
     78     Ok(())
     79 }
     80 
     81 struct ArgError(String);
     82 
     83 // ---------------------------------------------------------------------------
     84 // Core logic
     85 // ---------------------------------------------------------------------------
     86 
     87 fn run(args: Args) -> Result<()> {
     88     let path = PathBuf::from(&args.file);
     89 
     90     if !path.exists() {
     91         bail!("file not found: {}", path.display());
     92     }
     93 
     94     // Detect format
     95     let fmt = detect_format(&path, args.format.as_deref())?;
     96 
     97     // Build read options
     98     let read_opts = ReadOptions {
     99         sheet: args.sheet.clone(),
    100         skip_rows: args.skip,
    101         separator: None,
    102     };
    103 
    104     // Read the DataFrame
    105     let df = read_file(&path, fmt, &read_opts)?;
    106 
    107     // Parse filter expressions
    108     let filters = args
    109         .filters
    110         .iter()
    111         .map(|s| parse_filter_expr(s).map_err(|e| anyhow::anyhow!("{}", e)))
    112         .collect::<Result<Vec<_>>>()?;
    113 
    114     // Parse sort spec
    115     let sort = args
    116         .sort
    117         .as_deref()
    118         .map(|s| parse_sort_spec(s).map_err(|e| anyhow::anyhow!("{}", e)))
    119         .transpose()?;
    120 
    121     // Parse column selection
    122     let cols: Option<Vec<String>> = args.columns.as_deref().map(|s| {
    123         s.split(',')
    124             .map(|c| c.trim().to_string())
    125             .filter(|c| !c.is_empty())
    126             .collect()
    127     });
    128 
    129     // Build filter options
    130     let filter_opts = FilterOptions {
    131         filters,
    132         cols,
    133         sort,
    134         limit: args.limit,
    135         head: args.head,
    136         tail: args.tail,
    137     };
    138 
    139     // Run the pipeline
    140     let result = filter_pipeline(df, &filter_opts)?;
    141 
    142     // Report row count to stderr
    143     let row_count = result.height();
    144     eprintln!("{} row{}", row_count, if row_count == 1 { "" } else { "s" });
    145 
    146     // Output
    147     let output = if args.csv {
    148         format_csv(&result)
    149     } else {
    150         format_data_table(&result)
    151     };
    152 
    153     let stdout = std::io::stdout();
    154     let mut out = stdout.lock();
    155     out.write_all(output.as_bytes())?;
    156 
    157     Ok(())
    158 }
    159 
    160 fn main() {
    161     let args = Args::parse();
    162 
    163     if let Err(e) = validate_args(&args) {
    164         eprintln!("dtfilter: {}", e.0);
    165         process::exit(2);
    166     }
    167 
    168     if let Err(err) = run(args) {
    169         eprintln!("dtfilter: {err}");
    170         process::exit(1);
    171     }
    172 }