dtfilter.rs (4557B)
1 use std::io::Write; 2 use std::path::PathBuf; 3 use std::process; 4 5 use anyhow::{Result, bail}; 6 use clap::Parser; 7 8 use dtcore::filter::{FilterOptions, parse_filter_expr, parse_sort_spec, filter_pipeline}; 9 use dtcore::format::detect_format; 10 use dtcore::formatter::{format_data_table, format_csv}; 11 use dtcore::reader::{ReadOptions, read_file}; 12 13 // --------------------------------------------------------------------------- 14 // Argument parsing 15 // --------------------------------------------------------------------------- 16 17 #[derive(Parser)] 18 #[command( 19 name = "dtfilter", 20 about = "Filter, sort, and select columns from tabular data files", 21 version 22 )] 23 struct Args { 24 /// Input file 25 file: String, 26 27 /// Override format detection 28 #[arg(long, value_name = "FMT")] 29 format: Option<String>, 30 31 /// Select sheet by name or index (Excel only) 32 #[arg(long, value_name = "NAME|INDEX")] 33 sheet: Option<String>, 34 35 /// Skip first N rows after the header 36 #[arg(long, value_name = "N")] 37 skip: Option<usize>, 38 39 /// Filter expression(s), e.g. "State=CA", "Amount>1000" (repeatable, ANDed) 40 #[arg(long = "filter", value_name = "EXPR", action = clap::ArgAction::Append)] 41 filters: Vec<String>, 42 43 /// Sort spec, e.g. "Amount:desc" or "Name" 44 #[arg(long, value_name = "SPEC")] 45 sort: Option<String>, 46 47 /// Select columns by name (comma-separated) 48 #[arg(long, value_name = "COLS")] 49 columns: Option<String>, 50 51 /// First N rows (before filter) 52 #[arg(long, value_name = "N")] 53 head: Option<usize>, 54 55 /// Last N rows (before filter) 56 #[arg(long, value_name = "N")] 57 tail: Option<usize>, 58 59 /// Max output rows (after filter) 60 #[arg(long, value_name = "N")] 61 limit: Option<usize>, 62 63 /// Output as CSV 64 #[arg(long)] 65 csv: bool, 66 } 67 68 // --------------------------------------------------------------------------- 69 // Validation helpers 70 // --------------------------------------------------------------------------- 71 72 /// Validate args and return an error message for invalid combinations. 73 /// Returns exit-code 2 on any argument error. 74 fn validate_args(args: &Args) -> Result<(), ArgError> { 75 if args.head.is_some() && args.tail.is_some() { 76 return Err(ArgError("--head and --tail are mutually exclusive".to_string())); 77 } 78 Ok(()) 79 } 80 81 struct ArgError(String); 82 83 // --------------------------------------------------------------------------- 84 // Core logic 85 // --------------------------------------------------------------------------- 86 87 fn run(args: Args) -> Result<()> { 88 let path = PathBuf::from(&args.file); 89 90 if !path.exists() { 91 bail!("file not found: {}", path.display()); 92 } 93 94 // Detect format 95 let fmt = detect_format(&path, args.format.as_deref())?; 96 97 // Build read options 98 let read_opts = ReadOptions { 99 sheet: args.sheet.clone(), 100 skip_rows: args.skip, 101 separator: None, 102 }; 103 104 // Read the DataFrame 105 let df = read_file(&path, fmt, &read_opts)?; 106 107 // Parse filter expressions 108 let filters = args 109 .filters 110 .iter() 111 .map(|s| parse_filter_expr(s).map_err(|e| anyhow::anyhow!("{}", e))) 112 .collect::<Result<Vec<_>>>()?; 113 114 // Parse sort spec 115 let sort = args 116 .sort 117 .as_deref() 118 .map(|s| parse_sort_spec(s).map_err(|e| anyhow::anyhow!("{}", e))) 119 .transpose()?; 120 121 // Parse column selection 122 let cols: Option<Vec<String>> = args.columns.as_deref().map(|s| { 123 s.split(',') 124 .map(|c| c.trim().to_string()) 125 .filter(|c| !c.is_empty()) 126 .collect() 127 }); 128 129 // Build filter options 130 let filter_opts = FilterOptions { 131 filters, 132 cols, 133 sort, 134 limit: args.limit, 135 head: args.head, 136 tail: args.tail, 137 }; 138 139 // Run the pipeline 140 let result = filter_pipeline(df, &filter_opts)?; 141 142 // Report row count to stderr 143 let row_count = result.height(); 144 eprintln!("{} row{}", row_count, if row_count == 1 { "" } else { "s" }); 145 146 // Output 147 let output = if args.csv { 148 format_csv(&result) 149 } else { 150 format_data_table(&result) 151 }; 152 153 let stdout = std::io::stdout(); 154 let mut out = stdout.lock(); 155 out.write_all(output.as_bytes())?; 156 157 Ok(()) 158 } 159 160 fn main() { 161 let args = Args::parse(); 162 163 if let Err(e) = validate_args(&args) { 164 eprintln!("dtfilter: {}", e.0); 165 process::exit(2); 166 } 167 168 if let Err(err) = run(args) { 169 eprintln!("dtfilter: {err}"); 170 process::exit(1); 171 } 172 }