xl-cli-tools

CLI tools for viewing and editing Excel files
Log | Files | Refs | README | LICENSE

filter.rs (23352B)


      1 use anyhow::Result;
      2 use polars::prelude::*;
      3 
      4 #[derive(Debug, Clone, PartialEq)]
      5 pub enum FilterOp {
      6     Eq,
      7     NotEq,
      8     Gt,
      9     Lt,
     10     Gte,
     11     Lte,
     12     Contains,
     13     NotContains,
     14 }
     15 
     16 #[derive(Debug, Clone)]
     17 pub struct FilterExpr {
     18     pub column: String,
     19     pub op: FilterOp,
     20     pub value: String,
     21 }
     22 
     23 #[derive(Debug, Clone)]
     24 pub struct SortSpec {
     25     pub column: String,
     26     pub descending: bool,
     27 }
     28 
     29 /// Parse a filter expression like "State=CA", "Amount>1000", "Name~john".
     30 /// Scans left-to-right for the first operator character (= ! > < ~),
     31 /// then determines the full operator.
     32 pub fn parse_filter_expr(s: &str) -> Result<FilterExpr, String> {
     33     let op_chars = ['=', '!', '>', '<', '~'];
     34     let pos = s
     35         .find(|c: char| op_chars.contains(&c))
     36         .ok_or_else(|| {
     37             format!(
     38                 "no operator found in '{}'. Use =, !=, >, <, >=, <=, ~ or !~",
     39                 s
     40             )
     41         })?;
     42     if pos == 0 {
     43         return Err(format!("missing column name in '{}'", s));
     44     }
     45     let column = s[..pos].to_string();
     46     let rest = &s[pos..];
     47     let (op, op_len) = if rest.starts_with(">=") {
     48         (FilterOp::Gte, 2)
     49     } else if rest.starts_with("<=") {
     50         (FilterOp::Lte, 2)
     51     } else if rest.starts_with("!=") {
     52         (FilterOp::NotEq, 2)
     53     } else if rest.starts_with("!~") {
     54         (FilterOp::NotContains, 2)
     55     } else if rest.starts_with('>') {
     56         (FilterOp::Gt, 1)
     57     } else if rest.starts_with('<') {
     58         (FilterOp::Lt, 1)
     59     } else if rest.starts_with('=') {
     60         (FilterOp::Eq, 1)
     61     } else if rest.starts_with('~') {
     62         (FilterOp::Contains, 1)
     63     } else {
     64         return Err(format!("invalid operator in '{}'", s));
     65     };
     66     let value = rest[op_len..].to_string();
     67     Ok(FilterExpr { column, op, value })
     68 }
     69 
     70 /// Parse a sort spec like "Amount:desc" or "Name" (default asc).
     71 /// Splits on the last colon so column names containing colons are supported.
     72 pub fn parse_sort_spec(s: &str) -> Result<SortSpec, String> {
     73     if let Some(colon_pos) = s.rfind(':') {
     74         let col = &s[..colon_pos];
     75         let dir = &s[colon_pos + 1..];
     76         match dir.to_lowercase().as_str() {
     77             "asc" => Ok(SortSpec {
     78                 column: col.to_string(),
     79                 descending: false,
     80             }),
     81             "desc" => Ok(SortSpec {
     82                 column: col.to_string(),
     83                 descending: true,
     84             }),
     85             _ => Err(format!(
     86                 "invalid sort direction '{}'. Use 'asc' or 'desc'",
     87                 dir
     88             )),
     89         }
     90     } else {
     91         Ok(SortSpec {
     92             column: s.to_string(),
     93             descending: false,
     94         })
     95     }
     96 }
     97 
     98 /// Convert a column letter like "A", "B", "AA" to a 0-based index.
     99 /// Returns None if the string isn't purely alphabetic or is empty.
    100 fn col_letter_to_index(s: &str) -> Option<usize> {
    101     if s.is_empty() || !s.chars().all(|c| c.is_ascii_alphabetic()) {
    102         return None;
    103     }
    104     let mut idx: usize = 0;
    105     for c in s.to_uppercase().chars() {
    106         idx = idx * 26 + (c as usize - 'A' as usize + 1);
    107     }
    108     Some(idx - 1)
    109 }
    110 
    111 /// Resolve a column specifier to a DataFrame column name.
    112 /// Accepts either:
    113 /// - A header name (exact match first, then case-insensitive)
    114 /// - A column letter like "A", "B", "AA" (mapped by position)
    115 /// Header name match takes priority over column letter interpretation.
    116 pub fn resolve_column(spec: &str, df_columns: &[String]) -> Result<String, String> {
    117     // 1. Exact header name match
    118     if df_columns.contains(&spec.to_string()) {
    119         return Ok(spec.to_string());
    120     }
    121     // 2. Case-insensitive header name match
    122     let spec_lower = spec.to_lowercase();
    123     for col in df_columns {
    124         if col.to_lowercase() == spec_lower {
    125             return Ok(col.clone());
    126         }
    127     }
    128     // 3. Column letter (A=0, B=1, ...) — only if purely alphabetic
    129     if let Some(idx) = col_letter_to_index(spec) {
    130         if idx < df_columns.len() {
    131             return Ok(df_columns[idx].clone());
    132         }
    133     }
    134     let available = df_columns.join(", ");
    135     Err(format!("column '{}' not found. Available columns: {}", spec, available))
    136 }
    137 
    138 /// Resolve a list of column specifiers to DataFrame column names.
    139 pub fn resolve_columns(specs: &[String], df_columns: &[String]) -> Result<Vec<String>, String> {
    140     specs.iter().map(|s| resolve_column(s, df_columns)).collect()
    141 }
    142 
    143 /// Check if a polars DataType is numeric.
    144 fn is_numeric_dtype(dtype: &DataType) -> bool {
    145     matches!(
    146         dtype,
    147         DataType::Int8
    148             | DataType::Int16
    149             | DataType::Int32
    150             | DataType::Int64
    151             | DataType::UInt8
    152             | DataType::UInt16
    153             | DataType::UInt32
    154             | DataType::UInt64
    155             | DataType::Float32
    156             | DataType::Float64
    157     )
    158 }
    159 
    160 /// Build a boolean mask for a single filter expression against a DataFrame.
    161 fn build_filter_mask(df: &DataFrame, expr: &FilterExpr) -> Result<BooleanChunked> {
    162     let col = df.column(&expr.column).map_err(|e| anyhow::anyhow!("{}", e))?;
    163     let series = col.as_materialized_series();
    164     let dtype = series.dtype();
    165 
    166     match &expr.op {
    167         FilterOp::Eq => {
    168             if is_numeric_dtype(dtype) {
    169                 if let Ok(n) = expr.value.parse::<f64>() {
    170                     let s = series.cast(&DataType::Float64)?;
    171                     return Ok(s.f64()?.equal(n));
    172                 }
    173             }
    174             let s = series.cast(&DataType::String)?;
    175             Ok(s.str()?.equal(expr.value.as_str()))
    176         }
    177         FilterOp::NotEq => {
    178             if is_numeric_dtype(dtype) {
    179                 if let Ok(n) = expr.value.parse::<f64>() {
    180                     let s = series.cast(&DataType::Float64)?;
    181                     return Ok(s.f64()?.not_equal(n));
    182                 }
    183             }
    184             let s = series.cast(&DataType::String)?;
    185             Ok(s.str()?.not_equal(expr.value.as_str()))
    186         }
    187         FilterOp::Gt => {
    188             let n = parse_numeric_value(&expr.value, ">")?;
    189             let s = series.cast(&DataType::Float64)?;
    190             Ok(s.f64()?.gt(n))
    191         }
    192         FilterOp::Lt => {
    193             let n = parse_numeric_value(&expr.value, "<")?;
    194             let s = series.cast(&DataType::Float64)?;
    195             Ok(s.f64()?.lt(n))
    196         }
    197         FilterOp::Gte => {
    198             let n = parse_numeric_value(&expr.value, ">=")?;
    199             let s = series.cast(&DataType::Float64)?;
    200             Ok(s.f64()?.gt_eq(n))
    201         }
    202         FilterOp::Lte => {
    203             let n = parse_numeric_value(&expr.value, "<=")?;
    204             let s = series.cast(&DataType::Float64)?;
    205             Ok(s.f64()?.lt_eq(n))
    206         }
    207         FilterOp::Contains => {
    208             let s = series.cast(&DataType::String)?;
    209             let ca = s.str()?;
    210             let pat = expr.value.to_lowercase();
    211             let mask: BooleanChunked = ca.into_iter()
    212                 .map(|opt_s| opt_s.map(|s| s.to_lowercase().contains(&pat)).unwrap_or(false))
    213                 .collect();
    214             Ok(mask)
    215         }
    216         FilterOp::NotContains => {
    217             let s = series.cast(&DataType::String)?;
    218             let ca = s.str()?;
    219             let pat = expr.value.to_lowercase();
    220             let mask: BooleanChunked = ca.into_iter()
    221                 .map(|opt_s| opt_s.map(|s| !s.to_lowercase().contains(&pat)).unwrap_or(true))
    222                 .collect();
    223             Ok(mask)
    224         }
    225     }
    226 }
    227 
    228 fn parse_numeric_value(value: &str, op: &str) -> Result<f64> {
    229     value
    230         .parse::<f64>()
    231         .map_err(|_| anyhow::anyhow!("'{}' requires numeric value, got '{}'", op, value))
    232 }
    233 
    234 /// Apply a list of filter expressions to a DataFrame (AND logic).
    235 /// An empty list returns the DataFrame unchanged.
    236 pub fn apply_filters(df: &DataFrame, exprs: &[FilterExpr]) -> Result<DataFrame> {
    237     let mut result = df.clone();
    238     for expr in exprs {
    239         let mask = build_filter_mask(&result, expr)?;
    240         result = result.filter(&mask)?;
    241     }
    242     Ok(result)
    243 }
    244 
    245 /// Options for the filter pipeline.
    246 pub struct FilterOptions {
    247     pub filters: Vec<FilterExpr>,
    248     pub cols: Option<Vec<String>>,
    249     pub sort: Option<SortSpec>,
    250     pub limit: Option<usize>,
    251     pub head: Option<usize>,
    252     pub tail: Option<usize>,
    253 }
    254 
    255 /// Apply a sort specification to a DataFrame.
    256 pub fn apply_sort(df: &DataFrame, spec: &SortSpec) -> Result<DataFrame> {
    257     let opts = SortMultipleOptions::default()
    258         .with_order_descending(spec.descending);
    259     Ok(df.sort([&spec.column], opts)?)
    260 }
    261 
    262 /// Run the full filter pipeline: head/tail → resolve & filter → sort → limit → select columns.
    263 pub fn filter_pipeline(df: DataFrame, opts: &FilterOptions) -> Result<DataFrame> {
    264     let df_columns: Vec<String> = df
    265         .get_column_names()
    266         .iter()
    267         .map(|s| s.to_string())
    268         .collect();
    269 
    270     // 1. Pre-filter window: head or tail
    271     let df = if let Some(n) = opts.head {
    272         df.head(Some(n))
    273     } else if let Some(n) = opts.tail {
    274         df.tail(Some(n))
    275     } else {
    276         df
    277     };
    278 
    279     // 2. Resolve column names in filter expressions and apply filters
    280     let resolved_filters: Vec<FilterExpr> = opts
    281         .filters
    282         .iter()
    283         .map(|f| {
    284             let resolved_col = resolve_column(&f.column, &df_columns)?;
    285             Ok(FilterExpr {
    286                 column: resolved_col,
    287                 op: f.op.clone(),
    288                 value: f.value.clone(),
    289             })
    290         })
    291         .collect::<Result<Vec<_>, String>>()
    292         .map_err(|e| anyhow::anyhow!("{}", e))?;
    293 
    294     let df = apply_filters(&df, &resolved_filters)?;
    295 
    296     // 3. Sort
    297     let df = if let Some(ref spec) = opts.sort {
    298         let resolved_col = resolve_column(&spec.column, &df_columns)
    299             .map_err(|e| anyhow::anyhow!("{}", e))?;
    300         let resolved_spec = SortSpec {
    301             column: resolved_col,
    302             descending: spec.descending,
    303         };
    304         apply_sort(&df, &resolved_spec)?
    305     } else {
    306         df
    307     };
    308 
    309     // 4. Limit (after filtering and sorting)
    310     let df = if let Some(n) = opts.limit {
    311         df.head(Some(n))
    312     } else {
    313         df
    314     };
    315 
    316     // 5. Select columns
    317     let df = if let Some(ref col_specs) = opts.cols {
    318         let resolved_cols = resolve_columns(col_specs, &df_columns)
    319             .map_err(|e| anyhow::anyhow!("{}", e))?;
    320         let col_refs: Vec<&str> = resolved_cols.iter().map(|s| s.as_str()).collect();
    321         df.select(col_refs)?
    322     } else {
    323         df
    324     };
    325 
    326     Ok(df)
    327 }
    328 
    329 #[cfg(test)]
    330 mod tests {
    331     use super::*;
    332 
    333     fn make_test_df() -> DataFrame {
    334         DataFrame::new(vec![
    335             Column::new("State".into(), &["CA", "NY", "CA", "TX", "NY"]),
    336             Column::new("City".into(), &["LA", "NYC", "SF", "Houston", "Albany"]),
    337             Column::new("Amount".into(), &[1500i64, 2000, 800, 1200, 500]),
    338             Column::new("Year".into(), &[2023i64, 2023, 2024, 2024, 2023]),
    339             Column::new("Status".into(), &["Active", "Active", "Draft", "Active", "Draft"]),
    340         ])
    341         .unwrap()
    342     }
    343 
    344     #[test]
    345     fn filter_eq_string() {
    346         let df = make_test_df();
    347         let expr = parse_filter_expr("State=CA").unwrap();
    348         let result = apply_filters(&df, &[expr]).unwrap();
    349         assert_eq!(result.height(), 2);
    350     }
    351 
    352     #[test]
    353     fn filter_eq_numeric() {
    354         let df = make_test_df();
    355         let expr = parse_filter_expr("Amount=1500").unwrap();
    356         let result = apply_filters(&df, &[expr]).unwrap();
    357         assert_eq!(result.height(), 1);
    358     }
    359 
    360     #[test]
    361     fn filter_not_eq() {
    362         let df = make_test_df();
    363         let expr = parse_filter_expr("Status!=Draft").unwrap();
    364         let result = apply_filters(&df, &[expr]).unwrap();
    365         assert_eq!(result.height(), 3);
    366     }
    367 
    368     #[test]
    369     fn filter_gt() {
    370         let df = make_test_df();
    371         let expr = parse_filter_expr("Amount>1000").unwrap();
    372         let result = apply_filters(&df, &[expr]).unwrap();
    373         assert_eq!(result.height(), 3);
    374     }
    375 
    376     #[test]
    377     fn filter_lt() {
    378         let df = make_test_df();
    379         let expr = parse_filter_expr("Amount<1000").unwrap();
    380         let result = apply_filters(&df, &[expr]).unwrap();
    381         assert_eq!(result.height(), 2);
    382     }
    383 
    384     #[test]
    385     fn filter_gte() {
    386         let df = make_test_df();
    387         let expr = parse_filter_expr("Amount>=1500").unwrap();
    388         let result = apply_filters(&df, &[expr]).unwrap();
    389         assert_eq!(result.height(), 2);
    390     }
    391 
    392     #[test]
    393     fn filter_lte() {
    394         let df = make_test_df();
    395         let expr = parse_filter_expr("Amount<=800").unwrap();
    396         let result = apply_filters(&df, &[expr]).unwrap();
    397         assert_eq!(result.height(), 2);
    398     }
    399 
    400     #[test]
    401     fn filter_contains() {
    402         let df = make_test_df();
    403         let expr = parse_filter_expr("City~ou").unwrap();
    404         let result = apply_filters(&df, &[expr]).unwrap();
    405         assert_eq!(result.height(), 1);
    406     }
    407 
    408     #[test]
    409     fn filter_contains_case_insensitive() {
    410         let df = make_test_df();
    411         let expr = parse_filter_expr("City~HOUSTON").unwrap();
    412         let result = apply_filters(&df, &[expr]).unwrap();
    413         assert_eq!(result.height(), 1);
    414     }
    415 
    416     #[test]
    417     fn filter_not_contains() {
    418         let df = make_test_df();
    419         let expr = parse_filter_expr("Status!~raft").unwrap();
    420         let result = apply_filters(&df, &[expr]).unwrap();
    421         assert_eq!(result.height(), 3);
    422     }
    423 
    424     #[test]
    425     fn filter_multiple_and() {
    426         let df = make_test_df();
    427         let e1 = parse_filter_expr("State=CA").unwrap();
    428         let e2 = parse_filter_expr("Amount>1000").unwrap();
    429         let result = apply_filters(&df, &[e1, e2]).unwrap();
    430         assert_eq!(result.height(), 1);
    431     }
    432 
    433     #[test]
    434     fn filter_no_matches_returns_empty() {
    435         let df = make_test_df();
    436         let expr = parse_filter_expr("State=ZZ").unwrap();
    437         let result = apply_filters(&df, &[expr]).unwrap();
    438         assert_eq!(result.height(), 0);
    439     }
    440 
    441     #[test]
    442     fn filter_empty_exprs_returns_all() {
    443         let df = make_test_df();
    444         let result = apply_filters(&df, &[]).unwrap();
    445         assert_eq!(result.height(), 5);
    446     }
    447 
    448     #[test]
    449     fn parse_eq() {
    450         let expr = parse_filter_expr("State=CA").unwrap();
    451         assert_eq!(expr.column, "State");
    452         assert_eq!(expr.op, FilterOp::Eq);
    453         assert_eq!(expr.value, "CA");
    454     }
    455 
    456     #[test]
    457     fn parse_not_eq() {
    458         let expr = parse_filter_expr("Status!=Draft").unwrap();
    459         assert_eq!(expr.column, "Status");
    460         assert_eq!(expr.op, FilterOp::NotEq);
    461         assert_eq!(expr.value, "Draft");
    462     }
    463 
    464     #[test]
    465     fn parse_gt() {
    466         let expr = parse_filter_expr("Amount>1000").unwrap();
    467         assert_eq!(expr.column, "Amount");
    468         assert_eq!(expr.op, FilterOp::Gt);
    469         assert_eq!(expr.value, "1000");
    470     }
    471 
    472     #[test]
    473     fn parse_lt() {
    474         let expr = parse_filter_expr("Year<2024").unwrap();
    475         assert_eq!(expr.column, "Year");
    476         assert_eq!(expr.op, FilterOp::Lt);
    477         assert_eq!(expr.value, "2024");
    478     }
    479 
    480     #[test]
    481     fn parse_gte() {
    482         let expr = parse_filter_expr("Score>=90").unwrap();
    483         assert_eq!(expr.column, "Score");
    484         assert_eq!(expr.op, FilterOp::Gte);
    485         assert_eq!(expr.value, "90");
    486     }
    487 
    488     #[test]
    489     fn parse_lte() {
    490         let expr = parse_filter_expr("Price<=50.5").unwrap();
    491         assert_eq!(expr.column, "Price");
    492         assert_eq!(expr.op, FilterOp::Lte);
    493         assert_eq!(expr.value, "50.5");
    494     }
    495 
    496     #[test]
    497     fn parse_contains() {
    498         let expr = parse_filter_expr("Name~john").unwrap();
    499         assert_eq!(expr.column, "Name");
    500         assert_eq!(expr.op, FilterOp::Contains);
    501         assert_eq!(expr.value, "john");
    502     }
    503 
    504     #[test]
    505     fn parse_not_contains() {
    506         let expr = parse_filter_expr("Name!~draft").unwrap();
    507         assert_eq!(expr.column, "Name");
    508         assert_eq!(expr.op, FilterOp::NotContains);
    509         assert_eq!(expr.value, "draft");
    510     }
    511 
    512     #[test]
    513     fn parse_value_with_equals() {
    514         let expr = parse_filter_expr("Formula=A+B=C").unwrap();
    515         assert_eq!(expr.column, "Formula");
    516         assert_eq!(expr.op, FilterOp::Eq);
    517         assert_eq!(expr.value, "A+B=C");
    518     }
    519 
    520     #[test]
    521     fn parse_empty_value() {
    522         let expr = parse_filter_expr("Status=").unwrap();
    523         assert_eq!(expr.column, "Status");
    524         assert_eq!(expr.op, FilterOp::Eq);
    525         assert_eq!(expr.value, "");
    526     }
    527 
    528     #[test]
    529     fn parse_no_operator_is_err() {
    530         assert!(parse_filter_expr("JustAWord").is_err());
    531     }
    532 
    533     #[test]
    534     fn parse_no_column_is_err() {
    535         assert!(parse_filter_expr("=value").is_err());
    536     }
    537 
    538     #[test]
    539     fn parse_sort_desc() {
    540         let spec = parse_sort_spec("Amount:desc").unwrap();
    541         assert_eq!(spec.column, "Amount");
    542         assert!(spec.descending);
    543     }
    544 
    545     #[test]
    546     fn parse_sort_asc() {
    547         let spec = parse_sort_spec("Name:asc").unwrap();
    548         assert_eq!(spec.column, "Name");
    549         assert!(!spec.descending);
    550     }
    551 
    552     #[test]
    553     fn parse_sort_default_asc() {
    554         let spec = parse_sort_spec("Name").unwrap();
    555         assert_eq!(spec.column, "Name");
    556         assert!(!spec.descending);
    557     }
    558 
    559     #[test]
    560     fn parse_sort_bad_dir_is_err() {
    561         assert!(parse_sort_spec("Name:up").is_err());
    562     }
    563 
    564     #[test]
    565     fn resolve_by_header_name() {
    566         let cols = vec!["State".to_string(), "Amount".to_string(), "Year".to_string()];
    567         assert_eq!(resolve_column("Amount", &cols).unwrap(), "Amount");
    568     }
    569 
    570     #[test]
    571     fn resolve_by_letter() {
    572         let cols = vec!["State".to_string(), "Amount".to_string(), "Year".to_string()];
    573         assert_eq!(resolve_column("B", &cols).unwrap(), "Amount");
    574     }
    575 
    576     #[test]
    577     fn resolve_by_letter_lowercase() {
    578         let cols = vec!["State".to_string(), "Amount".to_string(), "Year".to_string()];
    579         assert_eq!(resolve_column("b", &cols).unwrap(), "Amount");
    580     }
    581 
    582     #[test]
    583     fn resolve_header_takes_priority_over_letter() {
    584         let cols = vec!["A".to_string(), "B".to_string()];
    585         assert_eq!(resolve_column("A", &cols).unwrap(), "A");
    586     }
    587 
    588     #[test]
    589     fn resolve_case_insensitive_header() {
    590         let cols = vec!["State".to_string(), "Amount".to_string()];
    591         assert_eq!(resolve_column("state", &cols).unwrap(), "State");
    592     }
    593 
    594     #[test]
    595     fn resolve_unknown_column_is_err() {
    596         let cols = vec!["State".to_string(), "Amount".to_string()];
    597         let err = resolve_column("Foo", &cols).unwrap_err();
    598         assert!(err.contains("not found"), "error was: {}", err);
    599     }
    600 
    601     #[test]
    602     fn resolve_letter_out_of_range_is_err() {
    603         let cols = vec!["State".to_string()];
    604         let err = resolve_column("C", &cols).unwrap_err();
    605         assert!(err.contains("not found"), "error was: {}", err);
    606     }
    607 
    608     #[test]
    609     fn resolve_multiple_columns() {
    610         let cols = vec!["State".to_string(), "Amount".to_string(), "Year".to_string()];
    611         let resolved = resolve_columns(&["A".to_string(), "Year".to_string()], &cols).unwrap();
    612         assert_eq!(resolved, vec!["State", "Year"]);
    613     }
    614 
    615     #[test]
    616     fn sort_ascending() {
    617         let df = make_test_df();
    618         let spec = parse_sort_spec("Amount:asc").unwrap();
    619         let result = apply_sort(&df, &spec).unwrap();
    620         let col = result.column("Amount").unwrap().as_materialized_series();
    621         let amounts = col.i64().unwrap();
    622         assert_eq!(amounts.get(0), Some(500));
    623         assert_eq!(amounts.get(4), Some(2000));
    624     }
    625 
    626     #[test]
    627     fn sort_descending() {
    628         let df = make_test_df();
    629         let spec = parse_sort_spec("Amount:desc").unwrap();
    630         let result = apply_sort(&df, &spec).unwrap();
    631         let col = result.column("Amount").unwrap().as_materialized_series();
    632         let amounts = col.i64().unwrap();
    633         assert_eq!(amounts.get(0), Some(2000));
    634         assert_eq!(amounts.get(4), Some(500));
    635     }
    636 
    637     #[test]
    638     fn pipeline_full() {
    639         let df = make_test_df();
    640         let opts = FilterOptions {
    641             filters: vec![parse_filter_expr("Amount>500").unwrap()],
    642             cols: Some(vec!["State".to_string(), "Amount".to_string()]),
    643             sort: Some(parse_sort_spec("Amount:desc").unwrap()),
    644             limit: Some(2),
    645             head: None,
    646             tail: None,
    647         };
    648         let result = filter_pipeline(df, &opts).unwrap();
    649         assert_eq!(result.height(), 2);
    650         assert_eq!(result.width(), 2);
    651         let col = result.column("Amount").unwrap().as_materialized_series();
    652         let amounts = col.i64().unwrap();
    653         assert_eq!(amounts.get(0), Some(2000));
    654         assert_eq!(amounts.get(1), Some(1500));
    655     }
    656 
    657     #[test]
    658     fn pipeline_head_before_filter() {
    659         let df = make_test_df(); // 5 rows: CA/LA, NY/NYC, CA/SF, TX/Houston, NY/Albany
    660         let opts = FilterOptions {
    661             filters: vec![parse_filter_expr("State=NY").unwrap()],
    662             cols: None,
    663             sort: None,
    664             limit: None,
    665             head: Some(3), // Take first 3 rows before filtering
    666             tail: None,
    667         };
    668         let result = filter_pipeline(df, &opts).unwrap();
    669         // First 3 rows: CA/LA, NY/NYC, CA/SF → only NY/NYC matches
    670         assert_eq!(result.height(), 1);
    671     }
    672 
    673     #[test]
    674     fn pipeline_tail_before_filter() {
    675         let df = make_test_df(); // 5 rows
    676         let opts = FilterOptions {
    677             filters: vec![parse_filter_expr("State=CA").unwrap()],
    678             cols: None,
    679             sort: None,
    680             limit: None,
    681             head: None,
    682             tail: Some(3), // Last 3 rows before filtering
    683         };
    684         let result = filter_pipeline(df, &opts).unwrap();
    685         // Last 3 rows: CA/SF, TX/Houston, NY/Albany → only CA/SF matches
    686         assert_eq!(result.height(), 1);
    687     }
    688 
    689     #[test]
    690     fn pipeline_no_options_returns_all() {
    691         let df = make_test_df();
    692         let opts = FilterOptions {
    693             filters: vec![],
    694             cols: None,
    695             sort: None,
    696             limit: None,
    697             head: None,
    698             tail: None,
    699         };
    700         let result = filter_pipeline(df, &opts).unwrap();
    701         assert_eq!(result.height(), 5);
    702         assert_eq!(result.width(), 5);
    703     }
    704 
    705     #[test]
    706     fn pipeline_cols_by_letter() {
    707         let df = make_test_df();
    708         let opts = FilterOptions {
    709             filters: vec![],
    710             cols: Some(vec!["A".to_string(), "C".to_string()]),
    711             sort: None,
    712             limit: None,
    713             head: None,
    714             tail: None,
    715         };
    716         let result = filter_pipeline(df, &opts).unwrap();
    717         assert_eq!(result.width(), 2);
    718         let names: Vec<String> = result.get_column_names().iter().map(|s| s.to_string()).collect();
    719         assert_eq!(names, vec!["State", "Amount"]);
    720     }
    721 
    722     #[test]
    723     fn pipeline_limit_after_filter() {
    724         let df = make_test_df();
    725         let opts = FilterOptions {
    726             filters: vec![parse_filter_expr("Status=Active").unwrap()],
    727             cols: None,
    728             sort: None,
    729             limit: Some(2),
    730             head: None,
    731             tail: None,
    732         };
    733         let result = filter_pipeline(df, &opts).unwrap();
    734         assert_eq!(result.height(), 2); // 3 Active rows, limited to 2
    735     }
    736 }