dt-cli-tools

CLI tools for viewing, filtering, and comparing tabular data files
Log | Files | Refs | README | LICENSE

filter.rs (2404B)


      1 use criterion::{criterion_group, criterion_main, Criterion};
      2 use polars::prelude::*;
      3 
      4 use dtcore::filter::{FilterExpr, FilterOp, FilterOptions, SortSpec, apply_filters, filter_pipeline};
      5 
      6 fn make_df(n: usize) -> DataFrame {
      7     let ids: Vec<i64> = (0..n as i64).collect();
      8     let regions: Vec<&str> = (0..n).map(|i| ["East", "West", "North", "South"][i % 4]).collect();
      9     let values: Vec<i64> = (0..n).map(|i| i as i64 * 100).collect();
     10     let names: Vec<String> = (0..n).map(|i| format!("name_{}", i)).collect();
     11 
     12     DataFrame::new(vec![
     13         Series::new("id".into(), &ids).into_column(),
     14         Series::new("region".into(), &regions).into_column(),
     15         Series::new("value".into(), &values).into_column(),
     16         Series::new("name".into(), &names).into_column(),
     17     ]).unwrap()
     18 }
     19 
     20 fn bench_filter(c: &mut Criterion) {
     21     for &size in &[1_000, 10_000, 100_000] {
     22         let df = make_df(size);
     23 
     24         let mut group = c.benchmark_group(format!("filter_{size}"));
     25 
     26         // Equality filter
     27         let eq_expr = vec![FilterExpr { column: "region".into(), op: FilterOp::Eq, value: "East".into() }];
     28         group.bench_function("eq", |b| {
     29             b.iter(|| apply_filters(&df, &eq_expr).unwrap())
     30         });
     31 
     32         // Numeric comparison
     33         let gt_expr = vec![FilterExpr { column: "value".into(), op: FilterOp::Gt, value: (size as i64 * 50).to_string() }];
     34         group.bench_function("gt", |b| {
     35             b.iter(|| apply_filters(&df, &gt_expr).unwrap())
     36         });
     37 
     38         // Contains (string scan)
     39         let contains_expr = vec![FilterExpr { column: "name".into(), op: FilterOp::Contains, value: "42".into() }];
     40         group.bench_function("contains", |b| {
     41             b.iter(|| apply_filters(&df, &contains_expr).unwrap())
     42         });
     43 
     44         // Full pipeline: filter + sort + limit
     45         let pipeline_opts = FilterOptions {
     46             filters: vec![FilterExpr { column: "region".into(), op: FilterOp::Eq, value: "East".into() }],
     47             sort: Some(SortSpec { column: "value".into(), descending: true }),
     48             limit: Some(10),
     49             cols: None,
     50             head: None,
     51             tail: None,
     52         };
     53         group.bench_function("pipeline", |b| {
     54             b.iter(|| filter_pipeline(df.clone(), &pipeline_opts).unwrap())
     55         });
     56 
     57         group.finish();
     58     }
     59 }
     60 
     61 criterion_group!(benches, bench_filter);
     62 criterion_main!(benches);