filter.rs (2404B)
1 use criterion::{criterion_group, criterion_main, Criterion}; 2 use polars::prelude::*; 3 4 use dtcore::filter::{FilterExpr, FilterOp, FilterOptions, SortSpec, apply_filters, filter_pipeline}; 5 6 fn make_df(n: usize) -> DataFrame { 7 let ids: Vec<i64> = (0..n as i64).collect(); 8 let regions: Vec<&str> = (0..n).map(|i| ["East", "West", "North", "South"][i % 4]).collect(); 9 let values: Vec<i64> = (0..n).map(|i| i as i64 * 100).collect(); 10 let names: Vec<String> = (0..n).map(|i| format!("name_{}", i)).collect(); 11 12 DataFrame::new(vec![ 13 Series::new("id".into(), &ids).into_column(), 14 Series::new("region".into(), ®ions).into_column(), 15 Series::new("value".into(), &values).into_column(), 16 Series::new("name".into(), &names).into_column(), 17 ]).unwrap() 18 } 19 20 fn bench_filter(c: &mut Criterion) { 21 for &size in &[1_000, 10_000, 100_000] { 22 let df = make_df(size); 23 24 let mut group = c.benchmark_group(format!("filter_{size}")); 25 26 // Equality filter 27 let eq_expr = vec![FilterExpr { column: "region".into(), op: FilterOp::Eq, value: "East".into() }]; 28 group.bench_function("eq", |b| { 29 b.iter(|| apply_filters(&df, &eq_expr).unwrap()) 30 }); 31 32 // Numeric comparison 33 let gt_expr = vec![FilterExpr { column: "value".into(), op: FilterOp::Gt, value: (size as i64 * 50).to_string() }]; 34 group.bench_function("gt", |b| { 35 b.iter(|| apply_filters(&df, >_expr).unwrap()) 36 }); 37 38 // Contains (string scan) 39 let contains_expr = vec![FilterExpr { column: "name".into(), op: FilterOp::Contains, value: "42".into() }]; 40 group.bench_function("contains", |b| { 41 b.iter(|| apply_filters(&df, &contains_expr).unwrap()) 42 }); 43 44 // Full pipeline: filter + sort + limit 45 let pipeline_opts = FilterOptions { 46 filters: vec![FilterExpr { column: "region".into(), op: FilterOp::Eq, value: "East".into() }], 47 sort: Some(SortSpec { column: "value".into(), descending: true }), 48 limit: Some(10), 49 cols: None, 50 head: None, 51 tail: None, 52 }; 53 group.bench_function("pipeline", |b| { 54 b.iter(|| filter_pipeline(df.clone(), &pipeline_opts).unwrap()) 55 }); 56 57 group.finish(); 58 } 59 } 60 61 criterion_group!(benches, bench_filter); 62 criterion_main!(benches);