dt-cli-tools

CLI tools for viewing, filtering, and comparing tabular data files
Log | Files | Refs | README | LICENSE

diff.rs (1631B)


      1 use criterion::{criterion_group, criterion_main, Criterion};
      2 use polars::prelude::*;
      3 
      4 use dtcore::diff::{DiffOptions, SheetSource, diff_positional, diff_keyed};
      5 
      6 fn source(name: &str) -> SheetSource {
      7     SheetSource { file_name: name.into(), sheet_name: "Sheet1".into() }
      8 }
      9 
     10 fn make_df(n: usize, offset: i64) -> DataFrame {
     11     let ids: Vec<i64> = (offset..offset + n as i64).collect();
     12     let names: Vec<String> = ids.iter().map(|i| format!("name_{}", i)).collect();
     13     let values: Vec<i64> = ids.iter().map(|i| i * 100).collect();
     14 
     15     DataFrame::new(vec![
     16         Series::new("id".into(), &ids).into_column(),
     17         Series::new("name".into(), &names).into_column(),
     18         Series::new("value".into(), &values).into_column(),
     19     ]).unwrap()
     20 }
     21 
     22 fn bench_diff(c: &mut Criterion) {
     23     let opts_positional = DiffOptions::default();
     24     let opts_keyed = DiffOptions { key_columns: vec!["id".into()], tolerance: None };
     25 
     26     for &size in &[1_000, 10_000, 100_000] {
     27         let df_a = make_df(size, 0);
     28         // 10% of rows differ (shifted by 10% of size)
     29         let shift = (size / 10) as i64;
     30         let df_b = make_df(size, shift);
     31 
     32         let mut group = c.benchmark_group(format!("diff_{size}"));
     33 
     34         group.bench_function("positional", |b| {
     35             b.iter(|| diff_positional(&df_a, &df_b, &opts_positional, source("a"), source("b")).unwrap())
     36         });
     37 
     38         group.bench_function("keyed", |b| {
     39             b.iter(|| diff_keyed(&df_a, &df_b, &opts_keyed, source("a"), source("b")).unwrap())
     40         });
     41 
     42         group.finish();
     43     }
     44 }
     45 
     46 criterion_group!(benches, bench_diff);
     47 criterion_main!(benches);