dt-cli-tools

CLI tools for viewing, filtering, and comparing tabular data files
Log | Files | Refs | README | LICENSE

parquet.rs (1114B)


      1 use anyhow::Result;
      2 use polars::prelude::*;
      3 use std::path::Path;
      4 
      5 use crate::reader::ReadOptions;
      6 
      7 pub fn read(path: &Path, opts: &ReadOptions) -> Result<DataFrame> {
      8     let file = std::fs::File::open(path)?;
      9     let mut df = ParquetReader::new(file).finish()?;
     10 
     11     if let Some(skip) = opts.skip_rows
     12         && skip > 0 && skip < df.height() {
     13             df = df.slice(skip as i64, df.height() - skip);
     14         }
     15 
     16     Ok(df)
     17 }
     18 
     19 #[cfg(test)]
     20 mod tests {
     21     use super::*;
     22     use tempfile::NamedTempFile;
     23 
     24     #[test]
     25     fn read_parquet_roundtrip() {
     26         let s1 = Series::new("name".into(), &["Alice", "Bob"]);
     27         let s2 = Series::new("value".into(), &[100i64, 200]);
     28         let mut df = DataFrame::new(vec![s1.into_column(), s2.into_column()]).unwrap();
     29 
     30         let f = NamedTempFile::with_suffix(".parquet").unwrap();
     31         let file = std::fs::File::create(f.path()).unwrap();
     32         ParquetWriter::new(file).finish(&mut df).unwrap();
     33 
     34         let result = read(f.path(), &ReadOptions::default()).unwrap();
     35         assert_eq!(result.height(), 2);
     36         assert_eq!(result.width(), 2);
     37     }
     38 }