dt-cli-tools

CLI tools for viewing, filtering, and comparing tabular data files
Log | Files | Refs | README | LICENSE

csv.rs (2049B)


      1 use anyhow::Result;
      2 use polars::prelude::*;
      3 use std::path::Path;
      4 
      5 use crate::reader::ReadOptions;
      6 
      7 pub fn read(path: &Path, opts: &ReadOptions) -> Result<DataFrame> {
      8     let separator = opts.separator.unwrap_or_else(|| {
      9         crate::format::detect_csv_delimiter(path).unwrap_or(b',')
     10     });
     11 
     12     let reader = CsvReadOptions::default()
     13         .with_has_header(true)
     14         .with_skip_rows(opts.skip_rows.unwrap_or(0))
     15         .with_parse_options(
     16             CsvParseOptions::default().with_separator(separator),
     17         )
     18         .try_into_reader_with_file_path(Some(path.into()))?;
     19 
     20     let df = reader.finish()?;
     21     Ok(df)
     22 }
     23 
     24 #[cfg(test)]
     25 mod tests {
     26     use super::*;
     27     use std::io::Write;
     28     use tempfile::NamedTempFile;
     29 
     30     fn default_opts() -> ReadOptions {
     31         ReadOptions::default()
     32     }
     33 
     34     #[test]
     35     fn read_basic_csv() {
     36         let mut f = NamedTempFile::with_suffix(".csv").unwrap();
     37         write!(f, "name,value\nAlice,100\nBob,200\n").unwrap();
     38         f.flush().unwrap();
     39 
     40         let df = read(f.path(), &default_opts()).unwrap();
     41         assert_eq!(df.height(), 2);
     42         assert_eq!(df.width(), 2);
     43     }
     44 
     45     #[test]
     46     fn read_tsv() {
     47         let mut f = NamedTempFile::with_suffix(".tsv").unwrap();
     48         write!(f, "a\tb\n1\t2\n3\t4\n").unwrap();
     49         f.flush().unwrap();
     50 
     51         let opts = ReadOptions { separator: Some(b'\t'), ..Default::default() };
     52         let df = read(f.path(), &opts).unwrap();
     53         assert_eq!(df.height(), 2);
     54         assert_eq!(df.width(), 2);
     55     }
     56 
     57     #[test]
     58     fn read_with_skip() {
     59         let mut f = NamedTempFile::with_suffix(".csv").unwrap();
     60         write!(f, "metadata line\nname,value\nAlice,100\n").unwrap();
     61         f.flush().unwrap();
     62 
     63         let opts = ReadOptions { skip_rows: Some(1), ..Default::default() };
     64         let df = read(f.path(), &opts).unwrap();
     65         assert_eq!(df.height(), 1);
     66         let names: Vec<String> = df.get_column_names().iter().map(|s| s.to_string()).collect();
     67         assert_eq!(names, vec!["name", "value"]);
     68     }
     69 }