csv.rs (2049B)
1 use anyhow::Result; 2 use polars::prelude::*; 3 use std::path::Path; 4 5 use crate::reader::ReadOptions; 6 7 pub fn read(path: &Path, opts: &ReadOptions) -> Result<DataFrame> { 8 let separator = opts.separator.unwrap_or_else(|| { 9 crate::format::detect_csv_delimiter(path).unwrap_or(b',') 10 }); 11 12 let reader = CsvReadOptions::default() 13 .with_has_header(true) 14 .with_skip_rows(opts.skip_rows.unwrap_or(0)) 15 .with_parse_options( 16 CsvParseOptions::default().with_separator(separator), 17 ) 18 .try_into_reader_with_file_path(Some(path.into()))?; 19 20 let df = reader.finish()?; 21 Ok(df) 22 } 23 24 #[cfg(test)] 25 mod tests { 26 use super::*; 27 use std::io::Write; 28 use tempfile::NamedTempFile; 29 30 fn default_opts() -> ReadOptions { 31 ReadOptions::default() 32 } 33 34 #[test] 35 fn read_basic_csv() { 36 let mut f = NamedTempFile::with_suffix(".csv").unwrap(); 37 write!(f, "name,value\nAlice,100\nBob,200\n").unwrap(); 38 f.flush().unwrap(); 39 40 let df = read(f.path(), &default_opts()).unwrap(); 41 assert_eq!(df.height(), 2); 42 assert_eq!(df.width(), 2); 43 } 44 45 #[test] 46 fn read_tsv() { 47 let mut f = NamedTempFile::with_suffix(".tsv").unwrap(); 48 write!(f, "a\tb\n1\t2\n3\t4\n").unwrap(); 49 f.flush().unwrap(); 50 51 let opts = ReadOptions { separator: Some(b'\t'), ..Default::default() }; 52 let df = read(f.path(), &opts).unwrap(); 53 assert_eq!(df.height(), 2); 54 assert_eq!(df.width(), 2); 55 } 56 57 #[test] 58 fn read_with_skip() { 59 let mut f = NamedTempFile::with_suffix(".csv").unwrap(); 60 write!(f, "metadata line\nname,value\nAlice,100\n").unwrap(); 61 f.flush().unwrap(); 62 63 let opts = ReadOptions { skip_rows: Some(1), ..Default::default() }; 64 let df = read(f.path(), &opts).unwrap(); 65 assert_eq!(df.height(), 1); 66 let names: Vec<String> = df.get_column_names().iter().map(|s| s.to_string()).collect(); 67 assert_eq!(names, vec!["name", "value"]); 68 } 69 }