reader.rs (1303B)
1 use anyhow::Result; 2 use polars::prelude::*; 3 use std::path::Path; 4 5 use crate::format::Format; 6 use crate::metadata::FileInfo; 7 use crate::readers; 8 9 /// Options that control how a file is read. 10 #[derive(Debug, Clone, Default)] 11 pub struct ReadOptions { 12 pub sheet: Option<String>, // Excel only 13 pub skip_rows: Option<usize>, 14 pub separator: Option<u8>, // CSV override 15 } 16 17 /// Read a file into a DataFrame, dispatching to the appropriate reader. 18 pub fn read_file(path: &Path, format: Format, opts: &ReadOptions) -> Result<DataFrame> { 19 match format { 20 Format::Csv | Format::Tsv => readers::csv::read(path, opts), 21 Format::Parquet => readers::parquet::read(path, opts), 22 Format::Arrow => readers::arrow::read(path, opts), 23 Format::Json | Format::Ndjson => readers::json::read(path, format, opts), 24 Format::Excel => readers::excel::read(path, opts), 25 } 26 } 27 28 /// Read file metadata: size, format, and sheet info (for Excel). 29 pub fn read_file_info(path: &Path, format: Format) -> Result<FileInfo> { 30 let file_size = std::fs::metadata(path)?.len(); 31 32 let sheets = match format { 33 Format::Excel => readers::excel::read_excel_info(path)?, 34 _ => vec![], 35 }; 36 37 Ok(FileInfo { 38 file_size, 39 format, 40 sheets, 41 }) 42 }