dt-cli-tools

CLI tools for viewing, filtering, and comparing tabular data files
Log | Files | Refs | README | LICENSE

reader.rs (1303B)


      1 use anyhow::Result;
      2 use polars::prelude::*;
      3 use std::path::Path;
      4 
      5 use crate::format::Format;
      6 use crate::metadata::FileInfo;
      7 use crate::readers;
      8 
      9 /// Options that control how a file is read.
     10 #[derive(Debug, Clone, Default)]
     11 pub struct ReadOptions {
     12     pub sheet: Option<String>,     // Excel only
     13     pub skip_rows: Option<usize>,
     14     pub separator: Option<u8>,     // CSV override
     15 }
     16 
     17 /// Read a file into a DataFrame, dispatching to the appropriate reader.
     18 pub fn read_file(path: &Path, format: Format, opts: &ReadOptions) -> Result<DataFrame> {
     19     match format {
     20         Format::Csv | Format::Tsv => readers::csv::read(path, opts),
     21         Format::Parquet => readers::parquet::read(path, opts),
     22         Format::Arrow => readers::arrow::read(path, opts),
     23         Format::Json | Format::Ndjson => readers::json::read(path, format, opts),
     24         Format::Excel => readers::excel::read(path, opts),
     25     }
     26 }
     27 
     28 /// Read file metadata: size, format, and sheet info (for Excel).
     29 pub fn read_file_info(path: &Path, format: Format) -> Result<FileInfo> {
     30     let file_size = std::fs::metadata(path)?.len();
     31 
     32     let sheets = match format {
     33         Format::Excel => readers::excel::read_excel_info(path)?,
     34         _ => vec![],
     35     };
     36 
     37     Ok(FileInfo {
     38         file_size,
     39         format,
     40         sheets,
     41     })
     42 }