dt-cli-tools

CLI tools for viewing, filtering, and comparing tabular data files
Log | Files | Refs | README | LICENSE

json.rs (5529B)


      1 use anyhow::Result;
      2 use polars::prelude::*;
      3 use std::path::Path;
      4 
      5 use crate::format::Format;
      6 use crate::reader::ReadOptions;
      7 
      8 pub fn read(path: &Path, format: Format, opts: &ReadOptions) -> Result<DataFrame> {
      9     let file = std::fs::File::open(path)?;
     10 
     11     let mut df = match format {
     12         Format::Ndjson => {
     13             // NDJSON: one JSON object per line — use JsonLineReader
     14             JsonLineReader::new(file).finish()?
     15         }
     16         _ => {
     17             // JSON array format — JsonReader defaults to JsonFormat::Json
     18             JsonReader::new(file).finish()?
     19         }
     20     };
     21 
     22     if let Some(skip) = opts.skip_rows
     23         && skip > 0 && skip < df.height() {
     24             df = df.slice(skip as i64, df.height() - skip);
     25         }
     26 
     27     Ok(df)
     28 }
     29 
     30 #[cfg(test)]
     31 mod tests {
     32     use super::*;
     33     use std::io::Write;
     34     use tempfile::NamedTempFile;
     35 
     36     fn default_opts() -> ReadOptions {
     37         ReadOptions::default()
     38     }
     39 
     40     // ── JSON array ────────────────────────────────────────────────────────────
     41 
     42     #[test]
     43     fn read_json_array_basic() {
     44         let mut f = NamedTempFile::with_suffix(".json").unwrap();
     45         write!(
     46             f,
     47             r#"[{{"name":"Alice","value":100}},{{"name":"Bob","value":200}},{{"name":"Carol","value":300}}]"#
     48         )
     49         .unwrap();
     50         f.flush().unwrap();
     51 
     52         let df = read(f.path(), Format::Json, &default_opts()).unwrap();
     53         assert_eq!(df.height(), 3);
     54         assert_eq!(df.width(), 2);
     55     }
     56 
     57     #[test]
     58     fn read_json_array_with_skip() {
     59         let mut f = NamedTempFile::with_suffix(".json").unwrap();
     60         write!(
     61             f,
     62             r#"[{{"id":1}},{{"id":2}},{{"id":3}},{{"id":4}},{{"id":5}}]"#
     63         )
     64         .unwrap();
     65         f.flush().unwrap();
     66 
     67         let opts = ReadOptions {
     68             skip_rows: Some(2),
     69             ..Default::default()
     70         };
     71         let df = read(f.path(), Format::Json, &opts).unwrap();
     72         // 5 rows total, skip 2 → 3 rows remain
     73         assert_eq!(df.height(), 3);
     74     }
     75 
     76     #[test]
     77     fn read_json_array_skip_zero_noop() {
     78         let mut f = NamedTempFile::with_suffix(".json").unwrap();
     79         write!(f, r#"[{{"x":1}},{{"x":2}}]"#).unwrap();
     80         f.flush().unwrap();
     81 
     82         let opts = ReadOptions {
     83             skip_rows: Some(0),
     84             ..Default::default()
     85         };
     86         let df = read(f.path(), Format::Json, &opts).unwrap();
     87         assert_eq!(df.height(), 2);
     88     }
     89 
     90     #[test]
     91     fn read_json_array_single_row() {
     92         let mut f = NamedTempFile::with_suffix(".json").unwrap();
     93         write!(f, r#"[{{"a":42,"b":"hello"}}]"#).unwrap();
     94         f.flush().unwrap();
     95 
     96         let df = read(f.path(), Format::Json, &default_opts()).unwrap();
     97         assert_eq!(df.height(), 1);
     98         assert_eq!(df.width(), 2);
     99     }
    100 
    101     // ── NDJSON ────────────────────────────────────────────────────────────────
    102 
    103     #[test]
    104     fn read_ndjson_basic() {
    105         let mut f = NamedTempFile::with_suffix(".ndjson").unwrap();
    106         write!(
    107             f,
    108             "{}\n{}\n{}\n",
    109             r#"{"name":"Alice","value":100}"#,
    110             r#"{"name":"Bob","value":200}"#,
    111             r#"{"name":"Carol","value":300}"#
    112         )
    113         .unwrap();
    114         f.flush().unwrap();
    115 
    116         let df = read(f.path(), Format::Ndjson, &default_opts()).unwrap();
    117         assert_eq!(df.height(), 3);
    118         assert_eq!(df.width(), 2);
    119     }
    120 
    121     #[test]
    122     fn read_ndjson_with_skip() {
    123         let mut f = NamedTempFile::with_suffix(".ndjson").unwrap();
    124         for i in 1..=5 {
    125             writeln!(f, r#"{{"id":{}}}"#, i).unwrap();
    126         }
    127         f.flush().unwrap();
    128 
    129         let opts = ReadOptions {
    130             skip_rows: Some(2),
    131             ..Default::default()
    132         };
    133         let df = read(f.path(), Format::Ndjson, &opts).unwrap();
    134         // 5 rows total, skip 2 → 3 rows remain
    135         assert_eq!(df.height(), 3);
    136     }
    137 
    138     #[test]
    139     fn read_ndjson_no_trailing_newline() {
    140         let mut f = NamedTempFile::with_suffix(".jsonl").unwrap();
    141         write!(f, "{}\n{}", r#"{"x":1}"#, r#"{"x":2}"#).unwrap();
    142         f.flush().unwrap();
    143 
    144         let df = read(f.path(), Format::Ndjson, &default_opts()).unwrap();
    145         assert_eq!(df.height(), 2);
    146     }
    147 
    148     #[test]
    149     fn read_ndjson_single_row() {
    150         let mut f = NamedTempFile::with_suffix(".ndjson").unwrap();
    151         writeln!(f, r#"{{"a":1,"b":"z"}}"#).unwrap();
    152         f.flush().unwrap();
    153 
    154         let df = read(f.path(), Format::Ndjson, &default_opts()).unwrap();
    155         assert_eq!(df.height(), 1);
    156         assert_eq!(df.width(), 2);
    157     }
    158 
    159     // ── skip_rows boundary ────────────────────────────────────────────────────
    160 
    161     #[test]
    162     fn skip_rows_ge_height_noop() {
    163         let mut f = NamedTempFile::with_suffix(".json").unwrap();
    164         write!(f, r#"[{{"v":1}},{{"v":2}}]"#).unwrap();
    165         f.flush().unwrap();
    166 
    167         let opts = ReadOptions {
    168             skip_rows: Some(10),
    169             ..Default::default()
    170         };
    171         let df = read(f.path(), Format::Json, &opts).unwrap();
    172         // skip >= height: condition `skip < df.height()` is false → no-op
    173         assert_eq!(df.height(), 2);
    174     }
    175 }