json.rs (5529B)
1 use anyhow::Result; 2 use polars::prelude::*; 3 use std::path::Path; 4 5 use crate::format::Format; 6 use crate::reader::ReadOptions; 7 8 pub fn read(path: &Path, format: Format, opts: &ReadOptions) -> Result<DataFrame> { 9 let file = std::fs::File::open(path)?; 10 11 let mut df = match format { 12 Format::Ndjson => { 13 // NDJSON: one JSON object per line — use JsonLineReader 14 JsonLineReader::new(file).finish()? 15 } 16 _ => { 17 // JSON array format — JsonReader defaults to JsonFormat::Json 18 JsonReader::new(file).finish()? 19 } 20 }; 21 22 if let Some(skip) = opts.skip_rows 23 && skip > 0 && skip < df.height() { 24 df = df.slice(skip as i64, df.height() - skip); 25 } 26 27 Ok(df) 28 } 29 30 #[cfg(test)] 31 mod tests { 32 use super::*; 33 use std::io::Write; 34 use tempfile::NamedTempFile; 35 36 fn default_opts() -> ReadOptions { 37 ReadOptions::default() 38 } 39 40 // ── JSON array ──────────────────────────────────────────────────────────── 41 42 #[test] 43 fn read_json_array_basic() { 44 let mut f = NamedTempFile::with_suffix(".json").unwrap(); 45 write!( 46 f, 47 r#"[{{"name":"Alice","value":100}},{{"name":"Bob","value":200}},{{"name":"Carol","value":300}}]"# 48 ) 49 .unwrap(); 50 f.flush().unwrap(); 51 52 let df = read(f.path(), Format::Json, &default_opts()).unwrap(); 53 assert_eq!(df.height(), 3); 54 assert_eq!(df.width(), 2); 55 } 56 57 #[test] 58 fn read_json_array_with_skip() { 59 let mut f = NamedTempFile::with_suffix(".json").unwrap(); 60 write!( 61 f, 62 r#"[{{"id":1}},{{"id":2}},{{"id":3}},{{"id":4}},{{"id":5}}]"# 63 ) 64 .unwrap(); 65 f.flush().unwrap(); 66 67 let opts = ReadOptions { 68 skip_rows: Some(2), 69 ..Default::default() 70 }; 71 let df = read(f.path(), Format::Json, &opts).unwrap(); 72 // 5 rows total, skip 2 → 3 rows remain 73 assert_eq!(df.height(), 3); 74 } 75 76 #[test] 77 fn read_json_array_skip_zero_noop() { 78 let mut f = NamedTempFile::with_suffix(".json").unwrap(); 79 write!(f, r#"[{{"x":1}},{{"x":2}}]"#).unwrap(); 80 f.flush().unwrap(); 81 82 let opts = ReadOptions { 83 skip_rows: Some(0), 84 ..Default::default() 85 }; 86 let df = read(f.path(), Format::Json, &opts).unwrap(); 87 assert_eq!(df.height(), 2); 88 } 89 90 #[test] 91 fn read_json_array_single_row() { 92 let mut f = NamedTempFile::with_suffix(".json").unwrap(); 93 write!(f, r#"[{{"a":42,"b":"hello"}}]"#).unwrap(); 94 f.flush().unwrap(); 95 96 let df = read(f.path(), Format::Json, &default_opts()).unwrap(); 97 assert_eq!(df.height(), 1); 98 assert_eq!(df.width(), 2); 99 } 100 101 // ── NDJSON ──────────────────────────────────────────────────────────────── 102 103 #[test] 104 fn read_ndjson_basic() { 105 let mut f = NamedTempFile::with_suffix(".ndjson").unwrap(); 106 write!( 107 f, 108 "{}\n{}\n{}\n", 109 r#"{"name":"Alice","value":100}"#, 110 r#"{"name":"Bob","value":200}"#, 111 r#"{"name":"Carol","value":300}"# 112 ) 113 .unwrap(); 114 f.flush().unwrap(); 115 116 let df = read(f.path(), Format::Ndjson, &default_opts()).unwrap(); 117 assert_eq!(df.height(), 3); 118 assert_eq!(df.width(), 2); 119 } 120 121 #[test] 122 fn read_ndjson_with_skip() { 123 let mut f = NamedTempFile::with_suffix(".ndjson").unwrap(); 124 for i in 1..=5 { 125 writeln!(f, r#"{{"id":{}}}"#, i).unwrap(); 126 } 127 f.flush().unwrap(); 128 129 let opts = ReadOptions { 130 skip_rows: Some(2), 131 ..Default::default() 132 }; 133 let df = read(f.path(), Format::Ndjson, &opts).unwrap(); 134 // 5 rows total, skip 2 → 3 rows remain 135 assert_eq!(df.height(), 3); 136 } 137 138 #[test] 139 fn read_ndjson_no_trailing_newline() { 140 let mut f = NamedTempFile::with_suffix(".jsonl").unwrap(); 141 write!(f, "{}\n{}", r#"{"x":1}"#, r#"{"x":2}"#).unwrap(); 142 f.flush().unwrap(); 143 144 let df = read(f.path(), Format::Ndjson, &default_opts()).unwrap(); 145 assert_eq!(df.height(), 2); 146 } 147 148 #[test] 149 fn read_ndjson_single_row() { 150 let mut f = NamedTempFile::with_suffix(".ndjson").unwrap(); 151 writeln!(f, r#"{{"a":1,"b":"z"}}"#).unwrap(); 152 f.flush().unwrap(); 153 154 let df = read(f.path(), Format::Ndjson, &default_opts()).unwrap(); 155 assert_eq!(df.height(), 1); 156 assert_eq!(df.width(), 2); 157 } 158 159 // ── skip_rows boundary ──────────────────────────────────────────────────── 160 161 #[test] 162 fn skip_rows_ge_height_noop() { 163 let mut f = NamedTempFile::with_suffix(".json").unwrap(); 164 write!(f, r#"[{{"v":1}},{{"v":2}}]"#).unwrap(); 165 f.flush().unwrap(); 166 167 let opts = ReadOptions { 168 skip_rows: Some(10), 169 ..Default::default() 170 }; 171 let df = read(f.path(), Format::Json, &opts).unwrap(); 172 // skip >= height: condition `skip < df.height()` is false → no-op 173 assert_eq!(df.height(), 2); 174 } 175 }