Skip to content

Commit 77d3de5

Browse files
committed
Add with_skip_validation flag to IPC StreamReader, FileReader and FileDecoder
1 parent 0c206c6 commit 77d3de5

File tree

3 files changed

+385
-87
lines changed

3 files changed

+385
-87
lines changed

arrow-ipc/benches/ipc_reader.rs

+123-38
Original file line numberDiff line numberDiff line change
@@ -24,25 +24,34 @@ use arrow_ipc::writer::{FileWriter, IpcWriteOptions, StreamWriter};
2424
use arrow_ipc::{root_as_footer, Block, CompressionType};
2525
use arrow_schema::{DataType, Field, Schema};
2626
use criterion::{criterion_group, criterion_main, Criterion};
27-
use std::io::Cursor;
27+
use std::io::{Cursor, Write};
2828
use std::sync::Arc;
2929
use tempfile::tempdir;
3030

3131
fn criterion_benchmark(c: &mut Criterion) {
3232
let mut group = c.benchmark_group("arrow_ipc_reader");
3333

3434
group.bench_function("StreamReader/read_10", |b| {
35-
let batch = create_batch(8192, true);
36-
let mut buffer = Vec::with_capacity(2 * 1024 * 1024);
37-
let mut writer = StreamWriter::try_new(&mut buffer, batch.schema().as_ref()).unwrap();
38-
for _ in 0..10 {
39-
writer.write(&batch).unwrap();
40-
}
41-
writer.finish().unwrap();
35+
let buffer = ipc_stream();
36+
b.iter(move || {
37+
let projection = None;
38+
let mut reader = StreamReader::try_new(buffer.as_slice(), projection).unwrap();
39+
for _ in 0..10 {
40+
reader.next().unwrap().unwrap();
41+
}
42+
assert!(reader.next().is_none());
43+
})
44+
});
4245

46+
group.bench_function("StreamReader/no_validation/read_10", |b| {
47+
let buffer = ipc_stream();
4348
b.iter(move || {
4449
let projection = None;
4550
let mut reader = StreamReader::try_new(buffer.as_slice(), projection).unwrap();
51+
unsafe {
52+
// safety: we created a valid IPC file
53+
reader = reader.with_skip_validation(true);
54+
}
4655
for _ in 0..10 {
4756
reader.next().unwrap().unwrap();
4857
}
@@ -51,69 +60,100 @@ fn criterion_benchmark(c: &mut Criterion) {
5160
});
5261

5362
group.bench_function("StreamReader/read_10/zstd", |b| {
54-
let batch = create_batch(8192, true);
55-
let mut buffer = Vec::with_capacity(2 * 1024 * 1024);
56-
let options = IpcWriteOptions::default()
57-
.try_with_compression(Some(CompressionType::ZSTD))
58-
.unwrap();
59-
let mut writer =
60-
StreamWriter::try_new_with_options(&mut buffer, batch.schema().as_ref(), options)
61-
.unwrap();
62-
for _ in 0..10 {
63-
writer.write(&batch).unwrap();
64-
}
65-
writer.finish().unwrap();
63+
let buffer = ipc_stream_zstd();
64+
b.iter(move || {
65+
let projection = None;
66+
let mut reader = StreamReader::try_new(buffer.as_slice(), projection).unwrap();
67+
for _ in 0..10 {
68+
reader.next().unwrap().unwrap();
69+
}
70+
assert!(reader.next().is_none());
71+
})
72+
});
6673

74+
group.bench_function("StreamReader/no_validation/read_10/zstd", |b| {
75+
let buffer = ipc_stream_zstd();
6776
b.iter(move || {
6877
let projection = None;
6978
let mut reader = StreamReader::try_new(buffer.as_slice(), projection).unwrap();
79+
unsafe {
80+
// safety: we created a valid IPC file
81+
reader = reader.with_skip_validation(true);
82+
}
7083
for _ in 0..10 {
7184
reader.next().unwrap().unwrap();
7285
}
7386
assert!(reader.next().is_none());
7487
})
7588
});
7689

90+
// --- Create IPC File ---
7791
group.bench_function("FileReader/read_10", |b| {
78-
let batch = create_batch(8192, true);
79-
let mut buffer = Vec::with_capacity(2 * 1024 * 1024);
80-
let mut writer = FileWriter::try_new(&mut buffer, batch.schema().as_ref()).unwrap();
81-
for _ in 0..10 {
82-
writer.write(&batch).unwrap();
83-
}
84-
writer.finish().unwrap();
92+
let buffer = ipc_file();
93+
b.iter(move || {
94+
let projection = None;
95+
let cursor = Cursor::new(buffer.as_slice());
96+
let mut reader = FileReader::try_new(cursor, projection).unwrap();
97+
for _ in 0..10 {
98+
reader.next().unwrap().unwrap();
99+
}
100+
assert!(reader.next().is_none());
101+
})
102+
});
85103

104+
group.bench_function("FileReader/no_validation/read_10", |b| {
105+
let buffer = ipc_file();
86106
b.iter(move || {
87107
let projection = None;
88108
let cursor = Cursor::new(buffer.as_slice());
89109
let mut reader = FileReader::try_new(cursor, projection).unwrap();
110+
unsafe {
111+
// safety: we created a valid IPC file
112+
reader = reader.with_skip_validation(true);
113+
}
90114
for _ in 0..10 {
91115
reader.next().unwrap().unwrap();
92116
}
93117
assert!(reader.next().is_none());
94118
})
95119
});
96120

121+
// write to an actual file
122+
let dir = tempdir().unwrap();
123+
let path = dir.path().join("test.arrow");
124+
let mut file = std::fs::File::create(&path).unwrap();
125+
file.write_all(&ipc_file()).unwrap();
126+
drop(file);
127+
97128
group.bench_function("FileReader/read_10/mmap", |b| {
98-
let batch = create_batch(8192, true);
99-
// write to an actual file
100-
let dir = tempdir().unwrap();
101-
let path = dir.path().join("test.arrow");
102-
let file = std::fs::File::create(&path).unwrap();
103-
let mut writer = FileWriter::try_new(file, batch.schema().as_ref()).unwrap();
104-
for _ in 0..10 {
105-
writer.write(&batch).unwrap();
106-
}
107-
writer.finish().unwrap();
129+
let path = &path;
130+
b.iter(move || {
131+
let ipc_file = std::fs::File::open(path).expect("failed to open file");
132+
let mmap = unsafe { memmap2::Mmap::map(&ipc_file).expect("failed to mmap file") };
133+
134+
// Convert the mmap region to an Arrow `Buffer` to back the arrow arrays.
135+
let bytes = bytes::Bytes::from_owner(mmap);
136+
let buffer = Buffer::from(bytes);
137+
let decoder = IPCBufferDecoder::new(buffer);
138+
assert_eq!(decoder.num_batches(), 10);
108139

140+
for i in 0..decoder.num_batches() {
141+
decoder.get_batch(i);
142+
}
143+
})
144+
});
145+
146+
group.bench_function("FileReader/no_validation/read_10/mmap", |b| {
147+
let path = &path;
109148
b.iter(move || {
110-
let ipc_file = std::fs::File::open(&path).expect("failed to open file");
149+
let ipc_file = std::fs::File::open(path).expect("failed to open file");
111150
let mmap = unsafe { memmap2::Mmap::map(&ipc_file).expect("failed to mmap file") };
112151

113152
// Convert the mmap region to an Arrow `Buffer` to back the arrow arrays.
114153
let bytes = bytes::Bytes::from_owner(mmap);
115154
let buffer = Buffer::from(bytes);
116155
let decoder = IPCBufferDecoder::new(buffer);
156+
let decoder = unsafe { decoder.with_skip_validation(true) };
117157
assert_eq!(decoder.num_batches(), 10);
118158

119159
for i in 0..decoder.num_batches() {
@@ -123,6 +163,46 @@ fn criterion_benchmark(c: &mut Criterion) {
123163
});
124164
}
125165

166+
/// Return an IPC stream with 10 record batches
167+
fn ipc_stream() -> Vec<u8> {
168+
let batch = create_batch(8192, true);
169+
let mut buffer = Vec::with_capacity(2 * 1024 * 1024);
170+
let mut writer = StreamWriter::try_new(&mut buffer, batch.schema().as_ref()).unwrap();
171+
for _ in 0..10 {
172+
writer.write(&batch).unwrap();
173+
}
174+
writer.finish().unwrap();
175+
buffer
176+
}
177+
178+
/// Return an IPC stream with ZSTD compression with 10 record batches
179+
fn ipc_stream_zstd() -> Vec<u8> {
180+
let batch = create_batch(8192, true);
181+
let mut buffer = Vec::with_capacity(2 * 1024 * 1024);
182+
let options = IpcWriteOptions::default()
183+
.try_with_compression(Some(CompressionType::ZSTD))
184+
.unwrap();
185+
let mut writer =
186+
StreamWriter::try_new_with_options(&mut buffer, batch.schema().as_ref(), options).unwrap();
187+
for _ in 0..10 {
188+
writer.write(&batch).unwrap();
189+
}
190+
writer.finish().unwrap();
191+
buffer
192+
}
193+
194+
/// Return an IPC file with 10 record batches
195+
fn ipc_file() -> Vec<u8> {
196+
let batch = create_batch(8192, true);
197+
let mut buffer = Vec::with_capacity(2 * 1024 * 1024);
198+
let mut writer = FileWriter::try_new(&mut buffer, batch.schema().as_ref()).unwrap();
199+
for _ in 0..10 {
200+
writer.write(&batch).unwrap();
201+
}
202+
writer.finish().unwrap();
203+
buffer
204+
}
205+
126206
// copied from the zero_copy_ipc example.
127207
// should we move this to an actual API?
128208
/// Wrapper around the example in the `FileDecoder` which handles the
@@ -166,6 +246,11 @@ impl IPCBufferDecoder {
166246
}
167247
}
168248

249+
unsafe fn with_skip_validation(mut self, skip_validation: bool) -> Self {
250+
self.decoder = self.decoder.with_skip_validation(skip_validation);
251+
self
252+
}
253+
169254
fn num_batches(&self) -> usize {
170255
self.batches.len()
171256
}

0 commit comments

Comments
 (0)