@@ -24,25 +24,34 @@ use arrow_ipc::writer::{FileWriter, IpcWriteOptions, StreamWriter};
24
24
use arrow_ipc:: { root_as_footer, Block , CompressionType } ;
25
25
use arrow_schema:: { DataType , Field , Schema } ;
26
26
use criterion:: { criterion_group, criterion_main, Criterion } ;
27
- use std:: io:: Cursor ;
27
+ use std:: io:: { Cursor , Write } ;
28
28
use std:: sync:: Arc ;
29
29
use tempfile:: tempdir;
30
30
31
31
fn criterion_benchmark ( c : & mut Criterion ) {
32
32
let mut group = c. benchmark_group ( "arrow_ipc_reader" ) ;
33
33
34
34
group. bench_function ( "StreamReader/read_10" , |b| {
35
- let batch = create_batch ( 8192 , true ) ;
36
- let mut buffer = Vec :: with_capacity ( 2 * 1024 * 1024 ) ;
37
- let mut writer = StreamWriter :: try_new ( & mut buffer, batch. schema ( ) . as_ref ( ) ) . unwrap ( ) ;
38
- for _ in 0 ..10 {
39
- writer. write ( & batch) . unwrap ( ) ;
40
- }
41
- writer. finish ( ) . unwrap ( ) ;
35
+ let buffer = ipc_stream ( ) ;
36
+ b. iter ( move || {
37
+ let projection = None ;
38
+ let mut reader = StreamReader :: try_new ( buffer. as_slice ( ) , projection) . unwrap ( ) ;
39
+ for _ in 0 ..10 {
40
+ reader. next ( ) . unwrap ( ) . unwrap ( ) ;
41
+ }
42
+ assert ! ( reader. next( ) . is_none( ) ) ;
43
+ } )
44
+ } ) ;
42
45
46
+ group. bench_function ( "StreamReader/no_validation/read_10" , |b| {
47
+ let buffer = ipc_stream ( ) ;
43
48
b. iter ( move || {
44
49
let projection = None ;
45
50
let mut reader = StreamReader :: try_new ( buffer. as_slice ( ) , projection) . unwrap ( ) ;
51
+ unsafe {
52
+ // safety: we created a valid IPC file
53
+ reader = reader. with_skip_validation ( true ) ;
54
+ }
46
55
for _ in 0 ..10 {
47
56
reader. next ( ) . unwrap ( ) . unwrap ( ) ;
48
57
}
@@ -51,69 +60,100 @@ fn criterion_benchmark(c: &mut Criterion) {
51
60
} ) ;
52
61
53
62
group. bench_function ( "StreamReader/read_10/zstd" , |b| {
54
- let batch = create_batch ( 8192 , true ) ;
55
- let mut buffer = Vec :: with_capacity ( 2 * 1024 * 1024 ) ;
56
- let options = IpcWriteOptions :: default ( )
57
- . try_with_compression ( Some ( CompressionType :: ZSTD ) )
58
- . unwrap ( ) ;
59
- let mut writer =
60
- StreamWriter :: try_new_with_options ( & mut buffer, batch. schema ( ) . as_ref ( ) , options)
61
- . unwrap ( ) ;
62
- for _ in 0 ..10 {
63
- writer. write ( & batch) . unwrap ( ) ;
64
- }
65
- writer. finish ( ) . unwrap ( ) ;
63
+ let buffer = ipc_stream_zstd ( ) ;
64
+ b. iter ( move || {
65
+ let projection = None ;
66
+ let mut reader = StreamReader :: try_new ( buffer. as_slice ( ) , projection) . unwrap ( ) ;
67
+ for _ in 0 ..10 {
68
+ reader. next ( ) . unwrap ( ) . unwrap ( ) ;
69
+ }
70
+ assert ! ( reader. next( ) . is_none( ) ) ;
71
+ } )
72
+ } ) ;
66
73
74
+ group. bench_function ( "StreamReader/no_validation/read_10/zstd" , |b| {
75
+ let buffer = ipc_stream_zstd ( ) ;
67
76
b. iter ( move || {
68
77
let projection = None ;
69
78
let mut reader = StreamReader :: try_new ( buffer. as_slice ( ) , projection) . unwrap ( ) ;
79
+ unsafe {
80
+ // safety: we created a valid IPC file
81
+ reader = reader. with_skip_validation ( true ) ;
82
+ }
70
83
for _ in 0 ..10 {
71
84
reader. next ( ) . unwrap ( ) . unwrap ( ) ;
72
85
}
73
86
assert ! ( reader. next( ) . is_none( ) ) ;
74
87
} )
75
88
} ) ;
76
89
90
+ // --- Create IPC File ---
77
91
group. bench_function ( "FileReader/read_10" , |b| {
78
- let batch = create_batch ( 8192 , true ) ;
79
- let mut buffer = Vec :: with_capacity ( 2 * 1024 * 1024 ) ;
80
- let mut writer = FileWriter :: try_new ( & mut buffer, batch. schema ( ) . as_ref ( ) ) . unwrap ( ) ;
81
- for _ in 0 ..10 {
82
- writer. write ( & batch) . unwrap ( ) ;
83
- }
84
- writer. finish ( ) . unwrap ( ) ;
92
+ let buffer = ipc_file ( ) ;
93
+ b. iter ( move || {
94
+ let projection = None ;
95
+ let cursor = Cursor :: new ( buffer. as_slice ( ) ) ;
96
+ let mut reader = FileReader :: try_new ( cursor, projection) . unwrap ( ) ;
97
+ for _ in 0 ..10 {
98
+ reader. next ( ) . unwrap ( ) . unwrap ( ) ;
99
+ }
100
+ assert ! ( reader. next( ) . is_none( ) ) ;
101
+ } )
102
+ } ) ;
85
103
104
+ group. bench_function ( "FileReader/no_validation/read_10" , |b| {
105
+ let buffer = ipc_file ( ) ;
86
106
b. iter ( move || {
87
107
let projection = None ;
88
108
let cursor = Cursor :: new ( buffer. as_slice ( ) ) ;
89
109
let mut reader = FileReader :: try_new ( cursor, projection) . unwrap ( ) ;
110
+ unsafe {
111
+ // safety: we created a valid IPC file
112
+ reader = reader. with_skip_validation ( true ) ;
113
+ }
90
114
for _ in 0 ..10 {
91
115
reader. next ( ) . unwrap ( ) . unwrap ( ) ;
92
116
}
93
117
assert ! ( reader. next( ) . is_none( ) ) ;
94
118
} )
95
119
} ) ;
96
120
121
+ // write to an actual file
122
+ let dir = tempdir ( ) . unwrap ( ) ;
123
+ let path = dir. path ( ) . join ( "test.arrow" ) ;
124
+ let mut file = std:: fs:: File :: create ( & path) . unwrap ( ) ;
125
+ file. write_all ( & ipc_file ( ) ) . unwrap ( ) ;
126
+ drop ( file) ;
127
+
97
128
group. bench_function ( "FileReader/read_10/mmap" , |b| {
98
- let batch = create_batch ( 8192 , true ) ;
99
- // write to an actual file
100
- let dir = tempdir ( ) . unwrap ( ) ;
101
- let path = dir . path ( ) . join ( "test.arrow" ) ;
102
- let file = std :: fs :: File :: create ( & path ) . unwrap ( ) ;
103
- let mut writer = FileWriter :: try_new ( file , batch . schema ( ) . as_ref ( ) ) . unwrap ( ) ;
104
- for _ in 0 .. 10 {
105
- writer . write ( & batch ) . unwrap ( ) ;
106
- }
107
- writer . finish ( ) . unwrap ( ) ;
129
+ let path = & path ;
130
+ b . iter ( move || {
131
+ let ipc_file = std :: fs :: File :: open ( path ) . expect ( "failed to open file" ) ;
132
+ let mmap = unsafe { memmap2 :: Mmap :: map ( & ipc_file ) . expect ( "failed to mmap file" ) } ;
133
+
134
+ // Convert the mmap region to an Arrow `Buffer` to back the arrow arrays.
135
+ let bytes = bytes :: Bytes :: from_owner ( mmap ) ;
136
+ let buffer = Buffer :: from ( bytes ) ;
137
+ let decoder = IPCBufferDecoder :: new ( buffer ) ;
138
+ assert_eq ! ( decoder . num_batches ( ) , 10 ) ;
108
139
140
+ for i in 0 ..decoder. num_batches ( ) {
141
+ decoder. get_batch ( i) ;
142
+ }
143
+ } )
144
+ } ) ;
145
+
146
+ group. bench_function ( "FileReader/no_validation/read_10/mmap" , |b| {
147
+ let path = & path;
109
148
b. iter ( move || {
110
- let ipc_file = std:: fs:: File :: open ( & path) . expect ( "failed to open file" ) ;
149
+ let ipc_file = std:: fs:: File :: open ( path) . expect ( "failed to open file" ) ;
111
150
let mmap = unsafe { memmap2:: Mmap :: map ( & ipc_file) . expect ( "failed to mmap file" ) } ;
112
151
113
152
// Convert the mmap region to an Arrow `Buffer` to back the arrow arrays.
114
153
let bytes = bytes:: Bytes :: from_owner ( mmap) ;
115
154
let buffer = Buffer :: from ( bytes) ;
116
155
let decoder = IPCBufferDecoder :: new ( buffer) ;
156
+ let decoder = unsafe { decoder. with_skip_validation ( true ) } ;
117
157
assert_eq ! ( decoder. num_batches( ) , 10 ) ;
118
158
119
159
for i in 0 ..decoder. num_batches ( ) {
@@ -123,6 +163,46 @@ fn criterion_benchmark(c: &mut Criterion) {
123
163
} ) ;
124
164
}
125
165
166
+ /// Return an IPC stream with 10 record batches
167
+ fn ipc_stream ( ) -> Vec < u8 > {
168
+ let batch = create_batch ( 8192 , true ) ;
169
+ let mut buffer = Vec :: with_capacity ( 2 * 1024 * 1024 ) ;
170
+ let mut writer = StreamWriter :: try_new ( & mut buffer, batch. schema ( ) . as_ref ( ) ) . unwrap ( ) ;
171
+ for _ in 0 ..10 {
172
+ writer. write ( & batch) . unwrap ( ) ;
173
+ }
174
+ writer. finish ( ) . unwrap ( ) ;
175
+ buffer
176
+ }
177
+
178
+ /// Return an IPC stream with ZSTD compression with 10 record batches
179
+ fn ipc_stream_zstd ( ) -> Vec < u8 > {
180
+ let batch = create_batch ( 8192 , true ) ;
181
+ let mut buffer = Vec :: with_capacity ( 2 * 1024 * 1024 ) ;
182
+ let options = IpcWriteOptions :: default ( )
183
+ . try_with_compression ( Some ( CompressionType :: ZSTD ) )
184
+ . unwrap ( ) ;
185
+ let mut writer =
186
+ StreamWriter :: try_new_with_options ( & mut buffer, batch. schema ( ) . as_ref ( ) , options) . unwrap ( ) ;
187
+ for _ in 0 ..10 {
188
+ writer. write ( & batch) . unwrap ( ) ;
189
+ }
190
+ writer. finish ( ) . unwrap ( ) ;
191
+ buffer
192
+ }
193
+
194
+ /// Return an IPC file with 10 record batches
195
+ fn ipc_file ( ) -> Vec < u8 > {
196
+ let batch = create_batch ( 8192 , true ) ;
197
+ let mut buffer = Vec :: with_capacity ( 2 * 1024 * 1024 ) ;
198
+ let mut writer = FileWriter :: try_new ( & mut buffer, batch. schema ( ) . as_ref ( ) ) . unwrap ( ) ;
199
+ for _ in 0 ..10 {
200
+ writer. write ( & batch) . unwrap ( ) ;
201
+ }
202
+ writer. finish ( ) . unwrap ( ) ;
203
+ buffer
204
+ }
205
+
126
206
// copied from the zero_copy_ipc example.
127
207
// should we move this to an actual API?
128
208
/// Wrapper around the example in the `FileDecoder` which handles the
@@ -166,6 +246,11 @@ impl IPCBufferDecoder {
166
246
}
167
247
}
168
248
249
+ unsafe fn with_skip_validation ( mut self , skip_validation : bool ) -> Self {
250
+ self . decoder = self . decoder . with_skip_validation ( skip_validation) ;
251
+ self
252
+ }
253
+
169
254
fn num_batches ( & self ) -> usize {
170
255
self . batches . len ( )
171
256
}
0 commit comments