17
17
18
18
use crate :: reader:: serializer:: TapeSerializer ;
19
19
use arrow_schema:: ArrowError ;
20
+ use memchr:: memchr2;
20
21
use serde:: Serialize ;
21
22
use std:: fmt:: Write ;
22
23
@@ -394,7 +395,7 @@ impl TapeDecoder {
394
395
}
395
396
// Decoding a string
396
397
DecoderState :: String => {
397
- let s = iter. advance_until ( |b| matches ! ( b , b '\\' | b'"' ) ) ;
398
+ let s = iter. skip_chrs ( b '\\', b'"' ) ;
398
399
self . bytes . extend_from_slice ( s) ;
399
400
400
401
match next ! ( iter) {
@@ -582,7 +583,7 @@ impl TapeDecoder {
582
583
self . bytes. len( )
583
584
) ;
584
585
585
- let strings = std :: str :: from_utf8 ( & self . bytes )
586
+ let strings = simdutf8 :: basic :: from_utf8 ( & self . bytes )
586
587
. map_err ( |_| ArrowError :: JsonError ( "Encountered non-UTF-8 data" . to_string ( ) ) ) ?;
587
588
588
589
for offset in self . offsets . iter ( ) . copied ( ) {
@@ -615,29 +616,33 @@ impl TapeDecoder {
615
616
}
616
617
617
618
/// A wrapper around a slice iterator that provides some helper functionality
618
- struct BufIter < ' a > ( std:: slice:: Iter < ' a , u8 > ) ;
619
+ struct BufIter < ' a > {
620
+ buf : & ' a [ u8 ] ,
621
+ pos : usize ,
622
+ }
619
623
620
624
impl < ' a > BufIter < ' a > {
621
625
fn new ( buf : & ' a [ u8 ] ) -> Self {
622
- Self ( buf. iter ( ) )
626
+ Self { buf, pos : 0 }
623
627
}
624
628
629
+ #[ inline]
625
630
fn as_slice ( & self ) -> & ' a [ u8 ] {
626
- self . 0 . as_slice ( )
631
+ & self . buf [ self . pos .. ]
627
632
}
628
633
634
+ #[ inline]
629
635
fn is_empty ( & self ) -> bool {
630
- self . 0 . len ( ) == 0
636
+ self . pos >= self . buf . len ( )
631
637
}
632
638
633
639
fn peek ( & self ) -> Option < u8 > {
634
- self . 0 . as_slice ( ) . first ( ) . copied ( )
640
+ self . buf . get ( self . pos ) . copied ( )
635
641
}
636
642
643
+ #[ inline]
637
644
fn advance ( & mut self , skip : usize ) {
638
- for _ in 0 ..skip {
639
- self . 0 . next ( ) ;
640
- }
645
+ self . pos += skip;
641
646
}
642
647
643
648
fn advance_until < F : FnMut ( u8 ) -> bool > ( & mut self , f : F ) -> & [ u8 ] {
@@ -654,6 +659,20 @@ impl<'a> BufIter<'a> {
654
659
}
655
660
}
656
661
662
+ fn skip_chrs ( & mut self , c1 : u8 , c2 : u8 ) -> & [ u8 ] {
663
+ let s = self . as_slice ( ) ;
664
+ match memchr2 ( c1, c2, s) {
665
+ Some ( p) => {
666
+ self . advance ( p) ;
667
+ & s[ ..p]
668
+ }
669
+ None => {
670
+ self . advance ( s. len ( ) ) ;
671
+ s
672
+ }
673
+ }
674
+ }
675
+
657
676
fn skip_whitespace ( & mut self ) {
658
677
self . advance_until ( |b| !json_whitespace ( b) ) ;
659
678
}
@@ -663,11 +682,14 @@ impl Iterator for BufIter<'_> {
663
682
type Item = u8 ;
664
683
665
684
fn next ( & mut self ) -> Option < Self :: Item > {
666
- self . 0 . next ( ) . copied ( )
685
+ let b = self . peek ( ) ;
686
+ self . pos += 1 ;
687
+ b
667
688
}
668
689
669
690
fn size_hint ( & self ) -> ( usize , Option < usize > ) {
670
- self . 0 . size_hint ( )
691
+ let s = self . buf . len ( ) . checked_sub ( self . pos ) . unwrap_or_default ( ) ;
692
+ ( s, Some ( s) )
671
693
}
672
694
}
673
695
0 commit comments