@@ -42,6 +42,7 @@ struct PackedBlockContext {
42
42
43
43
const ENCRYPTED_BLOCK_CONTEXT : u32 = 1 ;
44
44
const UNENCRYPTED_BLOCK_CONTEXT : u32 = 2 ;
45
+ const ZFS_RECORDSIZE : u64 = 128 * 1024 ;
45
46
46
47
const BLOCK_CONTEXT_SIZE_BYTES : u64 =
47
48
std:: mem:: size_of :: < PackedBlockContext > ( ) as u64 ;
@@ -511,17 +512,21 @@ impl RawInnerV2 {
511
512
512
513
let ( start_pos, mut parity) =
513
514
self . layout . block_pos ( writes[ 0 ] . offset . value ) ;
515
+ let start_block = writes[ 0 ] . offset ;
516
+ let mut block = start_block. value ;
514
517
515
518
// TODO we're overestimating capacity here, because we can write
516
519
// contexts in pairs.
517
520
let mut iovecs = Vec :: with_capacity ( n_blocks * 2 ) ;
518
521
519
522
let mut ctx_slice = ctxs. as_slice ( ) ;
520
523
let mut write_slice = writes;
524
+ let padding = vec ! [ 0u8 ; self . layout. padding_size( ) as usize ] ;
521
525
while !write_slice. is_empty ( ) || !ctx_slice. is_empty ( ) {
522
526
match parity {
523
527
Parity :: FirstBlock => {
524
528
iovecs. push ( IoSlice :: new ( & write_slice[ 0 ] . data ) ) ;
529
+ block += 1 ;
525
530
write_slice = & write_slice[ 1 ..] ;
526
531
parity = Parity :: FirstContext ;
527
532
}
@@ -547,13 +552,15 @@ impl RawInnerV2 {
547
552
// contexts).
548
553
iovecs. push ( IoSlice :: new ( & write_slice[ 0 ] . data ) ) ;
549
554
write_slice = & write_slice[ 1 ..] ;
555
+ if self . layout . has_padding_after ( block) {
556
+ iovecs. push ( IoSlice :: new ( & padding) ) ;
557
+ }
550
558
parity = Parity :: FirstBlock ;
559
+ block += 1 ;
551
560
}
552
561
}
553
562
}
554
563
555
- let start_block = writes[ 0 ] . offset ;
556
-
557
564
let expected_bytes =
558
565
n_blocks as u64 * ( block_size as u64 + BLOCK_CONTEXT_SIZE_BYTES ) ;
559
566
@@ -637,52 +644,88 @@ impl RawInnerV2 {
637
644
let mut buf_slice = & mut buf[ ..] ;
638
645
let mut ctx_slice = & mut ctxs[ ..] ;
639
646
647
+ // This is awkward: we know how many blocks and contexts we're reading,
648
+ // and have pre-allocated data for them. However, we don't know how
649
+ // many chunks of padding we may need to read! As such, we'll store a
650
+ // `Vec<Option<IoSliceMut>>`, and use `None` to represent padding reads;
651
+ // then, we'll go through and splice them in once we know their total
652
+ // size.
653
+ let mut block = start_block. value ;
654
+ let mut padding_count = 0 ;
640
655
while !ctx_slice. is_empty ( ) || !buf_slice. is_empty ( ) {
641
656
match parity {
642
657
Parity :: FirstBlock => {
643
658
let ( b, next) = buf_slice. split_at_mut ( block_size) ;
644
- iovecs. push ( IoSliceMut :: new ( b) ) ;
659
+ iovecs. push ( Some ( IoSliceMut :: new ( b) ) ) ;
645
660
buf_slice = next;
646
661
parity = Parity :: FirstContext ;
662
+ block += 1 ;
647
663
}
648
664
Parity :: FirstContext => {
649
665
if ctx_slice. len ( ) > 1 {
650
666
let ( b, next) = ctx_slice. split_at_mut ( 2 ) ;
651
- iovecs. push ( IoSliceMut :: new ( b. as_bytes_mut ( ) ) ) ;
667
+ iovecs. push ( Some ( IoSliceMut :: new ( b. as_bytes_mut ( ) ) ) ) ;
652
668
ctx_slice = next;
653
669
parity = Parity :: SecondBlock ;
654
670
} else {
655
671
let ( b, next) = ctx_slice. split_at_mut ( 1 ) ;
656
- iovecs. push ( IoSliceMut :: new ( b. as_bytes_mut ( ) ) ) ;
672
+ iovecs. push ( Some ( IoSliceMut :: new ( b. as_bytes_mut ( ) ) ) ) ;
657
673
ctx_slice = next;
658
674
parity = Parity :: SecondContext ;
659
675
}
660
676
}
661
677
Parity :: SecondContext => {
662
678
let ( b, next) = ctx_slice. split_at_mut ( 1 ) ;
663
- iovecs. push ( IoSliceMut :: new ( b. as_bytes_mut ( ) ) ) ;
679
+ iovecs. push ( Some ( IoSliceMut :: new ( b. as_bytes_mut ( ) ) ) ) ;
664
680
ctx_slice = next;
665
681
parity = Parity :: SecondBlock ;
666
682
}
667
683
Parity :: SecondBlock => {
668
- if buf_slice. len ( ) > block_size {
684
+ let has_padding = self . layout . has_padding_after ( block) ;
685
+ if buf_slice. len ( ) > block_size && !has_padding {
669
686
let ( b, next) = buf_slice. split_at_mut ( block_size * 2 ) ;
670
- iovecs. push ( IoSliceMut :: new ( b) ) ;
687
+ iovecs. push ( Some ( IoSliceMut :: new ( b) ) ) ;
671
688
buf_slice = next;
672
689
parity = Parity :: FirstContext ;
690
+ block += 2 ;
673
691
} else {
674
692
let ( b, next) = buf_slice. split_at_mut ( block_size) ;
675
- iovecs. push ( IoSliceMut :: new ( b) ) ;
693
+ iovecs. push ( Some ( IoSliceMut :: new ( b) ) ) ;
676
694
buf_slice = next;
695
+ if has_padding {
696
+ iovecs. push ( None ) ;
697
+ padding_count += 1 ;
698
+ }
677
699
parity = Parity :: FirstBlock ;
700
+ block += 1 ;
678
701
}
679
702
}
680
703
}
681
704
}
682
705
683
- let expected_bytes =
706
+ // How many bytes do we expect `preadv` to return?
707
+ let mut expected_bytes =
684
708
n_blocks as u64 * ( block_size as u64 + BLOCK_CONTEXT_SIZE_BYTES ) ;
685
709
710
+ // Now that we know the total number of padded reads, replace the `None`
711
+ // with borrowed chunks of a dummy array (`padding`) and unwrap all of
712
+ // the IoVecs.
713
+ let mut padding = vec ! [ ] ;
714
+ if padding_count > 0 {
715
+ let padding_size = self . layout . padding_size ( ) as usize ;
716
+ padding. resize ( padding_size * padding_count, 0u8 ) ;
717
+ expected_bytes += padding. len ( ) as u64 ;
718
+ for ( iov, p) in iovecs
719
+ . iter_mut ( )
720
+ . filter ( |b| b. is_none ( ) )
721
+ . zip ( padding. chunks_mut ( padding_size) )
722
+ {
723
+ * iov = Some ( IoSliceMut :: new ( p) ) ;
724
+ }
725
+ }
726
+ let mut iovecs: Vec < _ > =
727
+ iovecs. into_iter ( ) . map ( Option :: unwrap) . collect ( ) ;
728
+
686
729
// Finally we get to read the actual data. That's why we're here
687
730
cdt:: extent__read__file__start!( || {
688
731
( job_id. 0 , self . extent_number, n_blocks as u64 )
@@ -778,7 +821,19 @@ impl RawLayout {
778
821
779
822
/// Returns the byte offset of the `block_written` bitpacked array
780
823
fn block_written_array_offset ( & self ) -> u64 {
781
- self . block_count ( ) * ( self . block_size ( ) + BLOCK_CONTEXT_SIZE_BYTES )
824
+ let bpr = self . blocks_per_record ( ) ;
825
+ let bc = self . block_count ( ) ;
826
+
827
+ if bc % bpr == 0 {
828
+ ( bc / bpr) * ZFS_RECORDSIZE
829
+ } else {
830
+ let record_count = bc / bpr;
831
+ let trailing_blocks = bc - record_count * bpr;
832
+
833
+ record_count * ZFS_RECORDSIZE
834
+ + trailing_blocks
835
+ * ( self . block_size ( ) + BLOCK_CONTEXT_SIZE_BYTES )
836
+ }
782
837
}
783
838
784
839
/// Returns the size of the `block_written` bitpacked array, in bytes
@@ -885,7 +940,11 @@ impl RawLayout {
885
940
///
886
941
/// This offset could either be block data or context, depending on parity!
887
942
fn block_pos ( & self , block : u64 ) -> ( u64 , Parity ) {
888
- let pos = block * ( self . block_size ( ) + BLOCK_CONTEXT_SIZE_BYTES ) ;
943
+ let bpr = self . blocks_per_record ( ) ;
944
+ let record = block / bpr;
945
+ let block = block % bpr;
946
+ let pos = record * ZFS_RECORDSIZE
947
+ + block * ( self . block_size ( ) + BLOCK_CONTEXT_SIZE_BYTES ) ;
889
948
let parity = match block % 2 {
890
949
0 => Parity :: FirstBlock ,
891
950
1 => Parity :: SecondContext ,
@@ -896,13 +955,43 @@ impl RawLayout {
896
955
897
956
/// Returns the position of the given block's context
898
957
fn context_slot ( & self , block : u64 ) -> u64 {
899
- let pos = block * ( self . block_size ( ) + BLOCK_CONTEXT_SIZE_BYTES ) ;
958
+ let bpr = self . blocks_per_record ( ) ;
959
+ let record = block / bpr;
960
+ let block = block % bpr;
961
+ let pos = record * ZFS_RECORDSIZE
962
+ + block * ( self . block_size ( ) + BLOCK_CONTEXT_SIZE_BYTES ) ;
900
963
match block % 2 {
901
964
0 => pos + self . block_size ( ) ,
902
965
1 => pos,
903
966
_ => unreachable ! ( ) ,
904
967
}
905
968
}
969
+
970
+ /// Returns the number of blocks that fit into a ZFS recordsize
971
+ fn blocks_per_record ( & self ) -> u64 {
972
+ // Each block contains data and a single context slot
973
+ let bytes_per_block = self . block_size ( ) + BLOCK_CONTEXT_SIZE_BYTES ;
974
+ // We guarantee that there are an even number of blocks per record, for
975
+ // simplicity (so that padding always comes after `Parity::SecondBlock`)
976
+ 2 * ( ZFS_RECORDSIZE / ( 2 * bytes_per_block) )
977
+ }
978
+
979
+ /// Checks whether there is padding after the given block
980
+ fn has_padding_after ( & self , block : u64 ) -> bool {
981
+ // No padding at the end of the file
982
+ if block == self . block_count ( ) - 1 {
983
+ return false ;
984
+ }
985
+ // Otherwise, there's padding at the end of each block-pair-group
986
+ let bpr = self . blocks_per_record ( ) ;
987
+ ( block % bpr) == bpr - 1
988
+ }
989
+
990
+ /// Returns the size of `recordsize` padding
991
+ fn padding_size ( & self ) -> u64 {
992
+ let bpr = self . blocks_per_record ( ) ;
993
+ ZFS_RECORDSIZE - bpr * ( self . block_size ( ) + BLOCK_CONTEXT_SIZE_BYTES )
994
+ }
906
995
}
907
996
908
997
/// Represents position in a block-context pair.
0 commit comments