@@ -66,6 +66,8 @@ template <typename T, typename Filter, typename ExtractValues, bool kIsDense>
66
66
using DecoderVisitor =
67
67
velox::dwio::common::ColumnVisitor<T, Filter, ExtractValues, kIsDense >;
68
68
69
+ using vector_size_t = velox::vector_size_t ;
70
+
69
71
// Extra parameters that need to be persisted/used during a single call of
70
72
// readWithVisitor at column reader level, which might span multiple calls of
71
73
// readWithVisitor (one per chunk) in decoders.
@@ -75,14 +77,18 @@ struct ReadWithVisitorParams {
75
77
// across potential mutliple chunks.
76
78
std::function<uint64_t *()> makeReaderNulls;
77
79
80
+ // Initialize `SelectiveColumnReader::returnReaderNulls_' field. Need to be
81
+ // called after decoding nulls in `NullableEncoding'.
82
+ std::function<void ()> initReturnReaderNulls;
83
+
78
84
// Create the result nulls if not already exists. Similar to
79
85
// `makeReaderNulls', we create one single buffer for all the results nulls
80
86
// across potential multiple chunks during one read.
81
87
std::function<void ()> prepareResultNulls;
82
88
83
89
// Number of rows scanned so far. Contains rows scanned in previous chunks
84
90
// during this read call as well.
85
- velox:: vector_size_t numScanned;
91
+ vector_size_t numScanned;
86
92
};
87
93
88
94
class Encoding {
@@ -342,13 +348,13 @@ class BufferedEncoding {
342
348
std::array<T, BufferSize> buffer_;
343
349
};
344
350
345
- template <typename Visitor1, typename Visitor2>
346
- void checkCurrentRowEqual (const Visitor1& v1, const Visitor2& v2) {
347
- if (v1.atEnd ()) {
348
- NIMBLE_DASSERT (v2.atEnd (), " " );
351
+ template <typename T, typename PhysicalType>
352
+ T castFromPhysicalType (const PhysicalType& value) {
353
+ if constexpr (isFloatingPointType<T>()) {
354
+ static_assert (sizeof (T) == sizeof (PhysicalType));
355
+ return reinterpret_cast <const T&>(value);
349
356
} else {
350
- NIMBLE_DASSERT (!v2.atEnd (), " " );
351
- NIMBLE_DASSERT (v1.currentRow () == v2.currentRow (), " " );
357
+ return value;
352
358
}
353
359
}
354
360
@@ -362,10 +368,7 @@ void readWithVisitorSlow(
362
368
constexpr bool kExtractToReader = std::is_same_v<
363
369
typename DecoderVisitor::Extract,
364
370
velox::dwio::common::ExtractToReader>;
365
- const uint64_t * nulls = nullptr ;
366
- if (auto & nullsBuf = visitor.reader ().nullsInReadRange ()) {
367
- nulls = nullsBuf->template as <uint64_t >();
368
- }
371
+ auto * nulls = visitor.reader ().rawNullsInReadRange ();
369
372
if constexpr (kExtractToReader ) {
370
373
params.prepareResultNulls ();
371
374
}
@@ -378,32 +381,122 @@ void readWithVisitorSlow(
378
381
numNonNulls -=
379
382
velox::bits::countNulls (nulls, numScanned, visitor.currentRow ());
380
383
}
381
- skip (numNonNulls);
384
+ if (numNonNulls > 0 ) {
385
+ skip (numNonNulls);
386
+ }
382
387
numScanned = visitor.currentRow () + 1 ;
383
388
}
384
389
if (nulls && velox::bits::isBitNull (nulls, visitor.currentRow ())) {
385
390
if (!visitor.allowNulls ()) {
386
- visitor.setRowIndex (visitor. rowIndex () + 1 );
391
+ visitor.addRowIndex ( 1 );
387
392
atEnd = visitor.atEnd ();
388
393
} else if (kExtractToReader && visitor.reader ().returnReaderNulls ()) {
389
- visitor.setRowIndex (visitor. rowIndex () + 1 );
390
- visitor.setNumValues (visitor. reader (). numValues () + 1 );
394
+ visitor.addRowIndex ( 1 );
395
+ visitor.addNumValues ( 1 );
391
396
atEnd = visitor.atEnd ();
392
397
} else {
393
398
visitor.processNull (atEnd);
394
399
}
395
400
} else {
396
- auto value = decodeOne ();
397
- if constexpr (isFloatingPointType<T>()) {
398
- if constexpr (sizeof (T) != sizeof (value)) {
399
- NIMBLE_UNREACHABLE (typeid (decltype (value)).name ());
400
- }
401
- visitor.process (reinterpret_cast <const T&>(value), atEnd);
402
- } else {
403
- visitor.process (value, atEnd);
401
+ visitor.process (castFromPhysicalType<T>(decodeOne ()), atEnd);
402
+ }
403
+ }
404
+ }
405
+
406
+ template <typename TEncoding, typename V>
407
+ void readWithVisitorFast (
408
+ TEncoding& encoding,
409
+ V& visitor,
410
+ ReadWithVisitorParams& params,
411
+ const uint64_t * nulls) {
412
+ constexpr bool kOutputNulls = !V::kHasFilter && !V::kHasHook ;
413
+ const auto numRows = visitor.numRows () - visitor.rowIndex ();
414
+ auto & outerRows = visitor.outerNonNullRows ();
415
+ if (!nulls) {
416
+ encoding.template bulkScan <false >(
417
+ visitor,
418
+ params.numScanned ,
419
+ visitor.rows () + visitor.rowIndex (),
420
+ numRows,
421
+ velox::iota (visitor.numRows (), outerRows) + visitor.rowIndex ());
422
+ return ;
423
+ }
424
+ // TODO: Store last non null index and num non-nulls so far in decoder to
425
+ // accelerate multi-chunk decoding.
426
+ const auto numNonNullsSoFar =
427
+ velox::bits::countNonNulls (nulls, 0 , params.numScanned );
428
+ if constexpr (V::dense) {
429
+ NIMBLE_DASSERT (
430
+ !visitor.reader ().hasNulls () || visitor.reader ().returnReaderNulls (),
431
+ " " );
432
+ outerRows.resize (numRows);
433
+ auto numNonNulls = velox::simd::indicesOfSetBits (
434
+ nulls, visitor.rowIndex (), visitor.numRows (), outerRows.data ());
435
+ outerRows.resize (numNonNulls);
436
+ if (outerRows.empty ()) {
437
+ if constexpr (kOutputNulls ) {
438
+ visitor.addNumValues (numRows);
404
439
}
440
+ visitor.addRowIndex (numRows);
441
+ } else {
442
+ encoding.template bulkScan <true >(
443
+ visitor,
444
+ numNonNullsSoFar,
445
+ visitor.rows () + numNonNullsSoFar,
446
+ numNonNulls,
447
+ outerRows.data ());
448
+ }
449
+ return ;
450
+ }
451
+ auto & innerRows = visitor.innerNonNullRows ();
452
+ int32_t tailSkip = -1 ;
453
+ uint64_t * resultNulls = nullptr ;
454
+ uint8_t * chunkResultNulls = nullptr ;
455
+ if constexpr (kOutputNulls ) {
456
+ params.prepareResultNulls ();
457
+ resultNulls = visitor.reader ().rawResultNulls ();
458
+ chunkResultNulls = reinterpret_cast <uint8_t *>(resultNulls) +
459
+ velox::bits::nbytes (visitor.rowIndex ());
460
+ }
461
+ bool anyNulls =
462
+ velox::dwio::common::nonNullRowsFromSparse<V::kHasFilter , kOutputNulls >(
463
+ nulls,
464
+ velox::RowSet (visitor.rows () + visitor.rowIndex (), numRows),
465
+ innerRows,
466
+ outerRows,
467
+ chunkResultNulls,
468
+ tailSkip);
469
+ if (anyNulls) {
470
+ visitor.setHasNulls ();
471
+ }
472
+ if (kOutputNulls && visitor.rowIndex () % 8 != 0 ) {
473
+ velox::bits::copyBits (
474
+ resultNulls,
475
+ velox::bits::roundUp (visitor.rowIndex (), 8 ),
476
+ resultNulls,
477
+ visitor.rowIndex (),
478
+ numRows);
479
+ }
480
+ if (!V::kHasFilter && visitor.rowIndex () > 0 ) {
481
+ for (auto & row : outerRows) {
482
+ row += visitor.rowIndex ();
405
483
}
406
484
}
485
+ if (innerRows.empty ()) {
486
+ if constexpr (kOutputNulls ) {
487
+ visitor.addNumValues (numRows);
488
+ }
489
+ visitor.addRowIndex (numRows);
490
+ encoding.skip (tailSkip - numNonNullsSoFar);
491
+ } else {
492
+ encoding.template bulkScan <true >(
493
+ visitor,
494
+ numNonNullsSoFar,
495
+ innerRows.data (),
496
+ innerRows.size (),
497
+ outerRows.data ());
498
+ encoding.skip (tailSkip);
499
+ }
407
500
}
408
501
409
502
} // namespace detail
0 commit comments