@@ -328,34 +328,36 @@ void testDataTypeWriter(
328
328
const TypePtr& type,
329
329
std::vector<std::optional<T>>& data,
330
330
const uint32_t sequence = 0 ,
331
- DwrfFormat format = DwrfFormat:: kDwrf ) {
331
+ dwio::common::FileFormat fileFormat = dwio::common::FileFormat::DWRF ) {
332
332
// Generate a seed and randomly shuffle the data
333
333
uint32_t seed = Random::rand32 ();
334
334
std::shuffle (data.begin (), data.end (), std::default_random_engine (seed));
335
335
336
336
auto config = std::make_shared<Config>();
337
337
auto pool = memory::memoryManager ()->addLeafPool ();
338
- WriterContext context{config, memory::memoryManager ()->addRootPool ()};
338
+ WriterContext context{
339
+ config, memory::memoryManager ()->addRootPool (), fileFormat};
339
340
context.initBuffer ();
340
341
auto rowType = ROW ({type});
341
342
auto dataTypeWithId = TypeWithId::create (type, 1 );
342
343
343
344
// write
344
- auto writer = BaseColumnWriter::create (
345
- context, *dataTypeWithId, sequence, nullptr , format );
345
+ auto writer =
346
+ BaseColumnWriter::create ( context, *dataTypeWithId, sequence, nullptr );
346
347
auto size = data.size ();
347
348
auto batch = populateBatch (data, pool.get (), type);
348
349
const size_t stripeCount = 2 ;
349
350
const size_t strideCount = 3 ;
350
351
351
352
for (auto stripeI = 0 ; stripeI < stripeCount; ++stripeI) {
352
353
proto::StripeFooter sf;
354
+ auto sfw = StripeFooterWriteWrapper (&sf);
353
355
for (auto strideI = 0 ; strideI < strideCount; ++strideI) {
354
356
writer->write (batch, common::Ranges::of (0 , size));
355
357
writer->createIndexEntry ();
356
358
}
357
- writer->flush ([&sf ](uint32_t /* unused */ ) -> proto::ColumnEncoding& {
358
- return *sf. add_encoding ();
359
+ writer->flush ([&sfw ](uint32_t /* unused */ ) -> ColumnEncodingWriteWrapper {
360
+ return sfw. addEncoding ();
359
361
});
360
362
361
363
TestStripeStreams streams (context, sf, rowType, pool.get ());
@@ -461,7 +463,7 @@ TEST_F(ColumnWriterTest, TestNullBooleanWriter) {
461
463
}
462
464
463
465
TEST_F (ColumnWriterTest, testDecimalWriter) {
464
- const auto format = DwrfFormat:: kOrc ;
466
+ const auto format = dwio::common::FileFormat::ORC ;
465
467
auto genShortDecimals = [&](bool hasNull) {
466
468
std::vector<std::optional<int64_t >> shortDecimals;
467
469
for (auto i = 0 ; i < ITERATIONS; ++i) {
@@ -1003,6 +1005,7 @@ void testMapWriter(
1003
1005
}
1004
1006
1005
1007
proto::StripeFooter sf;
1008
+ auto sfw = StripeFooterWriteWrapper (&sf);
1006
1009
std::vector<VectorPtr> writtenBatches;
1007
1010
1008
1011
// Write map/row
@@ -1020,8 +1023,8 @@ void testMapWriter(
1020
1023
writtenBatches.push_back (toWrite);
1021
1024
}
1022
1025
1023
- writer->flush ([&sf ](uint32_t /* unused */ ) -> proto::ColumnEncoding& {
1024
- return *sf. add_encoding ();
1026
+ writer->flush ([&sfw ](uint32_t /* unused */ ) -> ColumnEncodingWriteWrapper {
1027
+ return sfw. addEncoding ();
1025
1028
});
1026
1029
1027
1030
auto validate = [&](bool returnFlatVector = false ) {
@@ -1145,6 +1148,7 @@ void testMapWriterRow(
1145
1148
}
1146
1149
1147
1150
proto::StripeFooter sf;
1151
+ auto sfw = StripeFooterWriteWrapper (&sf);
1148
1152
std::vector<VectorPtr> writtenBatches;
1149
1153
1150
1154
// Write map/row
@@ -1156,8 +1160,8 @@ void testMapWriterRow(
1156
1160
writer->createIndexEntry ();
1157
1161
writtenBatches.push_back (toWrite);
1158
1162
1159
- writer->flush ([&sf ](uint32_t /* unused */ ) -> proto::ColumnEncoding& {
1160
- return *sf. add_encoding ();
1163
+ writer->flush ([&sfw ](uint32_t /* unused */ ) -> ColumnEncodingWriteWrapper {
1164
+ return sfw. addEncoding ();
1161
1165
});
1162
1166
1163
1167
auto validate = [&](bool returnFlatVector = false ) {
@@ -2174,15 +2178,16 @@ struct IntegerColumnWriterTypedTestCase {
2174
2178
2175
2179
for (size_t i = 0 ; i != flushCount; ++i) {
2176
2180
proto::StripeFooter stripeFooter;
2181
+ auto sfw = StripeFooterWriteWrapper (&stripeFooter);
2177
2182
for (size_t j = 0 ; j != repetitionCount; ++j) {
2178
2183
columnWriter->write (batch, common::Ranges::of (0 , batch->size ()));
2179
2184
postProcess (*columnWriter, i, j);
2180
2185
columnWriter->createIndexEntry ();
2181
2186
}
2182
2187
// We only flush once per stripe.
2183
2188
columnWriter->flush (
2184
- [&stripeFooter ](uint32_t /* unused */ ) -> proto::ColumnEncoding& {
2185
- return *stripeFooter. add_encoding ();
2189
+ [&sfw ](uint32_t /* unused */ ) -> ColumnEncodingWriteWrapper {
2190
+ return sfw. addEncoding ();
2186
2191
});
2187
2192
2188
2193
// Read and verify.
@@ -3408,6 +3413,7 @@ struct StringColumnWriterTestCase {
3408
3413
3409
3414
for (size_t i = 0 ; i != flushCount; ++i) {
3410
3415
proto::StripeFooter stripeFooter;
3416
+ auto sfw = StripeFooterWriteWrapper (&stripeFooter);
3411
3417
// Write Stride
3412
3418
for (size_t j = 0 ; j != repetitionCount; ++j) {
3413
3419
// TODO: break the batch into multiple strides.
@@ -3418,8 +3424,8 @@ struct StringColumnWriterTestCase {
3418
3424
3419
3425
// Flush when all strides are written (once per stripe).
3420
3426
columnWriter->flush (
3421
- [&stripeFooter ](uint32_t /* unused */ ) -> proto::ColumnEncoding& {
3422
- return *stripeFooter. add_encoding ();
3427
+ [&sfw ](uint32_t /* unused */ ) -> ColumnEncodingWriteWrapper {
3428
+ return sfw. addEncoding ();
3423
3429
});
3424
3430
3425
3431
// Read and verify.
@@ -4248,8 +4254,9 @@ TEST_F(ColumnWriterTest, IntDictWriterDirectValueOverflow) {
4248
4254
writer->write (vector, common::Ranges::of (0 , size));
4249
4255
writer->createIndexEntry ();
4250
4256
proto::StripeFooter sf;
4251
- writer->flush ([&sf](auto /* unused */ ) -> proto::ColumnEncoding& {
4252
- return *sf.add_encoding ();
4257
+ auto sfw = StripeFooterWriteWrapper (&sf);
4258
+ writer->flush ([&sfw](auto /* unused */ ) -> ColumnEncodingWriteWrapper {
4259
+ return sfw.addEncoding ();
4253
4260
});
4254
4261
auto & enc = sf.encoding (0 );
4255
4262
ASSERT_EQ (enc.kind (), proto::ColumnEncoding_Kind_DICTIONARY);
@@ -4293,8 +4300,9 @@ TEST_F(ColumnWriterTest, ShortDictWriterDictValueOverflow) {
4293
4300
writer->write (vector, common::Ranges::of (0 , size));
4294
4301
writer->createIndexEntry ();
4295
4302
proto::StripeFooter sf;
4296
- writer->flush ([&sf](auto /* unused */ ) -> proto::ColumnEncoding& {
4297
- return *sf.add_encoding ();
4303
+ auto sfw = StripeFooterWriteWrapper (&sf);
4304
+ writer->flush ([&sfw](uint32_t /* unused */ ) -> ColumnEncodingWriteWrapper {
4305
+ return sfw.addEncoding ();
4298
4306
});
4299
4307
auto & enc = sf.encoding (0 );
4300
4308
ASSERT_EQ (enc.kind (), proto::ColumnEncoding_Kind_DICTIONARY);
@@ -4334,8 +4342,9 @@ TEST_F(ColumnWriterTest, RemovePresentStream) {
4334
4342
writer->write (vector, common::Ranges::of (0 , size));
4335
4343
writer->createIndexEntry ();
4336
4344
proto::StripeFooter sf;
4337
- writer->flush ([&sf](auto /* unused */ ) -> proto::ColumnEncoding& {
4338
- return *sf.add_encoding ();
4345
+ auto sfw = StripeFooterWriteWrapper (&sf);
4346
+ writer->flush ([&sfw](uint32_t /* unused */ ) -> ColumnEncodingWriteWrapper {
4347
+ return sfw.addEncoding ();
4339
4348
});
4340
4349
4341
4350
// get data stream
@@ -4372,8 +4381,9 @@ TEST_F(ColumnWriterTest, ColumnIdInStream) {
4372
4381
writer->write (vector, common::Ranges::of (0 , size));
4373
4382
writer->createIndexEntry ();
4374
4383
proto::StripeFooter sf;
4375
- writer->flush ([&sf](auto /* unused */ ) -> proto::ColumnEncoding& {
4376
- return *sf.add_encoding ();
4384
+ auto sfw = StripeFooterWriteWrapper (&sf);
4385
+ writer->flush ([&sfw](uint32_t /* unused */ ) -> ColumnEncodingWriteWrapper {
4386
+ return sfw.addEncoding ();
4377
4387
});
4378
4388
4379
4389
// get data stream
@@ -4501,8 +4511,9 @@ struct DictColumnWriterTestCase {
4501
4511
writer->createIndexEntry ();
4502
4512
4503
4513
proto::StripeFooter sf;
4504
- writer->flush ([&sf](uint32_t /* unused */ ) -> proto::ColumnEncoding& {
4505
- return *sf.add_encoding ();
4514
+ auto sfw = StripeFooterWriteWrapper (&sf);
4515
+ writer->flush ([&sfw](uint32_t /* unused */ ) -> ColumnEncodingWriteWrapper {
4516
+ return sfw.addEncoding ();
4506
4517
});
4507
4518
4508
4519
// Reading the vector out
0 commit comments