@@ -34,11 +34,6 @@ DEFINE_bool(
34
34
true ,
35
35
" When true, generates plans with aggregations over sorted inputs" );
36
36
37
- DEFINE_bool (
38
- enable_window_reference_verification,
39
- false ,
40
- " When true, the results of the window aggregation are compared to reference DB results" );
41
-
42
37
using facebook::velox::fuzzer::CallableSignature;
43
38
using facebook::velox::fuzzer::SignatureTemplate;
44
39
@@ -83,21 +78,10 @@ class AggregationFuzzer : public AggregationFuzzerBase {
83
78
84
79
// Number of iterations using aggregations over distinct inputs.
85
80
size_t numDistinctInputs{0 };
86
- // Number of iterations using window expressions.
87
- size_t numWindow{0 };
88
81
89
82
void print (size_t numIterations) const ;
90
83
};
91
84
92
- // Return 'true' if query plans failed.
93
- bool verifyWindow (
94
- const std::vector<std::string>& partitionKeys,
95
- const std::vector<std::string>& sortingKeys,
96
- const std::string& aggregate,
97
- const std::vector<RowVectorPtr>& input,
98
- bool customVerification,
99
- bool enableWindowVerification);
100
-
101
85
// Return 'true' if query plans failed.
102
86
bool verifyAggregation (
103
87
const std::vector<std::string>& groupingKeys,
@@ -374,110 +358,85 @@ void AggregationFuzzer::go() {
374
358
std::vector<TypePtr> argTypes = signature.args ;
375
359
std::vector<std::string> argNames = makeNames (argTypes.size ());
376
360
377
- // 10% of times test window operator.
361
+ const bool sortedInputs = FLAGS_enable_sorted_aggregations &&
362
+ canSortInputs (signature) && vectorFuzzer_.coinToss (0.2 );
363
+
364
+ // Exclude approx_xxx aggregations since their verifiers may not be able
365
+ // to verify the results. The approx_percentile verifier would discard
366
+ // the distinct property when calculating the expected result, say the
367
+ // expected result of the verifier would be approx_percentile(x), which
368
+ // may be different from the actual result of approx_percentile(distinct
369
+ // x).
370
+ const bool distinctInputs = !sortedInputs &&
371
+ (signature.name .find (" approx_" ) == std::string::npos) &&
372
+ supportsDistinctInputs (signature, orderableGroupKeys_) &&
373
+ vectorFuzzer_.coinToss (0.2 );
374
+
375
+ auto call = makeFunctionCall (
376
+ signature.name , argNames, sortedInputs, distinctInputs);
377
+
378
+ // 20% of times use mask.
379
+ std::vector<std::string> masks;
380
+ if (vectorFuzzer_.coinToss (0.2 )) {
381
+ ++stats_.numMask ;
382
+
383
+ masks.push_back (" m0" );
384
+ argTypes.push_back (BOOLEAN ());
385
+ argNames.push_back (masks.back ());
386
+ }
387
+
388
+ // 10% of times use global aggregation (no grouping keys).
389
+ std::vector<std::string> groupingKeys;
378
390
if (vectorFuzzer_.coinToss (0.1 )) {
379
- ++stats_.numWindow ;
391
+ ++stats_.numGlobal ;
392
+ } else {
393
+ ++stats_.numGroupBy ;
394
+ groupingKeys = generateKeys (" g" , argNames, argTypes);
395
+ }
380
396
381
- auto call = makeFunctionCall (signature. name , argNames, false );
397
+ auto input = generateInputData (argNames, argTypes, signature );
382
398
383
- auto partitionKeys = generateKeys (" p" , argNames, argTypes);
384
- auto sortingKeys = generateSortingKeys (" s" , argNames, argTypes);
385
- auto input = generateInputDataWithRowNumber (
386
- argNames, argTypes, partitionKeys, {}, sortingKeys, signature);
399
+ logVectors (input);
387
400
388
- logVectors (input);
401
+ std::shared_ptr<ResultVerifier> customVerifier;
402
+ if (customVerification) {
403
+ customVerifier = customVerificationFunctions_.at (signature.name );
404
+ }
389
405
390
- bool failed = verifyWindow (
391
- partitionKeys,
392
- sortingKeys,
406
+ if (sortedInputs) {
407
+ ++stats_.numSortedInputs ;
408
+ bool failed = verifySortedAggregation (
409
+ groupingKeys,
393
410
call,
411
+ masks,
394
412
input,
395
413
customVerification,
396
- FLAGS_enable_window_reference_verification );
414
+ customVerifier );
397
415
if (failed) {
398
416
signatureWithStats.second .numFailed ++;
399
417
}
400
- } else {
401
- const bool sortedInputs = FLAGS_enable_sorted_aggregations &&
402
- canSortInputs (signature) && vectorFuzzer_.coinToss (0.2 );
403
-
404
- // Exclude approx_xxx aggregations since their verifiers may not be able
405
- // to verify the results. The approx_percentile verifier would discard
406
- // the distinct property when calculating the expected result, say the
407
- // expected result of the verifier would be approx_percentile(x), which
408
- // may be different from the actual result of approx_percentile(distinct
409
- // x).
410
- const bool distinctInputs = !sortedInputs &&
411
- (signature.name .find (" approx_" ) == std::string::npos) &&
412
- supportsDistinctInputs (signature, orderableGroupKeys_) &&
413
- vectorFuzzer_.coinToss (0.2 );
414
-
415
- auto call = makeFunctionCall (
416
- signature.name , argNames, sortedInputs, distinctInputs);
417
-
418
- // 20% of times use mask.
419
- std::vector<std::string> masks;
420
- if (vectorFuzzer_.coinToss (0.2 )) {
421
- ++stats_.numMask ;
422
-
423
- masks.push_back (" m0" );
424
- argTypes.push_back (BOOLEAN ());
425
- argNames.push_back (masks.back ());
426
- }
427
-
428
- // 10% of times use global aggregation (no grouping keys).
429
- std::vector<std::string> groupingKeys;
430
- if (vectorFuzzer_.coinToss (0.1 )) {
431
- ++stats_.numGlobal ;
432
- } else {
433
- ++stats_.numGroupBy ;
434
- groupingKeys = generateKeys (" g" , argNames, argTypes);
435
- }
436
-
437
- auto input = generateInputData (argNames, argTypes, signature);
438
-
439
- logVectors (input);
440
-
441
- std::shared_ptr<ResultVerifier> customVerifier;
442
- if (customVerification) {
443
- customVerifier = customVerificationFunctions_.at (signature.name );
418
+ } else if (distinctInputs) {
419
+ ++stats_.numDistinctInputs ;
420
+ bool failed = verifyDistinctAggregation (
421
+ groupingKeys,
422
+ call,
423
+ masks,
424
+ input,
425
+ customVerification,
426
+ customVerifier);
427
+ if (failed) {
428
+ signatureWithStats.second .numFailed ++;
444
429
}
445
-
446
- if (sortedInputs) {
447
- ++stats_.numSortedInputs ;
448
- bool failed = verifySortedAggregation (
449
- groupingKeys,
450
- call,
451
- masks,
452
- input,
453
- customVerification,
454
- customVerifier);
455
- if (failed) {
456
- signatureWithStats.second .numFailed ++;
457
- }
458
- } else if (distinctInputs) {
459
- ++stats_.numDistinctInputs ;
460
- bool failed = verifyDistinctAggregation (
461
- groupingKeys,
462
- call,
463
- masks,
464
- input,
465
- customVerification,
466
- customVerifier);
467
- if (failed) {
468
- signatureWithStats.second .numFailed ++;
469
- }
470
- } else {
471
- bool failed = verifyAggregation (
472
- groupingKeys,
473
- {call},
474
- masks,
475
- input,
476
- customVerification,
477
- customVerifier);
478
- if (failed) {
479
- signatureWithStats.second .numFailed ++;
480
- }
430
+ } else {
431
+ bool failed = verifyAggregation (
432
+ groupingKeys,
433
+ {call},
434
+ masks,
435
+ input,
436
+ customVerification,
437
+ customVerifier);
438
+ if (failed) {
439
+ signatureWithStats.second .numFailed ++;
481
440
}
482
441
}
483
442
}
@@ -689,63 +648,6 @@ void makeStreamingPlansWithTableScan(
689
648
.planNode ());
690
649
}
691
650
692
- bool AggregationFuzzer::verifyWindow (
693
- const std::vector<std::string>& partitionKeys,
694
- const std::vector<std::string>& sortingKeys,
695
- const std::string& aggregate,
696
- const std::vector<RowVectorPtr>& input,
697
- bool customVerification,
698
- bool enableWindowVerification) {
699
- std::stringstream frame;
700
- if (!partitionKeys.empty ()) {
701
- frame << " partition by " << folly::join (" , " , partitionKeys);
702
- }
703
- if (!sortingKeys.empty ()) {
704
- frame << " order by " << folly::join (" , " , sortingKeys);
705
- }
706
-
707
- auto plan = PlanBuilder ()
708
- .values (input)
709
- .window ({fmt::format (" {} over ({})" , aggregate, frame.str ())})
710
- .planNode ();
711
- if (persistAndRunOnce_) {
712
- persistReproInfo ({{plan, {}}}, reproPersistPath_);
713
- }
714
- try {
715
- auto resultOrError = execute (plan);
716
- if (resultOrError.exceptionPtr ) {
717
- ++stats_.numFailed ;
718
- }
719
-
720
- if (!customVerification && enableWindowVerification) {
721
- if (resultOrError.result ) {
722
- auto referenceResult =
723
- computeReferenceResults (plan, referenceQueryRunner_.get ());
724
- stats_.updateReferenceQueryStats (referenceResult.second );
725
- if (auto expectedResult = referenceResult.first ) {
726
- ++stats_.numVerified ;
727
- VELOX_CHECK (
728
- assertEqualResults (
729
- expectedResult.value (),
730
- plan->outputType (),
731
- {resultOrError.result }),
732
- " Velox and reference DB results don't match" );
733
- LOG (INFO) << " Verified results against reference DB" ;
734
- }
735
- }
736
- } else {
737
- ++stats_.numVerificationSkipped ;
738
- }
739
-
740
- return resultOrError.exceptionPtr != nullptr ;
741
- } catch (...) {
742
- if (!reproPersistPath_.empty ()) {
743
- persistReproInfo ({{plan, {}}}, reproPersistPath_);
744
- }
745
- throw ;
746
- }
747
- }
748
-
749
651
bool AggregationFuzzer::verifyAggregation (
750
652
const std::vector<std::string>& groupingKeys,
751
653
const std::vector<std::string>& aggregates,
@@ -1045,8 +947,6 @@ void AggregationFuzzer::Stats::print(size_t numIterations) const {
1045
947
<< printPercentageStat (numDistinct, numIterations);
1046
948
LOG (ERROR) << " Total aggregations over distinct inputs: "
1047
949
<< printPercentageStat (numDistinctInputs, numIterations);
1048
- LOG (ERROR) << " Total window expressions: "
1049
- << printPercentageStat (numWindow, numIterations);
1050
950
AggregationFuzzerBase::Stats::print (numIterations);
1051
951
}
1052
952
0 commit comments