Skip to content

Commit 9376a2d

Browse files
kagamiorifacebook-github-bot
authored andcommitted
misc(fuzzer): Remove verifyWindow() in AggregationFuzzer (#12391)
Summary: Pull Request resolved: #12391 We used to test window operations in aggregaiton fuzzer before the window fuzzer was built. Since we now have a window fuzzer that has much better coverage, remove the verifyWindow method in AggregationFuzzer. Reviewed By: natashasehgal Differential Revision: D69886065 fbshipit-source-id: 94df9dc6ee9a9adfbe943a4952489f43c1ec7e0d
1 parent 9323f22 commit 9376a2d

File tree

1 file changed

+67
-167
lines changed

1 file changed

+67
-167
lines changed

velox/exec/fuzzer/AggregationFuzzer.cpp

+67-167
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,6 @@ DEFINE_bool(
3434
true,
3535
"When true, generates plans with aggregations over sorted inputs");
3636

37-
DEFINE_bool(
38-
enable_window_reference_verification,
39-
false,
40-
"When true, the results of the window aggregation are compared to reference DB results");
41-
4237
using facebook::velox::fuzzer::CallableSignature;
4338
using facebook::velox::fuzzer::SignatureTemplate;
4439

@@ -83,21 +78,10 @@ class AggregationFuzzer : public AggregationFuzzerBase {
8378

8479
// Number of iterations using aggregations over distinct inputs.
8580
size_t numDistinctInputs{0};
86-
// Number of iterations using window expressions.
87-
size_t numWindow{0};
8881

8982
void print(size_t numIterations) const;
9083
};
9184

92-
// Return 'true' if query plans failed.
93-
bool verifyWindow(
94-
const std::vector<std::string>& partitionKeys,
95-
const std::vector<std::string>& sortingKeys,
96-
const std::string& aggregate,
97-
const std::vector<RowVectorPtr>& input,
98-
bool customVerification,
99-
bool enableWindowVerification);
100-
10185
// Return 'true' if query plans failed.
10286
bool verifyAggregation(
10387
const std::vector<std::string>& groupingKeys,
@@ -374,110 +358,85 @@ void AggregationFuzzer::go() {
374358
std::vector<TypePtr> argTypes = signature.args;
375359
std::vector<std::string> argNames = makeNames(argTypes.size());
376360

377-
// 10% of times test window operator.
361+
const bool sortedInputs = FLAGS_enable_sorted_aggregations &&
362+
canSortInputs(signature) && vectorFuzzer_.coinToss(0.2);
363+
364+
// Exclude approx_xxx aggregations since their verifiers may not be able
365+
// to verify the results. The approx_percentile verifier would discard
366+
// the distinct property when calculating the expected result, say the
367+
// expected result of the verifier would be approx_percentile(x), which
368+
// may be different from the actual result of approx_percentile(distinct
369+
// x).
370+
const bool distinctInputs = !sortedInputs &&
371+
(signature.name.find("approx_") == std::string::npos) &&
372+
supportsDistinctInputs(signature, orderableGroupKeys_) &&
373+
vectorFuzzer_.coinToss(0.2);
374+
375+
auto call = makeFunctionCall(
376+
signature.name, argNames, sortedInputs, distinctInputs);
377+
378+
// 20% of times use mask.
379+
std::vector<std::string> masks;
380+
if (vectorFuzzer_.coinToss(0.2)) {
381+
++stats_.numMask;
382+
383+
masks.push_back("m0");
384+
argTypes.push_back(BOOLEAN());
385+
argNames.push_back(masks.back());
386+
}
387+
388+
// 10% of times use global aggregation (no grouping keys).
389+
std::vector<std::string> groupingKeys;
378390
if (vectorFuzzer_.coinToss(0.1)) {
379-
++stats_.numWindow;
391+
++stats_.numGlobal;
392+
} else {
393+
++stats_.numGroupBy;
394+
groupingKeys = generateKeys("g", argNames, argTypes);
395+
}
380396

381-
auto call = makeFunctionCall(signature.name, argNames, false);
397+
auto input = generateInputData(argNames, argTypes, signature);
382398

383-
auto partitionKeys = generateKeys("p", argNames, argTypes);
384-
auto sortingKeys = generateSortingKeys("s", argNames, argTypes);
385-
auto input = generateInputDataWithRowNumber(
386-
argNames, argTypes, partitionKeys, {}, sortingKeys, signature);
399+
logVectors(input);
387400

388-
logVectors(input);
401+
std::shared_ptr<ResultVerifier> customVerifier;
402+
if (customVerification) {
403+
customVerifier = customVerificationFunctions_.at(signature.name);
404+
}
389405

390-
bool failed = verifyWindow(
391-
partitionKeys,
392-
sortingKeys,
406+
if (sortedInputs) {
407+
++stats_.numSortedInputs;
408+
bool failed = verifySortedAggregation(
409+
groupingKeys,
393410
call,
411+
masks,
394412
input,
395413
customVerification,
396-
FLAGS_enable_window_reference_verification);
414+
customVerifier);
397415
if (failed) {
398416
signatureWithStats.second.numFailed++;
399417
}
400-
} else {
401-
const bool sortedInputs = FLAGS_enable_sorted_aggregations &&
402-
canSortInputs(signature) && vectorFuzzer_.coinToss(0.2);
403-
404-
// Exclude approx_xxx aggregations since their verifiers may not be able
405-
// to verify the results. The approx_percentile verifier would discard
406-
// the distinct property when calculating the expected result, say the
407-
// expected result of the verifier would be approx_percentile(x), which
408-
// may be different from the actual result of approx_percentile(distinct
409-
// x).
410-
const bool distinctInputs = !sortedInputs &&
411-
(signature.name.find("approx_") == std::string::npos) &&
412-
supportsDistinctInputs(signature, orderableGroupKeys_) &&
413-
vectorFuzzer_.coinToss(0.2);
414-
415-
auto call = makeFunctionCall(
416-
signature.name, argNames, sortedInputs, distinctInputs);
417-
418-
// 20% of times use mask.
419-
std::vector<std::string> masks;
420-
if (vectorFuzzer_.coinToss(0.2)) {
421-
++stats_.numMask;
422-
423-
masks.push_back("m0");
424-
argTypes.push_back(BOOLEAN());
425-
argNames.push_back(masks.back());
426-
}
427-
428-
// 10% of times use global aggregation (no grouping keys).
429-
std::vector<std::string> groupingKeys;
430-
if (vectorFuzzer_.coinToss(0.1)) {
431-
++stats_.numGlobal;
432-
} else {
433-
++stats_.numGroupBy;
434-
groupingKeys = generateKeys("g", argNames, argTypes);
435-
}
436-
437-
auto input = generateInputData(argNames, argTypes, signature);
438-
439-
logVectors(input);
440-
441-
std::shared_ptr<ResultVerifier> customVerifier;
442-
if (customVerification) {
443-
customVerifier = customVerificationFunctions_.at(signature.name);
418+
} else if (distinctInputs) {
419+
++stats_.numDistinctInputs;
420+
bool failed = verifyDistinctAggregation(
421+
groupingKeys,
422+
call,
423+
masks,
424+
input,
425+
customVerification,
426+
customVerifier);
427+
if (failed) {
428+
signatureWithStats.second.numFailed++;
444429
}
445-
446-
if (sortedInputs) {
447-
++stats_.numSortedInputs;
448-
bool failed = verifySortedAggregation(
449-
groupingKeys,
450-
call,
451-
masks,
452-
input,
453-
customVerification,
454-
customVerifier);
455-
if (failed) {
456-
signatureWithStats.second.numFailed++;
457-
}
458-
} else if (distinctInputs) {
459-
++stats_.numDistinctInputs;
460-
bool failed = verifyDistinctAggregation(
461-
groupingKeys,
462-
call,
463-
masks,
464-
input,
465-
customVerification,
466-
customVerifier);
467-
if (failed) {
468-
signatureWithStats.second.numFailed++;
469-
}
470-
} else {
471-
bool failed = verifyAggregation(
472-
groupingKeys,
473-
{call},
474-
masks,
475-
input,
476-
customVerification,
477-
customVerifier);
478-
if (failed) {
479-
signatureWithStats.second.numFailed++;
480-
}
430+
} else {
431+
bool failed = verifyAggregation(
432+
groupingKeys,
433+
{call},
434+
masks,
435+
input,
436+
customVerification,
437+
customVerifier);
438+
if (failed) {
439+
signatureWithStats.second.numFailed++;
481440
}
482441
}
483442
}
@@ -689,63 +648,6 @@ void makeStreamingPlansWithTableScan(
689648
.planNode());
690649
}
691650

692-
bool AggregationFuzzer::verifyWindow(
693-
const std::vector<std::string>& partitionKeys,
694-
const std::vector<std::string>& sortingKeys,
695-
const std::string& aggregate,
696-
const std::vector<RowVectorPtr>& input,
697-
bool customVerification,
698-
bool enableWindowVerification) {
699-
std::stringstream frame;
700-
if (!partitionKeys.empty()) {
701-
frame << "partition by " << folly::join(", ", partitionKeys);
702-
}
703-
if (!sortingKeys.empty()) {
704-
frame << " order by " << folly::join(", ", sortingKeys);
705-
}
706-
707-
auto plan = PlanBuilder()
708-
.values(input)
709-
.window({fmt::format("{} over ({})", aggregate, frame.str())})
710-
.planNode();
711-
if (persistAndRunOnce_) {
712-
persistReproInfo({{plan, {}}}, reproPersistPath_);
713-
}
714-
try {
715-
auto resultOrError = execute(plan);
716-
if (resultOrError.exceptionPtr) {
717-
++stats_.numFailed;
718-
}
719-
720-
if (!customVerification && enableWindowVerification) {
721-
if (resultOrError.result) {
722-
auto referenceResult =
723-
computeReferenceResults(plan, referenceQueryRunner_.get());
724-
stats_.updateReferenceQueryStats(referenceResult.second);
725-
if (auto expectedResult = referenceResult.first) {
726-
++stats_.numVerified;
727-
VELOX_CHECK(
728-
assertEqualResults(
729-
expectedResult.value(),
730-
plan->outputType(),
731-
{resultOrError.result}),
732-
"Velox and reference DB results don't match");
733-
LOG(INFO) << "Verified results against reference DB";
734-
}
735-
}
736-
} else {
737-
++stats_.numVerificationSkipped;
738-
}
739-
740-
return resultOrError.exceptionPtr != nullptr;
741-
} catch (...) {
742-
if (!reproPersistPath_.empty()) {
743-
persistReproInfo({{plan, {}}}, reproPersistPath_);
744-
}
745-
throw;
746-
}
747-
}
748-
749651
bool AggregationFuzzer::verifyAggregation(
750652
const std::vector<std::string>& groupingKeys,
751653
const std::vector<std::string>& aggregates,
@@ -1045,8 +947,6 @@ void AggregationFuzzer::Stats::print(size_t numIterations) const {
1045947
<< printPercentageStat(numDistinct, numIterations);
1046948
LOG(ERROR) << "Total aggregations over distinct inputs: "
1047949
<< printPercentageStat(numDistinctInputs, numIterations);
1048-
LOG(ERROR) << "Total window expressions: "
1049-
<< printPercentageStat(numWindow, numIterations);
1050950
AggregationFuzzerBase::Stats::print(numIterations);
1051951
}
1052952

0 commit comments

Comments
 (0)