Skip to content

Commit 52b80a7

Browse files
zacw7facebook-github-bot
authored andcommitted
Support UNKNOWN type in array_distinct (facebookincubator#9500)
Summary: Pull Request resolved: facebookincubator#9500 Test Plan: 1. New unit test added. 2. Run fuzzer for 1 hour: ``` buck run //velox/expression/tests/facebook:fb_velox_expression_fuzzer_test2 buck run //velox/expression/tests/facebook:fb_velox_expression_fuzzer_test2 with --enable_variadic_signatures --lazy_vector_generation_ratio=0.2 --velox_fuzzer_enable_complex_types --velox_fuzzer_enable_column_reuse --velox_fuzzer_enable_expression_reuse --retry_with_try --enable_dereference --assign_function_tickets="array_distinct=40" --duration_sec=3600 ``` Reviewed By: kagamiori Differential Revision: D56176342 Pulled By: zacw7 fbshipit-source-id: 5bd6992ad05ba2d2fef2a963ee0c775761a523b1
1 parent c240cf3 commit 52b80a7

File tree

2 files changed

+84
-5
lines changed

2 files changed

+84
-5
lines changed

velox/functions/prestosql/ArrayDistinct.cpp

+63-5
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,6 @@ class ArrayDistinctFunction : public exec::VectorFunction {
7070
context.moveOrCopyResult(localResult, rows, result);
7171
}
7272

73-
private:
7473
VectorPtr applyFlat(
7574
const SelectivityVector& rows,
7675
const VectorPtr& arg,
@@ -93,8 +92,8 @@ class ArrayDistinctFunction : public exec::VectorFunction {
9392
// Pointers and cursors to the raw data.
9493
vector_size_t indicesCursor = 0;
9594
auto* rawNewIndices = newIndices->asMutable<vector_size_t>();
96-
auto* rawSizes = newLengths->asMutable<vector_size_t>();
97-
auto* rawOffsets = newOffsets->asMutable<vector_size_t>();
95+
auto* rawNewSizes = newLengths->asMutable<vector_size_t>();
96+
auto* rawNewOffsets = newOffsets->asMutable<vector_size_t>();
9897

9998
// Process the rows: store unique values in the hash table.
10099
folly::F14FastSet<T> uniqueSet;
@@ -103,7 +102,7 @@ class ArrayDistinctFunction : public exec::VectorFunction {
103102
auto size = arrayVector->sizeAt(row);
104103
auto offset = arrayVector->offsetAt(row);
105104

106-
rawOffsets[row] = indicesCursor;
105+
rawNewOffsets[row] = indicesCursor;
107106
bool hasNulls = false;
108107
for (vector_size_t i = offset; i < offset + size; ++i) {
109108
if (elements->isNullAt(i)) {
@@ -121,7 +120,7 @@ class ArrayDistinctFunction : public exec::VectorFunction {
121120
}
122121

123122
uniqueSet.clear();
124-
rawSizes[row] = indicesCursor - rawOffsets[row];
123+
rawNewSizes[row] = indicesCursor - rawNewOffsets[row];
125124
});
126125

127126
newIndices->setSize(indicesCursor * sizeof(vector_size_t));
@@ -140,6 +139,58 @@ class ArrayDistinctFunction : public exec::VectorFunction {
140139
}
141140
};
142141

142+
template <>
143+
VectorPtr ArrayDistinctFunction<UnknownType>::applyFlat(
144+
const SelectivityVector& rows,
145+
const VectorPtr& arg,
146+
exec::EvalCtx& context) const {
147+
auto arrayVector = arg->as<ArrayVector>();
148+
auto elementsVector = arrayVector->elements();
149+
vector_size_t rowCount = rows.end();
150+
151+
// Allocate new vectors for indices, length and offsets.
152+
memory::MemoryPool* pool = context.pool();
153+
BufferPtr newIndices = allocateIndices(rowCount, pool);
154+
BufferPtr newLengths = allocateSizes(rowCount, pool);
155+
BufferPtr newOffsets = allocateOffsets(rowCount, pool);
156+
157+
// Pointers and cursors to the raw data.
158+
vector_size_t indicesCursor = 0;
159+
auto* rawNewIndices = newIndices->asMutable<vector_size_t>();
160+
auto* rawNewSizes = newLengths->asMutable<vector_size_t>();
161+
auto* rawNewOffsets = newOffsets->asMutable<vector_size_t>();
162+
163+
rows.applyToSelected([&](vector_size_t row) {
164+
auto size = arrayVector->sizeAt(row);
165+
auto offset = arrayVector->offsetAt(row);
166+
167+
rawNewOffsets[row] = indicesCursor;
168+
if (size > 0) {
169+
if (FOLLY_UNLIKELY(indicesCursor == 0)) {
170+
rawNewIndices[0] = offset;
171+
}
172+
rawNewSizes[row] = 1;
173+
rawNewIndices[indicesCursor++] = rawNewIndices[0];
174+
} else {
175+
rawNewSizes[row] = 0;
176+
}
177+
});
178+
179+
newIndices->setSize(indicesCursor * sizeof(vector_size_t));
180+
auto newElements =
181+
BaseVector::transpose(newIndices, std::move(elementsVector));
182+
183+
return std::make_shared<ArrayVector>(
184+
pool,
185+
arrayVector->type(),
186+
nullptr,
187+
rowCount,
188+
std::move(newOffsets),
189+
std::move(newLengths),
190+
std::move(newElements),
191+
0);
192+
}
193+
143194
// Validate number of parameters and types.
144195
void validateType(const std::vector<exec::VectorFunctionArg>& inputArgs) {
145196
VELOX_USER_CHECK_EQ(
@@ -169,6 +220,9 @@ std::shared_ptr<exec::VectorFunction> create(
169220
const core::QueryConfig& /*config*/) {
170221
validateType(inputArgs);
171222
auto elementType = inputArgs.front().type->childAt(0);
223+
if (elementType->isUnKnown()) {
224+
return std::make_shared<ArrayDistinctFunction<UnknownType>>();
225+
}
172226

173227
return VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH(
174228
createTyped, elementType->kind(), inputArgs);
@@ -184,6 +238,10 @@ std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
184238
.argumentType(fmt::format("array({})", type))
185239
.build());
186240
}
241+
signatures.push_back(exec::FunctionSignatureBuilder()
242+
.returnType("array(unknown)")
243+
.argumentType("array(unknown)")
244+
.build());
187245
return signatures;
188246
}
189247

velox/functions/prestosql/tests/ArrayDistinctTest.cpp

+21
Original file line numberDiff line numberDiff line change
@@ -329,3 +329,24 @@ TEST_F(ArrayDistinctTest, constant) {
329329
expected = makeConstantArray<int64_t>(size, {6});
330330
assertEqualVectors(expected, result);
331331
}
332+
333+
TEST_F(ArrayDistinctTest, unknownType) {
334+
// array_distinct(ARRAY[]) -> []
335+
auto emptyArrayVector = makeArrayVector<UnknownValue>({{}});
336+
auto result =
337+
evaluate("array_distinct(c0)", makeRowVector({emptyArrayVector}));
338+
assertEqualVectors(emptyArrayVector, result);
339+
340+
// array_distinct(ARRAY[null, null, null]) -> [null]
341+
// array_distinct(ARRAY[]) -> []
342+
// array_distinct(ARRAY[null]) -> [null]
343+
auto nullArrayVector = makeArrayVector(
344+
{0, 3, 3},
345+
makeNullableFlatVector<UnknownValue>(
346+
{std::nullopt, std::nullopt, std::nullopt, std::nullopt}));
347+
auto expected = makeArrayVector(
348+
{0, 1, 1},
349+
makeNullableFlatVector<UnknownValue>({std::nullopt, std::nullopt}));
350+
result = evaluate("array_distinct(c0)", makeRowVector({nullArrayVector}));
351+
assertEqualVectors(expected, result);
352+
}

0 commit comments

Comments
 (0)