@@ -70,7 +70,6 @@ class ArrayDistinctFunction : public exec::VectorFunction {
70
70
context.moveOrCopyResult (localResult, rows, result);
71
71
}
72
72
73
- private:
74
73
VectorPtr applyFlat (
75
74
const SelectivityVector& rows,
76
75
const VectorPtr& arg,
@@ -93,8 +92,8 @@ class ArrayDistinctFunction : public exec::VectorFunction {
93
92
// Pointers and cursors to the raw data.
94
93
vector_size_t indicesCursor = 0 ;
95
94
auto * rawNewIndices = newIndices->asMutable <vector_size_t >();
96
- auto * rawSizes = newLengths->asMutable <vector_size_t >();
97
- auto * rawOffsets = newOffsets->asMutable <vector_size_t >();
95
+ auto * rawNewSizes = newLengths->asMutable <vector_size_t >();
96
+ auto * rawNewOffsets = newOffsets->asMutable <vector_size_t >();
98
97
99
98
// Process the rows: store unique values in the hash table.
100
99
folly::F14FastSet<T> uniqueSet;
@@ -103,7 +102,7 @@ class ArrayDistinctFunction : public exec::VectorFunction {
103
102
auto size = arrayVector->sizeAt (row);
104
103
auto offset = arrayVector->offsetAt (row);
105
104
106
- rawOffsets [row] = indicesCursor;
105
+ rawNewOffsets [row] = indicesCursor;
107
106
bool hasNulls = false ;
108
107
for (vector_size_t i = offset; i < offset + size; ++i) {
109
108
if (elements->isNullAt (i)) {
@@ -121,7 +120,7 @@ class ArrayDistinctFunction : public exec::VectorFunction {
121
120
}
122
121
123
122
uniqueSet.clear ();
124
- rawSizes [row] = indicesCursor - rawOffsets [row];
123
+ rawNewSizes [row] = indicesCursor - rawNewOffsets [row];
125
124
});
126
125
127
126
newIndices->setSize (indicesCursor * sizeof (vector_size_t ));
@@ -140,6 +139,58 @@ class ArrayDistinctFunction : public exec::VectorFunction {
140
139
}
141
140
};
142
141
142
+ template <>
143
+ VectorPtr ArrayDistinctFunction<UnknownType>::applyFlat(
144
+ const SelectivityVector& rows,
145
+ const VectorPtr& arg,
146
+ exec::EvalCtx& context) const {
147
+ auto arrayVector = arg->as <ArrayVector>();
148
+ auto elementsVector = arrayVector->elements ();
149
+ vector_size_t rowCount = rows.end ();
150
+
151
+ // Allocate new vectors for indices, length and offsets.
152
+ memory::MemoryPool* pool = context.pool ();
153
+ BufferPtr newIndices = allocateIndices (rowCount, pool);
154
+ BufferPtr newLengths = allocateSizes (rowCount, pool);
155
+ BufferPtr newOffsets = allocateOffsets (rowCount, pool);
156
+
157
+ // Pointers and cursors to the raw data.
158
+ vector_size_t indicesCursor = 0 ;
159
+ auto * rawNewIndices = newIndices->asMutable <vector_size_t >();
160
+ auto * rawNewSizes = newLengths->asMutable <vector_size_t >();
161
+ auto * rawNewOffsets = newOffsets->asMutable <vector_size_t >();
162
+
163
+ rows.applyToSelected ([&](vector_size_t row) {
164
+ auto size = arrayVector->sizeAt (row);
165
+ auto offset = arrayVector->offsetAt (row);
166
+
167
+ rawNewOffsets[row] = indicesCursor;
168
+ if (size > 0 ) {
169
+ if (FOLLY_UNLIKELY (indicesCursor == 0 )) {
170
+ rawNewIndices[0 ] = offset;
171
+ }
172
+ rawNewSizes[row] = 1 ;
173
+ rawNewIndices[indicesCursor++] = rawNewIndices[0 ];
174
+ } else {
175
+ rawNewSizes[row] = 0 ;
176
+ }
177
+ });
178
+
179
+ newIndices->setSize (indicesCursor * sizeof (vector_size_t ));
180
+ auto newElements =
181
+ BaseVector::transpose (newIndices, std::move (elementsVector));
182
+
183
+ return std::make_shared<ArrayVector>(
184
+ pool,
185
+ arrayVector->type (),
186
+ nullptr ,
187
+ rowCount,
188
+ std::move (newOffsets),
189
+ std::move (newLengths),
190
+ std::move (newElements),
191
+ 0 );
192
+ }
193
+
143
194
// Validate number of parameters and types.
144
195
void validateType (const std::vector<exec::VectorFunctionArg>& inputArgs) {
145
196
VELOX_USER_CHECK_EQ (
@@ -169,6 +220,9 @@ std::shared_ptr<exec::VectorFunction> create(
169
220
const core::QueryConfig& /* config*/ ) {
170
221
validateType (inputArgs);
171
222
auto elementType = inputArgs.front ().type ->childAt (0 );
223
+ if (elementType->isUnKnown ()) {
224
+ return std::make_shared<ArrayDistinctFunction<UnknownType>>();
225
+ }
172
226
173
227
return VELOX_DYNAMIC_SCALAR_TYPE_DISPATCH (
174
228
createTyped, elementType->kind (), inputArgs);
@@ -184,6 +238,10 @@ std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() {
184
238
.argumentType (fmt::format (" array({})" , type))
185
239
.build ());
186
240
}
241
+ signatures.push_back (exec::FunctionSignatureBuilder ()
242
+ .returnType (" array(unknown)" )
243
+ .argumentType (" array(unknown)" )
244
+ .build ());
187
245
return signatures;
188
246
}
189
247
0 commit comments