13
13
* See the License for the specific language governing permissions and
14
14
* limitations under the License.
15
15
*/
16
+ #include " velox/exec/AddressableNonNullValueList.h"
16
17
#include " velox/exec/Aggregate.h"
17
18
#include " velox/exec/Strings.h"
18
19
#include " velox/expression/FunctionSignature.h"
19
- #include " velox/functions/lib/CheckedArithmeticImpl.h"
20
20
#include " velox/functions/prestosql/aggregates/AggregateNames.h"
21
21
#include " velox/vector/FlatVector.h"
22
22
@@ -32,7 +32,7 @@ struct Accumulator {
32
32
AlignedStlAllocator<std::pair<const K, S>, 16 >>::Type;
33
33
ValuesMap sums;
34
34
35
- explicit Accumulator (HashStringAllocator* allocator)
35
+ explicit Accumulator (const TypePtr& /* type */ , HashStringAllocator* allocator)
36
36
: sums{AlignedStlAllocator<std::pair<const K, S>, 16 >(allocator)} {}
37
37
38
38
size_t size () const {
@@ -41,18 +41,20 @@ struct Accumulator {
41
41
42
42
void addValues (
43
43
const MapVector* mapVector,
44
- const SimpleVector<K>* mapKeys,
45
- const SimpleVector<S>* mapValues,
44
+ const VectorPtr& mapKeys,
45
+ const VectorPtr& mapValues,
46
46
vector_size_t row,
47
47
HashStringAllocator* allocator) {
48
+ auto keys = mapKeys->template as <SimpleVector<K>>();
49
+ auto values = mapValues->template as <SimpleVector<S>>();
48
50
auto offset = mapVector->offsetAt (row);
49
51
auto size = mapVector->sizeAt (row);
50
52
51
53
for (auto i = 0 ; i < size; ++i) {
52
54
// Ignore null map keys.
53
- if (!mapKeys ->isNullAt (offset + i)) {
54
- auto key = mapKeys ->valueAt (offset + i);
55
- addValue (key, mapValues , offset + i, mapValues ->typeKind ());
55
+ if (!keys ->isNullAt (offset + i)) {
56
+ auto key = keys ->valueAt (offset + i);
57
+ addValue (key, values , offset + i, values ->typeKind ());
56
58
}
57
59
}
58
60
}
@@ -94,13 +96,16 @@ struct Accumulator {
94
96
}
95
97
96
98
vector_size_t extractValues (
97
- FlatVector<K> & mapKeys,
98
- FlatVector<S> & mapValues,
99
+ VectorPtr & mapKeys,
100
+ VectorPtr & mapValues,
99
101
vector_size_t offset) {
102
+ auto keys = mapKeys->asFlatVector <K>();
103
+ auto values = mapValues->asFlatVector <S>();
104
+
100
105
auto index = offset;
101
106
for (const auto & [key, sum] : sums) {
102
- mapKeys. set (index , key);
103
- mapValues. set (index , sum);
107
+ keys-> set (index , key);
108
+ values-> set (index , sum);
104
109
105
110
++index ;
106
111
}
@@ -115,26 +120,30 @@ struct StringViewAccumulator {
115
120
116
121
Strings strings;
117
122
118
- explicit StringViewAccumulator (HashStringAllocator* allocator)
119
- : base{allocator} {}
123
+ explicit StringViewAccumulator (
124
+ const TypePtr& type,
125
+ HashStringAllocator* allocator)
126
+ : base{type, allocator} {}
120
127
121
128
size_t size () const {
122
129
return base.size ();
123
130
}
124
131
125
132
void addValues (
126
133
const MapVector* mapVector,
127
- const SimpleVector<StringView>* mapKeys,
128
- const SimpleVector<S>* mapValues,
134
+ const VectorPtr& mapKeys,
135
+ const VectorPtr& mapValues,
129
136
vector_size_t row,
130
137
HashStringAllocator* allocator) {
138
+ auto keys = mapKeys->template as <SimpleVector<StringView>>();
139
+ auto values = mapValues->template as <SimpleVector<S>>();
131
140
auto offset = mapVector->offsetAt (row);
132
141
auto size = mapVector->sizeAt (row);
133
142
134
143
for (auto i = 0 ; i < size; ++i) {
135
144
// Ignore null map keys.
136
- if (!mapKeys ->isNullAt (offset + i)) {
137
- auto key = mapKeys ->valueAt (offset + i);
145
+ if (!keys ->isNullAt (offset + i)) {
146
+ auto key = keys ->valueAt (offset + i);
138
147
139
148
if (!key.isInline ()) {
140
149
auto it = base.sums .find (key);
@@ -145,19 +154,95 @@ struct StringViewAccumulator {
145
154
}
146
155
}
147
156
148
- base.addValue (key, mapValues , offset + i, mapValues ->typeKind ());
157
+ base.addValue (key, values , offset + i, values ->typeKind ());
149
158
}
150
159
}
151
160
}
152
161
153
162
vector_size_t extractValues (
154
- FlatVector<StringView> & mapKeys,
155
- FlatVector<S> & mapValues,
163
+ VectorPtr & mapKeys,
164
+ VectorPtr & mapValues,
156
165
vector_size_t offset) {
157
166
return base.extractValues (mapKeys, mapValues, offset);
158
167
}
159
168
};
160
169
170
+ // / Maintains a map with keys of type array, map or struct.
171
+ template <typename V>
172
+ struct ComplexTypeAccumulator {
173
+ using ValueMap = folly::F14FastMap<
174
+ AddressableNonNullValueList::Entry,
175
+ int64_t ,
176
+ AddressableNonNullValueList::Hash,
177
+ AddressableNonNullValueList::EqualTo,
178
+ AlignedStlAllocator<
179
+ std::pair<const AddressableNonNullValueList::Entry, int64_t >,
180
+ 16 >>;
181
+
182
+ // / A set of pointers to values stored in AddressableNonNullValueList.
183
+ ValueMap sums;
184
+
185
+ // / Stores unique non-null keys.
186
+ AddressableNonNullValueList serializedKeys;
187
+
188
+ ComplexTypeAccumulator (const TypePtr& type, HashStringAllocator* allocator)
189
+ : sums{
190
+ 0 ,
191
+ AddressableNonNullValueList::Hash{},
192
+ AddressableNonNullValueList::EqualTo{type},
193
+ AlignedStlAllocator<
194
+ std::pair<const AddressableNonNullValueList::Entry, int64_t >,
195
+ 16 >(allocator)} {}
196
+
197
+ void addValues (
198
+ const MapVector* mapVector,
199
+ const VectorPtr& mapKeys,
200
+ const VectorPtr& mapValues,
201
+ vector_size_t row,
202
+ HashStringAllocator* allocator) {
203
+ auto offset = mapVector->offsetAt (row);
204
+ auto size = mapVector->sizeAt (row);
205
+ auto values = mapValues->template as <SimpleVector<V>>();
206
+
207
+ for (auto i = 0 ; i < size; ++i) {
208
+ if (!mapKeys->isNullAt (offset + i)) {
209
+ auto entry =
210
+ serializedKeys.append (*mapKeys.get (), offset + i, allocator);
211
+
212
+ auto it = sums.find (entry);
213
+ if (it == sums.end ()) {
214
+ // New entry.
215
+ sums[entry] = values->valueAt (offset + i);
216
+ } else {
217
+ // Existing entry.
218
+ sums[entry] += values->valueAt (offset + i);
219
+ }
220
+ }
221
+ }
222
+ }
223
+
224
+ vector_size_t extractValues (
225
+ VectorPtr& mapKeys,
226
+ VectorPtr& mapValues,
227
+ vector_size_t offset) {
228
+ auto values = mapValues->asFlatVector <V>();
229
+ auto index = offset;
230
+
231
+ for (const auto & [position, count] : sums) {
232
+ AddressableNonNullValueList::read (position, *mapKeys.get (), index );
233
+ values->set (index , count);
234
+ ++index ;
235
+ }
236
+
237
+ return sums.size ();
238
+ }
239
+
240
+ size_t size () const {
241
+ return sums.size ();
242
+ }
243
+ };
244
+
245
+ // Defines unique accumulators dependent on type.
161
246
template <typename K, typename S>
162
247
struct AccumulatorTypeTraits {
163
248
using AccumulatorType = Accumulator<K, S>;
@@ -168,6 +253,12 @@ struct AccumulatorTypeTraits<StringView, S> {
168
253
using AccumulatorType = StringViewAccumulator<S>;
169
254
};
170
255
256
+ template <typename V>
257
+ struct AccumulatorTypeTraits <ComplexType, V> {
258
+ using AccumulatorType = ComplexTypeAccumulator<V>;
259
+ };
260
+
261
+ // Defines common aggregator.
171
262
template <typename K, typename S>
172
263
class MapUnionSumAggregate : public exec ::Aggregate {
173
264
public:
@@ -190,12 +281,18 @@ class MapUnionSumAggregate : public exec::Aggregate {
190
281
VELOX_CHECK (mapVector);
191
282
mapVector->resize (numGroups);
192
283
193
- auto mapKeys = mapVector->mapKeys ()-> as <FlatVector<K>> ();
194
- auto mapValues = mapVector->mapValues ()-> as <FlatVector<S>> ();
284
+ auto mapKeysPtr = mapVector->mapKeys ();
285
+ auto mapValuesPtr = mapVector->mapValues ();
195
286
196
287
auto numElements = countElements (groups, numGroups);
197
- mapKeys->resize (numElements);
198
- mapValues->resize (numElements);
288
+ mapVector->mapValues ()->as <FlatVector<S>>()->resize (numElements);
289
+
290
+ // ComplexType cannot be resized the same.
291
+ if constexpr (!std::is_same_v<K, ComplexType>) {
292
+ mapVector->mapKeys ()->as <FlatVector<K>>()->resize (numElements);
293
+ } else {
294
+ mapVector->mapKeys ()->resize (numElements);
295
+ }
199
296
200
297
auto rawNulls = mapVector->mutableRawNulls ();
201
298
vector_size_t offset = 0 ;
@@ -208,7 +305,7 @@ class MapUnionSumAggregate : public exec::Aggregate {
208
305
clearNull (rawNulls, i);
209
306
210
307
auto mapSize = value<AccumulatorType>(group)->extractValues (
211
- *mapKeys, *mapValues , offset);
308
+ mapKeysPtr, mapValuesPtr , offset);
212
309
mapVector->setOffsetAndSize (i, offset, mapSize);
213
310
offset += mapSize;
214
311
}
@@ -227,8 +324,8 @@ class MapUnionSumAggregate : public exec::Aggregate {
227
324
bool /* mayPushdown*/ ) override {
228
325
decodedMaps_.decode (*args[0 ], rows);
229
326
auto mapVector = decodedMaps_.base ()->template as <MapVector>();
230
- auto mapKeys = mapVector->mapKeys ()-> template as <SimpleVector<K>>() ;
231
- auto mapValues = mapVector->mapValues ()-> template as <SimpleVector<S>>() ;
327
+ auto mapKeys = mapVector->mapKeys ();
328
+ auto mapValues = mapVector->mapValues ();
232
329
233
330
rows.applyToSelected ([&](auto row) {
234
331
if (!decodedMaps_.isNullAt (row)) {
@@ -249,8 +346,8 @@ class MapUnionSumAggregate : public exec::Aggregate {
249
346
bool /* mayPushdown */ ) override {
250
347
decodedMaps_.decode (*args[0 ], rows);
251
348
auto mapVector = decodedMaps_.base ()->template as <MapVector>();
252
- auto mapKeys = mapVector->mapKeys ()-> template as <SimpleVector<K>>() ;
253
- auto mapValues = mapVector->mapValues ()-> template as <SimpleVector<S>>() ;
349
+ auto mapKeys = mapVector->mapKeys ();
350
+ auto mapValues = mapVector->mapValues ();
254
351
255
352
auto groupMap = value<AccumulatorType>(group);
256
353
@@ -285,7 +382,7 @@ class MapUnionSumAggregate : public exec::Aggregate {
285
382
folly::Range<const vector_size_t *> indices) override {
286
383
setAllNulls (groups, indices);
287
384
for (auto index : indices) {
288
- new (groups[index ] + offset_) AccumulatorType{allocator_};
385
+ new (groups[index ] + offset_) AccumulatorType{resultType_, allocator_};
289
386
}
290
387
}
291
388
@@ -304,8 +401,8 @@ class MapUnionSumAggregate : public exec::Aggregate {
304
401
void addMap (
305
402
AccumulatorType& groupMap,
306
403
const MapVector* mapVector,
307
- const SimpleVector<K>* mapKeys,
308
- const SimpleVector<S>* mapValues,
404
+ const VectorPtr& mapKeys,
405
+ const VectorPtr& mapValues,
309
406
vector_size_t row) const {
310
407
auto decodedRow = decodedMaps_.index (row);
311
408
groupMap.addValues (mapVector, mapKeys, mapValues, decodedRow, allocator_);
@@ -340,7 +437,8 @@ std::unique_ptr<exec::Aggregate> createMapUnionSumAggregate(
340
437
case TypeKind::DOUBLE:
341
438
return std::make_unique<MapUnionSumAggregate<K, double >>(resultType);
342
439
default :
343
- VELOX_UNREACHABLE ();
440
+ VELOX_UNREACHABLE (
441
+ " Unexpected value type {}" , mapTypeKindToName (valueKind));
344
442
}
345
443
}
346
444
@@ -350,35 +448,14 @@ void registerMapUnionSumAggregate(
350
448
const std::string& prefix,
351
449
bool withCompanionFunctions,
352
450
bool overwrite) {
353
- const std::vector<std::string> keyTypes = {
354
- " tinyint" ,
355
- " smallint" ,
356
- " integer" ,
357
- " bigint" ,
358
- " real" ,
359
- " double" ,
360
- " varchar" ,
361
- " json" };
362
- const std::vector<std::string> valueTypes = {
363
- " tinyint" ,
364
- " smallint" ,
365
- " integer" ,
366
- " bigint" ,
367
- " double" ,
368
- " real" ,
369
- };
370
-
371
- std::vector<std::shared_ptr<exec::AggregateFunctionSignature>> signatures;
372
- for (auto keyType : keyTypes) {
373
- for (auto valueType : valueTypes) {
374
- auto mapType = fmt::format (" map({},{})" , keyType, valueType);
375
- signatures.push_back (exec::AggregateFunctionSignatureBuilder ()
376
- .returnType (mapType)
377
- .intermediateType (mapType)
378
- .argumentType (mapType)
379
- .build ());
380
- }
381
- }
451
+ std::vector<std::shared_ptr<exec::AggregateFunctionSignature>> signatures{
452
+ exec::AggregateFunctionSignatureBuilder ()
453
+ .typeVariable (" K" )
454
+ .typeVariable (" V" )
455
+ .returnType (" map(K,V)" )
456
+ .intermediateType (" map(K,V)" )
457
+ .argumentType (" map(K,V)" )
458
+ .build ()};
382
459
383
460
auto name = prefix + kMapUnionSum ;
384
461
exec::registerAggregateFunction (
@@ -395,6 +472,8 @@ void registerMapUnionSumAggregate(
395
472
auto & mapType = argTypes[0 ]->asMap ();
396
473
auto keyTypeKind = mapType.keyType ()->kind ();
397
474
auto valueTypeKind = mapType.valueType ()->kind ();
475
+ const auto keyType = resultType->childAt (0 );
476
+
398
477
switch (keyTypeKind) {
399
478
case TypeKind::TINYINT:
400
479
return createMapUnionSumAggregate<int8_t >(
@@ -416,8 +495,14 @@ void registerMapUnionSumAggregate(
416
495
case TypeKind::VARCHAR:
417
496
return createMapUnionSumAggregate<StringView>(
418
497
valueTypeKind, resultType);
498
+ case TypeKind::ARRAY:
499
+ case TypeKind::MAP:
500
+ case TypeKind::ROW:
501
+ return createMapUnionSumAggregate<ComplexType>(
502
+ valueTypeKind, resultType);
419
503
default :
420
- VELOX_UNREACHABLE ();
504
+ VELOX_UNREACHABLE (
505
+ " Unexpected key type {}" , mapTypeKindToName (keyTypeKind));
421
506
}
422
507
},
423
508
withCompanionFunctions,
0 commit comments