Skip to content

Commit d034c5e

Browse files
committed
Enable map_concat function
1 parent 620d498 commit d034c5e

File tree

8 files changed

+33
-4
lines changed

8 files changed

+33
-4
lines changed

backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala

+2-1
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,7 @@ object CHExpressionUtil {
204204
STACK -> DefaultValidator(),
205205
RAISE_ERROR -> DefaultValidator(),
206206
WIDTH_BUCKET -> DefaultValidator(),
207-
MAKE_DATE -> DefaultValidator()
207+
MAKE_DATE -> DefaultValidator(),
208+
MAP_CONCAT -> DefaultValidator()
208209
)
209210
}

backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala

+18
Original file line numberDiff line numberDiff line change
@@ -614,6 +614,24 @@ abstract class ScalarFunctionsValidateSuite extends FunctionsValidateSuite {
614614
}
615615
}
616616

617+
test("map_concat") {
618+
withTempPath {
619+
path =>
620+
Seq(
621+
Map[String, Int]("a" -> 1, "b" -> 2),
622+
Map[String, Int]("a" -> 2, "b" -> 3),
623+
null
624+
)
625+
.toDF("m")
626+
.write
627+
.parquet(path.getCanonicalPath)
628+
spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("map_tbl")
629+
runQueryAndCompare("select map_concat(m, map('c', 4)) from map_tbl") {
630+
checkGlutenOperatorMatch[ProjectExecTransformer]
631+
}
632+
}
633+
}
634+
617635
test("test transform_keys function") {
618636
withTempPath {
619637
path =>

cpp/core/config/GlutenConfig.h

+1
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ const std::string kSparkRedactionString = "*********(redacted)";
7575

7676
const std::string kSparkLegacyTimeParserPolicy = "spark.sql.legacy.timeParserPolicy";
7777
const std::string kShuffleFileBufferSize = "spark.shuffle.file.buffer";
78+
const std::string kSparkMapKeyDedupPolicy = "spark.sql.mapKeyDedupPolicy";
7879

7980
std::unordered_map<std::string, std::string>
8081
parseConfMap(JNIEnv* env, const uint8_t* planData, const int32_t planDataLength);

cpp/velox/compute/WholeStageResultIterator.cc

+6
Original file line numberDiff line numberDiff line change
@@ -565,6 +565,12 @@ std::unordered_map<std::string, std::string> WholeStageResultIterator::getQueryC
565565
configs[velox::core::QueryConfig::kSparkLegacyDateFormatter] = "false";
566566
}
567567

568+
if (veloxCfg_->get<std::string>(kSparkMapKeyDedupPolicy, "") == "EXCEPTION") {
569+
configs[velox::core::QueryConfig::kSparkThrowExceptionOnDuplicateMapKeys] = "true";
570+
} else {
571+
configs[velox::core::QueryConfig::kSparkThrowExceptionOnDuplicateMapKeys] = "false";
572+
}
573+
568574
const auto setIfExists = [&](const std::string& glutenKey, const std::string& veloxKey) {
569575
const auto valueOptional = veloxCfg_->get<std::string>(glutenKey);
570576
if (valueOptional.hasValue()) {

ep/build-velox/src/get_velox.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616

1717
set -exu
1818

19-
VELOX_REPO=https://github.com/oap-project/velox.git
20-
VELOX_BRANCH=2025_02_19
19+
VELOX_REPO=https://github.com/rui-mo/velox.git
20+
VELOX_BRANCH=test_map_concat
2121
VELOX_HOME=""
2222

2323
OS=`uname -s`

gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala

+1
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,7 @@ object ExpressionMappings {
258258
// Map functions
259259
Sig[CreateMap](CREATE_MAP),
260260
Sig[GetMapValue](GET_MAP_VALUE),
261+
Sig[MapConcat](MAP_CONCAT),
261262
Sig[MapKeys](MAP_KEYS),
262263
Sig[MapValues](MAP_VALUES),
263264
Sig[MapFromArrays](MAP_FROM_ARRAYS),

shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala

+2-1
Original file line numberDiff line numberDiff line change
@@ -502,7 +502,8 @@ object GlutenConfig {
502502
(
503503
GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD.key,
504504
GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD.defaultValue.get.toString),
505-
(SPARK_SHUFFLE_SPILL_COMPRESS, SPARK_SHUFFLE_SPILL_COMPRESS_DEFAULT.toString)
505+
(SPARK_SHUFFLE_SPILL_COMPRESS, SPARK_SHUFFLE_SPILL_COMPRESS_DEFAULT.toString),
506+
(SQLConf.MAP_KEY_DEDUP_POLICY.key, SQLConf.MAP_KEY_DEDUP_POLICY.defaultValueString)
506507
)
507508
keyWithDefault.forEach(e => nativeConfMap.put(e._1, conf.getOrElse(e._1, e._2)))
508509
GlutenConfigUtil.mapByteConfValue(

shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala

+1
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,7 @@ object ExpressionNames {
281281
// Map functions
282282
final val CREATE_MAP = "map"
283283
final val GET_MAP_VALUE = "get_map_value"
284+
final val MAP_CONCAT = "map_concat"
284285
final val MAP_KEYS = "map_keys"
285286
final val MAP_VALUES = "map_values"
286287
final val MAP_FROM_ARRAYS = "map_from_arrays"

0 commit comments

Comments
 (0)