Skip to content

Commit 3658be0

Browse files
committed
Merge branch 'main' into opt_nested_funcs
2 parents 5a86bfa + b3c0052 commit 3658be0

File tree

33 files changed

+613
-79
lines changed

33 files changed

+613
-79
lines changed

.github/workflows/velox_nightly.yml

+108
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one or more
2+
# contributor license agreements. See the NOTICE file distributed with
3+
# this work for additional information regarding copyright ownership.
4+
# The ASF licenses this file to You under the Apache License, Version 2.0
5+
# (the "License"); you may not use this file except in compliance with
6+
# the License. You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
name: Velox backend nightly release
17+
18+
on:
19+
pull_request:
20+
paths:
21+
- '.github/workflows/velox_nightly.yml'
22+
schedule:
23+
- cron: '0 0 * * *'
24+
25+
env:
26+
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
27+
CCACHE_DIR: "${{ github.workspace }}/.ccache"
28+
29+
concurrency:
30+
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
31+
cancel-in-progress: true
32+
33+
jobs:
34+
build-native-lib:
35+
runs-on: ubuntu-20.04
36+
steps:
37+
- uses: actions/checkout@v4
38+
- name: Get Ccache
39+
uses: actions/cache/restore@v4
40+
with:
41+
path: '${{ env.CCACHE_DIR }}'
42+
key: ccache-centos7-release-default-${{github.sha}}
43+
restore-keys: |
44+
ccache-centos7-release-default
45+
- name: Build Gluten velox third party
46+
run: |
47+
docker run -v $GITHUB_WORKSPACE:/work -w /work apache/gluten:vcpkg-centos-7 bash -c "
48+
df -a
49+
cd /work
50+
export CCACHE_DIR=/work/.ccache
51+
bash dev/ci-velox-buildstatic-centos-7.sh
52+
ccache -s
53+
mkdir -p /work/.m2/repository/org/apache/arrow/
54+
cp -r /root/.m2/repository/org/apache/arrow/* /work/.m2/repository/org/apache/arrow/
55+
"
56+
- name: Upload native libs
57+
uses: actions/upload-artifact@v4
58+
with:
59+
path: ./cpp/build/releases/
60+
name: velox-native-lib-${{github.sha}}
61+
retention-days: 1
62+
- name: Upload Artifact Arrow Jar
63+
uses: actions/upload-artifact@v4
64+
with:
65+
path: .m2/repository/org/apache/arrow/
66+
name: velox-arrow-jar-centos-7-${{github.sha}}
67+
68+
build-bundle-package-centos8:
69+
needs: build-native-lib
70+
runs-on: ubuntu-22.04
71+
container: centos:8
72+
steps:
73+
- uses: actions/checkout@v4
74+
- name: Download All Artifacts
75+
uses: actions/download-artifact@v4
76+
with:
77+
name: velox-native-lib-${{github.sha}}
78+
path: ./cpp/build/releases
79+
- name: Download All Arrow Jar Artifacts
80+
uses: actions/download-artifact@v4
81+
with:
82+
name: velox-arrow-jar-centos-7-${{github.sha}}
83+
path: /root/.m2/repository/org/apache/arrow/
84+
- name: Setup java and maven
85+
run: |
86+
sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-* && \
87+
sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-* && \
88+
yum update -y && yum install -y java-1.8.0-openjdk-devel wget && \
89+
wget https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz && \
90+
tar -xvf apache-maven-3.8.8-bin.tar.gz && \
91+
mv apache-maven-3.8.8 /usr/lib/maven
92+
- name: Get current date
93+
id: date
94+
run: echo "::set-output name=date::$(date +'%Y-%m-%d')"
95+
- name: Build for Spark 3.5
96+
run: |
97+
cd $GITHUB_WORKSPACE/ && \
98+
export MAVEN_HOME=/usr/lib/maven && \
99+
export PATH=${PATH}:${MAVEN_HOME}/bin && \
100+
mvn clean install -Pspark-3.5 -Pbackends-velox -Pceleborn -Puniffle -DskipTests -Dmaven.source.skip
101+
- name: Upload bundle package
102+
uses: actions/upload-artifact@v4
103+
with:
104+
name: nightly-gluten-velox-bundle-package-spark35-${{ steps.date.outputs.date }}
105+
path: package/target/gluten-velox-bundle-*.jar
106+
retention-days: 7
107+
108+

backends-clickhouse/pom.xml

+2-2
Original file line numberDiff line numberDiff line change
@@ -130,9 +130,9 @@
130130
</exclusions>
131131
</dependency>
132132
<dependency>
133-
<groupId>io.glutenproject</groupId>
133+
<groupId>com.google.protobuf</groupId>
134134
<artifactId>protobuf-java</artifactId>
135-
<version>${custom.protobuf.version}</version>
135+
<version>${protobuf.version}</version>
136136
</dependency>
137137
<dependency>
138138
<groupId>org.apache.gluten</groupId>

backends-velox/pom.xml

+2-2
Original file line numberDiff line numberDiff line change
@@ -157,9 +157,9 @@
157157
<scope>compile</scope>
158158
</dependency>
159159
<dependency>
160-
<groupId>io.glutenproject</groupId>
160+
<groupId>com.google.protobuf</groupId>
161161
<artifactId>protobuf-java</artifactId>
162-
<version>${custom.protobuf.version}</version>
162+
<version>${protobuf.version}</version>
163163
</dependency>
164164
<dependency>
165165
<groupId>org.apache.gluten</groupId>

backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala

+60
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ import org.apache.commons.lang3.ClassUtils
5858

5959
import javax.ws.rs.core.UriBuilder
6060

61+
import java.util.Locale
62+
6163
class VeloxSparkPlanExecApi extends SparkPlanExecApi {
6264

6365
/** Transform GetArrayItem to Substrait. */
@@ -700,6 +702,64 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
700702
GenericExpressionTransformer(substraitExprName, children, expr)
701703
}
702704

705+
/** Generate an expression transformer to transform JsonToStructs to Substrait. */
706+
override def genFromJsonTransformer(
707+
substraitExprName: String,
708+
children: Seq[ExpressionTransformer],
709+
expr: JsonToStructs): ExpressionTransformer = {
710+
val enablePartialResults =
711+
try {
712+
SQLConf.get.getConfString(s"spark.sql.json.enablePartialResults").toBoolean
713+
} catch {
714+
case _: NoSuchElementException =>
715+
// Before spark 3.4, this config is not defined, and partial result parsing is not
716+
// supported. Therefore we need to return false.
717+
false
718+
}
719+
if (!enablePartialResults) {
720+
// Velox only supports partial results mode. We need to fall back this when
721+
// 'spark.sql.json.enablePartialResults' is set to false or not defined.
722+
throw new GlutenNotSupportException(
723+
s"'from_json' with 'spark.sql.json.enablePartialResults = false' is not supported in Velox")
724+
}
725+
if (!expr.options.isEmpty) {
726+
throw new GlutenNotSupportException("'from_json' with options is not supported in Velox")
727+
}
728+
if (SQLConf.get.caseSensitiveAnalysis) {
729+
throw new GlutenNotSupportException(
730+
"'from_json' with 'spark.sql.caseSensitive = true' is not supported in Velox")
731+
}
732+
733+
val hasDuplicateKey = expr.schema match {
734+
case s: StructType =>
735+
s.names.distinct.size != s.names.size ||
736+
!s.filter(
737+
f =>
738+
!s.names
739+
.filter(
740+
n => n != f.name && n.toLowerCase(Locale.ROOT) == f.name.toLowerCase(Locale.ROOT))
741+
.isEmpty)
742+
.isEmpty
743+
case other =>
744+
false
745+
}
746+
if (hasDuplicateKey) {
747+
throw new GlutenNotSupportException(
748+
"'from_json' with duplicate keys is not supported in Velox")
749+
}
750+
val hasCorruptRecord = expr.schema match {
751+
case s: StructType =>
752+
!s.filter(_.name == SQLConf.get.getConf(SQLConf.COLUMN_NAME_OF_CORRUPT_RECORD)).isEmpty
753+
case other =>
754+
false
755+
}
756+
if (hasCorruptRecord) {
757+
throw new GlutenNotSupportException(
758+
"'from_json' with column corrupt record is not supported in Velox")
759+
}
760+
GenericExpressionTransformer(substraitExprName, children, expr)
761+
}
762+
703763
/** Generate an expression transformer to transform NamedStruct to Substrait. */
704764
override def genNamedStructTransformer(
705765
substraitExprName: String,

0 commit comments

Comments
 (0)