Skip to content

Commit de3aeb1

Browse files
authored
[GLUTEN-8266][VL][CI] Pre-install spark sources in docker image (#8290)
1 parent c2bf8f0 commit de3aeb1

File tree

3 files changed

+23
-16
lines changed

3 files changed

+23
-16
lines changed

.github/workflows/util/install_spark_resources.sh

+5-5
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
set -e
2121

22-
INSTALL_DIR=$GITHUB_WORKSPACE
22+
INSTALL_DIR=/opt/
2323
case "$1" in
2424
3.2)
2525
# Spark-3.2
@@ -79,12 +79,12 @@ case "$1" in
7979
wget -nv https://archive.apache.org/dist/spark/spark-3.5.2/spark-3.5.2-bin-hadoop3.tgz && \
8080
tar --strip-components=1 -xf spark-3.5.2-bin-hadoop3.tgz spark-3.5.2-bin-hadoop3/jars/ && \
8181
rm -rf spark-3.5.2-bin-hadoop3.tgz && \
82-
mkdir -p ${INSTALL_DIR}/shims/spark35/spark_home/assembly/target/scala-2.13 && \
83-
mv jars ${INSTALL_DIR}/shims/spark35/spark_home/assembly/target/scala-2.13 && \
82+
mkdir -p ${INSTALL_DIR}/shims/spark35-scala2.13/spark_home/assembly/target/scala-2.13 && \
83+
mv jars ${INSTALL_DIR}/shims/spark35-scala2.13/spark_home/assembly/target/scala-2.13 && \
8484
wget -nv https://github.com/apache/spark/archive/refs/tags/v3.5.2.tar.gz && \
8585
tar --strip-components=1 -xf v3.5.2.tar.gz spark-3.5.2/sql/core/src/test/resources/ && \
86-
mkdir -p shims/spark35/spark_home/ && \
87-
mv sql shims/spark35/spark_home/
86+
mkdir -p shims/spark35-scala2.13/spark_home/ && \
87+
mv sql shims/spark35-scala2.13/spark_home/
8888
;;
8989
*)
9090
echo "Spark version is expected to be specified."

.github/workflows/velox_backend.yml

+11-11
Original file line numberDiff line numberDiff line change
@@ -621,7 +621,7 @@ jobs:
621621
cd $GITHUB_WORKSPACE/
622622
export SPARK_SCALA_VERSION=2.12
623623
$MVN_CMD clean test -Pspark-3.2 -Pspark-ut -Pbackends-velox -Pceleborn -Piceberg \
624-
-Pdelta -Phudi -DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark32/spark_home/" \
624+
-Pdelta -Phudi -DargLine="-Dspark.test.home=/opt/shims/spark32/spark_home/" \
625625
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
626626
- name: Upload test report
627627
if: always()
@@ -659,7 +659,7 @@ jobs:
659659
run: |
660660
cd $GITHUB_WORKSPACE/
661661
$MVN_CMD clean test -Pspark-3.2 -Pspark-ut -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi \
662-
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark32/spark_home/" -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
662+
-DargLine="-Dspark.test.home=/opt/shims/spark32/spark_home/" -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
663663
- name: Upload test report
664664
if: always()
665665
uses: actions/upload-artifact@v4
@@ -696,7 +696,7 @@ jobs:
696696
cd $GITHUB_WORKSPACE/
697697
export SPARK_SCALA_VERSION=2.12
698698
$MVN_CMD clean test -Pspark-3.3 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi -Pspark-ut \
699-
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark33/spark_home/" \
699+
-DargLine="-Dspark.test.home=/opt/shims/spark33/spark_home/" \
700700
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
701701
- name: Upload test report
702702
if: always()
@@ -735,7 +735,7 @@ jobs:
735735
run: |
736736
cd $GITHUB_WORKSPACE/
737737
$MVN_CMD clean test -Pspark-3.3 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi -Pspark-ut \
738-
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark33/spark_home/" \
738+
-DargLine="-Dspark.test.home=/opt/shims/spark33/spark_home/" \
739739
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
740740
- name: Upload test report
741741
if: always()
@@ -773,7 +773,7 @@ jobs:
773773
cd $GITHUB_WORKSPACE/
774774
export SPARK_SCALA_VERSION=2.12
775775
$MVN_CMD clean test -Pspark-3.4 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi -Pspark-ut \
776-
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark34/spark_home/" \
776+
-DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/" \
777777
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
778778
- name: Upload test report
779779
if: always()
@@ -812,7 +812,7 @@ jobs:
812812
run: |
813813
cd $GITHUB_WORKSPACE/
814814
$MVN_CMD clean test -Pspark-3.4 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Pspark-ut -Phudi \
815-
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark34/spark_home/" \
815+
-DargLine="-Dspark.test.home=/opt/shims/spark34/spark_home/" \
816816
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
817817
- name: Upload test report
818818
if: always()
@@ -850,7 +850,7 @@ jobs:
850850
cd $GITHUB_WORKSPACE/
851851
export SPARK_SCALA_VERSION=2.12
852852
$MVN_CMD clean test -Pspark-3.5 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi -Pspark-ut \
853-
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark35/spark_home/" \
853+
-DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/" \
854854
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
855855
- name: Upload test report
856856
if: always()
@@ -894,7 +894,7 @@ jobs:
894894
cd $GITHUB_WORKSPACE/
895895
export SPARK_SCALA_VERSION=2.13
896896
$MVN_CMD clean test -Pspark-3.5 -Pscala-2.13 -Pbackends-velox -Pceleborn -Piceberg \
897-
-Pdelta -Pspark-ut -DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark35/spark_home/" \
897+
-Pdelta -Pspark-ut -DargLine="-Dspark.test.home=/opt/shims/spark35-scala2.13/spark_home/" \
898898
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
899899
- name: Upload test report
900900
if: always()
@@ -926,7 +926,7 @@ jobs:
926926
run: |
927927
cd $GITHUB_WORKSPACE/
928928
$MVN_CMD clean test -Pspark-3.5 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Phudi -Pspark-ut \
929-
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark35/spark_home/" \
929+
-DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/" \
930930
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
931931
- name: Upload test report
932932
if: always()
@@ -964,7 +964,7 @@ jobs:
964964
cd $GITHUB_WORKSPACE/
965965
export SPARK_SCALA_VERSION=2.12
966966
$MVN_CMD clean test -Pspark-3.5 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Pspark-ut \
967-
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark35/spark_home/ -Dspark.gluten.ras.enabled=true" \
967+
-DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/ -Dspark.gluten.ras.enabled=true" \
968968
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
969969
- name: Upload test report
970970
uses: actions/upload-artifact@v4
@@ -995,7 +995,7 @@ jobs:
995995
run: |
996996
cd $GITHUB_WORKSPACE/
997997
$MVN_CMD clean test -Pspark-3.5 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Pspark-ut \
998-
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark35/spark_home/ -Dspark.gluten.ras.enabled=true" \
998+
-DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/ -Dspark.gluten.ras.enabled=true" \
999999
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
10001000
- name: Upload test report
10011001
uses: actions/upload-artifact@v4

dev/docker/Dockerfile.centos8-dynamic-build

+7
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,13 @@ RUN wget --no-check-certificate https://downloads.apache.org/maven/maven-3/3.8.8
1515
mv apache-maven-3.8.8 /usr/lib/maven
1616
ENV PATH=${PATH}:/usr/lib/maven/bin
1717

18+
RUN wget -nv https://archive.apache.org/dist/celeborn/celeborn-0.3.2-incubating/apache-celeborn-0.3.2-incubating-bin.tgz -P /opt/
19+
RUN wget -nv https://archive.apache.org/dist/celeborn/celeborn-0.4.2/apache-celeborn-0.4.2-bin.tgz -P /opt/
20+
RUN wget -nv https://archive.apache.org/dist/celeborn/celeborn-0.5.2/apache-celeborn-0.5.2-bin.tgz -P /opt/
21+
1822
RUN git clone --depth=1 https://github.com/apache/incubator-gluten /opt/gluten
1923

24+
RUN cd /opt/gluten/.github/workflows/util/ && ./install_spark_resources.sh 3.2 && ./install_spark_resources.sh 3.3 \
25+
&& ./install_spark_resources.sh 3.4 && ./install_spark_resources.sh 3.5 && ./install_spark_resources.sh 3.5-scala2.13
26+
2027
RUN cd /opt/gluten && source /opt/rh/gcc-toolset-11/enable && ./dev/builddeps-veloxbe.sh --run_setup_script=ON build_arrow && rm -rf /opt/gluten

0 commit comments

Comments
 (0)