From ef1edb4e5eee19cefe6ad9c229c614ed802bb5ce Mon Sep 17 00:00:00 2001 From: Furqaanahmed Khan Date: Mon, 21 Oct 2024 11:13:39 -0700 Subject: [PATCH 01/22] fix: connection timeout error --- .github/workflows/python.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 04fa4f7fc9..efe72108d8 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -117,10 +117,10 @@ jobs: SPARK_VERSION: ${{ matrix.spark }} HADOOP_VERSION: ${{ matrix.hadoop }} run: | - wget https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz - wget https://repo.osgeo.org/repository/release/javax/media/jai_core/${JAI_CORE_VERSION}/jai_core-${JAI_CORE_VERSION}.jar - wget https://repo.osgeo.org/repository/release/javax/media/jai_codec/${JAI_CODEC_VERSION}/jai_codec-${JAI_CODEC_VERSION}.jar - wget https://repo.osgeo.org/repository/release/javax/media/jai_imageio/${JAI_IMAGEIO_VERSION}/jai_imageio-${JAI_IMAGEIO_VERSION}.jar + wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz + wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_core/${JAI_CORE_VERSION}/jai_core-${JAI_CORE_VERSION}.jar + wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_codec/${JAI_CODEC_VERSION}/jai_codec-${JAI_CODEC_VERSION}.jar + wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_imageio/${JAI_IMAGEIO_VERSION}/jai_imageio-${JAI_IMAGEIO_VERSION}.jar tar -xzf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz mv -v jai_core-${JAI_CORE_VERSION}.jar spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}/jars/ mv -v jai_codec-${JAI_CODEC_VERSION}.jar spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}/jars/ From 55010cfddb1537130721c6be880cbac9fff32f63 Mon Sep 17 00:00:00 2001 From: Furqaanahmed Khan Date: Mon, 21 Oct 2024 15:47:42 -0700 Subject: [PATCH 02/22] fix: try some ideas --- .github/workflows/python.yml | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index efe72108d8..e11d1d0e12 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -113,18 +113,6 @@ jobs: run: | SPARK_COMPAT_VERSION=${SPARK_VERSION:0:3} mvn -q clean install -DskipTests -Dspark=${SPARK_COMPAT_VERSION} -Dscala=${SCALA_VERSION:0:4} -Dgeotools - - env: - SPARK_VERSION: ${{ matrix.spark }} - HADOOP_VERSION: ${{ matrix.hadoop }} - run: | - wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz - wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_core/${JAI_CORE_VERSION}/jai_core-${JAI_CORE_VERSION}.jar - wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_codec/${JAI_CODEC_VERSION}/jai_codec-${JAI_CODEC_VERSION}.jar - wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_imageio/${JAI_IMAGEIO_VERSION}/jai_imageio-${JAI_IMAGEIO_VERSION}.jar - tar -xzf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz - mv -v jai_core-${JAI_CORE_VERSION}.jar spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}/jars/ - mv -v jai_codec-${JAI_CODEC_VERSION}.jar spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}/jars/ - mv -v jai_imageio-${JAI_IMAGEIO_VERSION}.jar spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}/jars/ - run: sudo apt-get -y install python3-pip python-dev-is-python3 - run: sudo pip3 install -U setuptools - run: sudo pip3 install -U wheel @@ -145,6 +133,17 @@ jobs: pipenv install pyspark==${SPARK_VERSION} pipenv install --dev pipenv graph + - env: + SPARK_VERSION: ${{ matrix.spark }} + HADOOP_VERSION: ${{ matrix.hadoop }} + PYTHON_VERSION: ${{ matrix.python }} + run: | + wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_core/${JAI_CORE_VERSION}/jai_core-${JAI_CORE_VERSION}.jar + wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_codec/${JAI_CODEC_VERSION}/jai_codec-${JAI_CODEC_VERSION}.jar + wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_imageio/${JAI_IMAGEIO_VERSION}/jai_imageio-${JAI_IMAGEIO_VERSION}.jar + mv -v jai_core-${JAI_CORE_VERSION}.jar /usr/local/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars + mv -v jai_codec-${JAI_CODEC_VERSION}.jar /usr/local/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars + mv -v jai_imageio-${JAI_IMAGEIO_VERSION}.jar /usr/local/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars - env: SPARK_VERSION: ${{ matrix.spark }} HADOOP_VERSION: ${{ matrix.hadoop }} From 5d5ed76cc3441937f4a623694e0a9606fe4d3c2f Mon Sep 17 00:00:00 2001 From: Furqaanahmed Khan Date: Mon, 21 Oct 2024 15:56:14 -0700 Subject: [PATCH 03/22] fix: try some ideas 2/? --- .github/workflows/python.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index e11d1d0e12..a93fae73dc 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -133,6 +133,7 @@ jobs: pipenv install pyspark==${SPARK_VERSION} pipenv install --dev pipenv graph + pipenv run pip show pyspark - env: SPARK_VERSION: ${{ matrix.spark }} HADOOP_VERSION: ${{ matrix.hadoop }} From c1a2250f736ea98f9300c0b02928f83ab2cdef3f Mon Sep 17 00:00:00 2001 From: Furqaanahmed Khan Date: Mon, 21 Oct 2024 16:27:28 -0700 Subject: [PATCH 04/22] fix: try some ideas 3/? --- .github/workflows/python.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index a93fae73dc..a55e0c4f4b 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -129,6 +129,7 @@ jobs: echo "Patching Pipfile to use Shapely 1.x" sed -i 's/^shapely.*$/shapely="<2.0.0"/g' Pipfile fi + export PIPENV_CUSTOM_VENV_NAME=python-${PYTHON_VERSION} pipenv --python ${PYTHON_VERSION} pipenv install pyspark==${SPARK_VERSION} pipenv install --dev @@ -142,6 +143,7 @@ jobs: wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_core/${JAI_CORE_VERSION}/jai_core-${JAI_CORE_VERSION}.jar wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_codec/${JAI_CODEC_VERSION}/jai_codec-${JAI_CODEC_VERSION}.jar wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_imageio/${JAI_IMAGEIO_VERSION}/jai_imageio-${JAI_IMAGEIO_VERSION}.jar + /home/runner/.local/share/virtualenvs/python-5OHwJwPm/lib/python3.8/site-packages mv -v jai_core-${JAI_CORE_VERSION}.jar /usr/local/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars mv -v jai_codec-${JAI_CODEC_VERSION}.jar /usr/local/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars mv -v jai_imageio-${JAI_IMAGEIO_VERSION}.jar /usr/local/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars From 097a0e8bdb10781d43c27a663115ef198bd63743 Mon Sep 17 00:00:00 2001 From: Furqaanahmed Khan Date: Mon, 21 Oct 2024 16:35:45 -0700 Subject: [PATCH 05/22] fix: try some ideas 4/? --- .github/workflows/python.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index a55e0c4f4b..099a85d9a8 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -143,10 +143,9 @@ jobs: wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_core/${JAI_CORE_VERSION}/jai_core-${JAI_CORE_VERSION}.jar wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_codec/${JAI_CODEC_VERSION}/jai_codec-${JAI_CODEC_VERSION}.jar wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_imageio/${JAI_IMAGEIO_VERSION}/jai_imageio-${JAI_IMAGEIO_VERSION}.jar - /home/runner/.local/share/virtualenvs/python-5OHwJwPm/lib/python3.8/site-packages - mv -v jai_core-${JAI_CORE_VERSION}.jar /usr/local/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars - mv -v jai_codec-${JAI_CODEC_VERSION}.jar /usr/local/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars - mv -v jai_imageio-${JAI_IMAGEIO_VERSION}.jar /usr/local/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars + mv -v jai_core-${JAI_CORE_VERSION}.jar /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars + mv -v jai_codec-${JAI_CODEC_VERSION}.jar /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars + mv -v jai_imageio-${JAI_IMAGEIO_VERSION}.jar /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars - env: SPARK_VERSION: ${{ matrix.spark }} HADOOP_VERSION: ${{ matrix.hadoop }} From 439915f86b5deb19a7b2712b9d1548271ab846b9 Mon Sep 17 00:00:00 2001 From: Furqaanahmed Khan Date: Mon, 21 Oct 2024 17:04:13 -0700 Subject: [PATCH 06/22] fix: add debugger --- .github/workflows/python.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 099a85d9a8..91417722bd 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -94,6 +94,8 @@ jobs: hadoop: '2.7' steps: - uses: actions/checkout@v4 + - name: Setup tmate session + uses: mxschmitt/action-tmate@v3 - uses: actions/setup-java@v4 with: distribution: 'zulu' From bf737778ead8214a6dc6b0a1a4316fe47dae16ce Mon Sep 17 00:00:00 2001 From: Furqaanahmed Khan Date: Mon, 21 Oct 2024 17:17:11 -0700 Subject: [PATCH 07/22] fix: add debugger 2/? --- .github/workflows/python.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 91417722bd..509ad2ca00 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -95,7 +95,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Setup tmate session - uses: mxschmitt/action-tmate@v3 + uses: mxschmitt/action-tmate@v3.19 - uses: actions/setup-java@v4 with: distribution: 'zulu' From a4bcae39c5c36a7c1fd85ef0641042d0d013b1ed Mon Sep 17 00:00:00 2001 From: Furqaanahmed Khan Date: Mon, 21 Oct 2024 17:21:47 -0700 Subject: [PATCH 08/22] fix: add debugger 3/? --- .github/workflows/python.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 509ad2ca00..fb4d75135f 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -94,8 +94,6 @@ jobs: hadoop: '2.7' steps: - uses: actions/checkout@v4 - - name: Setup tmate session - uses: mxschmitt/action-tmate@v3.19 - uses: actions/setup-java@v4 with: distribution: 'zulu' @@ -109,6 +107,8 @@ jobs: path: ~/.m2 key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} restore-keys: ${{ runner.os }}-m2 + - name: Setup tmate session + uses: mxschmitt/action-tmate@v3.19 - env: SPARK_VERSION: ${{ matrix.spark }} SCALA_VERSION: ${{ matrix.scala }} From b22a842b4d69a241907b7b3ae828b68b3d47d452 Mon Sep 17 00:00:00 2001 From: Furqaanahmed Khan Date: Tue, 22 Oct 2024 15:04:33 -0700 Subject: [PATCH 09/22] fix: add debugger 4/? --- .github/workflows/python.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index fb4d75135f..3dbe13ed11 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -107,8 +107,6 @@ jobs: path: ~/.m2 key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }} restore-keys: ${{ runner.os }}-m2 - - name: Setup tmate session - uses: mxschmitt/action-tmate@v3.19 - env: SPARK_VERSION: ${{ matrix.spark }} SCALA_VERSION: ${{ matrix.scala }} @@ -148,6 +146,8 @@ jobs: mv -v jai_core-${JAI_CORE_VERSION}.jar /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars mv -v jai_codec-${JAI_CODEC_VERSION}.jar /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars mv -v jai_imageio-${JAI_IMAGEIO_VERSION}.jar /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars + - name: Setup tmate session + uses: mxschmitt/action-tmate@v3.19 - env: SPARK_VERSION: ${{ matrix.spark }} HADOOP_VERSION: ${{ matrix.hadoop }} From 66c2755fcf4e85c09fb055d3dcf1a37de2185219 Mon Sep 17 00:00:00 2001 From: Furqaanahmed Khan Date: Thu, 24 Oct 2024 13:29:57 -0700 Subject: [PATCH 10/22] fix: try some idea 5/? --- .github/workflows/python.yml | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 3dbe13ed11..ca5f1b5cad 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -146,8 +146,6 @@ jobs: mv -v jai_core-${JAI_CORE_VERSION}.jar /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars mv -v jai_codec-${JAI_CODEC_VERSION}.jar /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars mv -v jai_imageio-${JAI_IMAGEIO_VERSION}.jar /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars - - name: Setup tmate session - uses: mxschmitt/action-tmate@v3.19 - env: SPARK_VERSION: ${{ matrix.spark }} HADOOP_VERSION: ${{ matrix.hadoop }} @@ -155,7 +153,8 @@ jobs: - env: SPARK_VERSION: ${{ matrix.spark }} HADOOP_VERSION: ${{ matrix.hadoop }} - run: (export SPARK_HOME=$PWD/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION};export PYTHONPATH=$SPARK_HOME/python;cd python;pipenv run pytest tests) + PYTHON_VERSION: ${{ matrix.python }} + run: (export SPARK_HOME=/home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark;cd python;pipenv run pytest tests) - env: SPARK_VERSION: ${{ matrix.spark }} HADOOP_VERSION: ${{ matrix.hadoop }} @@ -166,8 +165,7 @@ jobs: exit fi - export SPARK_HOME=$PWD/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} - export PYTHONPATH=$SPARK_HOME/python + export SPARK_HOME=/home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark export SPARK_REMOTE=local cd python From 438519aef79c280cf46076e62324589fc449da0b Mon Sep 17 00:00:00 2001 From: Furqaanahmed Khan Date: Thu, 24 Oct 2024 13:46:28 -0700 Subject: [PATCH 11/22] fix: add debugger --- .github/workflows/python.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index ca5f1b5cad..50acb35517 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -155,6 +155,8 @@ jobs: HADOOP_VERSION: ${{ matrix.hadoop }} PYTHON_VERSION: ${{ matrix.python }} run: (export SPARK_HOME=/home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark;cd python;pipenv run pytest tests) + - name: Setup tmate session + uses: mxschmitt/action-tmate@v3.19 - env: SPARK_VERSION: ${{ matrix.spark }} HADOOP_VERSION: ${{ matrix.hadoop }} From 73f005bf2c7da313fbe0444f7b32ff24f64f3054 Mon Sep 17 00:00:00 2001 From: Furqaanahmed Khan Date: Thu, 24 Oct 2024 14:06:15 -0700 Subject: [PATCH 12/22] fix: add debugger 2/? --- .github/workflows/python.yml | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 50acb35517..63e2e5c13a 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -129,6 +129,7 @@ jobs: echo "Patching Pipfile to use Shapely 1.x" sed -i 's/^shapely.*$/shapely="<2.0.0"/g' Pipfile fi + export WORKON_HOME=./.venv export PIPENV_CUSTOM_VENV_NAME=python-${PYTHON_VERSION} pipenv --python ${PYTHON_VERSION} pipenv install pyspark==${SPARK_VERSION} @@ -143,20 +144,20 @@ jobs: wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_core/${JAI_CORE_VERSION}/jai_core-${JAI_CORE_VERSION}.jar wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_codec/${JAI_CODEC_VERSION}/jai_codec-${JAI_CODEC_VERSION}.jar wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_imageio/${JAI_IMAGEIO_VERSION}/jai_imageio-${JAI_IMAGEIO_VERSION}.jar - mv -v jai_core-${JAI_CORE_VERSION}.jar /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars - mv -v jai_codec-${JAI_CODEC_VERSION}.jar /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars - mv -v jai_imageio-${JAI_IMAGEIO_VERSION}.jar /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars + mv -v jai_core-${JAI_CORE_VERSION}.jar .venv/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars + mv -v jai_codec-${JAI_CODEC_VERSION}.jar .venv/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars + mv -v jai_imageio-${JAI_IMAGEIO_VERSION}.jar .venv/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars - env: SPARK_VERSION: ${{ matrix.spark }} HADOOP_VERSION: ${{ matrix.hadoop }} - run: find spark-shaded/target -name sedona-*.jar -exec cp {} spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}/jars/ \; + run: find spark-shaded/target -name sedona-*.jar -exec cp {} .venv/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars/ \; + - name: Setup tmate session + uses: mxschmitt/action-tmate@v3.19 - env: SPARK_VERSION: ${{ matrix.spark }} HADOOP_VERSION: ${{ matrix.hadoop }} PYTHON_VERSION: ${{ matrix.python }} - run: (export SPARK_HOME=/home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark;cd python;pipenv run pytest tests) - - name: Setup tmate session - uses: mxschmitt/action-tmate@v3.19 + run: (export SPARK_HOME=.venv/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark;cd python;pipenv run pytest tests) - env: SPARK_VERSION: ${{ matrix.spark }} HADOOP_VERSION: ${{ matrix.hadoop }} @@ -167,7 +168,7 @@ jobs: exit fi - export SPARK_HOME=/home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark + export SPARK_HOME=.venv/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark export SPARK_REMOTE=local cd python From 188ef4323bdfc5f4e7ca90b1a2db698fbb520c7c Mon Sep 17 00:00:00 2001 From: Furqaanahmed Khan Date: Thu, 24 Oct 2024 14:25:38 -0700 Subject: [PATCH 13/22] fix: add debugger 3/? --- .github/workflows/python.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 63e2e5c13a..18f99f7c04 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -129,7 +129,7 @@ jobs: echo "Patching Pipfile to use Shapely 1.x" sed -i 's/^shapely.*$/shapely="<2.0.0"/g' Pipfile fi - export WORKON_HOME=./.venv + export WORKON_HOME=$PWD/.venv export PIPENV_CUSTOM_VENV_NAME=python-${PYTHON_VERSION} pipenv --python ${PYTHON_VERSION} pipenv install pyspark==${SPARK_VERSION} From 40521f518ea76080ef8f8c90ebfab7c7ad2ff61a Mon Sep 17 00:00:00 2001 From: Furqaanahmed Khan Date: Thu, 24 Oct 2024 14:38:41 -0700 Subject: [PATCH 14/22] fix: try some ideas 6/? --- .github/workflows/python.yml | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 18f99f7c04..84fd365c1c 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -129,7 +129,7 @@ jobs: echo "Patching Pipfile to use Shapely 1.x" sed -i 's/^shapely.*$/shapely="<2.0.0"/g' Pipfile fi - export WORKON_HOME=$PWD/.venv + export VENV_DIR=/home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION} export PIPENV_CUSTOM_VENV_NAME=python-${PYTHON_VERSION} pipenv --python ${PYTHON_VERSION} pipenv install pyspark==${SPARK_VERSION} @@ -144,20 +144,18 @@ jobs: wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_core/${JAI_CORE_VERSION}/jai_core-${JAI_CORE_VERSION}.jar wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_codec/${JAI_CODEC_VERSION}/jai_codec-${JAI_CODEC_VERSION}.jar wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_imageio/${JAI_IMAGEIO_VERSION}/jai_imageio-${JAI_IMAGEIO_VERSION}.jar - mv -v jai_core-${JAI_CORE_VERSION}.jar .venv/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars - mv -v jai_codec-${JAI_CODEC_VERSION}.jar .venv/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars - mv -v jai_imageio-${JAI_IMAGEIO_VERSION}.jar .venv/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars + mv -v jai_core-${JAI_CORE_VERSION}.jar $VENV_DIR/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars + mv -v jai_codec-${JAI_CODEC_VERSION}.jar $VENV_DIR/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars + mv -v jai_imageio-${JAI_IMAGEIO_VERSION}.jar $VENV_DIR/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars - env: SPARK_VERSION: ${{ matrix.spark }} HADOOP_VERSION: ${{ matrix.hadoop }} - run: find spark-shaded/target -name sedona-*.jar -exec cp {} .venv/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars/ \; - - name: Setup tmate session - uses: mxschmitt/action-tmate@v3.19 + run: find spark-shaded/target -name sedona-*.jar -exec cp {} $VENV_DIR/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars/ \; - env: SPARK_VERSION: ${{ matrix.spark }} HADOOP_VERSION: ${{ matrix.hadoop }} PYTHON_VERSION: ${{ matrix.python }} - run: (export SPARK_HOME=.venv/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark;cd python;pipenv run pytest tests) + run: (export SPARK_HOME=$VENV_DIR/lib/python${PYTHON_VERSION}/site-packages/pyspark;cd python;source $VENV_DIR/bin/activate;pytest tests) - env: SPARK_VERSION: ${{ matrix.spark }} HADOOP_VERSION: ${{ matrix.hadoop }} @@ -168,9 +166,10 @@ jobs: exit fi - export SPARK_HOME=.venv/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark + export SPARK_HOME=$VENV_DIR/lib/python${PYTHON_VERSION}/site-packages/pyspark export SPARK_REMOTE=local cd python - pipenv install "pyspark[connect]==${SPARK_VERSION}" - pipenv run pytest tests/sql/test_dataframe_api.py + source $VENV_DIR/bin/activate + pip install "pyspark[connect]==${SPARK_VERSION}" + pytest tests/sql/test_dataframe_api.py From 50c7d4907c31bd674e5457020a35e975b86ed55c Mon Sep 17 00:00:00 2001 From: Furqaanahmed Khan Date: Thu, 24 Oct 2024 14:48:29 -0700 Subject: [PATCH 15/22] fix: try some ideas 7/? --- .github/workflows/python.yml | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 84fd365c1c..0fdce0243b 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -129,7 +129,6 @@ jobs: echo "Patching Pipfile to use Shapely 1.x" sed -i 's/^shapely.*$/shapely="<2.0.0"/g' Pipfile fi - export VENV_DIR=/home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION} export PIPENV_CUSTOM_VENV_NAME=python-${PYTHON_VERSION} pipenv --python ${PYTHON_VERSION} pipenv install pyspark==${SPARK_VERSION} @@ -144,18 +143,24 @@ jobs: wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_core/${JAI_CORE_VERSION}/jai_core-${JAI_CORE_VERSION}.jar wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_codec/${JAI_CODEC_VERSION}/jai_codec-${JAI_CODEC_VERSION}.jar wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_imageio/${JAI_IMAGEIO_VERSION}/jai_imageio-${JAI_IMAGEIO_VERSION}.jar - mv -v jai_core-${JAI_CORE_VERSION}.jar $VENV_DIR/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars - mv -v jai_codec-${JAI_CODEC_VERSION}.jar $VENV_DIR/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars - mv -v jai_imageio-${JAI_IMAGEIO_VERSION}.jar $VENV_DIR/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars + mv -v jai_core-${JAI_CORE_VERSION}.jar /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars + mv -v jai_codec-${JAI_CODEC_VERSION}.jar /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars + mv -v jai_imageio-${JAI_IMAGEIO_VERSION}.jar /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars - env: SPARK_VERSION: ${{ matrix.spark }} HADOOP_VERSION: ${{ matrix.hadoop }} - run: find spark-shaded/target -name sedona-*.jar -exec cp {} $VENV_DIR/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars/ \; + run: find spark-shaded/target -name sedona-*.jar -exec cp {} /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars/ \; - env: SPARK_VERSION: ${{ matrix.spark }} HADOOP_VERSION: ${{ matrix.hadoop }} PYTHON_VERSION: ${{ matrix.python }} - run: (export SPARK_HOME=$VENV_DIR/lib/python${PYTHON_VERSION}/site-packages/pyspark;cd python;source $VENV_DIR/bin/activate;pytest tests) + run: | + export SPARK_HOME=/home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark + cd python + source /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/bin/activate + pytest tests + - name: Setup tmate session + uses: mxschmitt/action-tmate@v3.19 - env: SPARK_VERSION: ${{ matrix.spark }} HADOOP_VERSION: ${{ matrix.hadoop }} @@ -166,10 +171,10 @@ jobs: exit fi - export SPARK_HOME=$VENV_DIR/lib/python${PYTHON_VERSION}/site-packages/pyspark + export SPARK_HOME=/home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark export SPARK_REMOTE=local cd python - source $VENV_DIR/bin/activate + source /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/bin/activate pip install "pyspark[connect]==${SPARK_VERSION}" pytest tests/sql/test_dataframe_api.py From 43ba73844b54b601fd426c52b8f5849f4dce50f2 Mon Sep 17 00:00:00 2001 From: Furqaanahmed Khan Date: Thu, 24 Oct 2024 15:11:00 -0700 Subject: [PATCH 16/22] fix: try some ideas 8/? --- .github/workflows/python.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 0fdce0243b..c37f673cc6 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -150,6 +150,8 @@ jobs: SPARK_VERSION: ${{ matrix.spark }} HADOOP_VERSION: ${{ matrix.hadoop }} run: find spark-shaded/target -name sedona-*.jar -exec cp {} /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars/ \; + - name: Setup tmate session + uses: mxschmitt/action-tmate@v3.19 - env: SPARK_VERSION: ${{ matrix.spark }} HADOOP_VERSION: ${{ matrix.hadoop }} @@ -159,8 +161,6 @@ jobs: cd python source /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/bin/activate pytest tests - - name: Setup tmate session - uses: mxschmitt/action-tmate@v3.19 - env: SPARK_VERSION: ${{ matrix.spark }} HADOOP_VERSION: ${{ matrix.hadoop }} From 2a328923824dc949b51d9881fcad2f8ac00540a6 Mon Sep 17 00:00:00 2001 From: Furqaanahmed Khan Date: Thu, 24 Oct 2024 15:20:51 -0700 Subject: [PATCH 17/22] fix: it should work now. --- .github/workflows/python.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index c37f673cc6..2e93aa27f0 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -149,9 +149,8 @@ jobs: - env: SPARK_VERSION: ${{ matrix.spark }} HADOOP_VERSION: ${{ matrix.hadoop }} + PYTHON_VERSION: ${{ matrix.python }} run: find spark-shaded/target -name sedona-*.jar -exec cp {} /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars/ \; - - name: Setup tmate session - uses: mxschmitt/action-tmate@v3.19 - env: SPARK_VERSION: ${{ matrix.spark }} HADOOP_VERSION: ${{ matrix.hadoop }} @@ -161,11 +160,13 @@ jobs: cd python source /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/bin/activate pytest tests + - name: Setup tmate session + uses: mxschmitt/action-tmate@v3.19 - env: SPARK_VERSION: ${{ matrix.spark }} HADOOP_VERSION: ${{ matrix.hadoop }} run: | - if [ ! -f "spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}/sbin/start-connect-server.sh" ] + if [ ! -f "/home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/sbin/start-connect-server.sh" ] then echo "Skipping connect tests for Spark $SPARK_VERSION" exit From c7d63aaa762ab81d92d0de63b6c2ab0d7f39600b Mon Sep 17 00:00:00 2001 From: Furqaanahmed Khan Date: Thu, 24 Oct 2024 15:50:12 -0700 Subject: [PATCH 18/22] fix: remove the debugger --- .github/workflows/python.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 2e93aa27f0..b8f6b7b58c 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -160,8 +160,6 @@ jobs: cd python source /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/bin/activate pytest tests - - name: Setup tmate session - uses: mxschmitt/action-tmate@v3.19 - env: SPARK_VERSION: ${{ matrix.spark }} HADOOP_VERSION: ${{ matrix.hadoop }} From 460fab0615ad98e3c85f814380f3b3f2146c642b Mon Sep 17 00:00:00 2001 From: Furqaanahmed Khan Date: Thu, 24 Oct 2024 16:16:38 -0700 Subject: [PATCH 19/22] cleaning up --- .github/workflows/python.yml | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index b8f6b7b58c..77421c6108 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -41,57 +41,44 @@ jobs: - spark: '3.5.0' scala: '2.12.8' python: '3.10' - hadoop: '3' shapely: '1' - spark: '3.5.0' scala: '2.12.8' python: '3.10' - hadoop: '3' - spark: '3.5.0' scala: '2.12.8' python: '3.9' - hadoop: '3' - spark: '3.5.0' scala: '2.12.8' python: '3.8' - hadoop: '3' - spark: '3.4.0' scala: '2.12.8' python: '3.10' - hadoop: '3' - spark: '3.4.0' scala: '2.12.8' python: '3.9' - hadoop: '3' - spark: '3.4.0' scala: '2.12.8' python: '3.8' - hadoop: '3' - spark: '3.4.0' scala: '2.12.8' python: '3.7' - hadoop: '3' - spark: '3.4.0' scala: '2.12.8' python: '3.7' - hadoop: '3' shapely: '1' - spark: '3.3.0' scala: '2.12.8' python: '3.8' - hadoop: '3' - spark: '3.2.0' scala: '2.12.8' python: '3.7' - hadoop: '2.7' - spark: '3.1.2' scala: '2.12.8' python: '3.7' - hadoop: '2.7' - spark: '3.0.3' scala: '2.12.8' python: '3.7' - hadoop: '2.7' steps: - uses: actions/checkout@v4 - uses: actions/setup-java@v4 @@ -136,8 +123,6 @@ jobs: pipenv graph pipenv run pip show pyspark - env: - SPARK_VERSION: ${{ matrix.spark }} - HADOOP_VERSION: ${{ matrix.hadoop }} PYTHON_VERSION: ${{ matrix.python }} run: | wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_core/${JAI_CORE_VERSION}/jai_core-${JAI_CORE_VERSION}.jar @@ -147,13 +132,9 @@ jobs: mv -v jai_codec-${JAI_CODEC_VERSION}.jar /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars mv -v jai_imageio-${JAI_IMAGEIO_VERSION}.jar /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars - env: - SPARK_VERSION: ${{ matrix.spark }} - HADOOP_VERSION: ${{ matrix.hadoop }} PYTHON_VERSION: ${{ matrix.python }} run: find spark-shaded/target -name sedona-*.jar -exec cp {} /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars/ \; - env: - SPARK_VERSION: ${{ matrix.spark }} - HADOOP_VERSION: ${{ matrix.hadoop }} PYTHON_VERSION: ${{ matrix.python }} run: | export SPARK_HOME=/home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark @@ -161,8 +142,7 @@ jobs: source /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/bin/activate pytest tests - env: - SPARK_VERSION: ${{ matrix.spark }} - HADOOP_VERSION: ${{ matrix.hadoop }} + PYTHON_VERSION: ${{ matrix.python }} run: | if [ ! -f "/home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/sbin/start-connect-server.sh" ] then From 5eae5d718bc2f28e0c1539683736528b650ee85c Mon Sep 17 00:00:00 2001 From: Furqaanahmed Khan Date: Fri, 25 Oct 2024 08:30:57 -0700 Subject: [PATCH 20/22] introduce global environment --- .github/workflows/python.yml | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 77421c6108..5160cd8c5c 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -79,6 +79,8 @@ jobs: - spark: '3.0.3' scala: '2.12.8' python: '3.7' + env: + VENV_PATH: /home/runner/.local/share/virtualenvs/python-${{ matrix.python }} steps: - uses: actions/checkout@v4 - uses: actions/setup-java@v4 @@ -128,32 +130,32 @@ jobs: wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_core/${JAI_CORE_VERSION}/jai_core-${JAI_CORE_VERSION}.jar wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_codec/${JAI_CODEC_VERSION}/jai_codec-${JAI_CODEC_VERSION}.jar wget --retry-connrefused --waitretry=10 --read-timeout=20 --timeout=15 --tries=5 https://repo.osgeo.org/repository/release/javax/media/jai_imageio/${JAI_IMAGEIO_VERSION}/jai_imageio-${JAI_IMAGEIO_VERSION}.jar - mv -v jai_core-${JAI_CORE_VERSION}.jar /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars - mv -v jai_codec-${JAI_CODEC_VERSION}.jar /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars - mv -v jai_imageio-${JAI_IMAGEIO_VERSION}.jar /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars + mv -v jai_core-${JAI_CORE_VERSION}.jar ${VENV_PATH}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars + mv -v jai_codec-${JAI_CODEC_VERSION}.jar ${VENV_PATH}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars + mv -v jai_imageio-${JAI_IMAGEIO_VERSION}.jar ${VENV_PATH}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars - env: PYTHON_VERSION: ${{ matrix.python }} - run: find spark-shaded/target -name sedona-*.jar -exec cp {} /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars/ \; + run: find spark-shaded/target -name sedona-*.jar -exec cp {} ${VENV_PATH}/lib/python${PYTHON_VERSION}/site-packages/pyspark/jars/ \; - env: PYTHON_VERSION: ${{ matrix.python }} run: | - export SPARK_HOME=/home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark + export SPARK_HOME=${VENV_PATH}/lib/python${PYTHON_VERSION}/site-packages/pyspark cd python - source /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/bin/activate + source ${VENV_PATH}/bin/activate pytest tests - env: PYTHON_VERSION: ${{ matrix.python }} run: | - if [ ! -f "/home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark/sbin/start-connect-server.sh" ] + if [ ! -f "${VENV_PATH}/lib/python${PYTHON_VERSION}/site-packages/pyspark/sbin/start-connect-server.sh" ] then echo "Skipping connect tests for Spark $SPARK_VERSION" exit fi - export SPARK_HOME=/home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/lib/python${PYTHON_VERSION}/site-packages/pyspark + export SPARK_HOME=${VENV_PATH}/lib/python${PYTHON_VERSION}/site-packages/pyspark export SPARK_REMOTE=local cd python - source /home/runner/.local/share/virtualenvs/python-${PYTHON_VERSION}/bin/activate + source ${VENV_PATH}/bin/activate pip install "pyspark[connect]==${SPARK_VERSION}" pytest tests/sql/test_dataframe_api.py From 0932b511fe4b7f73b68416ba1376100fae96e7af Mon Sep 17 00:00:00 2001 From: Furqaan Khan <46216254+furqaankhan@users.noreply.github.com> Date: Fri, 25 Oct 2024 11:38:12 -0400 Subject: [PATCH 21/22] Update .github/workflows/python.yml Co-authored-by: Jia Yu --- .github/workflows/python.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 5160cd8c5c..0ad1d51727 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -123,7 +123,6 @@ jobs: pipenv install pyspark==${SPARK_VERSION} pipenv install --dev pipenv graph - pipenv run pip show pyspark - env: PYTHON_VERSION: ${{ matrix.python }} run: | From 8b0d034438ab07606deafdfa2bff1efbc3dfb21d Mon Sep 17 00:00:00 2001 From: Furqaanahmed Khan Date: Mon, 28 Oct 2024 15:45:36 -0700 Subject: [PATCH 22/22] fix: docker timeout issues --- .../sedona-jupyterlab.dockerfile | 6 ++---- docker/spark.sh | 13 +++---------- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/docker/sedona-spark-jupyterlab/sedona-jupyterlab.dockerfile b/docker/sedona-spark-jupyterlab/sedona-jupyterlab.dockerfile index 1b39637340..6596229af5 100644 --- a/docker/sedona-spark-jupyterlab/sedona-jupyterlab.dockerfile +++ b/docker/sedona-spark-jupyterlab/sedona-jupyterlab.dockerfile @@ -19,7 +19,6 @@ FROM ubuntu:22.04 ARG shared_workspace=/opt/workspace ARG spark_version=3.4.1 -ARG hadoop_version=3 ARG hadoop_s3_version=3.3.4 ARG aws_sdk_version=1.12.402 ARG spark_xml_version=0.16.0 @@ -29,8 +28,7 @@ ARG spark_extension_version=2.11.0 # Set up envs ENV SHARED_WORKSPACE=${shared_workspace} -ENV SPARK_HOME /opt/spark -RUN mkdir ${SPARK_HOME} +ENV SPARK_HOME /usr/local/lib/python3.10/dist-packages/pyspark ENV SEDONA_HOME /opt/sedona RUN mkdir ${SEDONA_HOME} @@ -44,7 +42,7 @@ COPY ./ ${SEDONA_HOME}/ RUN chmod +x ${SEDONA_HOME}/docker/spark.sh RUN chmod +x ${SEDONA_HOME}/docker/sedona.sh -RUN ${SEDONA_HOME}/docker/spark.sh ${spark_version} ${hadoop_version} ${hadoop_s3_version} ${aws_sdk_version} ${spark_xml_version} +RUN ${SEDONA_HOME}/docker/spark.sh ${spark_version} ${hadoop_s3_version} ${aws_sdk_version} ${spark_xml_version} # Install Python dependencies COPY docker/sedona-spark-jupyterlab/requirements.txt /opt/requirements.txt diff --git a/docker/spark.sh b/docker/spark.sh index 8cca154a34..bd935e8a45 100755 --- a/docker/spark.sh +++ b/docker/spark.sh @@ -19,10 +19,9 @@ set -e # Define variables spark_version=$1 -hadoop_version=$2 -hadoop_s3_version=$3 -aws_sdk_version=$4 -spark_xml_version=$5 +hadoop_s3_version=$2 +aws_sdk_version=$3 +spark_xml_version=$4 # Set up OS libraries apt-get update @@ -30,9 +29,6 @@ apt-get install -y openjdk-19-jdk-headless curl python3-pip maven pip3 install --upgrade pip && pip3 install pipenv # Download Spark jar and set up PySpark -curl https://archive.apache.org/dist/spark/spark-"${spark_version}"/spark-"${spark_version}"-bin-hadoop"${hadoop_version}".tgz -o spark.tgz -tar -xf spark.tgz && mv spark-"${spark_version}"-bin-hadoop"${hadoop_version}"/* "${SPARK_HOME}"/ -rm spark.tgz && rm -rf spark-"${spark_version}"-bin-hadoop"${hadoop_version}" pip3 install pyspark=="${spark_version}" # Add S3 jars @@ -42,9 +38,6 @@ curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/"${aws_sdk # Add spark-xml jar curl https://repo1.maven.org/maven2/com/databricks/spark-xml_2.12/"${spark_xml_version}"/spark-xml_2.12-"${spark_xml_version}".jar -o "${SPARK_HOME}"/jars/spark-xml_2.12-"${spark_xml_version}".jar -# Set up master IP address and executor memory -cp "${SPARK_HOME}"/conf/spark-defaults.conf.template "${SPARK_HOME}"/conf/spark-defaults.conf - # Install required libraries for GeoPandas on Apple chip mac apt-get install -y gdal-bin libgdal-dev