Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CI] Add pre-commit hook shellcheck-py to lint shell files #1626

Merged
merged 1 commit into from
Oct 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ repos:
exclude: ^\.github/.*$
types: [markdown]
files: \.(md|mdown|markdown)$
- repo: https://github.com/shellcheck-py/shellcheck-py
rev: v0.10.0.1
hooks:
- id: shellcheck
- repo: https://github.com/adrienverge/yamllint
rev: v1.35.1
hooks:
Expand Down
1 change: 1 addition & 0 deletions .shellcheckrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
disable=SC2004,SC2041,SC2155,SC2181
14 changes: 7 additions & 7 deletions docker/sedona-spark-jupyterlab/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ BUILD_MODE=$3
GEOTOOLS_VERSION=${4:-auto}

SEDONA_SPARK_VERSION=${SPARK_VERSION:0:3}
if [ ${SPARK_VERSION:0:1} -eq "3" ] && [ ${SPARK_VERSION:2:1} -le "3" ]; then
if [ "${SPARK_VERSION:0:1}" -eq "3" ] && [ "${SPARK_VERSION:2:1}" -le "3" ]; then
# 3.0, 3.1, 3.2, 3.3
SEDONA_SPARK_VERSION=3.0
fi
Expand All @@ -42,7 +42,7 @@ get_latest_version_with_suffix() {

# Fetch the maven-metadata.xml file
METADATA_URL="${BASE_URL}maven-metadata.xml"
METADATA_XML=$(curl -s $METADATA_URL)
METADATA_XML=$(curl -s "$METADATA_URL")

# Extract versions from the XML
VERSIONS=$(echo "$METADATA_XML" | grep -o '<version>[^<]*</version>' | awk -F'[<>]' '{print $3}')
Expand All @@ -52,7 +52,7 @@ get_latest_version_with_suffix() {
# Filter versions that end with the specified suffix and find the largest one
for VERSION in $VERSIONS; do
if [[ $VERSION == *$SUFFIX ]]; then
if [[ -z $LATEST_VERSION ]] || version_gt $VERSION $LATEST_VERSION; then
if [[ -z $LATEST_VERSION ]] || version_gt "$VERSION" "$LATEST_VERSION"; then
LATEST_VERSION=$VERSION
fi
fi
Expand All @@ -61,7 +61,7 @@ get_latest_version_with_suffix() {
if [[ -z $LATEST_VERSION ]]; then
exit 1
else
echo $LATEST_VERSION
echo "$LATEST_VERSION"
fi
}

Expand All @@ -80,7 +80,7 @@ if [ "$SEDONA_VERSION" = "latest" ]; then
echo "Using latest geotools-wrapper version: $GEOTOOLS_WRAPPER_VERSION"

# The compilation must take place outside Docker to avoid unnecessary maven packages
mvn clean install -DskipTests -Dspark=${SEDONA_SPARK_VERSION} -Dscala=2.12
mvn clean install -DskipTests -Dspark="${SEDONA_SPARK_VERSION}" -Dscala=2.12
fi

# -- Building the image
Expand All @@ -92,7 +92,7 @@ if [ -z "$BUILD_MODE" ] || [ "$BUILD_MODE" = "local" ]; then
--build-arg sedona_version="${SEDONA_VERSION}" \
--build-arg geotools_wrapper_version="${GEOTOOLS_WRAPPER_VERSION}" \
-f docker/sedona-spark-jupyterlab/sedona-jupyterlab.dockerfile \
-t apache/sedona:${SEDONA_VERSION} .
-t apache/sedona:"${SEDONA_VERSION}" .
else
# If release, build the image for cross-platform
docker buildx build --platform linux/amd64,linux/arm64 \
Expand All @@ -103,5 +103,5 @@ else
--build-arg sedona_version="${SEDONA_VERSION}" \
--build-arg geotools_wrapper_version="${GEOTOOLS_WRAPPER_VERSION}" \
-f docker/sedona-spark-jupyterlab/sedona-jupyterlab.dockerfile \
-t apache/sedona:${SEDONA_VERSION} .
-t apache/sedona:"${SEDONA_VERSION}" .
fi
18 changes: 9 additions & 9 deletions docker/sedona-spark-jupyterlab/start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ convert_to_mb() {
echo $(($mem_value * 1024))
;;
[mM])
echo $mem_value
echo "$mem_value"
;;
*)
echo "Invalid memory unit: $mem_str" >&2
Expand All @@ -39,13 +39,13 @@ convert_to_mb() {
}

# Convert DRIVER_MEM and EXECUTOR_MEM to megabytes
DRIVER_MEM_MB=$(convert_to_mb $DRIVER_MEM)
DRIVER_MEM_MB=$(convert_to_mb "$DRIVER_MEM")
if [ $? -ne 0 ]; then
echo "Error converting DRIVER_MEM to megabytes." >&2
exit 1
fi

EXECUTOR_MEM_MB=$(convert_to_mb $EXECUTOR_MEM)
EXECUTOR_MEM_MB=$(convert_to_mb "$EXECUTOR_MEM")
if [ $? -ne 0 ]; then
echo "Error converting EXECUTOR_MEM to megabytes." >&2
exit 1
Expand All @@ -58,7 +58,7 @@ TOTAL_PHYSICAL_MEM_MB=$(free -m | awk '/^Mem:/{print $2}')
TOTAL_REQUIRED_MEM_MB=$(($DRIVER_MEM_MB + $EXECUTOR_MEM_MB))

# Compare total required memory with total physical memory
if [ $TOTAL_REQUIRED_MEM_MB -gt $TOTAL_PHYSICAL_MEM_MB ]; then
if [ $TOTAL_REQUIRED_MEM_MB -gt "$TOTAL_PHYSICAL_MEM_MB" ]; then
echo "Error: Insufficient memory" >&2
echo " total: $TOTAL_PHYSICAL_MEM_MB MB" >&2
echo " required: $TOTAL_REQUIRED_MEM_MB MB (driver: $DRIVER_MEM_MB MB, executor: $EXECUTOR_MEM_MB MB)" >&2
Expand All @@ -68,14 +68,14 @@ if [ $TOTAL_REQUIRED_MEM_MB -gt $TOTAL_PHYSICAL_MEM_MB ]; then
fi

# Configure spark
cp ${SPARK_HOME}/conf/spark-env.sh.template ${SPARK_HOME}/conf/spark-env.sh
echo "SPARK_WORKER_MEMORY=${EXECUTOR_MEM}" >> ${SPARK_HOME}/conf/spark-env.sh
echo "spark.driver.memory $DRIVER_MEM" >> ${SPARK_HOME}/conf/spark-defaults.conf
echo "spark.executor.memory $EXECUTOR_MEM" >> ${SPARK_HOME}/conf/spark-defaults.conf
cp "${SPARK_HOME}"/conf/spark-env.sh.template "${SPARK_HOME}"/conf/spark-env.sh
echo "SPARK_WORKER_MEMORY=${EXECUTOR_MEM}" >> "${SPARK_HOME}"/conf/spark-env.sh
echo "spark.driver.memory $DRIVER_MEM" >> "${SPARK_HOME}"/conf/spark-defaults.conf
echo "spark.executor.memory $EXECUTOR_MEM" >> "${SPARK_HOME}"/conf/spark-defaults.conf

# Start spark standalone cluster
service ssh start
${SPARK_HOME}/sbin/start-all.sh
"${SPARK_HOME}"/sbin/start-all.sh

# Start jupyter lab
exec jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root --NotebookApp.token=
18 changes: 9 additions & 9 deletions docker/sedona.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,29 +25,29 @@ spark_extension_version=$4

spark_compat_version=${spark_version:0:3}
sedona_spark_version=${spark_compat_version}
if [ ${spark_version:0:1} -eq "3" ] && [ ${spark_version:2:1} -le "3" ]; then
if [ "${spark_version:0:1}" -eq "3" ] && [ "${spark_version:2:1}" -le "3" ]; then
# 3.0, 3.1, 3.2, 3.3
sedona_spark_version=3.0
fi

if [ $sedona_version = "latest" ]; then
if [ "$sedona_version" = "latest" ]; then
# Code to execute when SEDONA_VERSION is "latest"
cp ${SEDONA_HOME}/spark-shaded/target/sedona-spark-shaded-*.jar ${SPARK_HOME}/jars/
cd ${SEDONA_HOME}/python;pip3 install .
cp "${SEDONA_HOME}"/spark-shaded/target/sedona-spark-shaded-*.jar "${SPARK_HOME}"/jars/
cd "${SEDONA_HOME}"/python;pip3 install .
else
# Code to execute when SEDONA_VERSION is not "latest"
# Download Sedona
curl https://repo1.maven.org/maven2/org/apache/sedona/sedona-spark-shaded-${sedona_spark_version}_2.12/${sedona_version}/sedona-spark-shaded-${sedona_spark_version}_2.12-${sedona_version}.jar -o $SPARK_HOME/jars/sedona-spark-shaded-${sedona_spark_version}_2.12-${sedona_version}.jar
curl https://repo1.maven.org/maven2/org/apache/sedona/sedona-spark-shaded-"${sedona_spark_version}"_2.12/"${sedona_version}"/sedona-spark-shaded-"${sedona_spark_version}"_2.12-"${sedona_version}".jar -o "$SPARK_HOME"/jars/sedona-spark-shaded-"${sedona_spark_version}"_2.12-"${sedona_version}".jar

# Install Sedona Python
pip3 install apache-sedona==${sedona_version}
pip3 install apache-sedona=="${sedona_version}"
fi

# Download gresearch spark extension
curl https://repo1.maven.org/maven2/uk/co/gresearch/spark/spark-extension_2.12/${spark_extension_version}-${spark_compat_version}/spark-extension_2.12-${spark_extension_version}-${spark_compat_version}.jar -o $SPARK_HOME/jars/spark-extension_2.12-${spark_extension_version}-${spark_compat_version}.jar
curl https://repo1.maven.org/maven2/uk/co/gresearch/spark/spark-extension_2.12/"${spark_extension_version}"-"${spark_compat_version}"/spark-extension_2.12-"${spark_extension_version}"-"${spark_compat_version}".jar -o "$SPARK_HOME"/jars/spark-extension_2.12-"${spark_extension_version}"-"${spark_compat_version}".jar

# Install Spark extension Python
pip3 install pyspark-extension==${spark_extension_version}.${spark_compat_version}
pip3 install pyspark-extension=="${spark_extension_version}"."${spark_compat_version}"

# Download GeoTools jar
curl https://repo1.maven.org/maven2/org/datasyslab/geotools-wrapper/${geotools_wrapper_version}/geotools-wrapper-${geotools_wrapper_version}.jar -o $SPARK_HOME/jars/geotools-wrapper-${geotools_wrapper_version}.jar
curl https://repo1.maven.org/maven2/org/datasyslab/geotools-wrapper/"${geotools_wrapper_version}"/geotools-wrapper-"${geotools_wrapper_version}".jar -o "$SPARK_HOME"/jars/geotools-wrapper-"${geotools_wrapper_version}".jar
16 changes: 8 additions & 8 deletions docker/spark.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,20 @@ apt-get install -y openjdk-19-jdk-headless curl python3-pip maven
pip3 install --upgrade pip && pip3 install pipenv

# Download Spark jar and set up PySpark
curl https://archive.apache.org/dist/spark/spark-${spark_version}/spark-${spark_version}-bin-hadoop${hadoop_version}.tgz -o spark.tgz
tar -xf spark.tgz && mv spark-${spark_version}-bin-hadoop${hadoop_version}/* ${SPARK_HOME}/
rm spark.tgz && rm -rf spark-${spark_version}-bin-hadoop${hadoop_version}
pip3 install pyspark==${spark_version}
curl https://archive.apache.org/dist/spark/spark-"${spark_version}"/spark-"${spark_version}"-bin-hadoop"${hadoop_version}".tgz -o spark.tgz
tar -xf spark.tgz && mv spark-"${spark_version}"-bin-hadoop"${hadoop_version}"/* "${SPARK_HOME}"/
rm spark.tgz && rm -rf spark-"${spark_version}"-bin-hadoop"${hadoop_version}"
pip3 install pyspark=="${spark_version}"

# Add S3 jars
curl https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/${hadoop_s3_version}/hadoop-aws-${hadoop_s3_version}.jar -o ${SPARK_HOME}/jars/hadoop-aws-${hadoop_s3_version}.jar
curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/${aws_sdk_version}/aws-java-sdk-bundle-${aws_sdk_version}.jar -o ${SPARK_HOME}/jars/aws-java-sdk-bundle-${aws_sdk_version}.jar
curl https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/"${hadoop_s3_version}"/hadoop-aws-"${hadoop_s3_version}".jar -o "${SPARK_HOME}"/jars/hadoop-aws-"${hadoop_s3_version}".jar
curl https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/"${aws_sdk_version}"/aws-java-sdk-bundle-"${aws_sdk_version}".jar -o "${SPARK_HOME}"/jars/aws-java-sdk-bundle-"${aws_sdk_version}".jar

# Add spark-xml jar
curl https://repo1.maven.org/maven2/com/databricks/spark-xml_2.12/${spark_xml_version}/spark-xml_2.12-${spark_xml_version}.jar -o ${SPARK_HOME}/jars/spark-xml_2.12-${spark_xml_version}.jar
curl https://repo1.maven.org/maven2/com/databricks/spark-xml_2.12/"${spark_xml_version}"/spark-xml_2.12-"${spark_xml_version}".jar -o "${SPARK_HOME}"/jars/spark-xml_2.12-"${spark_xml_version}".jar

# Set up master IP address and executor memory
cp ${SPARK_HOME}/conf/spark-defaults.conf.template ${SPARK_HOME}/conf/spark-defaults.conf
cp "${SPARK_HOME}"/conf/spark-defaults.conf.template "${SPARK_HOME}"/conf/spark-defaults.conf

# Install required libraries for GeoPandas on Apple chip mac
apt-get install -y gdal-bin libgdal-dev
Expand Down
Loading