Skip to content

Commit dd0e40e

Browse files
authored
Merge branch 'main' into add-cuda-docker-images
2 parents 08c6bcd + 98436ca commit dd0e40e

File tree

550 files changed

+14838
-4544
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

550 files changed

+14838
-4544
lines changed

.github/workflows/build_pyvelox.yml

+16-26
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,13 @@ concurrency:
4646
cancel-in-progress: true
4747

4848
jobs:
49-
# TODO: https://github.com/facebookincubator/velox/issues/9014
50-
if: false
5149
build_wheels:
5250
name: Build wheels on ${{ matrix.os }}
5351
runs-on: ${{ matrix.os }}
5452
strategy:
5553
fail-fast: false
5654
matrix:
57-
os: [ubuntu-22.04]
55+
os: [8-core-ubuntu]
5856
steps:
5957
- uses: actions/checkout@v3
6058
with:
@@ -92,6 +90,7 @@ jobs:
9290
9391
- run: mkdir -p ccache
9492
- name: "Restore ccache"
93+
if: false
9594
uses: actions/cache/restore@v3
9695
id: restore-cache
9796
with:
@@ -101,57 +100,48 @@ jobs:
101100
ccache-wheels-${{ matrix.os }}-
102101
103102
- name: Install macOS dependencies
104-
if: matrix.os == 'macos-11'
103+
if: startsWith(matrix.os, 'macos')
105104
run: |
106-
echo "OPENSSL_ROOT_DIR=/usr/local/opt/openssl@1.1/" >> $GITHUB_ENV
107-
bash scripts/setup-macos.sh &&
108-
bash scripts/setup-macos.sh install_folly
105+
export INSTALL_PREFIX="$GITHUB_WORKSPACE/dependencies"
106+
echo "CMAKE_PREFIX_PATH=$INSTALL_PREFIX" >> $GITHUB_ENV
107+
bash scripts/setup-macos.sh
109108
110109
- name: "Create sdist"
111-
if: matrix.os == 'ubuntu-22.04'
110+
if: matrix.os == '8-core-ubuntu'
112111
env:
113112
BUILD_VERSION: "${{ inputs.version || steps.version.outputs.build_version }}"
114113
run: |
115114
python setup.py sdist --dist-dir wheelhouse
116115
117116
- name: Build wheels
118-
uses: pypa/cibuildwheel@v2.22.0
117+
uses: pypa/cibuildwheel@v2.23.0
119118
env:
120119
# required for preadv/pwritev
121120
MACOSX_DEPLOYMENT_TARGET: "11.0"
122121
CIBW_ARCHS: "x86_64"
123-
# On PRs only build for Python 3.7
124-
CIBW_BUILD: ${{ github.event_name == 'pull_request' && 'cp37-*' || 'cp3*' }}
122+
# Only build for 3.12 for now
123+
CIBW_BUILD: ${{ github.event_name == 'pull_request' && 'cp312-*' || 'cp312-*' }}
125124
CIBW_SKIP: "*musllinux* cp36-*"
126-
CIBW_MANYLINUX_X86_64_IMAGE: "ghcr.io/facebookincubator/velox-dev:torcharrow-avx"
127-
CIBW_BEFORE_ALL_LINUX: >
128-
mkdir -p /output &&
129-
cp -R /host${{ github.workspace }}/ccache /output/ccache &&
130-
ccache -s
131-
CIBW_ENVIRONMENT_PASS_LINUX: CCACHE_DIR BUILD_VERSION
125+
CIBW_MANYLINUX_X86_64_IMAGE: "ghcr.io/facebookincubator/velox-dev:pyvelox"
126+
CIBW_ENVIRONMENT_PASS_LINUX: "CCACHE_DIR BUILD_VERSION CMAKE_PREFIX_PATH"
132127
CIBW_TEST_EXTRAS: "tests"
133128
CIBW_TEST_COMMAND: "cd {project}/pyvelox && python -m unittest -v"
134-
CIBW_TEST_SKIP: "*macos*"
135-
CCACHE_DIR: "${{ matrix.os != 'macos-11' && '/output' || github.workspace }}/ccache"
129+
CIBW_TEST_SKIP: "*"
130+
# CCACHE_DIR: "${{ matrix.os != 'macos-11' && '/output' || github.workspace }}/ccache"
136131
BUILD_VERSION: "${{ inputs.version || steps.version.outputs.build_version }}"
137132
with:
138133
output-dir: wheelhouse
139134

140-
- name: "Move ccache to workspace"
141-
if: matrix.os != 'macos-11'
142-
run: |
143-
mkdir -p ccache
144-
cp -R ./wheelhouse/ccache/* ccache
145-
146135
- name: "Save ccache"
136+
if: false
147137
uses: actions/cache/save@v3
148138
id: cache
149139
with:
150140
path: "ccache"
151141
key: ccache-wheels-${{ matrix.os }}-${{ github.sha }}
152142

153143
- name: "Rename wheel compatibility tag"
154-
if: matrix.os == 'macos-11'
144+
if: startsWith(matrix.os, 'macos')
155145
run: |
156146
brew install rename
157147
cd wheelhouse

.github/workflows/docker.yml

+3
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ jobs:
5151
- name: Centos 9
5252
file: "scripts/centos.dockerfile"
5353
tags: "ghcr.io/facebookincubator/velox-dev:centos9"
54+
- name: Pyvelox
55+
file: "scripts/pyvelox.dockerfile"
56+
tags: "ghcr.io/facebookincubator/velox-dev:pyvelox"
5457
- name: Dev
5558
file: "scripts/ubuntu-22.04-cpp.dockerfile"
5659
args: ""

.github/workflows/linux-build-base.yml

+2
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ jobs:
3939
GTest_SOURCE: BUNDLED
4040
simdjson_SOURCE: BUNDLED
4141
xsimd_SOURCE: BUNDLED
42+
geos_SOURCE: BUNDLED
4243
CUDA_VERSION: "12.8"
4344
USE_CLANG: "${{ inputs.use-clang && 'true' || 'false' }}"
4445
steps:
@@ -88,6 +89,7 @@ jobs:
8889
"-DVELOX_ENABLE_BENCHMARKS=ON"
8990
"-DVELOX_ENABLE_EXAMPLES=ON"
9091
"-DVELOX_ENABLE_ARROW=ON"
92+
"-DVELOX_ENABLE_GEO=ON"
9193
"-DVELOX_ENABLE_PARQUET=ON"
9294
"-DVELOX_ENABLE_HDFS=ON"
9395
"-DVELOX_ENABLE_S3=ON"

.github/workflows/scheduled.yml

+10-8
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@ jobs:
318318
uses: actions/upload-artifact@v4
319319
with:
320320
name: cache_fuzzer
321-
path: velox/_build/debug/velox/exec/tests/velox_cache_fuzzer_test
321+
path: velox/_build/debug/velox/exec/fuzzer/velox_cache_fuzzer
322322
retention-days: "${{ env.RETENTION }}"
323323

324324
- name: Upload table evolution fuzzer
@@ -751,6 +751,8 @@ jobs:
751751
container: ghcr.io/facebookincubator/velox-dev:centos9
752752
needs: compile
753753
timeout-minutes: 120
754+
# Temporarily disable on PRs till flakiness is fixed #12167
755+
if: ${{ github.event_name != 'pull_request' }}
754756
steps:
755757

756758
- name: Download cache fuzzer
@@ -760,24 +762,24 @@ jobs:
760762

761763
- name: Run Cache Fuzzer
762764
run: |
763-
mkdir -p /tmp/cache_fuzzer_test/logs/
764-
chmod -R 777 /tmp/cache_fuzzer_test
765-
chmod +x velox_cache_fuzzer_test
766-
./velox_cache_fuzzer_test \
765+
mkdir -p /tmp/cache_fuzzer/logs/
766+
chmod -R 777 /tmp/cache_fuzzer
767+
chmod +x velox_cache_fuzzer
768+
./velox_cache_fuzzer \
767769
--seed ${RANDOM} \
768770
--duration_sec $DURATION \
769771
--minloglevel=0 \
770772
--stderrthreshold=2 \
771-
--log_dir=/tmp/cache_fuzzer_test/logs \
773+
--log_dir=/tmp/cache_fuzzer/logs \
772774
&& echo -e "\n\Cache fuzzer run finished successfully."
773775
774776
- name: Archive Cache production artifacts
775777
if: ${{ !cancelled() }}
776778
uses: actions/upload-artifact@v4
777779
with:
778-
name: cache-fuzzer-test-logs
780+
name: cache-fuzzer-logs
779781
path: |
780-
/tmp/cache_fuzzer_test
782+
/tmp/cache_fuzzer
781783
782784
table-evolution-fuzzer-run:
783785
name: Table Evolution Fuzzer

CMake/resolve_dependency_modules/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ by Velox. See details on bundling below.
4141
| DuckDB (testing) | 0.8.1 | Yes |
4242
| cpr (testing) | 1.10.15 | Yes |
4343
| arrow | 15.0.0 | Yes |
44+
| geos | 3.13.0 | Yes |
4445

4546
# Bundled Dependency Management
4647
This module provides a dependency management system that allows us to automatically fetch and build dependencies from source if needed.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Copyright (c) Facebook, Inc. and its affiliates.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
include_guard(GLOBAL)
15+
16+
# GEOS Configuration
17+
set(VELOX_GEOS_BUILD_VERSION 3.13.0)
18+
set(VELOX_GEOS_BUILD_SHA256_CHECKSUM
19+
47ec83ff334d672b9e4426695f15da6e6368244214971fabf386ff8ef6df39e4)
20+
string(CONCAT VELOX_GEOS_SOURCE_URL "https://download.osgeo.org/geos/"
21+
"geos-${VELOX_GEOS_BUILD_VERSION}.tar.bz2")
22+
23+
velox_resolve_dependency_url(GEOS)
24+
25+
FetchContent_Declare(
26+
geos
27+
URL ${VELOX_GEOS_SOURCE_URL}
28+
URL_HASH ${VELOX_GEOS_BUILD_SHA256_CHECKSUM})
29+
set(BUILD_SHARED_LIBS ${VELOX_BUILD_SHARED})
30+
FetchContent_MakeAvailable(geos)
31+
unset(BUILD_SHARED_LIBS)

CMake/resolve_dependency_modules/gflags.cmake

+13
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,20 @@ set(GFLAGS_IS_SUBPROJECT ON)
4343

4444
# Workaround for https://github.com/gflags/gflags/issues/277
4545
unset(BUILD_SHARED_LIBS)
46+
if(DEFINED CACHE{BUILD_SHARED_LIBS})
47+
set(CACHED_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
48+
unset(BUILD_SHARED_LIBS CACHE)
49+
endif()
50+
4651
FetchContent_MakeAvailable(gflags)
52+
53+
# Workaround for https://github.com/gflags/gflags/issues/277
54+
if(DEFINED CACHED_BUILD_SHARED_LIBS)
55+
set(BUILD_SHARED_LIBS
56+
${CACHED_BUILD_SHARED_LIBS}
57+
CACHE BOOL "Restored after setting up gflags" FORCE)
58+
endif()
59+
4760
# This causes find_package(gflags) in other dependencies to search in the build
4861
# directory and prevents the system gflags from being found when they don't use
4962
# the target directly (like folly).

CMake/resolve_dependency_modules/gtest.cmake

+2-2
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,12 @@ velox_resolve_dependency_url(GTEST)
2424

2525
message(STATUS "Building gtest from source")
2626
FetchContent_Declare(
27-
gtest
27+
googletest
2828
URL ${VELOX_GTEST_SOURCE_URL}
2929
URL_HASH ${VELOX_GTEST_BUILD_SHA256_CHECKSUM}
3030
OVERRIDE_FIND_PACKAGE SYSTEM EXCLUDE_FROM_ALL)
3131

32-
FetchContent_MakeAvailable(gtest)
32+
FetchContent_MakeAvailable(googletest)
3333

3434
# Mask compilation warning in clang 16.
3535
target_compile_options(gtest PRIVATE -Wno-implicit-int-float-conversion)

CMakeLists.txt

+11-2
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ option(
7979
option(VELOX_MONO_LIBRARY "Build single unified library." OFF)
8080
option(ENABLE_ALL_WARNINGS "Enable -Wall and -Wextra compiler warnings." ON)
8181
option(VELOX_BUILD_SHARED "Build Velox as shared libraries." OFF)
82+
option(VELOX_SKIP_WAVE_BRANCH_KERNEL_TEST "Disable Wave branch kernel test."
83+
OFF)
8284
# While it's possible to build both in one go we currently want to build either
8385
# static or shared.
8486
cmake_dependent_option(
@@ -138,6 +140,7 @@ option(VELOX_ENABLE_ABFS "Build Abfs Connector" OFF)
138140
option(VELOX_ENABLE_HDFS "Build Hdfs Connector" OFF)
139141
option(VELOX_ENABLE_PARQUET "Enable Parquet support" ON)
140142
option(VELOX_ENABLE_ARROW "Enable Arrow support" OFF)
143+
option(VELOX_ENABLE_GEO "Enable Geospatial support" OFF)
141144
option(VELOX_ENABLE_REMOTE_FUNCTIONS "Enable remote function support" OFF)
142145
option(VELOX_ENABLE_CCACHE "Use ccache if installed." ON)
143146

@@ -203,6 +206,7 @@ if(${VELOX_BUILD_PYTHON_PACKAGE})
203206
set(VELOX_ENABLE_AGGREGATES ON)
204207
set(VELOX_ENABLE_SPARK_FUNCTIONS ON)
205208
set(VELOX_BUILD_RUNNER OFF)
209+
set(VELOX_BUILD_TEST_UTILS ON) # required for velox_exec_test_lib
206210
endif()
207211

208212
if(${VELOX_ENABLE_DUCKDB})
@@ -493,7 +497,7 @@ if(${VELOX_BUILD_MINIMAL_WITH_DWIO}
493497

494498
# Locate or build protobuf.
495499
velox_set_source(Protobuf)
496-
velox_resolve_dependency(Protobuf CONFIG 3.21.7 REQUIRED)
500+
velox_resolve_dependency(Protobuf 3.21.7 REQUIRED)
497501
include_directories(${Protobuf_INCLUDE_DIRS})
498502
endif()
499503

@@ -594,7 +598,7 @@ if(NOT VELOX_DISABLE_GOOGLETEST)
594598
velox_set_source(GTest)
595599
velox_resolve_dependency(GTest)
596600
set(VELOX_GTEST_INCUDE_DIR
597-
"${gtest_SOURCE_DIR}/googletest/include"
601+
"${gtest_SOURCE_DIR}/include"
598602
PARENT_SCOPE)
599603
endif()
600604

@@ -626,4 +630,9 @@ if(VELOX_ENABLE_ARROW)
626630
velox_resolve_dependency(Arrow)
627631
endif()
628632

633+
if(VELOX_ENABLE_GEO)
634+
velox_set_source(geos)
635+
velox_resolve_dependency(geos)
636+
endif()
637+
629638
add_subdirectory(velox)

CONTRIBUTING.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ with a benchmark.
287287
line arguments.
288288

289289
```
290-
# Test the new function in isolation. Use --only flag to restrict the set of functions
290+
# Test the new function in isolation. Use --only flag to restrict the set of functions
291291
# and run for 60 seconds or longer.
292292
velox_expression_fuzzer_test --only <my-new-function-name> --duration_sec 60 --logtostderr=1 --enable_variadic_signatures --velox_fuzzer_enable_complex_types --velox_fuzzer_enable_decimal_type --lazy_vector_generation_ratio 0.2 --velox_fuzzer_enable_column_reuse --velox_fuzzer_enable_expression_reuse
293293

scripts/pyvelox.dockerfile

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Copyright (c) Facebook, Inc. and its affiliates.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
# Build the test and build container for presto_cpp
15+
FROM quay.io/pypa/manylinux_2_28_x86_64:latest
16+
17+
COPY scripts/setup-helper-functions.sh /
18+
COPY scripts/setup-manylinux.sh /
19+
20+
# Build static folly to reduce wheel size (folly.so is ~120M)
21+
ENV VELOX_BUILD_SHARED=OFF
22+
# The removal of the build dir has to happen in the same layer as the build
23+
# to minimize the image size. gh & jq are required for CI
24+
RUN mkdir build && ( cd build && bash /setup-manylinux.sh ) && rm -rf build && \
25+
dnf install -y -q 'dnf-command(config-manager)' && \
26+
dnf config-manager --add-repo 'https://cli.github.com/packages/rpm/gh-cli.repo' && \
27+
dnf install -y -q gh jq && \
28+
dnf clean all
29+
30+
ENV CC=/opt/rh/gcc-toolset-12/root/bin/gcc \
31+
CXX=/opt/rh/gcc-toolset-12/root/bin/g++
32+
33+
ENTRYPOINT ["/bin/bash", "-c", "source /opt/rh/gcc-toolset-12/enable && exec \"$@\"", "--"]
34+
CMD ["/bin/bash"]

scripts/setup-centos9.sh

+11-1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ export CFLAGS=${CXXFLAGS//"-std=c++17"/} # Used by LZO.
3636
CMAKE_BUILD_TYPE="${BUILD_TYPE:-Release}"
3737
VELOX_BUILD_SHARED=${VELOX_BUILD_SHARED:-"OFF"} #Build folly and gflags shared for use in libvelox.so.
3838
BUILD_DUCKDB="${BUILD_DUCKDB:-true}"
39+
BUILD_GEOS="${BUILD_GEOS:-true}"
3940
USE_CLANG="${USE_CLANG:-false}"
4041
export INSTALL_PREFIX=${INSTALL_PREFIX:-"/usr/local"}
4142
DEPENDENCY_DIR=${DEPENDENCY_DIR:-$(pwd)/deps-download}
@@ -48,6 +49,7 @@ THRIFT_VERSION="v0.16.0"
4849
ARROW_VERSION="15.0.0"
4950
STEMMER_VERSION="2.2.0"
5051
DUCKDB_VERSION="v0.8.1"
52+
GEOS_VERSION="3.13.0"
5153

5254
function dnf_install {
5355
dnf install -y -q --setopt=install_weak_deps=False "$@"
@@ -175,7 +177,7 @@ function install_mvfst {
175177
}
176178

177179
function install_duckdb {
178-
if $BUILD_DUCKDB ; then
180+
if [[ "$BUILD_DUCKDB" == "true" ]]; then
179181
echo 'Building DuckDB'
180182
wget_and_untar https://github.com/duckdb/duckdb/archive/refs/tags/${DUCKDB_VERSION}.tar.gz duckdb
181183
cmake_install_dir duckdb -DBUILD_UNITTESTS=OFF -DENABLE_SANITIZER=OFF -DENABLE_UBSAN=OFF -DBUILD_SHELL=OFF -DEXPORT_DLL_SYMBOLS=OFF -DCMAKE_BUILD_TYPE=Release
@@ -239,6 +241,13 @@ function install_cuda {
239241
cuda-nvrtc-devel-$dashed
240242
}
241243

244+
function install_geos {
245+
if [[ "$BUILD_GEOS" == "true" ]]; then
246+
wget_and_untar https://github.com/libgeos/geos/archive/${GEOS_VERSION}.tar.gz geos
247+
cmake_install_dir geos -DBUILD_TESTING=OFF
248+
fi
249+
}
250+
242251
function install_velox_deps {
243252
run_and_time install_velox_deps_from_dnf
244253
run_and_time install_conda
@@ -258,6 +267,7 @@ function install_velox_deps {
258267
run_and_time install_stemmer
259268
run_and_time install_thrift
260269
run_and_time install_arrow
270+
run_and_time install_geos
261271
}
262272

263273
(return 2> /dev/null) && return # If script was sourced, don't run commands.

0 commit comments

Comments
 (0)