Skip to content

Commit 5d4db25

Browse files
kgpaifacebook-github-bot
authored andcommitted
Add support for PyVelox - python velox bindings. (facebookincubator#3077)
Summary: This PR adds : 1. Creates the PyVelox python package/ extension 2. Only has basic type definitions currently and related tests 3. Supports creation of the package via setup.py 4. Note: Some of pybind template magic requires type definitions to be in header since otherwise we see weird errors in metas internal systems. I will follow up subsequently with PRs that add: 1. CI support to Build python package on every PR 2. Creation of vectors and registration of udfs etc. (in collaboration with Voltron). 3. Expression eval. If you have python installed, you can pull this PR and try out the build as follows: ``` $ DEBUG=1 python setup.py develop kpai@kpai-mbp /Users/kpai/src/Velox [pyvelox2]% python Python 3.9.7 (default, Sep 16 2021, 08:50:36) [Clang 10.0.0 ] :: Anaconda, Inc. on darwin Type "help", "copyright", "credits" or "license" for more information. >>> import pyvelox.pyvelox as pv >>> pv.BooleanType() <pyvelox.pyvelox.BooleanType object at 0x7fc190121830> >>> ``` Pull Request resolved: facebookincubator#3077 Reviewed By: pedroerp Differential Revision: D41475596 Pulled By: kgpai fbshipit-source-id: 09ea8cb33a95a9cfb42eda8cd900dab52f6b96a1
1 parent d188613 commit 5d4db25

11 files changed

+603
-2
lines changed

CMakeLists.txt

+20-2
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,24 @@ if(${VELOX_ENABLE_BENCHMARKS} OR ${VELOX_ENABLE_BENCHMARKS_BASIC})
110110
set(VELOX_BUILD_TEST_UTILS ON)
111111
endif()
112112

113+
if(${VELOX_BUILD_PYTHON_PACKAGE})
114+
set(VELOX_BUILD_TESTING OFF)
115+
set(VELOX_ENABLE_PRESTO_FUNCTIONS ON)
116+
set(VELOX_ENABLE_DUCKDB OFF)
117+
set(VELOX_ENABLE_EXPRESSION ON)
118+
set(VELOX_ENABLE_PARSE OFF)
119+
set(VELOX_ENABLE_EXEC OFF)
120+
set(VELOX_ENABLE_AGGREGATES OFF)
121+
set(VELOX_ENABLE_HIVE_CONNECTOR OFF)
122+
set(VELOX_ENABLE_TPCH_CONNECTOR OFF)
123+
set(VELOX_ENABLE_SPARK_FUNCTIONS OFF)
124+
set(VELOX_ENABLE_EXAMPLES OFF)
125+
set(VELOX_ENABLE_S3 OFF)
126+
set(VELOX_ENABLE_SUBSTRAIT OFF)
127+
set(VELOX_CODEGEN_SUPPORT OFF)
128+
set(VELOX_ENABLE_BENCHMARKS_BASIC OFF)
129+
endif()
130+
113131
if(VELOX_ENABLE_S3)
114132
# Set AWS_ROOT_DIR if you have a custom install location of AWS SDK CPP.
115133
if(AWSSDK_ROOT_DIR)
@@ -291,10 +309,10 @@ if(CMAKE_SYSTEM_NAME MATCHES "Darwin")
291309
link_directories("${ICU_INCLUDE_DIRS}/../lib")
292310
endif()
293311

294-
if(VELOX_BUILD_PYTHON_PACKAGE)
295-
message(STATUS "Adding pybind11")
312+
if(${VELOX_BUILD_PYTHON_PACKAGE})
296313
set(pybind11_SOURCE AUTO)
297314
resolve_dependency(pybind11 REQUIRED_VERSION 2.10.0)
315+
add_subdirectory(pyvelox)
298316
endif()
299317

300318
# Locate or build folly.

Makefile

+11
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ CPU_TARGET ?= "avx"
6262
FUZZER_SEED ?= 123456
6363
FUZZER_DURATION_SEC ?= 60
6464

65+
PYTHON_EXECUTABLE ?= $(shell which python)
66+
6567
all: release #: Build the release version
6668

6769
clean: #: Delete all build artifacts
@@ -145,3 +147,12 @@ help: #: Show the help messages
145147
@cat $(firstword $(MAKEFILE_LIST)) | \
146148
awk '/^[-a-z]+:/' | \
147149
awk -F: '{ printf("%-20s %s\n", $$1, $$NF) }'
150+
151+
python-clean:
152+
DEBUG=1 ${PYTHON_EXECUTABLE} setup.py clean
153+
154+
python-build:
155+
DEBUG=1 ${PYTHON_EXECUTABLE} setup.py develop
156+
157+
python-test: python-build
158+
DEBUG=1 ${PYTHON_EXECUTABLE} -m unittest -v

pyvelox/CMakeLists.txt

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Copyright (c) Facebook, Inc. and its affiliates.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
if(VELOX_BUILD_PYTHON_PACKAGE)
16+
message("Creating pyvelox module")
17+
include_directories(SYSTEM ${CMAKE_SOURCE_DIR})
18+
add_definitions(-DCREATE_PYVELOX_MODULE)
19+
# Define our Python module:
20+
pybind11_add_module(pyvelox MODULE pyvelox.cpp pyvelox.h)
21+
22+
# Link with Velox:
23+
target_link_libraries(pyvelox PRIVATE velox_type)
24+
25+
install(TARGETS pyvelox LIBRARY DESTINATION .)
26+
else()
27+
# Torcharrow will not use pyvelox as an extension module for compatibility
28+
# reasons.
29+
message("Creating pyvelox library")
30+
add_library(pyvelox pyvelox.cpp pyvelox.h)
31+
target_link_libraries(pyvelox velox_type pybind11::module)
32+
endif()

pyvelox/README.md

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# PyVelox: Python bindings and extensions for Velox
2+
3+
**This library is currently in Alpha stage and does not have a stable release. The API and implementation may change based on
4+
user feedback or performance. Future changes may not be backward compatible.
5+
If you have suggestions on the API or use cases you'd like to be covered, please open a
6+
GitHub issue. We'd love to hear thoughts and feedback.**
7+
8+
9+
## Prerequisites
10+
11+
You will need Python 3.7 or later. Also, we highly recommend installing an [Miniconda](https://docs.conda.io/en/latest/miniconda.html#latest-miniconda-installer-links) environment.
12+
13+
First, set up an environment. If you are using conda, create a conda environment:
14+
```
15+
conda create --name pyveloxenv python=3.7
16+
conda activate pyveloxenv
17+
```
18+
19+
20+
### From Source
21+
22+
Currently PyVelox can only be built from source. You will need Python 3.7 or later and a C++17 compiler.
23+
24+
25+
#### Install Dependencies
26+
27+
On macOS
28+
29+
[HomeBrew](https://brew.sh/) is required to install development tools on macOS.
30+
Run the script referenced [here](https://github.com/facebookincubator/velox#setting-up-on-macos) to install all the mac specific dependencies.
31+
32+
On Linux
33+
Run the script referenced [here](https://github.com/facebookincubator/velox#setting-up-on-linux-ubuntu-2004-or-later) to install on linux.
34+
35+
36+
#### Install PyVelox
37+
For local development, you can build with debug mode:
38+
```
39+
DEBUG=1 python setup.py develop
40+
```
41+
42+
And run unit tests with
43+
```
44+
python -m unittest -v
45+
```

pyvelox/__init__.py

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright (c) Facebook, Inc. and its affiliates.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.

pyvelox/pyvelox.cpp

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
* Copyright (c) Facebook, Inc. and its affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include "pyvelox.h" // @manual
18+
19+
namespace facebook::velox::py {
20+
using namespace velox;
21+
namespace py = pybind11;
22+
23+
std::string serializeType(const std::shared_ptr<const velox::Type>& type) {
24+
const auto& obj = type->serialize();
25+
return folly::json::serialize(obj, velox::getSerializationOptions());
26+
}
27+
28+
#ifdef CREATE_PYVELOX_MODULE
29+
PYBIND11_MODULE(pyvelox, m) {
30+
m.doc() = R"pbdoc(
31+
PyVelox native code module
32+
-----------------------
33+
)pbdoc";
34+
35+
addVeloxBindings(m);
36+
37+
m.attr("__version__") = "dev";
38+
}
39+
#endif
40+
} // namespace facebook::velox::py

pyvelox/pyvelox.h

+171
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
/*
2+
* Copyright (c) Facebook, Inc. and its affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#pragma once
18+
19+
#include <pybind11/pybind11.h>
20+
#include <pybind11/stl.h>
21+
#include <pybind11/stl_bind.h>
22+
#include <velox/type/Type.h>
23+
#include "folly/json.h"
24+
25+
namespace facebook::velox::py {
26+
27+
std::string serializeType(const std::shared_ptr<const velox::Type>& type);
28+
29+
/// Adds Velox Python Bindings to the module m.
30+
///
31+
/// This function adds the following bindings:
32+
/// * velox::TypeKind enum
33+
/// * velox::Type and its derived types
34+
/// * Basic functions on Type and its derived types.
35+
///
36+
/// @param m Module to add bindings too.
37+
/// @param asLocalModule If true then these bindings are only visible inside
38+
/// the module. Refer to
39+
/// https://pybind11.readthedocs.io/en/stable/advanced/classes.html#module-local-class-bindings
40+
/// for further details.
41+
inline void addVeloxBindings(pybind11::module& m, bool asLocalModule = true) {
42+
// Inlining these bindings since adding them to the cpp file results in a
43+
// ASAN error.
44+
using namespace velox;
45+
namespace py = pybind11;
46+
47+
// Add TypeKind enum.
48+
py::enum_<velox::TypeKind>(m, "TypeKind", py::module_local(asLocalModule))
49+
.value("BOOLEAN", velox::TypeKind::BOOLEAN)
50+
.value("TINYINT", velox::TypeKind::TINYINT)
51+
.value("SMALLINT", velox::TypeKind::SMALLINT)
52+
.value("INTEGER", velox::TypeKind::INTEGER)
53+
.value("BIGINT", velox::TypeKind::BIGINT)
54+
.value("REAL", velox::TypeKind::REAL)
55+
.value("DOUBLE", velox::TypeKind::DOUBLE)
56+
.value("VARCHAR", velox::TypeKind::VARCHAR)
57+
.value("VARBINARY", velox::TypeKind::VARBINARY)
58+
.value("TIMESTAMP", velox::TypeKind::TIMESTAMP)
59+
.value("OPAQUE", velox::TypeKind::OPAQUE)
60+
.value("ARRAY", velox::TypeKind::ARRAY)
61+
.value("MAP", velox::TypeKind::MAP)
62+
.value("ROW", velox::TypeKind::ROW)
63+
.export_values();
64+
65+
// Create VeloxType bound to velox::Type.
66+
py::class_<Type, std::shared_ptr<Type>> type(
67+
m, "VeloxType", py::module_local(asLocalModule));
68+
69+
// Adding all the derived types of Type here.
70+
py::class_<BooleanType, Type, std::shared_ptr<BooleanType>> booleanType(
71+
m, "BooleanType", py::module_local(asLocalModule));
72+
py::class_<IntegerType, Type, std::shared_ptr<IntegerType>> integerType(
73+
m, "IntegerType", py::module_local(asLocalModule));
74+
py::class_<BigintType, Type, std::shared_ptr<BigintType>> bigintType(
75+
m, "BigintType", py::module_local(asLocalModule));
76+
py::class_<SmallintType, Type, std::shared_ptr<SmallintType>> smallintType(
77+
m, "SmallintType", py::module_local(asLocalModule));
78+
py::class_<TinyintType, Type, std::shared_ptr<TinyintType>> tinyintType(
79+
m, "TinyintType", py::module_local(asLocalModule));
80+
py::class_<RealType, Type, std::shared_ptr<RealType>> realType(
81+
m, "RealType", py::module_local(asLocalModule));
82+
py::class_<DoubleType, Type, std::shared_ptr<DoubleType>> doubleType(
83+
m, "DoubleType", py::module_local(asLocalModule));
84+
py::class_<TimestampType, Type, std::shared_ptr<TimestampType>> timestampType(
85+
m, "TimestampType", py::module_local(asLocalModule));
86+
py::class_<VarcharType, Type, std::shared_ptr<VarcharType>> varcharType(
87+
m, "VarcharType", py::module_local(asLocalModule));
88+
py::class_<VarbinaryType, Type, std::shared_ptr<VarbinaryType>> varbinaryType(
89+
m, "VarbinaryType", py::module_local(asLocalModule));
90+
py::class_<ArrayType, Type, std::shared_ptr<ArrayType>> arrayType(
91+
m, "ArrayType", py::module_local(asLocalModule));
92+
py::class_<MapType, Type, std::shared_ptr<MapType>> mapType(
93+
m, "MapType", py::module_local(asLocalModule));
94+
py::class_<RowType, Type, std::shared_ptr<RowType>> rowType(
95+
m, "RowType", py::module_local(asLocalModule));
96+
py::class_<FixedSizeArrayType, Type, std::shared_ptr<FixedSizeArrayType>>
97+
fixedArrayType(m, "FixedSizeArrayType", py::module_local(asLocalModule));
98+
99+
// Basic operations on Type.
100+
type.def("__str__", &Type::toString);
101+
// Gcc doesnt support the below kind of templatization.
102+
#if defined(__clang__)
103+
// Adds equality and inequality comparison operators.
104+
type.def(py::self == py::self);
105+
type.def(py::self != py::self);
106+
#endif
107+
type.def(
108+
"cpp_size_in_bytes",
109+
&Type::cppSizeInBytes,
110+
"Return the C++ size in bytes");
111+
type.def(
112+
"is_fixed_width",
113+
&Type::isFixedWidth,
114+
"Check if the type is fixed width");
115+
type.def(
116+
"is_primitive_type",
117+
&Type::isPrimitiveType,
118+
"Check if the type is a primitive type");
119+
type.def("kind", &Type::kind, "Returns the kind of the type");
120+
type.def("serialize", &serializeType, "Serializes the type as JSON");
121+
122+
booleanType.def(py::init());
123+
tinyintType.def(py::init());
124+
smallintType.def(py::init());
125+
integerType.def(py::init());
126+
bigintType.def(py::init());
127+
realType.def(py::init());
128+
doubleType.def(py::init());
129+
varcharType.def(py::init());
130+
varbinaryType.def(py::init());
131+
timestampType.def(py::init());
132+
arrayType.def(py::init<std::shared_ptr<Type>>());
133+
arrayType.def(
134+
"element_type", &ArrayType::elementType, "Return the element type");
135+
fixedArrayType.def(py::init<int, velox::TypePtr>())
136+
.def("element_type", &velox::FixedSizeArrayType::elementType)
137+
.def("fixed_width", &velox::FixedSizeArrayType::fixedElementsWidth);
138+
mapType.def(py::init<std::shared_ptr<Type>, std::shared_ptr<Type>>());
139+
mapType.def("key_type", &MapType::keyType, "Return the key type");
140+
mapType.def("value_type", &MapType::valueType, "Return the value type");
141+
142+
rowType.def(py::init<
143+
std::vector<std::string>,
144+
std::vector<std::shared_ptr<const Type>>>());
145+
rowType.def("size", &RowType::size, "Return the number of columns");
146+
rowType.def(
147+
"child_at",
148+
&RowType::childAt,
149+
"Return the type of the column at a given index",
150+
py::arg("idx"));
151+
rowType.def(
152+
"find_child",
153+
[](const std::shared_ptr<RowType>& type, const std::string& name) {
154+
return type->findChild(name);
155+
},
156+
"Return the type of the column with the given name",
157+
py::arg("name"));
158+
rowType.def(
159+
"get_child_idx",
160+
&RowType::getChildIdx,
161+
"Return the index of the column with the given name",
162+
py::arg("name"));
163+
rowType.def(
164+
"name_of",
165+
&RowType::nameOf,
166+
"Return the name of the column at the given index",
167+
py::arg("idx"));
168+
rowType.def("names", &RowType::names, "Return the names of the columns");
169+
}
170+
171+
} // namespace facebook::velox::py

pyvelox/test/__init__.py

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright (c) Facebook, Inc. and its affiliates.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.

0 commit comments

Comments
 (0)