Skip to content

Commit 0bb7e64

Browse files
Yohahahafacebook-github-bot
authored andcommitted
refactor(sparksql): Speed up sparksql compilation by splitting function registrations (facebookincubator#11565)
Summary: This PR aims to speed up sparksql compilation by splitting function registrations to multiple source files arranged according to function type. Adds 'velox_functions_spark' for registrations and renames previous 'velox_functions_spark' as 'velox_functions_spark_impl'. Tested the compilation time using `velox_functions_spark_test` target to mock the general development process: build -> modify cpp file -> build. The compilation time speeds up 1.5x(165s to 104s) in release mode and more in debug mode. Fixes facebookincubator#11564. Pull Request resolved: facebookincubator#11565 Reviewed By: miaoever, kagamiori Differential Revision: D66688101 Pulled By: xiaoxmeng fbshipit-source-id: 54ba372f08c4ec91062b3d07e8e2b81aabbdef59
1 parent 46fd360 commit 0bb7e64

35 files changed

+947
-778
lines changed

pyvelox/signatures.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
#include "velox/functions/FunctionRegistry.h"
2020
#include "velox/functions/prestosql/aggregates/RegisterAggregateFunctions.h"
2121
#include "velox/functions/prestosql/registration/RegistrationFunctions.h"
22-
#include "velox/functions/sparksql/Register.h"
2322
#include "velox/functions/sparksql/aggregates/Register.h"
23+
#include "velox/functions/sparksql/registration/Register.h"
2424

2525
namespace facebook::velox::py {
2626

velox/expression/fuzzer/SparkExpressionFuzzerTest.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,12 @@
2424

2525
#include "velox/exec/fuzzer/ReferenceQueryRunner.h"
2626
#include "velox/expression/fuzzer/FuzzerRunner.h"
27-
#include "velox/functions/sparksql/Register.h"
2827
#include "velox/functions/sparksql/fuzzer/AddSubtractArgGenerator.h"
2928
#include "velox/functions/sparksql/fuzzer/DivideArgGenerator.h"
3029
#include "velox/functions/sparksql/fuzzer/MakeTimestampArgGenerator.h"
3130
#include "velox/functions/sparksql/fuzzer/MultiplyArgGenerator.h"
3231
#include "velox/functions/sparksql/fuzzer/UnscaledValueArgGenerator.h"
32+
#include "velox/functions/sparksql/registration/Register.h"
3333

3434
using namespace facebook::velox::functions::sparksql::fuzzer;
3535
using facebook::velox::fuzzer::ArgGenerator;

velox/expression/tests/ExpressionRunnerTest.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
#include "velox/exec/fuzzer/ReferenceQueryRunner.h"
2828
#include "velox/expression/tests/ExpressionVerifier.h"
2929
#include "velox/functions/prestosql/registration/RegistrationFunctions.h"
30-
#include "velox/functions/sparksql/Register.h"
30+
#include "velox/functions/sparksql/registration/Register.h"
3131
#include "velox/vector/VectorSaver.h"
3232

3333
using namespace facebook::velox;

velox/functions/sparksql/Bitwise.cpp

-160
This file was deleted.

velox/functions/sparksql/Bitwise.h

+106-3
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,115 @@
1515
*/
1616
#pragma once
1717

18-
#include <string>
1918
#include "velox/functions/Macros.h"
20-
#include "velox/functions/lib/RegistrationHelpers.h"
2119

2220
namespace facebook::velox::functions::sparksql {
2321

24-
void registerBitwiseFunctions(const std::string& prefix);
22+
template <typename T>
23+
struct BitwiseAndFunction {
24+
template <typename TInput>
25+
FOLLY_ALWAYS_INLINE void call(TInput& result, TInput a, TInput b) {
26+
result = a & b;
27+
}
28+
};
29+
30+
template <typename T>
31+
struct BitwiseOrFunction {
32+
template <typename TInput>
33+
FOLLY_ALWAYS_INLINE void call(TInput& result, TInput a, TInput b) {
34+
result = a | b;
35+
}
36+
};
37+
38+
template <typename T>
39+
struct BitwiseXorFunction {
40+
template <typename TInput>
41+
FOLLY_ALWAYS_INLINE void call(TInput& result, TInput a, TInput b) {
42+
result = a ^ b;
43+
}
44+
};
45+
46+
template <typename T>
47+
struct BitwiseNotFunction {
48+
template <typename TInput>
49+
FOLLY_ALWAYS_INLINE void call(TInput& result, TInput a) {
50+
result = ~a;
51+
}
52+
};
53+
54+
template <typename T>
55+
struct ShiftLeftFunction {
56+
template <typename TInput1, typename TInput2>
57+
FOLLY_ALWAYS_INLINE void call(TInput1& result, TInput1 a, TInput2 b) {
58+
if constexpr (std::is_same_v<TInput1, int32_t>) {
59+
if (b < 0) {
60+
b = b % 32 + 32;
61+
}
62+
if (b >= 32) {
63+
b = b % 32;
64+
}
65+
}
66+
if constexpr (std::is_same_v<TInput1, int64_t>) {
67+
if (b < 0) {
68+
b = b % 64 + 64;
69+
}
70+
if (b >= 64) {
71+
b = b % 64;
72+
}
73+
}
74+
result = a << b;
75+
}
76+
};
77+
78+
template <typename T>
79+
struct ShiftRightFunction {
80+
template <typename TInput1, typename TInput2>
81+
FOLLY_ALWAYS_INLINE void call(TInput1& result, TInput1 a, TInput2 b) {
82+
if constexpr (std::is_same_v<TInput1, int32_t>) {
83+
if (b < 0) {
84+
b = b % 32 + 32;
85+
}
86+
if (b >= 32) {
87+
b = b % 32;
88+
}
89+
}
90+
if constexpr (std::is_same_v<TInput1, int64_t>) {
91+
if (b < 0) {
92+
b = b % 64 + 64;
93+
}
94+
if (b >= 64) {
95+
b = b % 64;
96+
}
97+
}
98+
result = a >> b;
99+
}
100+
};
101+
102+
template <typename T>
103+
struct BitCountFunction {
104+
template <typename TInput>
105+
FOLLY_ALWAYS_INLINE void call(int32_t& result, TInput num) {
106+
constexpr int kMaxBits = sizeof(TInput) * CHAR_BIT;
107+
auto value = static_cast<uint64_t>(num);
108+
result = bits::countBits(&value, 0, kMaxBits);
109+
}
110+
};
111+
112+
template <typename T>
113+
struct BitGetFunction {
114+
template <typename TInput>
115+
FOLLY_ALWAYS_INLINE void call(int8_t& result, TInput num, int32_t pos) {
116+
constexpr int kMaxBits = sizeof(TInput) * CHAR_BIT;
117+
VELOX_USER_CHECK_GE(
118+
pos,
119+
0,
120+
"The value of 'pos' argument must be greater than or equal to zero.");
121+
VELOX_USER_CHECK_LT(
122+
pos,
123+
kMaxBits,
124+
"The value of 'pos' argument must not exceed the number of bits in 'x' - 1.");
125+
result = (num >> pos) & 1;
126+
}
127+
};
25128

26129
} // namespace facebook::velox::functions::sparksql

velox/functions/sparksql/CMakeLists.txt

+6-17
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,9 @@
1414

1515
add_subdirectory(specialforms)
1616
velox_add_library(
17-
velox_functions_spark
17+
velox_functions_spark_impl
1818
ArrayGetFunction.cpp
1919
ArraySort.cpp
20-
Bitwise.cpp
2120
Comparisons.cpp
2221
DecimalArithmetic.cpp
2322
DecimalCompare.cpp
@@ -27,34 +26,22 @@ velox_add_library(
2726
MakeTimestamp.cpp
2827
Map.cpp
2928
RegexFunctions.cpp
30-
Register.cpp
31-
RegisterArithmetic.cpp
32-
RegisterCompare.cpp
3329
Size.cpp
3430
String.cpp
3531
UnscaledValueFunction.cpp)
3632

37-
# GCC 12 has a bug where it does not respect "pragma ignore" directives and ends
38-
# up failing compilation in an openssl header included by a hash-related
39-
# function.
40-
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND NOT VELOX_MONO_LIBRARY)
41-
target_compile_options(velox_functions_spark
42-
PRIVATE -Wno-deprecated-declarations)
43-
endif()
44-
4533
velox_link_libraries(
46-
velox_functions_spark
34+
velox_functions_spark_impl
4735
velox_functions_lib
4836
velox_functions_prestosql_impl
4937
velox_functions_spark_specialforms
50-
velox_is_null_functions
5138
velox_functions_util
5239
Folly::folly
5340
simdjson::simdjson)
5441

5542
if(NOT VELOX_MONO_LIBRARY)
56-
set_property(TARGET velox_functions_spark PROPERTY JOB_POOL_COMPILE
57-
high_memory_pool)
43+
set_property(TARGET velox_functions_spark_impl PROPERTY JOB_POOL_COMPILE
44+
high_memory_pool)
5845
endif()
5946

6047
add_subdirectory(window)
@@ -72,3 +59,5 @@ endif()
7259
if(${VELOX_ENABLE_BENCHMARKS})
7360
add_subdirectory(benchmarks)
7461
endif()
62+
63+
add_subdirectory(registration)

velox/functions/sparksql/JsonObjectKeys.h

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
*/
1616
#pragma once
1717

18+
#include "velox/functions/Macros.h"
1819
#include "velox/functions/prestosql/json/SIMDJsonUtil.h"
1920

2021
namespace facebook::velox::functions::sparksql {

0 commit comments

Comments
 (0)