Skip to content

Commit 2a6bb60

Browse files
Bikramjeet Vigfacebook-github-bot
Bikramjeet Vig
authored andcommitted
Use a seperate memory pool for input vectors in ExpressionRunner
Summary: At present, the expression runner utilizes the identical memory pool for both the deserialization of input vectors and the execution of the expression evaluation. Typically, inputs are produced by a distinct operator that has a separate pool from the FilterProject operator. Therefore, to mimic this scenario, this modification ensures that we use separate memory pools. Reviewed By: kagamiori Differential Revision: D54277992
1 parent 2b2030a commit 2a6bb60

File tree

5 files changed

+53
-11
lines changed

5 files changed

+53
-11
lines changed

velox/docs/develop/testing/fuzzer.rst

+4
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,10 @@ ExpressionRunner supports the following flags:
284284

285285
* ``--store_result_path`` optional directory path for storing the results of evaluating SQL expression or query in 'common', 'simplified' or 'query' modes.
286286

287+
* ``--findMinimalSubExpression`` optional Whether to find minimum failing subexpression on result mismatch. Set to false by default.
288+
289+
* ``--useSeperatePoolForInput`` optional If true (default), expression evaluator and input vectors use different memory pools. This helps trigger code-paths that can depend on vectors having different pools. For eg, when copying a flat string vector copies of the strings stored in the string buffers need to be created. If however, the pools were the same between the vectors then the buffers can simply be shared between them instead.
290+
287291
Example command:
288292

289293
::

velox/expression/tests/ExpressionRunner.cpp

+11-6
Original file line numberDiff line numberDiff line change
@@ -115,21 +115,26 @@ void ExpressionRunner::run(
115115
vector_size_t numRows,
116116
const std::string& storeResultPath,
117117
const std::string& lazyColumnListPath,
118-
bool findMinimalSubExpression) {
118+
bool findMinimalSubExpression,
119+
bool useSeperatePoolForInput) {
119120
VELOX_CHECK(!sql.empty());
120-
121+
auto memoryManager = memory::memoryManager();
121122
std::shared_ptr<core::QueryCtx> queryCtx{std::make_shared<core::QueryCtx>()};
122-
std::shared_ptr<memory::MemoryPool> pool{
123-
memory::deprecatedAddDefaultLeafMemoryPool()};
123+
std::shared_ptr<memory::MemoryPool> deserializerPool{
124+
memoryManager->addLeafPool()};
125+
std::shared_ptr<memory::MemoryPool> pool = useSeperatePoolForInput
126+
? memoryManager->addLeafPool("exprEval")
127+
: deserializerPool;
124128
core::ExecCtx execCtx{pool.get(), queryCtx.get()};
125129

126130
RowVectorPtr inputVector;
131+
127132
if (inputPath.empty()) {
128133
inputVector = std::make_shared<RowVector>(
129-
pool.get(), ROW({}), nullptr, 1, std::vector<VectorPtr>{});
134+
deserializerPool.get(), ROW({}), nullptr, 1, std::vector<VectorPtr>{});
130135
} else {
131136
inputVector = std::dynamic_pointer_cast<RowVector>(
132-
restoreVectorFromFile(inputPath.c_str(), pool.get()));
137+
restoreVectorFromFile(inputPath.c_str(), deserializerPool.get()));
133138
VELOX_CHECK_NOT_NULL(
134139
inputVector,
135140
"Input vector is not a RowVector: {}",

velox/expression/tests/ExpressionRunner.h

+11-1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,15 @@ class ExpressionRunner {
4949
/// @param lazyColumnListPath The path to on-disk vector of column indices
5050
/// that specify which columns of the input row vector should be wrapped in
5151
/// lazy.
52+
/// @param findMinimalSubExpression Whether to find minimum failing
53+
/// subexpression on result mismatch.
54+
/// @param useSeperatePoolForInput Whether to use separate memory pools for
55+
/// input vector and expression evaluation. This helps trigger
56+
/// code-paths that can depend on vectors having different pools. For
57+
/// eg, when copying a flat string vector copies of the strings stored
58+
/// in the string buffers need to be created. If however, the pools
59+
/// were the same between the vectors then the buffers can simply be
60+
/// shared between them instead.
5261
///
5362
/// User can refer to 'VectorSaver' class to see how to serialize/preserve
5463
/// vectors to disk.
@@ -61,7 +70,8 @@ class ExpressionRunner {
6170
vector_size_t numRows,
6271
const std::string& storeResultPath,
6372
const std::string& lazyColumnListPath,
64-
bool findMinimalSubExpression = false);
73+
bool findMinimalSubExpression = false,
74+
bool useSeperatePoolForInput = true);
6575

6676
/// Parse comma-separated SQL expressions. This should be treated as private
6777
/// except for tests.

velox/expression/tests/ExpressionRunnerTest.cpp

+13-2
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,16 @@ DEFINE_string(
8686
"indices that specify which columns of the input row vector should "
8787
"be wrapped in lazy.");
8888

89+
DEFINE_bool(
90+
use_seperate_memory_pool_for_input_vector,
91+
true,
92+
"If true, expression evaluator and input vectors use different memory pools."
93+
" This helps trigger code-paths that can depend on vectors having different"
94+
" pools. For eg, when copying a flat string vector copies of the strings"
95+
" stored in the string buffers need to be created. If however, the pools"
96+
" were the same between the vectors then the buffers can simply be shared"
97+
" between them instead.");
98+
8999
static bool validateMode(const char* flagName, const std::string& value) {
90100
static const std::unordered_set<std::string> kModes = {
91101
"common", "simplified", "verify", "query"};
@@ -207,7 +217,7 @@ int main(int argc, char** argv) {
207217
sql = restoreStringFromFile(FLAGS_sql_path.c_str());
208218
VELOX_CHECK(!sql.empty());
209219
}
210-
220+
memory::initializeMemoryManager({});
211221
test::ExpressionRunner::run(
212222
FLAGS_input_path,
213223
sql,
@@ -217,5 +227,6 @@ int main(int argc, char** argv) {
217227
FLAGS_num_rows,
218228
FLAGS_store_result_path,
219229
FLAGS_lazy_column_list_path,
220-
FLAGS_find_minimal_subexpression);
230+
FLAGS_find_minimal_subexpression,
231+
FLAGS_use_seperate_memory_pool_for_input_vector);
221232
}

velox/expression/tests/ExpressionRunnerUnitTest.cpp

+14-2
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,20 @@ TEST_F(ExpressionRunnerUnitTest, run) {
6363
saveVectorToFile(inputVector.get(), inputPath);
6464
saveVectorToFile(resultVector.get(), resultPath);
6565

66-
EXPECT_NO_THROW(ExpressionRunner::run(
67-
inputPath, "length(c0)", "", resultPath, "verify", 0, "", ""));
66+
for (bool useSeperatePoolForInput : {true, false}) {
67+
LOG(INFO) << "Using useSeperatePoolForInput: " << useSeperatePoolForInput;
68+
EXPECT_NO_THROW(ExpressionRunner::run(
69+
inputPath,
70+
"length(c0)",
71+
"",
72+
resultPath,
73+
"verify",
74+
0,
75+
"",
76+
"",
77+
false,
78+
useSeperatePoolForInput));
79+
}
6880
}
6981

7082
TEST_F(ExpressionRunnerUnitTest, persistAndReproComplexSql) {

0 commit comments

Comments
 (0)