Skip to content

Commit 55b3102

Browse files
Bikramjeet Vigfacebook-github-bot
Bikramjeet Vig
authored andcommitted
Use a seperate memory pool for input vectors in ExpressionRunner
Summary: At present, the expression runner utilizes the identical memory pool for both the deserialization of input vectors and the execution of the expression evaluation. Typically, inputs are produced by a distinct operator that has a separate pool from the FilterProject operator. Therefore, to mimic this scenario, this modification ensures that we use separate memory pools. Differential Revision: D54277992
1 parent 2b2030a commit 55b3102

File tree

4 files changed

+48
-11
lines changed

4 files changed

+48
-11
lines changed

velox/expression/tests/ExpressionRunner.cpp

+11-6
Original file line numberDiff line numberDiff line change
@@ -115,21 +115,26 @@ void ExpressionRunner::run(
115115
vector_size_t numRows,
116116
const std::string& storeResultPath,
117117
const std::string& lazyColumnListPath,
118-
bool findMinimalSubExpression) {
118+
bool findMinimalSubExpression,
119+
bool useSeperatePoolForInput) {
119120
VELOX_CHECK(!sql.empty());
120-
121+
auto memoryManager = memory::memoryManager();
121122
std::shared_ptr<core::QueryCtx> queryCtx{std::make_shared<core::QueryCtx>()};
122-
std::shared_ptr<memory::MemoryPool> pool{
123-
memory::deprecatedAddDefaultLeafMemoryPool()};
123+
std::shared_ptr<memory::MemoryPool> deserializerPool{
124+
memoryManager->addLeafPool()};
125+
std::shared_ptr<memory::MemoryPool> pool = useSeperatePoolForInput
126+
? memoryManager->addLeafPool("exprEval")
127+
: deserializerPool;
124128
core::ExecCtx execCtx{pool.get(), queryCtx.get()};
125129

126130
RowVectorPtr inputVector;
131+
127132
if (inputPath.empty()) {
128133
inputVector = std::make_shared<RowVector>(
129-
pool.get(), ROW({}), nullptr, 1, std::vector<VectorPtr>{});
134+
deserializerPool.get(), ROW({}), nullptr, 1, std::vector<VectorPtr>{});
130135
} else {
131136
inputVector = std::dynamic_pointer_cast<RowVector>(
132-
restoreVectorFromFile(inputPath.c_str(), pool.get()));
137+
restoreVectorFromFile(inputPath.c_str(), deserializerPool.get()));
133138
VELOX_CHECK_NOT_NULL(
134139
inputVector,
135140
"Input vector is not a RowVector: {}",

velox/expression/tests/ExpressionRunner.h

+10-1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,14 @@ class ExpressionRunner {
4949
/// @param lazyColumnListPath The path to on-disk vector of column indices
5050
/// that specify which columns of the input row vector should be wrapped in
5151
/// lazy.
52+
/// @param findMinimalSubExpression Whether to find minimal subexpression
53+
/// that can be evaluated without relying on other subexpressions.
54+
/// @param useSeperatePoolForInput Whether to use separate memory pool for
55+
/// input vector. This helps trigger code-paths that can depend on
56+
/// vectors having different pools. For eg, when copying a flat string
57+
/// vector copies of the strings stored in the string buffers need to
58+
/// be created. If however, the pools were the same between the vectors
59+
/// then the buffers can simply be shared between them instead.
5260
///
5361
/// User can refer to 'VectorSaver' class to see how to serialize/preserve
5462
/// vectors to disk.
@@ -61,7 +69,8 @@ class ExpressionRunner {
6169
vector_size_t numRows,
6270
const std::string& storeResultPath,
6371
const std::string& lazyColumnListPath,
64-
bool findMinimalSubExpression = false);
72+
bool findMinimalSubExpression = false,
73+
bool useSeperatePoolForInput = true);
6574

6675
/// Parse comma-separated SQL expressions. This should be treated as private
6776
/// except for tests.

velox/expression/tests/ExpressionRunnerTest.cpp

+13-2
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,16 @@ DEFINE_string(
8686
"indices that specify which columns of the input row vector should "
8787
"be wrapped in lazy.");
8888

89+
DEFINE_bool(
90+
use_seperate_memory_pool_for_input_vector,
91+
true,
92+
"If true, a separate memory pool for creating input vectors is used."
93+
" This helps trigger code-paths that can depend on vectors having different"
94+
" pools. For eg, when copying a flat string vector copies of the strings"
95+
" stored in the string buffers need to be created. If however, the pools"
96+
" were the same between the vectors then the buffers can simply be shared"
97+
" between them instead.");
98+
8999
static bool validateMode(const char* flagName, const std::string& value) {
90100
static const std::unordered_set<std::string> kModes = {
91101
"common", "simplified", "verify", "query"};
@@ -207,7 +217,7 @@ int main(int argc, char** argv) {
207217
sql = restoreStringFromFile(FLAGS_sql_path.c_str());
208218
VELOX_CHECK(!sql.empty());
209219
}
210-
220+
memory::initializeMemoryManager({});
211221
test::ExpressionRunner::run(
212222
FLAGS_input_path,
213223
sql,
@@ -217,5 +227,6 @@ int main(int argc, char** argv) {
217227
FLAGS_num_rows,
218228
FLAGS_store_result_path,
219229
FLAGS_lazy_column_list_path,
220-
FLAGS_find_minimal_subexpression);
230+
FLAGS_find_minimal_subexpression,
231+
FLAGS_use_seperate_memory_pool_for_input_vector);
221232
}

velox/expression/tests/ExpressionRunnerUnitTest.cpp

+14-2
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,20 @@ TEST_F(ExpressionRunnerUnitTest, run) {
6363
saveVectorToFile(inputVector.get(), inputPath);
6464
saveVectorToFile(resultVector.get(), resultPath);
6565

66-
EXPECT_NO_THROW(ExpressionRunner::run(
67-
inputPath, "length(c0)", "", resultPath, "verify", 0, "", ""));
66+
for (bool useSeperatePoolForInput : {true, false}) {
67+
LOG(INFO) << "Using useSeperatePoolForInput: " << useSeperatePoolForInput;
68+
EXPECT_NO_THROW(ExpressionRunner::run(
69+
inputPath,
70+
"length(c0)",
71+
"",
72+
resultPath,
73+
"verify",
74+
0,
75+
"",
76+
"",
77+
false,
78+
useSeperatePoolForInput));
79+
}
6880
}
6981

7082
TEST_F(ExpressionRunnerUnitTest, persistAndReproComplexSql) {

0 commit comments

Comments
 (0)