Skip to content

Commit 78e13e5

Browse files
zhli1142015facebook-github-bot
authored andcommitted
fix(hashjoin): Create new VectorHashers for listNullKeyRows to prevent dangling pointer access (facebookincubator#12106)
Summary: This PR addresses an issue in the `listNullKeyRows` function that occurs in hash mode. In this function, `hashers_` from HashTable is used to construct the new HashLookup. The `joinProbe` function requires accessing `hashers_[0]->decodedVector().base()` for key comparison. However, this pointer can be dangling, causing the error described below. We propose fixing this issue by using separate VectorHashers with properly decoded vectors (1 row with null keys). ``` [ RUN ] HashJoinTest/MultiThreadedHashJoinTest.hashModeNullAwareAntiJoinWithFilterAndNullKey/1 unknown file: Failure C++ exception with description "Exception: VeloxUserError Error Source: USER Error Code: INVALID_ARGUMENT Reason: Specified element is not found : -96 Retriable: False Function: mapTypeKindToName File: /var/git/velox/velox/type/Type.cpp Line: 116 Stack trace: # 0 facebook::velox::VeloxException::VeloxException(char const*, unsigned long, char const*, std::basic_string_view<char, std::char_traits<char> >, std::basic_string_view<char, std::char_traits<char> >, std::basic_string_view<char, std::char_traits<char> >, std::basic_string_view<char, std::char_traits<char> >, bool, facebook::velox::VeloxException::Type, std::basic_string_view<char, std::char_traits<char> >) # 1 void facebook::velox::detail::veloxCheckFail<facebook::velox::VeloxUserError, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&>(facebook::velox::detail::VeloxCheckFailArgs const&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) # 2 facebook::velox::mapTypeKindToName[abi:cxx11](facebook::velox::TypeKind const&) # 3 facebook::velox::exec::HashTable<false>::compareKeys(char const*, facebook::velox::exec::HashLookup&, int) # 4 facebook::velox::exec::HashTable<false>::joinProbe(facebook::velox::exec::HashLookup&) # 5 facebook::velox::exec::HashTable<false>::listNullKeyRows(facebook::velox::exec::BaseHashTable::NullKeyRowsIterator*, int, char**) # 6 facebook::velox::exec::HashProbe::applyFilterOnTableRowsForNullAwareJoin(facebook::velox::SelectivityVector const&, facebook::velox::SelectivityVector&, std::function<int (char**, int)>) [clone .part.0] # 7 facebook::velox::exec::HashProbe::evalFilterForNullAwareJoin(int, bool) # 8 facebook::velox::exec::HashProbe::evalFilter(int) # 9 facebook::velox::exec::HashProbe::getOutputInternal(bool) # 10 facebook::velox::exec::HashProbe::getOutput() # 11 facebook::velox::exec::Driver::runInternal(std::shared_ptr<facebook::velox::exec::Driver>&, std::shared_ptr<facebook::velox::exec::BlockingState>&, std::shared_ptr<facebook::velox::RowVector>&)::{lambda()https://github.com/facebookincubator/velox/issues/5}::operator()() const # 12 facebook::velox::exec::Driver::runInternal(std::shared_ptr<facebook::velox::exec::Driver>&, std::shared_ptr<facebook::velox::exec::BlockingState>&, std::shared_ptr<facebook::velox::RowVector>&) # 13 facebook::velox::exec::Driver::run(std::shared_ptr<facebook::velox::exec::Driver>) # 14 void folly::detail::function::call_<facebook::velox::exec::Driver::enqueue(std::shared_ptr<facebook::velox::exec::Driver>)::{lambda()facebookincubator#1}, true, false, void>(, folly::detail::function::Data&) # 15 folly::ThreadPoolExecutor::runTask(std::shared_ptr<folly::ThreadPoolExecutor::Thread> const&, folly::ThreadPoolExecutor::Task&&) # 16 folly::CPUThreadPoolExecutor::threadRun(std::shared_ptr<folly::ThreadPoolExecutor::Thread>) # 17 void folly::detail::function::call_<std::_Bind<void (folly::ThreadPoolExecutor::*(folly::ThreadPoolExecutor*, std::shared_ptr<folly::ThreadPoolExecutor::Thread>))(std::shared_ptr<folly::ThreadPoolExecutor::Thread>)>, true, false, void>(, folly::detail::function::Data&) # 18 0x00000000000dc252 # 19 0x0000000000094ac2 # 20 0x000000000012684f " thrown in the test body. ``` Pull Request resolved: facebookincubator#12106 Reviewed By: Yuhta Differential Revision: D68544820 Pulled By: xiaoxmeng fbshipit-source-id: 6c82b61f940ffcb5288902859fc0e83a8199a568
1 parent aeba902 commit 78e13e5

File tree

6 files changed

+104
-10
lines changed

6 files changed

+104
-10
lines changed

velox/exec/HashProbe.cpp

+19-1
Original file line numberDiff line numberDiff line change
@@ -1380,11 +1380,13 @@ SelectivityVector HashProbe::evalFilterForNullAwareJoin(
13801380
}
13811381

13821382
if (buildSideHasNullKeys_) {
1383+
prepareNullKeyProbeHashers();
13831384
BaseHashTable::NullKeyRowsIterator iter;
13841385
nullKeyProbeRows.deselect(filterPassedRows);
13851386
applyFilterOnTableRowsForNullAwareJoin(
13861387
nullKeyProbeRows, filterPassedRows, [&](char** data, int32_t maxRows) {
1387-
return table_->listNullKeyRows(&iter, maxRows, data);
1388+
return table_->listNullKeyRows(
1389+
&iter, maxRows, data, nullKeyProbeHashers_);
13881390
});
13891391
}
13901392
BaseHashTable::RowsIterator iter;
@@ -1399,6 +1401,22 @@ SelectivityVector HashProbe::evalFilterForNullAwareJoin(
13991401
return filterPassedRows;
14001402
}
14011403

1404+
void HashProbe::prepareNullKeyProbeHashers() {
1405+
if (nullKeyProbeHashers_.empty()) {
1406+
nullKeyProbeHashers_ =
1407+
createVectorHashers(probeType_, joinNode_->leftKeys());
1408+
// Null-aware joins allow only one join key.
1409+
VELOX_CHECK_EQ(nullKeyProbeHashers_.size(), 1);
1410+
if (table_->hashMode() == BaseHashTable::HashMode::kHash) {
1411+
nullKeyProbeInput_ =
1412+
BaseVector::create(nullKeyProbeHashers_[0]->type(), 1, pool());
1413+
nullKeyProbeInput_->setNull(0, true);
1414+
SelectivityVector selectivity(1);
1415+
nullKeyProbeHashers_[0]->decode(*nullKeyProbeInput_, selectivity);
1416+
}
1417+
}
1418+
}
1419+
14021420
int32_t HashProbe::evalFilter(int32_t numRows) {
14031421
if (!filter_) {
14041422
return numRows;

velox/exec/HashProbe.h

+12
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,12 @@ class HashProbe : public Operator {
165165
vector_size_t numRows,
166166
bool filterPropagateNulls);
167167

168+
// Prepares the hashers for probing with null keys.
169+
// Initializes `nullKeyProbeHashers_` if empty, ensuring it has exactly one
170+
// hasher. If the table's hash mode is `kHash`, creates and decodes a null
171+
// input vector.
172+
void prepareNullKeyProbeHashers();
173+
168174
// Combine the selected probe-side rows with all or null-join-key (depending
169175
// on the iterator) build side rows and evaluate the filter. Mark probe rows
170176
// that pass the filter in 'filterPassedRows'. Used in null-aware join
@@ -680,6 +686,12 @@ class HashProbe : public Operator {
680686

681687
// The spilled probe partitions remaining to restore.
682688
SpillPartitionSet inputSpillPartitionSet_;
689+
690+
// VectorHashers used for listing rows with null keys.
691+
std::vector<std::unique_ptr<VectorHasher>> nullKeyProbeHashers_;
692+
693+
// Input vector used for listing rows with null keys.
694+
VectorPtr nullKeyProbeInput_;
683695
};
684696

685697
inline std::ostream& operator<<(std::ostream& os, ProbeOperatorState state) {

velox/exec/HashTable.cpp

+10-4
Original file line numberDiff line numberDiff line change
@@ -2033,11 +2033,14 @@ template <>
20332033
int32_t HashTable<false>::listNullKeyRows(
20342034
NullKeyRowsIterator* iter,
20352035
int32_t maxRows,
2036-
char** rows) {
2036+
char** rows,
2037+
const std::vector<std::unique_ptr<VectorHasher>>& hashers) {
20372038
if (!iter->initialized) {
20382039
VELOX_CHECK_GT(nextOffset_, 0);
2040+
// Null-aware joins allow only one join key.
20392041
VELOX_CHECK_EQ(hashers_.size(), 1);
2040-
HashLookup lookup(hashers_);
2042+
VELOX_CHECK_EQ(hashers_.size(), hashers.size());
2043+
HashLookup lookup(hashers);
20412044
if (hashMode_ == HashMode::kHash) {
20422045
lookup.hashes.push_back(VectorHasher::kNullHash);
20432046
} else {
@@ -2075,8 +2078,11 @@ int32_t HashTable<false>::listNullKeyRows(
20752078
}
20762079

20772080
template <>
2078-
int32_t
2079-
HashTable<true>::listNullKeyRows(NullKeyRowsIterator*, int32_t, char**) {
2081+
int32_t HashTable<true>::listNullKeyRows(
2082+
NullKeyRowsIterator*,
2083+
int32_t,
2084+
char**,
2085+
const std::vector<std::unique_ptr<VectorHasher>>&) {
20802086
VELOX_UNREACHABLE();
20812087
}
20822088

velox/exec/HashTable.h

+7-3
Original file line numberDiff line numberDiff line change
@@ -279,8 +279,11 @@ class BaseHashTable {
279279

280280
/// Returns all rows with null keys. Used by null-aware joins (e.g. anti or
281281
/// left semi project).
282-
virtual int32_t
283-
listNullKeyRows(NullKeyRowsIterator* iter, int32_t maxRows, char** rows) = 0;
282+
virtual int32_t listNullKeyRows(
283+
NullKeyRowsIterator* iter,
284+
int32_t maxRows,
285+
char** rows,
286+
const std::vector<std::unique_ptr<VectorHasher>>& hashers) = 0;
284287

285288
virtual void prepareJoinTable(
286289
std::vector<std::unique_ptr<BaseHashTable>> tables,
@@ -531,7 +534,8 @@ class HashTable : public BaseHashTable {
531534
int32_t listNullKeyRows(
532535
NullKeyRowsIterator* iter,
533536
int32_t maxRows,
534-
char** rows) override;
537+
char** rows,
538+
const std::vector<std::unique_ptr<VectorHasher>>& hashers) override;
535539

536540
void clear(bool freeTable) override;
537541

velox/exec/tests/HashJoinTest.cpp

+45
Original file line numberDiff line numberDiff line change
@@ -1222,6 +1222,51 @@ TEST_P(MultiThreadedHashJoinTest, nullAwareAntiJoinWithFilterAndNullKey) {
12221222
}
12231223
}
12241224

1225+
TEST_P(
1226+
MultiThreadedHashJoinTest,
1227+
hashModeNullAwareAntiJoinWithFilterAndNullKey) {
1228+
// Use float type keys to trigger hash mode table.
1229+
auto probeVectors = makeBatches(50, [&](int32_t /*unused*/) {
1230+
return makeRowVector(
1231+
{"t0", "t1"},
1232+
{
1233+
makeNullableFlatVector<float>({std::nullopt, 1, 2}),
1234+
makeFlatVector<int32_t>({1, 1, 2}),
1235+
});
1236+
});
1237+
auto buildVectors = makeBatches(5, [&](int32_t /*unused*/) {
1238+
return makeRowVector(
1239+
{"u0", "u1"},
1240+
{
1241+
makeNullableFlatVector<float>({std::nullopt, 2, 3}),
1242+
makeFlatVector<int32_t>({0, 2, 3}),
1243+
});
1244+
});
1245+
1246+
std::vector<std::string> filters({"u1 < t1", "u1 + t1 = 0"});
1247+
for (const std::string& filter : filters) {
1248+
const auto referenceSql = fmt::format(
1249+
"SELECT t.* FROM t WHERE t0 NOT IN (SELECT u0 FROM u WHERE {})",
1250+
filter);
1251+
1252+
auto testProbeVectors = probeVectors;
1253+
auto testBuildVectors = buildVectors;
1254+
HashJoinBuilder(*pool_, duckDbQueryRunner_, driverExecutor_.get())
1255+
.numDrivers(numDrivers_)
1256+
.probeKeys({"t0"})
1257+
.probeVectors(std::move(testProbeVectors))
1258+
.buildKeys({"u0"})
1259+
.buildVectors(std::move(testBuildVectors))
1260+
.joinType(core::JoinType::kAnti)
1261+
.nullAware(true)
1262+
.joinFilter(filter)
1263+
.joinOutputLayout({"t0", "t1"})
1264+
.referenceQuery(referenceSql)
1265+
.checkSpillStats(false)
1266+
.run();
1267+
}
1268+
}
1269+
12251270
TEST_P(MultiThreadedHashJoinTest, nullAwareAntiJoinWithFilterOnNullableColumn) {
12261271
const std::string referenceSql =
12271272
"SELECT t.* FROM t WHERE t0 NOT IN (SELECT u0 FROM u WHERE t1 <> u1)";

velox/exec/tests/HashTableTest.cpp

+11-2
Original file line numberDiff line numberDiff line change
@@ -547,7 +547,14 @@ class HashTableTest : public testing::TestWithParam<bool>,
547547
ASSERT_EQ(table->hashMode(), mode);
548548
std::vector<char*> rows(nullValues.size());
549549
BaseHashTable::NullKeyRowsIterator iter;
550-
auto numRows = table->listNullKeyRows(&iter, rows.size(), rows.data());
550+
std::vector<std::unique_ptr<VectorHasher>> probeHashers;
551+
probeHashers.push_back(std::make_unique<VectorHasher>(keys->type(), 0));
552+
auto nullKeyProbeInput = BaseVector::create(keys->type(), 1, pool());
553+
nullKeyProbeInput->setNull(0, true);
554+
SelectivityVector selectivity(1);
555+
probeHashers[0]->decode(*nullKeyProbeInput, selectivity);
556+
auto numRows =
557+
table->listNullKeyRows(&iter, rows.size(), rows.data(), probeHashers);
551558
ASSERT_EQ(numRows, nullValues.size());
552559
auto actual =
553560
BaseVector::create<FlatVector<int64_t>>(BIGINT(), numRows, pool());
@@ -558,7 +565,9 @@ class HashTableTest : public testing::TestWithParam<bool>,
558565
nullValues.erase(it);
559566
}
560567
ASSERT_TRUE(nullValues.empty());
561-
ASSERT_EQ(0, table->listNullKeyRows(&iter, rows.size(), rows.data()));
568+
ASSERT_EQ(
569+
0,
570+
table->listNullKeyRows(&iter, rows.size(), rows.data(), probeHashers));
562571
}
563572

564573
// Bitmap of positions in batches_ that end up in the table.

0 commit comments

Comments
 (0)