Skip to content

Commit c900c94

Browse files
Bikramjeet Vigfacebook-github-bot
Bikramjeet Vig
authored andcommitted
Minor refactor in hash build (facebookincubator#8933)
Summary: Includes refactoring as per the follow up comments in facebookincubator#8757 after it was merged. Differential Revision: D54441115
1 parent d270ace commit c900c94

File tree

2 files changed

+27
-31
lines changed

2 files changed

+27
-31
lines changed

velox/exec/HashBuild.cpp

+24-28
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ HashBuild::HashBuild(
120120
tableType_ = ROW(std::move(names), std::move(types));
121121
setupTable();
122122
setupSpiller();
123-
intermediateStateCleared_ = false;
123+
stateCleared_ = false;
124124
}
125125

126126
void HashBuild::initialize() {
@@ -784,11 +784,11 @@ bool HashBuild::finishHashBuild() {
784784
}
785785
}
786786
{
787-
std::lock_guard<std::mutex> l(build->intermediateStateMutex_);
787+
std::lock_guard<std::mutex> l(build->mutex_);
788788
VELOX_CHECK(
789-
!build->intermediateStateCleared_,
790-
"Intermediate state for a peer is empty. It might have been "
791-
"already closed.");
789+
!build->stateCleared_,
790+
"Internal state for a peer is empty. It might have already"
791+
" been closed.");
792792
numRows += build->table_->rows()->numRows();
793793
}
794794
otherBuilds.push_back(build);
@@ -800,22 +800,22 @@ bool HashBuild::finishHashBuild() {
800800
otherTables.reserve(peers.size());
801801
SpillPartitionSet spillPartitions;
802802
for (auto* build : otherBuilds) {
803-
std::unique_ptr<Spiller> buildSpiller;
803+
std::unique_ptr<Spiller> spiller;
804804
{
805-
std::lock_guard<std::mutex> l(build->intermediateStateMutex_);
805+
std::lock_guard<std::mutex> l(build->mutex_);
806806
VELOX_CHECK(
807-
!build->intermediateStateCleared_,
808-
"Intermediate state for a peer is empty. It might have been "
809-
"already closed.");
810-
build->intermediateStateCleared_ = true;
807+
!build->stateCleared_,
808+
"Internal state for a peer is empty. It might have already"
809+
" been closed.");
810+
build->stateCleared_ = true;
811811
VELOX_CHECK_NOT_NULL(build->table_);
812812
otherTables.push_back(std::move(build->table_));
813-
buildSpiller = std::move(build->spiller_);
813+
spiller = std::move(build->spiller_);
814814
}
815-
if (buildSpiller != nullptr) {
816-
buildSpiller->finishSpill(spillPartitions);
815+
if (spiller != nullptr) {
816+
spiller->finishSpill(spillPartitions);
817+
build->recordSpillStats(spiller.get());
817818
}
818-
build->recordSpillStats(buildSpiller.get());
819819
}
820820

821821
if (spiller_ != nullptr) {
@@ -847,7 +847,7 @@ bool HashBuild::finishHashBuild() {
847847
addRuntimeStats();
848848
if (joinBridge_->setHashTable(
849849
std::move(table_), std::move(spillPartitions), joinHasNullKeys_)) {
850-
intermediateStateCleared_ = true;
850+
stateCleared_ = true;
851851
spillGroup_->restart();
852852
}
853853

@@ -938,7 +938,7 @@ void HashBuild::setupSpillInput(HashJoinBridge::SpillInput spillInput) {
938938

939939
setupTable();
940940
setupSpiller(spillInput.spillPartition.get());
941-
intermediateStateCleared_ = false;
941+
stateCleared_ = false;
942942

943943
// Start to process spill input.
944944
processSpillInput();
@@ -1126,10 +1126,7 @@ void HashBuild::reclaim(
11261126

11271127
TestValue::adjust("facebook::velox::exec::HashBuild::reclaim", this);
11281128

1129-
// can another thread call close() while hashbuild is in arbitration and
1130-
// reclaim is called on it?
11311129
if (exceededMaxSpillLevelLimit_) {
1132-
// NOTE: we might have reached to the max spill limit.
11331130
return;
11341131
}
11351132

@@ -1142,9 +1139,9 @@ void HashBuild::reclaim(
11421139
LOG(WARNING) << "Can't reclaim from hash build operator, state_["
11431140
<< stateName(state_) << "], nonReclaimableSection_["
11441141
<< nonReclaimableSection_ << "], spiller_["
1145-
<< (intermediateStateCleared_ || spiller_->finalized()
1146-
? "finalized"
1147-
: "non-finalized")
1142+
<< (stateCleared_ ? "cleared"
1143+
: (spiller_->finalized() ? "finalized"
1144+
: "non-finalized"))
11481145
<< "] " << pool()->name()
11491146
<< ", usage: " << succinctBytes(pool()->currentBytes());
11501147
return;
@@ -1227,13 +1224,12 @@ bool HashBuild::nonReclaimableState() const {
12271224
// 1) the hash table has been built by the last build thread (inidicated
12281225
// by state_)
12291226
// 2) the last build operator has transferred ownership of 'this' operator's
1230-
// intermediate state (table_ and spiller_) to itself
1227+
// internal state (table_ and spiller_) to itself.
12311228
// 3) it has completed spilling before reaching either of the previous
12321229
// two states.
12331230
return ((state_ != State::kRunning) && (state_ != State::kWaitForBuild) &&
12341231
(state_ != State::kYield)) ||
1235-
nonReclaimableSection_ || intermediateStateCleared_ ||
1236-
spiller_->finalized();
1232+
nonReclaimableSection_ || !spiller_ || spiller_->finalized();
12371233
}
12381234

12391235
void HashBuild::close() {
@@ -1242,8 +1238,8 @@ void HashBuild::close() {
12421238
{
12431239
// Free up major memory usage. Gate access to them as they can be accessed
12441240
// by the last build thread that finishes building the hash table.
1245-
std::lock_guard<std::mutex> l(intermediateStateMutex_);
1246-
intermediateStateCleared_ = true;
1241+
std::lock_guard<std::mutex> l(mutex_);
1242+
stateCleared_ = true;
12471243
joinBridge_.reset();
12481244
spiller_.reset();
12491245
table_.reset();

velox/exec/HashBuild.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -264,18 +264,18 @@ class HashBuild final : public Operator {
264264
// The row type used for hash table build and disk spilling.
265265
RowTypePtr tableType_;
266266

267-
// Used to serialize access to intermediate state variables (like 'table_' and
267+
// Used to serialize access to the internal state (like 'table_' and
268268
// 'spiller_'). This is only required when variables are accessed
269269
// concurrently, that is, when a thread tries to close the operator while
270270
// another thread is building the hash table. Refer to 'close()' and
271271
// finishHashBuild()' for more details.
272-
std::mutex intermediateStateMutex_;
272+
std::mutex mutex_;
273273

274274
// Indicates if the intermediate state ('table_' and 'spiller_') has
275275
// been cleared. This can happen either when the operator is closed or when
276276
// the last hash build operator transfers ownership of them to itself while
277277
// building the final hash table.
278-
bool intermediateStateCleared_{false};
278+
bool stateCleared_{false};
279279

280280
// Container for the rows being accumulated.
281281
std::unique_ptr<BaseHashTable> table_;

0 commit comments

Comments
 (0)