Skip to content

Commit b6e6bca

Browse files
ppebaylifflander
authored andcommitted
#2201: checkpoint of non-breaking changes (documentation and style)
1 parent 45e947d commit b6e6bca

File tree

1 file changed

+16
-8
lines changed

1 file changed

+16
-8
lines changed

src/vt/vrt/collection/balance/temperedlb/temperedlb.cc

+16-8
Original file line numberDiff line numberDiff line change
@@ -455,9 +455,9 @@ void TemperedLB::inputParams(balance::ConfigEntry* config) {
455455
void TemperedLB::runLB(LoadType total_load) {
456456
bool should_lb = false;
457457

458+
// Compute load statistics
458459
this_load = total_load;
459460
stats = *getStats();
460-
461461
auto const avg = stats.at(lb::Statistic::Rank_load_modeled).at(
462462
lb::StatisticQuantity::avg
463463
);
@@ -481,10 +481,12 @@ void TemperedLB::runLB(LoadType total_load) {
481481
target_max_load_ = avg;
482482
}
483483

484-
if (avg > 0.0000000001) {
484+
// Use an absolute minimal bound on average load to load-balance
485+
if (avg > 1e-10) {
485486
should_lb = max > (run_temperedlb_tolerance + 1.0) * target_max_load_;
486487
}
487488

489+
// Report statistics from head rank
488490
if (theContext()->getNode() == 0) {
489491
vt_debug_print(
490492
terse, temperedlb,
@@ -501,6 +503,7 @@ void TemperedLB::runLB(LoadType total_load) {
501503
}
502504
}
503505

506+
// Perform load rebalancing when deemed necessary
504507
if (should_lb) {
505508
doLBStages(imb);
506509
}
@@ -814,15 +817,16 @@ void TemperedLB::propagateRound(uint8_t k_cur, bool sync, EpochType epoch) {
814817
selected.insert(this_node);
815818
}
816819

820+
// Determine fanout factor capped by number of nodes
817821
auto const fanout = std::min(f_, static_cast<decltype(f_)>(num_nodes - 1));
818-
819822
vt_debug_print(
820823
verbose, temperedlb,
821824
"TemperedLB::propagateRound: trial={}, iter={}, k_max={}, k_cur={}, "
822825
"selected.size()={}, fanout={}\n",
823826
trial_, iter_, k_max_, k_cur, selected.size(), fanout
824827
);
825828

829+
// Iterate over fanout factor
826830
for (int i = 0; i < fanout; i++) {
827831
// This implies full knowledge of all processors
828832
if (selected.size() >= static_cast<size_t>(num_nodes)) {
@@ -849,6 +853,7 @@ void TemperedLB::propagateRound(uint8_t k_cur, bool sync, EpochType epoch) {
849853

850854
// Send message with load
851855
if (sync) {
856+
// Message in synchronous mode
852857
auto msg = makeMessage<LoadMsgSync>(this_node, load_info_);
853858
if (epoch != no_epoch) {
854859
envelopeSetEpoch(msg->env, epoch);
@@ -858,6 +863,7 @@ void TemperedLB::propagateRound(uint8_t k_cur, bool sync, EpochType epoch) {
858863
LoadMsgSync, &TemperedLB::propagateIncomingSync
859864
>(msg.get());
860865
} else {
866+
// Message in asynchronous mode
861867
auto msg = makeMessage<LoadMsgAsync>(this_node, load_info_, k_cur);
862868
if (epoch != no_epoch) {
863869
envelopeSetEpoch(msg->env, epoch);
@@ -1216,8 +1222,10 @@ std::vector<TemperedLB::ObjIDType> TemperedLB::orderObjects(
12161222
void TemperedLB::decide() {
12171223
auto lazy_epoch = theTerm()->makeEpochCollective("TemperedLB: decide");
12181224

1225+
// Initialize transfer and rejection counters
12191226
int n_transfers = 0, n_rejected = 0;
12201227

1228+
// Try to migrate objects only from overloaded objects
12211229
if (is_overloaded_) {
12221230
std::vector<NodeType> under = makeUnderloaded();
12231231
std::unordered_map<NodeType, ObjsType> migrate_objs;
@@ -1250,6 +1258,7 @@ void TemperedLB::decide() {
12501258
}
12511259
// Rebuild the CMF with the new loads taken into account
12521260
auto cmf = createCMF(under);
1261+
12531262
// Select a node using the CMF
12541263
auto const selected_node = sampleFromCMF(under, cmf);
12551264

@@ -1259,16 +1268,15 @@ void TemperedLB::decide() {
12591268
selected_node, load_info_.size()
12601269
);
12611270

1271+
// Find load of selected node
12621272
auto load_iter = load_info_.find(selected_node);
12631273
vtAssert(load_iter != load_info_.end(), "Selected node not found");
1264-
1265-
// The load of the node selected
12661274
auto& selected_load = load_iter->second;
12671275

1276+
// Evaluate criterion for proposed transfer
12681277
bool eval = Criterion(criterion_)(
12691278
this_new_load_, selected_load, obj_load, target_max_load_
12701279
);
1271-
12721280
vt_debug_print(
12731281
verbose, temperedlb,
12741282
"TemperedLB::decide: trial={}, iter={}, under.size()={}, "
@@ -1288,9 +1296,10 @@ void TemperedLB::decide() {
12881296
eval
12891297
);
12901298

1299+
// Decide about proposed migration based on criterion evaluation
12911300
if (eval) {
12921301
++n_transfers;
1293-
// transfer the object load in seconds
1302+
// Transfer the object load in seconds
12941303
// to match the object load units on the receiving end
12951304
migrate_objs[selected_node][obj_id] = obj_load;
12961305

@@ -1315,7 +1324,6 @@ void TemperedLB::decide() {
13151324
auto node = migration.first;
13161325
lazyMigrateObjsTo(lazy_epoch, node, migration.second);
13171326
}
1318-
13191327
} else {
13201328
// do nothing (underloaded-based algorithm), waits to get work from
13211329
// overloaded nodes

0 commit comments

Comments
 (0)