@@ -1852,18 +1852,22 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr<LockedInfoMsg> msg) {
1852
1852
auto const src_after_mem = current_memory_usage_ - src_bytes + try_bytes;
1853
1853
auto const try_after_mem = try_total_bytes + src_bytes - try_bytes;
1854
1854
1855
+ // Check whether strict bounds on memory are satisfied
1855
1856
if (src_after_mem > mem_thresh_ or try_after_mem > mem_thresh_) {
1856
1857
return - std::numeric_limits<double >::infinity ();
1857
1858
}
1858
1859
1860
+ // Compute maximum work of original arrangement
1859
1861
auto const before_work_src = this_new_load_;
1860
1862
auto const before_work_try = try_total_load;
1861
1863
auto const w_max_0 = std::max (before_work_src, before_work_try);
1862
1864
1865
+ // Compute maximum work of proposed new arrangement
1863
1866
auto const after_work_src = this_new_load_ - src_load + try_load;
1864
1867
auto const after_work_try = before_work_try + src_load - try_load;
1865
1868
auto const w_max_new = std::max (after_work_src, after_work_try);
1866
1869
1870
+ // Return criterion value
1867
1871
return w_max_0 - w_max_new;
1868
1872
};
1869
1873
@@ -2258,7 +2262,7 @@ void TemperedLB::swapClusters() {
2258
2262
theTerm ()->pushEpoch (lazy_epoch);
2259
2263
2260
2264
auto criterion = [this ](auto src_cluster, auto try_cluster) -> double {
2261
- // this does not handle empty cluster swaps
2265
+ // FIXME: this does not swaps with an empty cluster
2262
2266
auto const & [src_id, src_bytes, src_load] = src_cluster;
2263
2267
auto const & [try_rank, try_id, try_bytes, try_load, try_mem] = try_cluster;
2264
2268
@@ -2362,9 +2366,9 @@ void TemperedLB::swapClusters() {
2362
2366
getSharedBlocksHere ().size (), mem_thresh_, this_new_load_
2363
2367
);
2364
2368
2365
- int n_rejected = 0 ;
2366
2369
2367
2370
// Report on rejection rate in debug mode
2371
+ int n_rejected = 0 ;
2368
2372
if (theConfig ()->vt_debug_temperedlb ) {
2369
2373
runInEpochCollective (" TemperedLB::swapClusters -> compute rejection" , [=] {
2370
2374
proxy_.allreduce <&TemperedLB::rejectionStatsHandler, collective::PlusOp>(
0 commit comments