Skip to content

Commit ec44525

Browse files
committed
#2201: temperedlb: add a bunch of prints for debugging
1 parent 62c6bbe commit ec44525

File tree

2 files changed

+32
-4
lines changed

2 files changed

+32
-4
lines changed

src/vt/vrt/collection/balance/temperedlb/temperedlb.cc

+31-4
Original file line numberDiff line numberDiff line change
@@ -2403,8 +2403,15 @@ void TemperedLB::considerSubClustersAfterLock(MsgSharedPtr<LockedInfoMsg> msg) {
24032403
}
24042404

24052405
void TemperedLB::considerSwapsAfterLock(MsgSharedPtr<LockedInfoMsg> msg) {
2406+
consider_swaps_counter_++;
24062407
is_swapping_ = true;
24072408

2409+
vt_debug_print(
2410+
verbose, temperedlb,
2411+
"considerSwapsAfterLock: consider_swaps_counter_={} start\n",
2412+
consider_swaps_counter_
2413+
);
2414+
24082415
auto const this_node = theContext()->getNode();
24092416

24102417
NodeInfo this_info{
@@ -2532,6 +2539,11 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr<LockedInfoMsg> msg) {
25322539
] = removeClusterToSend(src_shared_id);
25332540

25342541
runInEpochRooted("giveCluster", [&]{
2542+
vt_debug_print(
2543+
verbose, temperedlb,
2544+
"considerSwapsAfterLock: giveCluster swapping {} for {}, epoch={:x}\n",
2545+
src_shared_id, try_shared_id, theMsg()->getEpoch()
2546+
);
25352547
proxy_[try_rank].template send<&TemperedLB::giveCluster>(
25362548
this_node,
25372549
give_shared_blocks_size,
@@ -2556,7 +2568,14 @@ void TemperedLB::considerSwapsAfterLock(MsgSharedPtr<LockedInfoMsg> msg) {
25562568

25572569
proxy_[try_rank].template send<&TemperedLB::releaseLock>();
25582570

2571+
vt_debug_print(
2572+
verbose, temperedlb,
2573+
"considerSwapsAfterLock: consider_swaps_counter_={} finish\n",
2574+
consider_swaps_counter_
2575+
);
2576+
25592577
is_swapping_ = false;
2578+
consider_swaps_counter_--;
25602579

25612580
if (pending_actions_.size() > 0) {
25622581
auto action = pending_actions_.back();
@@ -2618,8 +2637,10 @@ void TemperedLB::giveCluster(
26182637

26192638
vt_debug_print(
26202639
normal, temperedlb,
2621-
"giveCluster: total memory usage={}, shared blocks here={}, "
2622-
"memory_threshold={}, give_cluster={}, take_cluster={}\n", computeMemoryUsage(),
2640+
"giveCluster: from_rank={}, epoch={:x} total memory usage={}, shared blocks here={}, "
2641+
"memory_threshold={}, give_cluster={}, take_cluster={}\n",
2642+
from_rank, theMsg()->getEpoch(),
2643+
computeMemoryUsage(),
26232644
getSharedBlocksHere().size(), mem_thresh_,
26242645
give_shared_blocks_size.begin()->first, take_cluster
26252646
);
@@ -2650,8 +2671,8 @@ void TemperedLB::lockObtained(LockedInfoMsg* in_msg) {
26502671

26512672
vt_debug_print(
26522673
normal, temperedlb,
2653-
"lockObtained: is_locked_={}, is_subclustering_={}\n",
2654-
is_locked_, is_subclustering_
2674+
"lockObtained: is_locked_={}, is_subclustering_={}, is_swapping_={}\n",
2675+
is_locked_, is_subclustering_, is_swapping_
26552676
);
26562677

26572678
auto cur_epoch = theMsg()->getEpoch();
@@ -2678,6 +2699,12 @@ void TemperedLB::lockObtained(LockedInfoMsg* in_msg) {
26782699
} else if (is_swapping_) {
26792700
pending_actions_.push_back(action);
26802701
} else {
2702+
vt_debug_print(
2703+
normal, temperedlb,
2704+
"lockObtained: running action immediately\n"
2705+
);
2706+
2707+
26812708
action();
26822709
}
26832710
}

src/vt/vrt/collection/balance/temperedlb/temperedlb.h

+1
Original file line numberDiff line numberDiff line change
@@ -516,6 +516,7 @@ struct TemperedLB : BaseLB {
516516
bool is_subclustering_ = false;
517517
/// Ready to satify looks
518518
bool ready_to_satisfy_locks_ = false;
519+
int consider_swaps_counter_ = 0;
519520
};
520521

521522
}}}} /* end namespace vt::vrt::collection::lb */

0 commit comments

Comments
 (0)