Skip to content

Commit b736aaa

Browse files
authored
Merge pull request #2244 from DARMA-tasking/2243-replay-using-phase-modulus
2243 allow replay to repeat phases using modulus
2 parents 26c8c78 + eee59c7 commit b736aaa

File tree

4 files changed

+31
-15
lines changed

4 files changed

+31
-15
lines changed

src/vt/vrt/collection/balance/workload_replay.cc

+16-7
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ namespace vt { namespace vrt { namespace collection {
5656
namespace balance { namespace replay {
5757

5858
void replayWorkloads(
59-
PhaseType initial_phase, PhaseType phases_to_run
59+
PhaseType initial_phase, PhaseType phases_to_run, PhaseType phase_mod
6060
) {
6161
// read in object loads from json files
6262
auto const filename = theConfig()->getLBDataFileIn();
@@ -67,11 +67,11 @@ void replayWorkloads(
6767
&LBManager::statsHandler
6868
>(theLBManager()->getProxy());
6969

70-
replayWorkloads(initial_phase, phases_to_run, workloads, stats_cb);
70+
replayWorkloads(initial_phase, phases_to_run, phase_mod, workloads, stats_cb);
7171
}
7272

7373
void replayWorkloads(
74-
PhaseType initial_phase, PhaseType phases_to_run,
74+
PhaseType initial_phase, PhaseType phases_to_run, PhaseType phase_mod,
7575
std::shared_ptr<LBDataHolder> workloads,
7676
Callback<std::vector<balance::LoadData>> stats_cb
7777
) {
@@ -102,6 +102,8 @@ void replayWorkloads(
102102
// simulate the given number of phases
103103
auto stop_phase = initial_phase + phases_to_run;
104104
for (PhaseType phase = initial_phase; phase < stop_phase; phase++) {
105+
PhaseType input_phase = phase_mod == 0 ? phase : phase % phase_mod;
106+
105107
// reapply the base load model if in case we overwrote it on a previous iter
106108
theLBManager()->setLoadModel(base_load_model);
107109

@@ -113,7 +115,7 @@ void replayWorkloads(
113115

114116
// point the load model at the workloads for the relevant phase
115117
runInEpochCollective("WorkloadReplayDriver -> updateLoads", [=] {
116-
base_load_model->updateLoads(phase);
118+
base_load_model->updateLoads(input_phase);
117119
});
118120

119121
if (theConfig()->vt_debug_replay) {
@@ -123,7 +125,7 @@ void replayWorkloads(
123125
++count;
124126
vt_debug_print(
125127
normal, replay,
126-
"workload for element {} is here on phase {}\n", workload_id, phase
128+
"workload for element {} is here on input_phase {}\n", workload_id, input_phase
127129
);
128130
}
129131
}
@@ -161,7 +163,7 @@ void replayWorkloads(
161163
}
162164

163165
if (this_rank == 0) {
164-
vt_print(replay, "Simulating phase {}...\n", phase);
166+
vt_print(replay, "Simulating phase {} using inputs from phase {}...\n", phase, input_phase);
165167
}
166168

167169
if (theConfig()->vt_debug_replay) {
@@ -227,12 +229,19 @@ void replayWorkloads(
227229
auto cb = theCB()->makeFunc<ReassignmentMsg>(
228230
vt::pipe::LifetimeEnum::Once, postLBWork
229231
);
230-
theLBManager()->selectStartLB(phase, cb);
232+
auto lb = theLBManager()->decideLBToRun(phase, true);
233+
auto const start_time = timing::getCurrentTime();
234+
theLBManager()->startLB(input_phase, lb, cb);
235+
auto const total_time = timing::getCurrentTime() - start_time;
236+
if (lb != LBType::NoLB) {
237+
vt_print(replay, "Time in load balancer: {}\n", total_time);
238+
}
231239
});
232240
runInEpochCollective("WorkloadReplayDriver -> destroyLB", [&] {
233241
theLBManager()->destroyLB();
234242
});
235243
auto last_phase_info = theLBManager()->getPhaseInfo();
244+
last_phase_info->phase = phase;
236245
thePhase()->printSummary(last_phase_info);
237246
}
238247
}

src/vt/vrt/collection/balance/workload_replay.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ namespace balance { namespace replay {
7474
* object exists during any given phase.
7575
*/
7676
void replayWorkloads(
77-
PhaseType initial_phase, PhaseType phases_to_run
77+
PhaseType initial_phase, PhaseType phases_to_run, PhaseType phase_mod
7878
);
7979

8080
/**
@@ -92,7 +92,7 @@ void replayWorkloads(
9292
* same rank as the object exists during any given phase.
9393
*/
9494
void replayWorkloads(
95-
PhaseType initial_phase, PhaseType phases_to_run,
95+
PhaseType initial_phase, PhaseType phases_to_run, PhaseType phase_mod,
9696
std::shared_ptr<LBDataHolder> workloads,
9797
Callback<std::vector<balance::LoadData>> stats_cb
9898
);

tests/unit/collection/test_workload_data_migrator.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -878,7 +878,7 @@ TEST_F(TestWorkloadReplay, test_run_replay_verify_some_stats) {
878878

879879
// then replay them but allow the lb to place objects differently
880880
vt::vrt::collection::balance::replay::replayWorkloads(
881-
initial_phase, num_phases, lbdh, stats_cb
881+
initial_phase, num_phases, 0, lbdh, stats_cb
882882
);
883883
}
884884

tools/workload_replay/simulate_replay.cc

+12-5
Original file line numberDiff line numberDiff line change
@@ -50,21 +50,28 @@ int main(int argc, char** argv) {
5050
vt::initialize(argc, argv);
5151

5252
vtAbortIf(
53-
argc != 3,
54-
"Must have two app-specific arguments: <initial phase> <phases to run>\n"
53+
argc < 3 or argc > 4,
54+
"Must have two or three app-specific arguments:\n"
55+
" <initial phase> <phases to run> [phase modulus]\n"
5556
"The json workload files needs to be specified using\n"
56-
"--vt_lb_data_file_in and --vt_lb_data_dir_in"
57+
" --vt_lb_data_in, --vt_lb_data_file_in, and --vt_lb_data_dir_in"
5758
);
5859

5960
// initial phase to simulate
6061
PhaseType initial_phase = atoi(argv[1]);
6162
// number of phases to simulate
6263
PhaseType phases_to_run = atoi(argv[2]);
64+
// phase modulus to apply to input
65+
PhaseType phase_mod = 0;
66+
67+
if (argc > 3) {
68+
phase_mod = atoi(argv[3]);
69+
}
6370

6471
// the workloads used will be those specified with the command-line arguments
65-
// --vt_lb_data_file_in and --vt_lb_data_dir_in
72+
// --vt_lb_data_in, --vt_lb_data_file_in, and --vt_lb_data_dir_in
6673
vt::vrt::collection::balance::replay::replayWorkloads(
67-
initial_phase, phases_to_run
74+
initial_phase, phases_to_run, phase_mod
6875
);
6976

7077
vt::finalize();

0 commit comments

Comments
 (0)