Skip to content

Commit 6511cdf

Browse files
committed
group start after comm init
1 parent b4701a1 commit 6511cdf

File tree

1 file changed

+10
-0
lines changed

1 file changed

+10
-0
lines changed

src/xccl/ProcessGroupXCCL.cpp

+10
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,11 @@ std::shared_ptr<xcclComm_t> ProcessGroupXCCL::getXCCLComm(
303303

304304
at::xpu::OptionalXPUGuard gpuGuard(device);
305305

306+
for (const auto i : c10::irange(xcclActiveGroupCounter_)) {
307+
(void)i;
308+
ccl::group_end();
309+
}
310+
306311
int numRanks, rank;
307312
if (!singleP2POp) {
308313
numRanks = getSize();
@@ -342,6 +347,11 @@ std::shared_ptr<xcclComm_t> ProcessGroupXCCL::getXCCLComm(
342347
-1, // globalRankStride
343348
size_); // worldSize
344349

350+
for (const auto i : c10::irange(xcclActiveGroupCounter_)) {
351+
(void)i;
352+
ccl::group_start();
353+
}
354+
345355
std::lock_guard<std::mutex> lock(mutex_);
346356
devXCCLCommMap_.emplace(deviceKey, XCCLComm);
347357
xcclStreamsMap_.emplace(deviceKey, std::move(stream));

0 commit comments

Comments
 (0)