Commit 29283c2 1 parent 0fdb0ab commit 29283c2 Copy full SHA for 29283c2
File tree 1 file changed +7
-1
lines changed
1 file changed +7
-1
lines changed Original file line number Diff line number Diff line change @@ -351,7 +351,13 @@ std::shared_ptr<xcclComm_t> ProcessGroupXCCL::getXCCLComm(
351
351
(void )i;
352
352
ccl::group_start ();
353
353
}
354
-
354
+ // The oneCCL group API requires retaining the SYCL queue (xcclstream) object
355
+ // within the lifecycle of the communicator. If the XPU stream is created
356
+ // within the collective operation, it would be destroyed earlier than the
357
+ // communicator after the operation ends. Therefore, the XPU stream is stored
358
+ // in a map alongside the communicator. Similarly, oneCCLv2 also requires
359
+ // retaining the SYCL queue pointer for collective operations, so this change
360
+ // will be necessary in oneCCLv2 as well.
355
361
ccl::stream xccl_stream = ccl::create_stream (q);
356
362
std::lock_guard<std::mutex> lock (mutex_);
357
363
devXCCLCommMap_.emplace (deviceKey, XCCLComm);
You can’t perform that action at this time.
0 commit comments