We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent f07f6e5 commit ba8b85fCopy full SHA for ba8b85f
src/xccl/ProcessGroupXCCL.cpp
@@ -973,6 +973,15 @@ c10::intrusive_ptr<Work> ProcessGroupXCCL::allreduce(
973
xcclReduceOp,
974
comm,
975
ccl::create_stream(stream.queue()));
976
+#if !defined(XCCL_HAS_AVG)
977
+ if (opts.reduceOp == ReduceOp::AVG) {
978
+ auto divisor = getSize();
979
+ c10::StreamGuard guard(stream);
980
+ c10::xpu::XPUCachingAllocator::recordStream(
981
+ output.storage().data_ptr(), stream);
982
+ output.div_(divisor);
983
+ }
984
+#endif
985
return;
986
},
987
OpType::ALLREDUCE,
0 commit comments