Skip to content

Commit 7df7228

Browse files
apavliuk55nkogteva
andauthored
[NVIDIA] Add IsCudaGraphCompatible() to all operations (#658)
* [NVIDIA] Add IsCudaGraphCompatible() to all operations * Fixes after rebase --------- Co-authored-by: Nadezhda <nadezhda.ageeva@intel.com>
1 parent 6b2a339 commit 7df7228

File tree

106 files changed

+492
-2
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

106 files changed

+492
-2
lines changed

modules/nvidia_plugin/src/cuda_operation_base.hpp

+3
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ class IOperationExec {
4242
Inputs inputTensors,
4343
Outputs outputTensors,
4444
const Workbuffers& workbuffers) const = 0;
45+
virtual bool IsCudaGraphCompatible() const = 0;
4546
virtual void InitSharedImmutableWorkbuffers(const Buffers&) = 0;
4647
virtual WorkbufferRequest GetWorkBufferRequest() const = 0;
4748
virtual const WorkbufferIds& GetWorkbufferIds() const = 0;
@@ -76,6 +77,8 @@ class OperationBase : public IOperationExec, public IOperationMeta, public std::
7677
IndexCollection&& inputIds,
7778
IndexCollection&& outputIds);
7879

80+
bool IsCudaGraphCompatible() const override { return false; }
81+
7982
WorkbufferRequest GetWorkBufferRequest() const override {
8083
return {}; // Most operators do not need workbuffers
8184
}

modules/nvidia_plugin/src/ops/activation_forward_cudnn_base.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -59,5 +59,7 @@ void ActivationForwardCuDnnOpBase::Execute(const InferenceRequestContext& contex
5959
outputTensors[0].get());
6060
}
6161

62+
bool ActivationForwardCuDnnOpBase::IsCudaGraphCompatible() const { return true; }
63+
6264
} // namespace nvidia_gpu
6365
} // namespace ov

modules/nvidia_plugin/src/ops/activation_forward_cudnn_base.hpp

+3
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,14 @@ class ActivationForwardCuDnnOpBase : public OperationCuDnn {
2525
const ov::Node& node,
2626
IndexCollection&& inputIds,
2727
IndexCollection&& outputIds);
28+
2829
void Execute(const InferenceRequestContext& context,
2930
Inputs inputTensors,
3031
Outputs outputTensors,
3132
const Workbuffers&) const override;
3233

34+
bool IsCudaGraphCompatible() const override;
35+
3336
protected:
3437
std::unique_ptr<CUDA::DnnActivationDescriptor> op_desc_;
3538
CUDA::DnnTensorDescriptor x_desc_;

modules/nvidia_plugin/src/ops/avgpool.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ void AvgPoolOp::Execute(const InferenceRequestContext& context,
3030
outputs[PoolingImpl::output_index].get());
3131
}
3232

33+
bool AvgPoolOp::IsCudaGraphCompatible() const { return true; }
34+
3335
OPERATION_REGISTER(AvgPoolOp, AvgPool);
3436

3537
} // namespace nvidia_gpu

modules/nvidia_plugin/src/ops/avgpool.hpp

+3
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,14 @@ class AvgPoolOp : public OperationCuDnn {
1717
const std::shared_ptr<ov::Node>& node,
1818
IndexCollection&& inputIds,
1919
IndexCollection&& outputIds);
20+
2021
void Execute(const InferenceRequestContext& context,
2122
Inputs inputTensors,
2223
Outputs outputTensors,
2324
const Workbuffers& workbuffers) const override;
2425

26+
bool IsCudaGraphCompatible() const override;
27+
2528
private:
2629
PoolingImpl impl_;
2730
};

modules/nvidia_plugin/src/ops/broadcast.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ void BroadcastOp::Execute(const InferenceRequestContext& context,
6565
(*kernel_)(stream, inputs[0].get(), broadcast_params_->mapper(workbuffers.immutable_buffers), outputs[0].get());
6666
}
6767

68+
bool BroadcastOp::IsCudaGraphCompatible() const { return true; }
69+
6870
WorkbufferRequest BroadcastOp::GetWorkBufferRequest() const { return {immutable_buffer_sizes_, {}}; }
6971

7072
void BroadcastOp::InitSharedImmutableWorkbuffers(const Buffers& buffers) {

modules/nvidia_plugin/src/ops/broadcast.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ class BroadcastOp : public OperationBase {
2727
WorkbufferRequest GetWorkBufferRequest() const override;
2828
void InitSharedImmutableWorkbuffers(const Buffers& buffers) override;
2929

30+
bool IsCudaGraphCompatible() const override;
31+
3032
private:
3133
std::vector<WorkbufferRequest::size_in_bytes_t> immutable_buffer_sizes_;
3234
std::unique_ptr<NumpyBroadcastParams> broadcast_params_;

modules/nvidia_plugin/src/ops/clamp_cuda.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -51,5 +51,7 @@ void ClampCudaOp::Execute(const InferenceRequestContext& context,
5151
(*kernel_)(context.getThreadContext().stream().get(), inputTensors[0].get(), outputTensors[0].get());
5252
}
5353

54+
bool ClampCudaOp::IsCudaGraphCompatible() const { return true; }
55+
5456
} // namespace nvidia_gpu
5557
} // namespace ov

modules/nvidia_plugin/src/ops/clamp_cuda.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ class ClampCudaOp : public OperationBase {
2626
Outputs outputTensors,
2727
const Workbuffers& workbuffers) const override;
2828

29+
bool IsCudaGraphCompatible() const override;
30+
2931
private:
3032
std::optional<kernel::Clamp> kernel_;
3133
};

modules/nvidia_plugin/src/ops/clamp_cudnn.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@ void ClampCuDnnOp::Execute(const InferenceRequestContext& context,
9797
outputTensors[0].get());
9898
}
9999

100+
bool ClampCuDnnOp::IsCudaGraphCompatible() const { return true; }
101+
100102
void ClampCuDnnOp::InitSharedImmutableWorkbuffers(const Buffers& buffers) {
101103
switch (data_type_) {
102104
case CUDNN_DATA_FLOAT:

modules/nvidia_plugin/src/ops/clamp_cudnn.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ class ClampCuDnnOp : public OperationCuDnn {
3333
Outputs outputTensors,
3434
const Workbuffers& workbuffers) const override;
3535

36+
bool IsCudaGraphCompatible() const override;
3637
void InitSharedImmutableWorkbuffers(const Buffers& buffers) override;
3738
WorkbufferRequest GetWorkBufferRequest() const override;
3839

modules/nvidia_plugin/src/ops/comparison.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@ Comparison::Comparison(const CreationContext& context,
8484
threads_per_block};
8585
}
8686

87+
bool Comparison::IsCudaGraphCompatible() const { return true; }
88+
8789
void Comparison::Execute(const InferenceRequestContext& context,
8890
Inputs inputs,
8991
Outputs outputs,

modules/nvidia_plugin/src/ops/comparison.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ class Comparison : public OperationBase {
1818
IndexCollection&& outputIds,
1919
kernel::Comparison::Op_t operation_type);
2020

21+
bool IsCudaGraphCompatible() const override;
22+
2123
private:
2224
void calculateOffsets();
2325
void Execute(const InferenceRequestContext& context,

modules/nvidia_plugin/src/ops/concat.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ void ConcatOp::Execute(const InferenceRequestContext& context,
9595
outputs[0].get());
9696
}
9797

98+
bool ConcatOp::IsCudaGraphCompatible() const { return false; }
99+
98100
OPERATION_REGISTER(ConcatOp, Concat);
99101
} // namespace nvidia_gpu
100102
} // namespace ov

modules/nvidia_plugin/src/ops/concat.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ class ConcatOp : public OperationBase {
2828
WorkbufferRequest GetWorkBufferRequest() const override;
2929
void InitSharedImmutableWorkbuffers(const Buffers&) override;
3030

31+
bool IsCudaGraphCompatible() const override;
32+
3133
private:
3234
size_t immutableWbSize() const { return concat_kernel_.value().immutableWbSize(); }
3335
size_t mutableWbSize() const { return concat_kernel_.value().mutableWbSize(); }

modules/nvidia_plugin/src/ops/convert.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ void ConvertOp::Execute(const InferenceRequestContext& context,
5555
(*convert_kernel_)(stream.get(), outputs[0].get(), inputs[0].get());
5656
}
5757

58+
bool ConvertOp::IsCudaGraphCompatible() const { return true; }
59+
5860
OPERATION_REGISTER(ConvertOp, Convert);
5961

6062
} // namespace nvidia_gpu

modules/nvidia_plugin/src/ops/convert.hpp

+4
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,14 @@ class ConvertOp : public OperationBase {
1818
const std::shared_ptr<ov::Node>& node,
1919
IndexCollection&& inputIds,
2020
IndexCollection&& outputIds);
21+
2122
void Execute(const InferenceRequestContext& context,
2223
Inputs inputTensors,
2324
Outputs outputTensors,
2425
const Workbuffers& workbuffers) const override;
26+
27+
bool IsCudaGraphCompatible() const override;
28+
2529
using Type_t = ov::element::Type_t;
2630
using convert_t = void (*)(
2731
const CUDA::Stream&, size_t, CUDA::DevicePointer<void*>, CUDA::DevicePointer<const void*>, unsigned, unsigned);

modules/nvidia_plugin/src/ops/convert_color_i420.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ class I420ConvertColorBase : public OperationBase {
9191
}
9292
}
9393

94+
bool IsCudaGraphCompatible() const override { return true; }
95+
9496
private:
9597
std::optional<TKernel> kernel_;
9698
};

modules/nvidia_plugin/src/ops/convert_color_nv12.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ class NV12ConvertColorBase : public OperationBase {
9090
}
9191
}
9292

93+
bool IsCudaGraphCompatible() const override { return true; }
94+
9395
private:
9496
std::optional<TKernel> kernel_;
9597
};

modules/nvidia_plugin/src/ops/convolution_backprop_data.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@ void ConvBackpropDataOp<T>::Execute(const InferenceRequestContext& context,
4242
outputs[ConvBackpropDataOp::ArgIndices::dinput].get()));
4343
}
4444

45+
template <typename T>
46+
bool ConvBackpropDataOp<T>::IsCudaGraphCompatible() const {
47+
return true;
48+
}
49+
4550
OPERATION_REGISTER(ConvolutionBackpropDataOp, ConvolutionBackpropData);
4651
OPERATION_REGISTER(GroupConvolutionBackpropDataOp, GroupConvolutionBackpropData);
4752

modules/nvidia_plugin/src/ops/convolution_backprop_data.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ class ConvBackpropDataOp : public OperationCuDnn {
3232
Outputs outputTensors,
3333
const Workbuffers& workbuffers) const override;
3434

35+
bool IsCudaGraphCompatible() const override;
36+
3537
void InitSharedImmutableWorkbuffers(const IOperationExec::Buffers&) override;
3638
WorkbufferRequest GetWorkBufferRequest() const override;
3739

modules/nvidia_plugin/src/ops/convolution_cudnn.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ void ConvolutionCuDnn::Execute(const InferenceRequestContext& context,
4444
throwIfError(status);
4545
}
4646

47+
bool ConvolutionCuDnn::IsCudaGraphCompatible() const { return true; }
48+
4749
WorkbufferRequest ConvolutionCuDnn::GetWorkBufferRequest() const {
4850
if (descs_.Algo().memory != 0)
4951
return {{}, {descs_.Algo().memory}};

modules/nvidia_plugin/src/ops/convolution_cudnn.hpp

+3
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,11 @@ class ConvolutionCuDnn : public OperationCuDnn {
2626
Inputs inputTensors,
2727
Outputs outputTensors,
2828
const Workbuffers&) const override;
29+
2930
WorkbufferRequest GetWorkBufferRequest() const override;
3031

32+
bool IsCudaGraphCompatible() const override;
33+
3134
private:
3235
Convolution::Details::ConvolutionDescriptorsCuDnn descs_;
3336
};

modules/nvidia_plugin/src/ops/convolution_cudnn_be.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,8 @@ void ConvolutionCuDnnBE::Execute(const InferenceRequestContext& context,
148148
throwIfError(::cudnnBackendExecute(context.getThreadContext().dnnHandle().get(), plan->get(), variantPack->get()));
149149
}
150150

151+
bool ConvolutionCuDnnBE::IsCudaGraphCompatible() const { return false; }
152+
151153
std::shared_ptr<CUDA::DnnBETensorDescriptor> ConvolutionCuDnnBE::MakeTensorDescriptor(int64_t id,
152154
cudnnDataType_t element_type,
153155
const ov::Shape& shape) {

modules/nvidia_plugin/src/ops/convolution_cudnn_be.hpp

+3
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,9 @@ class ConvolutionCuDnnBE : public OperationCuDnn {
3232
Inputs inputTensors,
3333
Outputs outputTensors,
3434
const Workbuffers& workbuffers) const override;
35+
36+
bool IsCudaGraphCompatible() const override;
37+
3538
WorkbufferRequest GetWorkBufferRequest() const override;
3639

3740
private:

modules/nvidia_plugin/src/ops/cudnn_tensor_op_base.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,8 @@ void CuDnnTensorOpBase::Execute(const InferenceRequestContext& context,
166166
outputTensors[0].get());
167167
}
168168

169+
bool CuDnnTensorOpBase::IsCudaGraphCompatible() const { return true; }
170+
169171
CuDnnTensorOpBase::IoParams::IoParams(const ov::Node& node, const Type& io_type, int index)
170172
: type_(convertDataType<cudnnDataType_t>(io_type == Type::INPUT ? node.get_input_element_type(index)
171173
: node.get_output_element_type(index))),

modules/nvidia_plugin/src/ops/cudnn_tensor_op_base.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ class CuDnnTensorOpBase : public OperationCuDnn {
2424
Outputs outputTensors,
2525
const Workbuffers& workbuffers) const override;
2626

27+
bool IsCudaGraphCompatible() const override;
28+
2729
private:
2830
struct IoParams {
2931
const cudnnDataType_t type_;

modules/nvidia_plugin/src/ops/detection_output.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,8 @@ void DetectionOutputOp::Execute(const InferenceRequestContext& context,
102102
}
103103
}
104104

105+
bool DetectionOutputOp::IsCudaGraphCompatible() const { return true; }
106+
105107
void DetectionOutputOp::InitSharedImmutableWorkbuffers(const Buffers& buffers) {
106108
kernel_.value().initSharedImmutableWorkbuffers(buffers);
107109
}

modules/nvidia_plugin/src/ops/detection_output.hpp

+3
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,14 @@ class DetectionOutputOp : public OperationBase {
1919
const NodeOp& node,
2020
IndexCollection&& inputIds,
2121
IndexCollection&& outputIds);
22+
2223
void Execute(const InferenceRequestContext& context,
2324
Inputs inputTensors,
2425
Outputs outputTensors,
2526
const Workbuffers& workbuffers) const override;
2627

28+
bool IsCudaGraphCompatible() const override;
29+
2730
void InitSharedImmutableWorkbuffers(const Buffers& buffers) override;
2831
WorkbufferRequest GetWorkBufferRequest() const override;
2932

modules/nvidia_plugin/src/ops/elementwise_binary.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ class ElementwiseBinaryOp : public OperationBase {
5959
static_cast<void*>(outputTensors[0].get()));
6060
}
6161

62+
bool IsCudaGraphCompatible() const override { return true; }
63+
6264
void InitSharedImmutableWorkbuffers(const IOperationExec::Buffers& buffers) override {
6365
in0_broadcast_params_->initWorkbuffers(buffers);
6466
in1_broadcast_params_->initWorkbuffers(buffers);

modules/nvidia_plugin/src/ops/fake_quantize.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ FakeQuantizeOp::FakeQuantizeOp(const CreationContext &context,
4545
convertDataType<ov::nvidia_gpu::kernel::Type_t>(element_type), output_size, max_threads_per_block, levels};
4646
}
4747

48+
bool FakeQuantizeOp::IsCudaGraphCompatible() const { return true; }
49+
4850
void FakeQuantizeOp::Execute(const InferenceRequestContext &context,
4951
Inputs inputTensors,
5052
Outputs outputTensors,

modules/nvidia_plugin/src/ops/fake_quantize.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ class FakeQuantizeOp : public OperationBase {
2020
IndexCollection&& inputIds,
2121
IndexCollection&& outputIds);
2222

23+
bool IsCudaGraphCompatible() const override;
24+
2325
private:
2426
void Execute(const InferenceRequestContext& context,
2527
Inputs inputTensors,

modules/nvidia_plugin/src/ops/floor.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ void FloorOp::Execute(const InferenceRequestContext& context,
4242
(*kernel_)(stream.get(), inputTensors[0].get(), outputTensors[0].get());
4343
}
4444

45+
bool FloorOp::IsCudaGraphCompatible() const { return true; }
46+
4547
OPERATION_REGISTER(FloorOp, Floor);
4648

4749
} // namespace nvidia_gpu

modules/nvidia_plugin/src/ops/floor.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ class FloorOp : public OperationBase {
2323
Outputs outputTensors,
2424
const Workbuffers& workbuffers) const override;
2525

26+
bool IsCudaGraphCompatible() const override;
27+
2628
private:
2729
std::optional<kernel::Floor> kernel_;
2830
};

modules/nvidia_plugin/src/ops/fully_connected.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ void FullyConnectedOp::Execute(const InferenceRequestContext& context,
5454
matmul_op_.Execute(context, inputs.first(inputs.size() - 1), outputs, workbuffers);
5555
}
5656

57+
bool FullyConnectedOp::IsCudaGraphCompatible() const { return true; }
58+
5759
OPERATION_REGISTER(FullyConnectedOp, FullyConnected);
5860
} // namespace nvidia_gpu
5961
} // namespace ov

modules/nvidia_plugin/src/ops/fully_connected.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ class FullyConnectedOp : public OperationCuBlas {
2626
Outputs outputTensors,
2727
const Workbuffers& workbuffers) const override;
2828

29+
bool IsCudaGraphCompatible() const override;
30+
2931
private:
3032
MatMulOp matmul_op_;
3133
size_t bias_size_ = 0;

modules/nvidia_plugin/src/ops/fused_convolution_backprop_data.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ void FusedConvolutionBackpropDataOp::Execute(const InferenceRequestContext& cont
7777
outputs[ArgIndices3Ins::dinput].get()));
7878
}
7979

80+
bool FusedConvolutionBackpropDataOp::IsCudaGraphCompatible() const { return true; }
81+
8082
void FusedConvolutionBackpropDataOp::InitSharedImmutableWorkbuffers(const IOperationExec::Buffers& buffers) {
8183
OPENVINO_ASSERT(buffers.size() == 1, "Node name: ", GetName());
8284
const size_t repeat = conv_in_bytes_ / add_in_bytes_;

modules/nvidia_plugin/src/ops/fused_convolution_backprop_data.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ class FusedConvolutionBackpropDataOp : public OperationCuDnn {
2525
Inputs inputTensors,
2626
Outputs outputTensors,
2727
const Workbuffers&) const override;
28+
29+
bool IsCudaGraphCompatible() const override;
2830
void InitSharedImmutableWorkbuffers(const IOperationExec::Buffers& buffers) override;
2931
WorkbufferRequest GetWorkBufferRequest() const override;
3032

modules/nvidia_plugin/src/ops/fused_convolution_cudnn.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ void FusedConvolutionCuDnn::Execute(const InferenceRequestContext& context,
9595
outputs[ArgIndices::output].get()));
9696
}
9797

98+
bool FusedConvolutionCuDnn::IsCudaGraphCompatible() const { return true; }
99+
98100
WorkbufferRequest FusedConvolutionCuDnn::GetWorkBufferRequest() const {
99101
if (conv_descs_->Algo().memory != 0)
100102
return {{}, {conv_descs_->Algo().memory}};

modules/nvidia_plugin/src/ops/fused_convolution_cudnn.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ class FusedConvolutionCuDnn : public OperationCuDnn {
3434
Inputs inputTensors,
3535
Outputs outputTensors,
3636
const Workbuffers&) const override;
37+
38+
bool IsCudaGraphCompatible() const override;
3739
void InitSharedImmutableWorkbuffers(const IOperationExec::Buffers&) override {}
3840
WorkbufferRequest GetWorkBufferRequest() const override;
3941

0 commit comments

Comments
 (0)