-
Notifications
You must be signed in to change notification settings - Fork 151
/
Copy pathresult.cpp
91 lines (78 loc) · 3.41 KB
/
result.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//
#include "result.hpp"
#include <cuda_runtime.h>
#include <cuda_operation_registry.hpp>
#include <openvino/core/except.hpp>
#include <transformations/rt_info/fused_names_attribute.hpp>
#include <utility>
#include "nop_op.hpp"
#include "transformations/utils/utils.hpp"
namespace ov {
namespace nvidia_gpu {
ResultOp::ResultOp(const CreationContext& context,
const NodeOp& node,
IndexCollection&& inputIds,
IndexCollection&& outputIds)
: OperationBase(context, node, std::move(inputIds), std::move(outputIds)) {
output_tensor_names_ = GetOutputTensorName(node);
}
void ResultOp::Execute(const InferenceRequestContext& context,
Inputs inputs,
Outputs outputs,
const Workbuffers&) const {
OPENVINO_ASSERT(inputs.size() == 1, "Node name: ", GetName());
OPENVINO_ASSERT(outputs.size() == 0, "Node name: ", GetName());
std::shared_ptr<ov::Tensor> tensor;
for (const auto& outputName : output_tensor_names_) {
if (context.getTensorMappingContext().has_output_tensor(outputName)) {
tensor = context.getTensorMappingContext().get_output_tensor(outputName);
break;
}
}
OPENVINO_ASSERT(tensor != nullptr, "Node name: ", GetName());
context.getThreadContext().stream().download(tensor->data(), inputs[0], tensor->get_byte_size());
}
CudaGraphCompatibility ResultOp::GetCudaGraphCompatibility() const { return CudaGraphCompatibility::FULL; }
std::optional<std::size_t> ResultOp::GetOutputTensorSubIndex(const ov::Output<ov::Node>& node) {
const auto& opRegistry = OperationRegistry::getInstance();
const auto& opType = opRegistry.getOperationType(node.get_node()->shared_from_this());
if (opType && std::type_index(typeid(NopOp)) == opType.value()) {
for (const auto& in : node.get_node()->input_values()) {
const auto& idx = GetOutputTensorSubIndex(in);
if (idx) {
return idx;
}
}
} else if (node.get_node()->get_output_size() > 1) {
return node.get_index();
}
return std::nullopt;
}
std::vector<std::string> ResultOp::GetOutputTensorName(const ov::op::v0::Result& node) {
const auto& input = node.input_value(0);
return ov::getFusedNamesVector(input.get_node()->shared_from_this());
}
void ResultOp::Capture(InferenceRequestContext& context,
Inputs inputs,
Outputs outputs,
const Workbuffers&) const {
OPENVINO_ASSERT(inputs.size() == 1, "Node name: ", GetName());
OPENVINO_ASSERT(outputs.size() == 0, "Node name: ", GetName());
std::shared_ptr<ov::Tensor> tensor;
std::string outputTensorName{};
for (const auto& outputName : output_tensor_names_) {
if (context.getTensorMappingContext().has_output_tensor(outputName)) {
tensor = context.getTensorMappingContext().get_output_tensor(outputName);
outputTensorName = outputName;
break;
}
}
OPENVINO_ASSERT(tensor != nullptr, "Node name: ", GetName());
context.getCudaGraphContext().add_result(
outputTensorName, context.getThreadContext().stream(), tensor->data(), inputs[0], tensor->get_byte_size());
}
OPERATION_REGISTER(ResultOp, Result);
} // namespace nvidia_gpu
} // namespace ov