@@ -67,14 +67,11 @@ using std::endl;
67
67
#pragma GCC diagnostic ignored "-Wunused-function"
68
68
using InferenceOutput = std::map<std::string, ov::Tensor>;
69
69
using InferenceInput = std::map<std::string, ov::Tensor>;
70
- // TODO
71
- // * why std::map
72
- // * no ret code from infer()
73
- // * no ret code from load()
70
+
74
71
namespace ovms {
75
72
static OVMS_DataType OVPrecision2CAPI (ov::element::Type_t datatype);
76
73
static ov::element::Type_t CAPI2OVPrecision (OVMS_DataType datatype);
77
- static ov::Tensor makeOvTensorO (OVMS_DataType datatype, const int64_t * shape, size_t dimCount, const void * voutputData, size_t bytesize);
74
+ static ov::Tensor makeOvTensor (OVMS_DataType datatype, const int64_t * shape, size_t dimCount, const void * voutputData, size_t bytesize);
78
75
79
76
OVMSInferenceAdapter::OVMSInferenceAdapter (const std::string& servableName, uint32_t servableVersion, OVMS_Server* cserver) :
80
77
servableName (servableName),
@@ -103,31 +100,22 @@ InferenceOutput OVMSInferenceAdapter::infer(const InferenceInput& input) {
103
100
// PREPARE EACH INPUT
104
101
// extract single tensor
105
102
for (const auto & [name, input_tensor] : input) {
106
- // TODO validate existence of tag key in map
107
- // or handle inference when there is no need for mapping
108
103
const char * realInputName = name.c_str ();
109
- #if 0
110
- const float* input_tensor_access = reinterpret_cast<float*>(input_tensor.data());
111
- std::stringstream ss;
112
- ss << " Adapter received tensor: [ ";
113
- for (int x = 0; x < 10; ++x) {
114
- ss << input_tensor_access[x] << " ";
115
- }
116
- ss << " ]";
117
- LOG(INFO) << ss.str();
118
- #endif
119
104
const auto & ovinputShape = input_tensor.get_shape ();
120
- std::vector<int64_t > inputShape{ovinputShape.begin (), ovinputShape.end ()}; // TODO error handling shape conversion
105
+ if (std::any_of (ovinputShape.begin (), ovinputShape.end (), [](size_t dim) {
106
+ return dim > std::numeric_limits<int64_t >::max ();})) {
107
+ throw std::runtime_error (" Cannot use C-API with dimension size greater than int64_t max value" );
108
+ }
109
+ std::vector<int64_t > inputShape{ovinputShape.begin (), ovinputShape.end ()};
121
110
OVMS_DataType inputDataType = OVPrecision2CAPI (input_tensor.get_element_type ());
122
- ASSERT_CAPI_STATUS_NULL (OVMS_InferenceRequestAddInput (request, realInputName, inputDataType, inputShape.data (), inputShape.size ())); // TODO retcode
111
+ ASSERT_CAPI_STATUS_NULL (OVMS_InferenceRequestAddInput (request, realInputName, inputDataType, inputShape.data (), inputShape.size ()));
123
112
const uint32_t NOT_USED_NUM = 0 ;
124
- // TODO handle hardcoded buffertype, notUsedNum additional options? side packets?
125
113
ASSERT_CAPI_STATUS_NULL (OVMS_InferenceRequestInputSetData (request,
126
114
realInputName,
127
115
reinterpret_cast <void *>(input_tensor.data ()),
128
116
input_tensor.get_byte_size (),
129
117
OVMS_BUFFERTYPE_CPU,
130
- NOT_USED_NUM)); // TODO retcode
118
+ NOT_USED_NUM));
131
119
}
132
120
// ////////////////
133
121
// INFERENCE
@@ -147,13 +135,10 @@ InferenceOutput OVMSInferenceAdapter::infer(const InferenceInput& input) {
147
135
return output;
148
136
}
149
137
CREATE_GUARD (responseGuard, OVMS_InferenceResponse, response);
150
- // verify GetOutputCount
151
138
uint32_t outputCount = 42 ;
152
139
ASSERT_CAPI_STATUS_NULL (OVMS_InferenceResponseOutputCount (response, &outputCount));
153
140
uint32_t parameterCount = 42 ;
154
141
ASSERT_CAPI_STATUS_NULL (OVMS_InferenceResponseParameterCount (response, ¶meterCount));
155
- // TODO handle output filtering. Graph definition could suggest
156
- // that we are not interested in all outputs from OVMS Inference
157
142
const void * voutputData;
158
143
size_t bytesize = 42 ;
159
144
OVMS_DataType datatype = (OVMS_DataType)199 ;
@@ -164,18 +149,19 @@ InferenceOutput OVMSInferenceAdapter::infer(const InferenceInput& input) {
164
149
const char * outputName{nullptr };
165
150
for (size_t i = 0 ; i < outputCount; ++i) {
166
151
ASSERT_CAPI_STATUS_NULL (OVMS_InferenceResponseOutput (response, i, &outputName, &datatype, &shape, &dimCount, &voutputData, &bytesize, &bufferType, &deviceId));
167
- output[outputName] = makeOvTensorO (datatype, shape, dimCount, voutputData, bytesize); // TODO optimize FIXME
152
+ output[outputName] = makeOvTensor (datatype, shape, dimCount, voutputData, bytesize);
168
153
}
169
154
return output;
170
155
}
156
+
171
157
void OVMSInferenceAdapter::loadModel (const std::shared_ptr<const ov::Model>& model, ov::Core& core,
172
158
const std::string& device, const ov::AnyMap& compilationConfig) {
173
159
// no need to load but we need to extract metadata
174
160
OVMS_ServableMetadata* servableMetadata = nullptr ;
175
161
ASSERT_CAPI_STATUS_NULL (OVMS_GetServableMetadata (cserver, servableName.c_str (), servableVersion, &servableMetadata));
162
+ CREATE_GUARD (metadataGuard, OVMS_ServableMetadata, servableMetadata);
176
163
uint32_t inputCount = 0 ;
177
164
uint32_t outputCount = 0 ;
178
- // TODO ensure Metadata object removal in all paths
179
165
ASSERT_CAPI_STATUS_NULL (OVMS_ServableMetadataInputCount (servableMetadata, &inputCount));
180
166
ASSERT_CAPI_STATUS_NULL (OVMS_ServableMetadataOutputCount (servableMetadata, &outputCount));
181
167
@@ -190,7 +176,6 @@ void OVMSInferenceAdapter::loadModel(const std::shared_ptr<const ov::Model>& mod
190
176
inputNames.emplace_back (tensorName);
191
177
shape_min_max_t inputMinMax;
192
178
for (size_t i = 0 ; i < dimCount; ++i) {
193
- // TODO test adapter dynamic shapes
194
179
inputMinMax.first .emplace_back (shapeMin[i]);
195
180
inputMinMax.second .emplace_back (shapeMax[i]);
196
181
}
@@ -203,7 +188,6 @@ void OVMSInferenceAdapter::loadModel(const std::shared_ptr<const ov::Model>& mod
203
188
const ov::AnyMap* servableMetadataRtInfo;
204
189
ASSERT_CAPI_STATUS_NULL (OVMS_ServableMetadataInfo (servableMetadata, reinterpret_cast <const void **>(&servableMetadataRtInfo)));
205
190
this ->modelConfig = *servableMetadataRtInfo;
206
- OVMS_ServableMetadataDelete (servableMetadata);
207
191
}
208
192
209
193
ov::PartialShape OVMSInferenceAdapter::getInputShape (const std::string& inputName) const {
@@ -294,7 +278,7 @@ static ov::element::Type_t CAPI2OVPrecision(OVMS_DataType datatype) {
294
278
return it->second ;
295
279
}
296
280
297
- static ov::Tensor makeOvTensorO (OVMS_DataType datatype, const int64_t * shape, size_t dimCount, const void * voutputData, size_t bytesize) {
281
+ static ov::Tensor makeOvTensor (OVMS_DataType datatype, const int64_t * shape, size_t dimCount, const void * voutputData, size_t bytesize) {
298
282
ov::Shape ovShape;
299
283
for (size_t i = 0 ; i < dimCount; ++i) {
300
284
ovShape.push_back (shape[i]);
0 commit comments