@@ -78,17 +78,9 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
78
78
_graphInputDescriptors(_graph->get_input_descriptors ()),
79
79
_graphOutputDescriptors(_graph->get_output_descriptors ()),
80
80
_levelZeroInputTensors(_metadata.inputs.size(), std::vector<std::shared_ptr<ov::ITensor>>(1 , nullptr )),
81
- _levelZeroOutputTensors(_metadata.outputs.size(), nullptr),
82
- _profilingPool(_initStructs, _graph, zeroProfiling::POOL_SIZE),
83
- _profilingQuery(_initStructs, 0 ) {
81
+ _levelZeroOutputTensors(_metadata.outputs.size(), nullptr) {
84
82
_logger.debug (" ZeroInferRequest::ZeroInferRequest - SyncInferRequest" );
85
83
86
- auto proftype = config.get <PROFILING_TYPE>();
87
- if (proftype == ov::intel_npu::ProfilingType::INFER) {
88
- _logger.debug (" ZeroInferRequest::ZeroInferRequest - profiling type == ov::intel_npu::ProfilingType::INFER" );
89
- _npuProfiling = std::make_shared<zeroProfiling::NpuInferProfiling>(_initStructs, _config.get <LOG_LEVEL>());
90
- }
91
-
92
84
_outputAllocator = std::make_shared<const zeroMemory::HostMemAllocator>(_initStructs);
93
85
_inputAllocator =
94
86
std::make_shared<const zeroMemory::HostMemAllocator>(_initStructs, ZE_HOST_MEM_ALLOC_FLAG_BIAS_WRITE_COMBINED);
@@ -195,14 +187,8 @@ void ZeroInferRequest::create_pipeline() {
195
187
_logger.debug (" ZeroInferRequest::create_pipeline - constructing pipeline" );
196
188
197
189
// Construct pipeline
198
- _pipeline = std::make_unique<Pipeline>(_config,
199
- _initStructs,
200
- _graph,
201
- _profilingPool,
202
- _profilingQuery,
203
- _npuProfiling,
204
- _levelZeroInputTensors,
205
- _levelZeroOutputTensors);
190
+ _pipeline =
191
+ std::make_unique<Pipeline>(_config, _initStructs, _graph, _levelZeroInputTensors, _levelZeroOutputTensors);
206
192
207
193
_logger.debug (" ZeroInferRequest::create_pipeline - SyncInferRequest completed" );
208
194
}
@@ -241,12 +227,11 @@ void ZeroInferRequest::set_tensor_data(const std::shared_ptr<ov::ITensor>& tenso
241
227
242
228
OPENVINO_ASSERT (levelZeroTensors->data (), " Empty buffer" );
243
229
244
- OV_ITT_TASK_NEXT (ZERO_SET_TENSOR, " updateCommandList " );
245
- _pipeline->updateCommandList (
230
+ OV_ITT_TASK_NEXT (ZERO_SET_TENSOR, " update_graph_arguments " );
231
+ _pipeline->update_graph_arguments (
246
232
isInput ? _graph->get_input_descriptors ().at (index ).idx : _graph->get_output_descriptors ().at (index ).idx ,
247
233
levelZeroTensors->data (),
248
234
levelZeroTensors->get_byte_size ());
249
- _pipeline->closeCommandList ();
250
235
}
251
236
}
252
237
@@ -269,12 +254,11 @@ void ZeroInferRequest::set_remote_tensor_data(const std::shared_ptr<ZeroRemoteTe
269
254
auto data = tensor->get_original_memory ();
270
255
OPENVINO_ASSERT (data, " Empty buffer" );
271
256
272
- OV_ITT_TASK_NEXT (ZERO_SET_REMOTE_TENSOR, " updateCommandList " );
273
- _pipeline->updateCommandList (
257
+ OV_ITT_TASK_NEXT (ZERO_SET_REMOTE_TENSOR, " update_graph_arguments " );
258
+ _pipeline->update_graph_arguments (
274
259
isInput ? _graph->get_input_descriptors ().at (index ).idx : _graph->get_output_descriptors ().at (index ).idx ,
275
260
data,
276
261
tensor->get_byte_size ());
277
- _pipeline->closeCommandList ();
278
262
}
279
263
}
280
264
@@ -384,12 +368,12 @@ void ZeroInferRequest::set_tensors(const ov::Output<const ov::Node>& port,
384
368
}
385
369
386
370
if (_pipelineIsCreated) {
387
- OV_ITT_TASK_NEXT (SET_TENSORS, " updateCommandList" );
388
-
389
371
OPENVINO_ASSERT (data, " Empty buffer" );
372
+ OV_ITT_TASK_NEXT (SET_TENSORS, " updateCommandList" );
390
373
391
- _pipeline->updateCommandListIndex (_graph->get_input_descriptors ().at (foundPort.idx ).idx , data, i);
392
- _pipeline->closeCommandListIndex (i);
374
+ _pipeline->update_graph_arguments_batching (_graph->get_input_descriptors ().at (foundPort.idx ).idx ,
375
+ data,
376
+ i);
393
377
}
394
378
}
395
379
}
@@ -442,7 +426,6 @@ ov::SoPtr<ov::ITensor> ZeroInferRequest::get_tensor(const ov::Output<const ov::N
442
426
}
443
427
444
428
void ZeroInferRequest::update_pipeline_if_memory_changed () {
445
- bool closePipeline = false ;
446
429
size_t ioIndex = 0 ;
447
430
448
431
for (const auto & levelZeroTensor : _levelZeroInputTensors) {
@@ -459,10 +442,9 @@ void ZeroInferRequest::update_pipeline_if_memory_changed() {
459
442
_logger.debug (" Update input graph descriptor with the new tensor" );
460
443
OPENVINO_ASSERT (zeroTensor->data (), " Empty buffer" );
461
444
462
- _pipeline->updateCommandList (_graph->get_input_descriptors ().at (ioIndex).idx ,
463
- zeroTensor->data (),
464
- zeroTensor->get_byte_size ());
465
- closePipeline = true ;
445
+ _pipeline->update_graph_arguments (_graph->get_input_descriptors ().at (ioIndex).idx ,
446
+ zeroTensor->data (),
447
+ zeroTensor->get_byte_size ());
466
448
467
449
if (!inputDescriptor.isStateInput ) {
468
450
zeroTensor->reset_memory_flag ();
@@ -487,25 +469,18 @@ void ZeroInferRequest::update_pipeline_if_memory_changed() {
487
469
_logger.debug (" Update output graph descriptor with the new tensor" );
488
470
OPENVINO_ASSERT (zeroTensor->data (), " Empty buffer" );
489
471
490
- _pipeline->updateCommandList (_graph->get_output_descriptors ().at (ioIndex).idx ,
491
- zeroTensor->data (),
492
- zeroTensor->get_byte_size ());
493
- closePipeline = true ;
472
+ _pipeline->update_graph_arguments (_graph->get_output_descriptors ().at (ioIndex).idx ,
473
+ zeroTensor->data (),
474
+ zeroTensor->get_byte_size ());
494
475
495
476
zeroTensor->reset_memory_flag ();
496
477
}
497
478
498
479
++ioIndex;
499
480
}
500
-
501
- if (closePipeline) {
502
- _pipeline->closeCommandList ();
503
- }
504
481
}
505
482
506
483
void ZeroInferRequest::update_states_if_memory_changed () {
507
- bool closePipeline = false ;
508
-
509
484
for (const auto & variableState : _variableStates) {
510
485
auto zeroState = std::dynamic_pointer_cast<ZeroVariableState>(variableState._ptr );
511
486
@@ -522,27 +497,21 @@ void ZeroInferRequest::update_states_if_memory_changed() {
522
497
523
498
void * userBuffer = !remoteTensor ? zeroState->get_state ()->data () : remoteTensor->get_original_memory ();
524
499
525
- _pipeline->updateCommandList (_graphInputDescriptors.at (zeroState->get_tensor_index ()).idx ,
526
- userBuffer,
527
- zeroState->get_state ()->get_byte_size ());
500
+ _pipeline->update_graph_arguments (_graphInputDescriptors.at (zeroState->get_tensor_index ()).idx ,
501
+ userBuffer,
502
+ zeroState->get_state ()->get_byte_size ());
528
503
529
- _pipeline->updateCommandList (_graphOutputDescriptors.at (zeroState->get_related_tensor_index ()).idx ,
530
- userBuffer,
531
- zeroState->get_state ()->get_byte_size ());
504
+ _pipeline->update_graph_arguments (_graphOutputDescriptors.at (zeroState->get_related_tensor_index ()).idx ,
505
+ userBuffer,
506
+ zeroState->get_state ()->get_byte_size ());
532
507
533
508
zeroState->reset_zero_tensor_updated_flag ();
534
509
535
510
get_level_zero_input (zeroState->get_tensor_index ()) = zeroState->get_state ()._ptr ;
536
511
_levelZeroOutputTensors.at (zeroState->get_related_tensor_index ()) = zeroState->get_state ()._ptr ;
537
-
538
- closePipeline = true ;
539
512
}
540
513
}
541
514
}
542
-
543
- if (closePipeline) {
544
- _pipeline->closeCommandList ();
545
- }
546
515
}
547
516
548
517
void ZeroInferRequest::infer () {
@@ -749,31 +718,9 @@ void ZeroInferRequest::check_network_precision(const ov::element::Type_t precisi
749
718
}
750
719
751
720
std::vector<ov::ProfilingInfo> ZeroInferRequest::get_profiling_info () const {
752
- _logger.debug (" InferRequest::get_profiling_info started" );
753
- const auto & compiledModel = *std::dynamic_pointer_cast<const ICompiledModel>(_compiledModel);
754
- const auto & compilerConfig = compiledModel.get_config ();
755
- if (!compilerConfig.get <PERF_COUNT>() || !_config.get <PERF_COUNT>()) {
756
- _logger.warning (" InferRequest::get_profiling_info complete with empty {}." );
757
- return {};
758
- }
721
+ OPENVINO_ASSERT (_pipeline, " Profiling information isn't available before running an inference!" );
759
722
760
- auto compilerType = compilerConfig.get <COMPILER_TYPE>();
761
- if (compilerType == ov::intel_npu::CompilerType::MLIR) {
762
- // For plugin compiler retreive raw profiling data from backend and delegate
763
- // processing to the compiler
764
- auto profData = get_raw_profiling_data ();
765
- _logger.debug (" InferRequest::get_profiling_info complete with compiler->process_profiling_output()." );
766
- return _graph->process_profiling_output (profData, compilerConfig);
767
- } else {
768
- auto proftype = _config.get <PROFILING_TYPE>();
769
- if (proftype == ov::intel_npu::ProfilingType::INFER) {
770
- _logger.debug (" InferRequest::get_profiling_info complete with _npuProfiling->getNpuInferStatistics()." );
771
- return _npuProfiling->getNpuInferStatistics ();
772
- } else { // / proftype = MODEL or undefined = fallback to model profiling
773
- _logger.debug (" InferRequest::get_profiling_info complete with _profilingQuery.getLayerStatistics()." );
774
- return _profilingQuery.getLayerStatistics ();
775
- }
776
- }
723
+ return _pipeline->get_profiling_info ();
777
724
}
778
725
779
726
std::shared_ptr<ov::ITensor> ZeroInferRequest::create_tensor (ov::element::Type type,
@@ -797,10 +744,6 @@ void ZeroInferRequest::add_state(const IODescriptor& descriptor, size_t tensorIn
797
744
_config));
798
745
}
799
746
800
- std::vector<uint8_t > ZeroInferRequest::get_raw_profiling_data () const {
801
- return _profilingQuery.getData <uint8_t >();
802
- }
803
-
804
747
std::shared_ptr<ov::ITensor>& ZeroInferRequest::get_level_zero_input (size_t index, size_t tensorNo) const {
805
748
return _levelZeroInputTensors.at (index ).at (tensorNo);
806
749
}
0 commit comments