Skip to content

Commit e48c323

Browse files
authored
[NPU] Refactoring Pipeline object (#29679)
### Details: - *Refactoring Pipeline object* ### Ticket: - *CVS-164942* --------- Signed-off-by: Bogdan Pereanu <bogdan.pereanu@intel.com>
1 parent 30bddee commit e48c323

File tree

6 files changed

+77
-130
lines changed

6 files changed

+77
-130
lines changed

src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp

-5
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
#include "intel_npu/utils/zero/zero_utils.hpp"
1414
#include "intel_npu/utils/zero/zero_wrappers.hpp"
1515
#include "zero_pipeline.hpp"
16-
#include "zero_profiling.hpp"
1716
#include "zero_remote_tensor.hpp"
1817
#include "zero_tensor.hpp"
1918

@@ -37,7 +36,6 @@ class ZeroInferRequest final : public SyncInferRequest {
3736

3837
private:
3938
std::vector<ov::ProfilingInfo> get_profiling_info() const override;
40-
std::vector<uint8_t> get_raw_profiling_data() const;
4139

4240
/**
4341
* @brief Check the received tensor and set the Level Zero tensor accordingly
@@ -88,9 +86,6 @@ class ZeroInferRequest final : public SyncInferRequest {
8886
std::shared_ptr<const zeroMemory::HostMemAllocator> _inputAllocator;
8987
std::shared_ptr<const zeroMemory::HostMemAllocator> _outputAllocator;
9088

91-
zeroProfiling::ProfilingPool _profilingPool;
92-
zeroProfiling::ProfilingQuery _profilingQuery;
93-
std::shared_ptr<zeroProfiling::NpuInferProfiling> _npuProfiling;
9489
std::unique_ptr<Pipeline> _pipeline;
9590

9691
bool _pipelineIsCreated = false;

src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp

+6-8
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,6 @@ struct Pipeline {
1818
Pipeline(const Config& config,
1919
const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
2020
const std::shared_ptr<IGraph>& graph,
21-
zeroProfiling::ProfilingPool& profiling_pool,
22-
zeroProfiling::ProfilingQuery& profiling_query,
23-
const std::shared_ptr<zeroProfiling::NpuInferProfiling>& npu_profiling,
2421
const std::vector<std::vector<std::shared_ptr<ov::ITensor>>>& input_tensors,
2522
const std::vector<std::shared_ptr<ov::ITensor>>& output_tensors);
2623

@@ -32,17 +29,19 @@ struct Pipeline {
3229
void pull();
3330
void reset() const;
3431

35-
void updateCommandList(uint32_t arg_index, const void* arg_data, size_t byte_size);
36-
void updateCommandListIndex(uint32_t arg_index, const void* arg_data, size_t command_list_index);
32+
void update_graph_arguments(uint32_t arg_index, const void* arg_data, size_t byte_size);
33+
void update_graph_arguments_batching(uint32_t arg_index, const void* arg_data, size_t batch_index);
3734

38-
void closeCommandList();
39-
void closeCommandListIndex(size_t command_list_index);
35+
std::vector<ov::ProfilingInfo> get_profiling_info() const;
4036

4137
protected:
4238
std::shared_ptr<IGraph> _graph;
4339
const Config _config;
4440
const uint32_t _id;
4541

42+
std::unique_ptr<zeroProfiling::ProfilingQuery> _profiling_query;
43+
std::shared_ptr<zeroProfiling::NpuInferProfiling> _npu_profiling;
44+
4645
/**
4746
* @brief Indicates how many command lists will be used inside the pipeline.
4847
* @details Leveraging multiple command lists implies distributing the input/output buffers accross the batch axis
@@ -59,7 +58,6 @@ struct Pipeline {
5958
std::shared_ptr<EventPool> _event_pool;
6059
std::vector<std::shared_ptr<Event>> _events;
6160
bool _sync_output_with_fences = true;
62-
std::shared_ptr<zeroProfiling::NpuInferProfiling> _npu_profiling;
6361
Logger _logger;
6462
};
6563

src/plugins/intel_npu/src/backend/include/zero_profiling.hpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ struct ProfilingQuery {
5050
_index(index) {}
5151
ProfilingQuery(const ProfilingQuery&) = delete;
5252
ProfilingQuery& operator=(const ProfilingQuery&) = delete;
53-
void create(const ze_graph_profiling_pool_handle_t& profiling_pool);
53+
void create(const std::shared_ptr<ProfilingPool>& profiling_pool);
5454
ze_graph_profiling_query_handle_t getHandle() const {
5555
return _handle;
5656
}
@@ -67,6 +67,8 @@ struct ProfilingQuery {
6767
std::shared_ptr<ZeroInitStructsHolder> _init_structs;
6868
const uint32_t _index;
6969

70+
std::shared_ptr<ProfilingPool> _profiling_pool = nullptr;
71+
7072
ze_graph_profiling_query_handle_t _handle = nullptr;
7173
};
7274

src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp

+25-82
Original file line numberDiff line numberDiff line change
@@ -78,17 +78,9 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
7878
_graphInputDescriptors(_graph->get_input_descriptors()),
7979
_graphOutputDescriptors(_graph->get_output_descriptors()),
8080
_levelZeroInputTensors(_metadata.inputs.size(), std::vector<std::shared_ptr<ov::ITensor>>(1, nullptr)),
81-
_levelZeroOutputTensors(_metadata.outputs.size(), nullptr),
82-
_profilingPool(_initStructs, _graph, zeroProfiling::POOL_SIZE),
83-
_profilingQuery(_initStructs, 0) {
81+
_levelZeroOutputTensors(_metadata.outputs.size(), nullptr) {
8482
_logger.debug("ZeroInferRequest::ZeroInferRequest - SyncInferRequest");
8583

86-
auto proftype = config.get<PROFILING_TYPE>();
87-
if (proftype == ov::intel_npu::ProfilingType::INFER) {
88-
_logger.debug("ZeroInferRequest::ZeroInferRequest - profiling type == ov::intel_npu::ProfilingType::INFER");
89-
_npuProfiling = std::make_shared<zeroProfiling::NpuInferProfiling>(_initStructs, _config.get<LOG_LEVEL>());
90-
}
91-
9284
_outputAllocator = std::make_shared<const zeroMemory::HostMemAllocator>(_initStructs);
9385
_inputAllocator =
9486
std::make_shared<const zeroMemory::HostMemAllocator>(_initStructs, ZE_HOST_MEM_ALLOC_FLAG_BIAS_WRITE_COMBINED);
@@ -195,14 +187,8 @@ void ZeroInferRequest::create_pipeline() {
195187
_logger.debug("ZeroInferRequest::create_pipeline - constructing pipeline");
196188

197189
// Construct pipeline
198-
_pipeline = std::make_unique<Pipeline>(_config,
199-
_initStructs,
200-
_graph,
201-
_profilingPool,
202-
_profilingQuery,
203-
_npuProfiling,
204-
_levelZeroInputTensors,
205-
_levelZeroOutputTensors);
190+
_pipeline =
191+
std::make_unique<Pipeline>(_config, _initStructs, _graph, _levelZeroInputTensors, _levelZeroOutputTensors);
206192

207193
_logger.debug("ZeroInferRequest::create_pipeline - SyncInferRequest completed");
208194
}
@@ -241,12 +227,11 @@ void ZeroInferRequest::set_tensor_data(const std::shared_ptr<ov::ITensor>& tenso
241227

242228
OPENVINO_ASSERT(levelZeroTensors->data(), "Empty buffer");
243229

244-
OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "updateCommandList");
245-
_pipeline->updateCommandList(
230+
OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "update_graph_arguments");
231+
_pipeline->update_graph_arguments(
246232
isInput ? _graph->get_input_descriptors().at(index).idx : _graph->get_output_descriptors().at(index).idx,
247233
levelZeroTensors->data(),
248234
levelZeroTensors->get_byte_size());
249-
_pipeline->closeCommandList();
250235
}
251236
}
252237

@@ -269,12 +254,11 @@ void ZeroInferRequest::set_remote_tensor_data(const std::shared_ptr<ZeroRemoteTe
269254
auto data = tensor->get_original_memory();
270255
OPENVINO_ASSERT(data, "Empty buffer");
271256

272-
OV_ITT_TASK_NEXT(ZERO_SET_REMOTE_TENSOR, "updateCommandList");
273-
_pipeline->updateCommandList(
257+
OV_ITT_TASK_NEXT(ZERO_SET_REMOTE_TENSOR, "update_graph_arguments");
258+
_pipeline->update_graph_arguments(
274259
isInput ? _graph->get_input_descriptors().at(index).idx : _graph->get_output_descriptors().at(index).idx,
275260
data,
276261
tensor->get_byte_size());
277-
_pipeline->closeCommandList();
278262
}
279263
}
280264

@@ -384,12 +368,12 @@ void ZeroInferRequest::set_tensors(const ov::Output<const ov::Node>& port,
384368
}
385369

386370
if (_pipelineIsCreated) {
387-
OV_ITT_TASK_NEXT(SET_TENSORS, "updateCommandList");
388-
389371
OPENVINO_ASSERT(data, "Empty buffer");
372+
OV_ITT_TASK_NEXT(SET_TENSORS, "updateCommandList");
390373

391-
_pipeline->updateCommandListIndex(_graph->get_input_descriptors().at(foundPort.idx).idx, data, i);
392-
_pipeline->closeCommandListIndex(i);
374+
_pipeline->update_graph_arguments_batching(_graph->get_input_descriptors().at(foundPort.idx).idx,
375+
data,
376+
i);
393377
}
394378
}
395379
}
@@ -442,7 +426,6 @@ ov::SoPtr<ov::ITensor> ZeroInferRequest::get_tensor(const ov::Output<const ov::N
442426
}
443427

444428
void ZeroInferRequest::update_pipeline_if_memory_changed() {
445-
bool closePipeline = false;
446429
size_t ioIndex = 0;
447430

448431
for (const auto& levelZeroTensor : _levelZeroInputTensors) {
@@ -459,10 +442,9 @@ void ZeroInferRequest::update_pipeline_if_memory_changed() {
459442
_logger.debug("Update input graph descriptor with the new tensor");
460443
OPENVINO_ASSERT(zeroTensor->data(), "Empty buffer");
461444

462-
_pipeline->updateCommandList(_graph->get_input_descriptors().at(ioIndex).idx,
463-
zeroTensor->data(),
464-
zeroTensor->get_byte_size());
465-
closePipeline = true;
445+
_pipeline->update_graph_arguments(_graph->get_input_descriptors().at(ioIndex).idx,
446+
zeroTensor->data(),
447+
zeroTensor->get_byte_size());
466448

467449
if (!inputDescriptor.isStateInput) {
468450
zeroTensor->reset_memory_flag();
@@ -487,25 +469,18 @@ void ZeroInferRequest::update_pipeline_if_memory_changed() {
487469
_logger.debug("Update output graph descriptor with the new tensor");
488470
OPENVINO_ASSERT(zeroTensor->data(), "Empty buffer");
489471

490-
_pipeline->updateCommandList(_graph->get_output_descriptors().at(ioIndex).idx,
491-
zeroTensor->data(),
492-
zeroTensor->get_byte_size());
493-
closePipeline = true;
472+
_pipeline->update_graph_arguments(_graph->get_output_descriptors().at(ioIndex).idx,
473+
zeroTensor->data(),
474+
zeroTensor->get_byte_size());
494475

495476
zeroTensor->reset_memory_flag();
496477
}
497478

498479
++ioIndex;
499480
}
500-
501-
if (closePipeline) {
502-
_pipeline->closeCommandList();
503-
}
504481
}
505482

506483
void ZeroInferRequest::update_states_if_memory_changed() {
507-
bool closePipeline = false;
508-
509484
for (const auto& variableState : _variableStates) {
510485
auto zeroState = std::dynamic_pointer_cast<ZeroVariableState>(variableState._ptr);
511486

@@ -522,27 +497,21 @@ void ZeroInferRequest::update_states_if_memory_changed() {
522497

523498
void* userBuffer = !remoteTensor ? zeroState->get_state()->data() : remoteTensor->get_original_memory();
524499

525-
_pipeline->updateCommandList(_graphInputDescriptors.at(zeroState->get_tensor_index()).idx,
526-
userBuffer,
527-
zeroState->get_state()->get_byte_size());
500+
_pipeline->update_graph_arguments(_graphInputDescriptors.at(zeroState->get_tensor_index()).idx,
501+
userBuffer,
502+
zeroState->get_state()->get_byte_size());
528503

529-
_pipeline->updateCommandList(_graphOutputDescriptors.at(zeroState->get_related_tensor_index()).idx,
530-
userBuffer,
531-
zeroState->get_state()->get_byte_size());
504+
_pipeline->update_graph_arguments(_graphOutputDescriptors.at(zeroState->get_related_tensor_index()).idx,
505+
userBuffer,
506+
zeroState->get_state()->get_byte_size());
532507

533508
zeroState->reset_zero_tensor_updated_flag();
534509

535510
get_level_zero_input(zeroState->get_tensor_index()) = zeroState->get_state()._ptr;
536511
_levelZeroOutputTensors.at(zeroState->get_related_tensor_index()) = zeroState->get_state()._ptr;
537-
538-
closePipeline = true;
539512
}
540513
}
541514
}
542-
543-
if (closePipeline) {
544-
_pipeline->closeCommandList();
545-
}
546515
}
547516

548517
void ZeroInferRequest::infer() {
@@ -749,31 +718,9 @@ void ZeroInferRequest::check_network_precision(const ov::element::Type_t precisi
749718
}
750719

751720
std::vector<ov::ProfilingInfo> ZeroInferRequest::get_profiling_info() const {
752-
_logger.debug("InferRequest::get_profiling_info started");
753-
const auto& compiledModel = *std::dynamic_pointer_cast<const ICompiledModel>(_compiledModel);
754-
const auto& compilerConfig = compiledModel.get_config();
755-
if (!compilerConfig.get<PERF_COUNT>() || !_config.get<PERF_COUNT>()) {
756-
_logger.warning("InferRequest::get_profiling_info complete with empty {}.");
757-
return {};
758-
}
721+
OPENVINO_ASSERT(_pipeline, "Profiling information isn't available before running an inference!");
759722

760-
auto compilerType = compilerConfig.get<COMPILER_TYPE>();
761-
if (compilerType == ov::intel_npu::CompilerType::MLIR) {
762-
// For plugin compiler retreive raw profiling data from backend and delegate
763-
// processing to the compiler
764-
auto profData = get_raw_profiling_data();
765-
_logger.debug("InferRequest::get_profiling_info complete with compiler->process_profiling_output().");
766-
return _graph->process_profiling_output(profData, compilerConfig);
767-
} else {
768-
auto proftype = _config.get<PROFILING_TYPE>();
769-
if (proftype == ov::intel_npu::ProfilingType::INFER) {
770-
_logger.debug("InferRequest::get_profiling_info complete with _npuProfiling->getNpuInferStatistics().");
771-
return _npuProfiling->getNpuInferStatistics();
772-
} else { /// proftype = MODEL or undefined = fallback to model profiling
773-
_logger.debug("InferRequest::get_profiling_info complete with _profilingQuery.getLayerStatistics().");
774-
return _profilingQuery.getLayerStatistics();
775-
}
776-
}
723+
return _pipeline->get_profiling_info();
777724
}
778725

779726
std::shared_ptr<ov::ITensor> ZeroInferRequest::create_tensor(ov::element::Type type,
@@ -797,10 +744,6 @@ void ZeroInferRequest::add_state(const IODescriptor& descriptor, size_t tensorIn
797744
_config));
798745
}
799746

800-
std::vector<uint8_t> ZeroInferRequest::get_raw_profiling_data() const {
801-
return _profilingQuery.getData<uint8_t>();
802-
}
803-
804747
std::shared_ptr<ov::ITensor>& ZeroInferRequest::get_level_zero_input(size_t index, size_t tensorNo) const {
805748
return _levelZeroInputTensors.at(index).at(tensorNo);
806749
}

0 commit comments

Comments
 (0)