Skip to content

Commit 3c1f707

Browse files
committed
Add fix for new CIP optimization
1 parent 2f7ec86 commit 3c1f707

File tree

4 files changed

+55
-43
lines changed

4 files changed

+55
-43
lines changed

src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp

+40-36
Original file line numberDiff line numberDiff line change
@@ -13,74 +13,78 @@ namespace intel_npu {
1313

1414
class BlobContainer {
1515
public:
16-
virtual void* get_ptr() = 0;
16+
BlobContainer() = default;
1717

18-
virtual size_t size() const = 0;
18+
BlobContainer(std::vector<uint8_t> blob) : _blob(std::move(blob)) {}
1919

20-
virtual bool release_from_memory() = 0;
21-
22-
virtual std::vector<uint8_t> get_ownership_blob() = 0;
23-
24-
virtual ~BlobContainer() = default;
25-
};
26-
27-
class BlobContainerVector : public BlobContainer {
28-
public:
29-
BlobContainerVector(std::vector<uint8_t> blob) : _ownershipBlob(std::move(blob)) {}
30-
31-
void* get_ptr() override {
32-
return reinterpret_cast<void*>(_ownershipBlob.data());
20+
virtual const void* get_ptr() const {
21+
return _blob.data();
3322
}
3423

35-
size_t size() const override {
36-
return _ownershipBlob.size();
24+
virtual size_t size() const {
25+
return _blob.size();
3726
}
3827

39-
bool release_from_memory() override {
40-
_ownershipBlob.clear();
41-
_ownershipBlob.shrink_to_fit();
42-
return true;
28+
virtual bool release_from_memory() const {
29+
if (_shouldDeallocate) {
30+
_blob.clear();
31+
_blob.shrink_to_fit();
32+
return true;
33+
}
34+
_shouldDeallocate = true;
35+
return false;
4336
}
4437

45-
std::vector<uint8_t> get_ownership_blob() override {
46-
return _ownershipBlob;
38+
virtual const std::vector<uint8_t>& get_blob() const {
39+
// when unerlying blob object was accessed,
40+
// prevent deallocation on next `release_from_memory` call
41+
_shouldDeallocate = false;
42+
return _blob;
4743
}
4844

45+
virtual ~BlobContainer() = default;
46+
47+
protected:
48+
mutable std::vector<uint8_t> _blob;
49+
4950
private:
50-
std::vector<uint8_t> _ownershipBlob;
51+
mutable bool _shouldDeallocate = true;
5152
};
5253

5354
class BlobContainerAlignedBuffer : public BlobContainer {
5455
public:
5556
BlobContainerAlignedBuffer(const std::shared_ptr<ov::AlignedBuffer>& blobSO,
5657
size_t ovHeaderOffset,
5758
uint64_t blobSize)
58-
: _blobSize(blobSize),
59+
: _size(blobSize),
5960
_ovHeaderOffset(ovHeaderOffset),
60-
_ownershipBlob(blobSO) {}
61+
_blobSO(blobSO) {}
6162

62-
void* get_ptr() override {
63-
return _ownershipBlob->get_ptr(_ovHeaderOffset);
63+
const void* get_ptr() const override {
64+
return _blobSO->get_ptr(_ovHeaderOffset);
6465
}
6566

6667
size_t size() const override {
67-
return _blobSize;
68+
return _size;
6869
}
6970

70-
bool release_from_memory() override {
71+
bool release_from_memory() const override {
72+
BlobContainer::release_from_memory();
7173
return false;
7274
}
7375

74-
std::vector<uint8_t> get_ownership_blob() override {
75-
std::vector<uint8_t> blob(_blobSize);
76-
blob.assign(reinterpret_cast<const uint8_t*>(this->get_ptr()), reinterpret_cast<const uint8_t*>(this->get_ptr()) + this->size());
77-
return blob;
76+
const std::vector<uint8_t>& get_blob() const override {
77+
BlobContainer::release_from_memory();
78+
_blob.resize(_size);
79+
_blob.assign(reinterpret_cast<const uint8_t*>(this->get_ptr()),
80+
reinterpret_cast<const uint8_t*>(this->get_ptr()) + _size);
81+
return _blob;
7882
}
7983

8084
private:
81-
uint64_t _blobSize;
85+
uint64_t _size;
8286
size_t _ovHeaderOffset;
83-
std::shared_ptr<ov::AlignedBuffer> _ownershipBlob;
87+
std::shared_ptr<ov::AlignedBuffer> _blobSO;
8488
};
8589

8690
} // namespace intel_npu

src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con
8080

8181
_logger.debug("compile start");
8282
auto networkDesc = _compiler->compile(model, config);
83-
auto blobPtr = std::make_unique<BlobContainerVector>(std::move(networkDesc.compiledNetwork));
83+
auto blobPtr = std::make_unique<BlobContainer>(std::move(networkDesc.compiledNetwork));
8484
_logger.debug("compile end");
8585

8686
ze_graph_handle_t graphHandle = nullptr;
@@ -110,8 +110,9 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::parse(std::unique_ptr<BlobContain
110110
OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse");
111111

112112
_logger.debug("parse start");
113-
const auto& blob = blobPtr->get_ownership_blob();
113+
const auto& blob = blobPtr->get_blob();
114114
auto networkMeta = _compiler->parse(blob, config);
115+
blobPtr->release_from_memory();
115116
_logger.debug("parse end");
116117

117118
ze_graph_handle_t graphHandle = nullptr;

src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp

+9-2
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,15 @@ size_t PluginGraph::export_blob(std::ostream& stream) const {
5656

5757
std::vector<ov::ProfilingInfo> PluginGraph::process_profiling_output(const std::vector<uint8_t>& profData,
5858
const Config& config) const {
59-
const auto& blob = _blobPtr->get_ownership_blob();
60-
return _compiler->process_profiling_output(profData, blob, config);
59+
std::vector<ov::ProfilingInfo> profilingInfo;
60+
const auto& blob = _blobPtr->get_blob();
61+
try {
62+
profilingInfo = _compiler->process_profiling_output(profData, blob, config);
63+
} catch (const std::exception& ex) {
64+
_logger.error(ex.what());
65+
}
66+
_blobPtr->release_from_memory();
67+
return profilingInfo;
6168
}
6269

6370
void PluginGraph::set_argument_value(uint32_t argi, const void* argv) const {

src/plugins/intel_npu/src/plugin/src/plugin.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -636,13 +636,13 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
636636
auto localConfig = merge_configs(_globalConfig, localPropertiesMap);
637637
update_log_level(localPropertiesMap);
638638

639-
const auto set_cache_dir = localConfig.get<CACHE_DIR>();
639+
/* const auto set_cache_dir = localConfig.get<CACHE_DIR>();
640640
if (!set_cache_dir.empty()) {
641641
const auto compilerType = localConfig.get<COMPILER_TYPE>();
642642
if (compilerType == ov::intel_npu::CompilerType::MLIR) {
643643
OPENVINO_THROW("Option 'CACHE_DIR' is not supported with MLIR compiler type");
644644
}
645-
}
645+
} */
646646

647647
const auto platform = _backends->getCompilationPlatform(localConfig.get<PLATFORM>(), localConfig.get<DEVICE_ID>());
648648
auto device = _backends->getDevice(localConfig.get<DEVICE_ID>());
@@ -806,7 +806,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c
806806
}
807807
_logger.debug("Successfully read %zu bytes into blob.", graphSize);
808808

809-
blobPtr = std::make_unique<BlobContainerVector>(std::move(blob));
809+
blobPtr = std::make_unique<BlobContainer>(std::move(blob));
810810
} else {
811811
blobPtr = std::make_unique<BlobContainerAlignedBuffer>(modelBuffer, stream.tellg(), graphSize);
812812
}

0 commit comments

Comments
 (0)