Skip to content

Commit 0ffe5d6

Browse files
committed
Revert changes for CIP Optimization
1 parent 3748c0b commit 0ffe5d6

File tree

5 files changed

+88
-75
lines changed

5 files changed

+88
-75
lines changed

src/plugins/intel_npu/src/common/include/intel_npu/common/blob_container.hpp

+31-40
Original file line numberDiff line numberDiff line change
@@ -13,50 +13,50 @@ namespace intel_npu {
1313

1414
class BlobContainer {
1515
public:
16-
BlobContainer() = default;
16+
/**
17+
* @brief Returns the address at the beginning of the blob.
18+
*/
19+
virtual const void* get_ptr() const = 0;
1720

18-
BlobContainer(std::vector<uint8_t> blob) : _blob(std::move(blob)) {}
21+
/**
22+
* @brief Size of the blob.
23+
*/
24+
virtual size_t size() const = 0;
1925

20-
virtual const void* get_ptr() const {
21-
return _blob.data();
22-
}
26+
/**
27+
* @brief Returns true if the blob can be deallocated from memory, false otherwise.
28+
*/
29+
virtual bool release_from_memory() = 0;
2330

24-
virtual size_t size() const {
25-
return _blob.size();
26-
}
31+
virtual ~BlobContainer() = default;
32+
};
2733

28-
virtual bool release_from_memory() const {
29-
if (_shouldDeallocate) {
30-
_blob.clear();
31-
_blob.shrink_to_fit();
32-
return true;
33-
}
34-
_shouldDeallocate = true;
35-
return false;
36-
}
34+
class BlobContainerVector : public BlobContainer {
35+
public:
36+
BlobContainerVector(std::vector<uint8_t> blob) : _blob(std::move(blob)) {}
3737

38-
virtual const std::vector<uint8_t>& get_blob() const {
39-
// when unerlying blob object was accessed,
40-
// prevent deallocation on next `release_from_memory` call
41-
_shouldDeallocate = false;
42-
return _blob;
38+
const void* get_ptr() const override {
39+
return reinterpret_cast<const void*>(_blob.data());
4340
}
4441

45-
virtual ~BlobContainer() = default;
42+
size_t size() const override {
43+
return _blob.size();
44+
}
4645

47-
protected:
48-
mutable std::vector<uint8_t> _blob;
46+
bool release_from_memory() override {
47+
_blob.clear();
48+
_blob.shrink_to_fit();
49+
return true;
50+
}
4951

5052
private:
51-
mutable bool _shouldDeallocate = true;
53+
std::vector<uint8_t> _blob;
5254
};
5355

5456
class BlobContainerAlignedBuffer : public BlobContainer {
5557
public:
56-
BlobContainerAlignedBuffer(const std::shared_ptr<ov::AlignedBuffer>& blobSO,
57-
size_t ovHeaderOffset,
58-
uint64_t blobSize)
59-
: _size(blobSize),
58+
BlobContainerAlignedBuffer(const std::shared_ptr<ov::AlignedBuffer>& blobSO, size_t ovHeaderOffset, uint64_t size)
59+
: _size(size),
6060
_ovHeaderOffset(ovHeaderOffset),
6161
_blobSO(blobSO) {}
6262

@@ -68,19 +68,10 @@ class BlobContainerAlignedBuffer : public BlobContainer {
6868
return _size;
6969
}
7070

71-
bool release_from_memory() const override {
72-
BlobContainer::release_from_memory();
71+
bool release_from_memory() override {
7372
return false;
7473
}
7574

76-
const std::vector<uint8_t>& get_blob() const override {
77-
BlobContainer::release_from_memory();
78-
_blob.resize(_size);
79-
_blob.assign(reinterpret_cast<const uint8_t*>(this->get_ptr()),
80-
reinterpret_cast<const uint8_t*>(this->get_ptr()) + _size);
81-
return _blob;
82-
}
83-
8475
private:
8576
uint64_t _size;
8677
size_t _ovHeaderOffset;

src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp

+8-4
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con
8080

8181
_logger.debug("compile start");
8282
auto networkDesc = _compiler->compile(model, config);
83-
auto blobPtr = std::make_unique<BlobContainer>(std::move(networkDesc.compiledNetwork));
83+
auto blobPtr = std::make_unique<BlobContainerVector>(std::move(networkDesc.compiledNetwork));
8484
_logger.debug("compile end");
8585

8686
ze_graph_handle_t graphHandle = nullptr;
@@ -110,9 +110,13 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::parse(std::unique_ptr<BlobContain
110110
OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse");
111111

112112
_logger.debug("parse start");
113-
const auto& blob = blobPtr->get_blob();
114-
auto networkMeta = _compiler->parse(blob, config);
115-
blobPtr->release_from_memory();
113+
// [Additional copy track number: E#153402]
114+
std::vector<uint8_t> network(blobPtr->size());
115+
network.assign(reinterpret_cast<const uint8_t*>(blobPtr->get_ptr()),
116+
reinterpret_cast<const uint8_t*>(blobPtr->get_ptr()) + blobPtr->size());
117+
auto networkMeta = _compiler->parse(network, config);
118+
network.clear();
119+
network.shrink_to_fit();
116120
_logger.debug("parse end");
117121

118122
ze_graph_handle_t graphHandle = nullptr;

src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp

+5-9
Original file line numberDiff line numberDiff line change
@@ -56,15 +56,11 @@ size_t PluginGraph::export_blob(std::ostream& stream) const {
5656

5757
std::vector<ov::ProfilingInfo> PluginGraph::process_profiling_output(const std::vector<uint8_t>& profData,
5858
const Config& config) const {
59-
std::vector<ov::ProfilingInfo> profilingInfo;
60-
const auto& blob = _blobPtr->get_blob();
61-
try {
62-
profilingInfo = _compiler->process_profiling_output(profData, blob, config);
63-
} catch (const std::exception& ex) {
64-
_logger.error(ex.what());
65-
}
66-
_blobPtr->release_from_memory();
67-
return profilingInfo;
59+
// [Additional copy track number: E#153402]
60+
std::vector<uint8_t> blob(_blobPtr->size());
61+
blob.assign(reinterpret_cast<const uint8_t*>(_blobPtr->get_ptr()),
62+
reinterpret_cast<const uint8_t*>(_blobPtr->get_ptr()) + _blobPtr->size());
63+
return _compiler->process_profiling_output(profData, blob, config);
6864
}
6965

7066
void PluginGraph::set_argument_value(uint32_t argi, const void* argv) const {

src/plugins/intel_npu/src/plugin/src/plugin.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -636,13 +636,13 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
636636
auto localConfig = merge_configs(_globalConfig, localPropertiesMap);
637637
update_log_level(localPropertiesMap);
638638

639-
/* const auto set_cache_dir = localConfig.get<CACHE_DIR>();
639+
const auto set_cache_dir = localConfig.get<CACHE_DIR>();
640640
if (!set_cache_dir.empty()) {
641641
const auto compilerType = localConfig.get<COMPILER_TYPE>();
642642
if (compilerType == ov::intel_npu::CompilerType::MLIR) {
643643
OPENVINO_THROW("Option 'CACHE_DIR' is not supported with MLIR compiler type");
644644
}
645-
} */
645+
}
646646

647647
const auto platform = _backends->getCompilationPlatform(localConfig.get<PLATFORM>(), localConfig.get<DEVICE_ID>());
648648
auto device = _backends->getDevice(localConfig.get<DEVICE_ID>());
@@ -806,7 +806,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c
806806
}
807807
_logger.debug("Successfully read %zu bytes into blob.", graphSize);
808808

809-
blobPtr = std::make_unique<BlobContainer>(std::move(blob));
809+
blobPtr = std::make_unique<BlobContainerVector>(std::move(blob));
810810
} else {
811811
blobPtr = std::make_unique<BlobContainerAlignedBuffer>(modelBuffer, stream.tellg(), graphSize);
812812
}

src/plugins/intel_npu/tests/unit/npu/blob_container.cpp

+41-19
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,22 @@
2121

2222
using namespace intel_npu;
2323

24-
using BlobContainerUnitTests = ::testing::Test;
25-
26-
namespace {
27-
const char* dummyBlobHeader = "blobwillstartafterspace correctblob!";
28-
const char* testCacheDir = "blob_container_test_cache_dir";
29-
const char* testFileName = "blob_container_test.blob";
24+
class BlobContainerUnitTests : public ::testing::Test {
25+
protected:
26+
void TearDown() override {
27+
ov::util::iterate_files(testCacheDir, [](const std::string& file, bool is_dir) {
28+
if (!is_dir) {
29+
ov::test::utils::removeFile(file);
30+
}
31+
});
32+
ov::test::utils::removeDir(testCacheDir);
33+
ov::test::utils::removeFile(testFileName);
34+
}
3035

31-
} // namespace
36+
const char* dummyBlobHeader = "blobwillstartafterspace ";
37+
const char* testCacheDir = "blob_container_test_cache_dir";
38+
const char* testFileName = "blob_container_test.blob";
39+
};
3240

3341
TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForCacheEnabled) {
3442
auto core = std::make_shared<ov::CoreImpl>();
@@ -59,18 +67,26 @@ TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForCacheEnabled) {
5967
auto inferRequest = compiledModel->create_infer_request();
6068
inferRequest->infer();
6169
OV_ASSERT_NO_THROW(auto profilingInfo = inferRequest->get_profiling_info());
70+
6271
auto outputFile =
63-
std::ofstream(std::filesystem::path(testCacheDir) / testFileName, std::ios::out | std::ios::binary);
72+
std::ofstream(ov::util::Path::path(testCacheDir) / testFileName, std::ios::out | std::ios::binary);
73+
std::ostringstream blobStream;
6474
OV_ASSERT_NO_THROW(compiledModel->export_model(outputFile));
75+
OV_ASSERT_NO_THROW(compiledModel->export_model(blobStream));
6576

6677
auto* compiledModelPtr = dynamic_cast<intel_npu::ICompiledModel*>(compiledModel._ptr.get());
6778
OPENVINO_ASSERT(compiledModelPtr != nullptr);
6879
const auto& blobContainer = compiledModelPtr->get_graph()->get_blob_container();
6980
auto* blobContainerAlignedBufferPtr =
7081
dynamic_cast<const intel_npu::BlobContainerAlignedBuffer*>(&blobContainer);
7182
OPENVINO_ASSERT(blobContainerAlignedBufferPtr != nullptr, "Cached blob should be memory mapped!");
83+
84+
// Expect output stream with metadata to be larger than actual blob size
85+
OPENVINO_ASSERT(outputFile.tellp() > 0 && blobContainer.size() > 0 &&
86+
static_cast<size_t>(outputFile.tellp()) > blobContainer.size());
87+
OPENVINO_ASSERT(blobStream.tellp() > 0 && blobContainer.size() > 0 &&
88+
static_cast<size_t>(blobStream.tellp()) > blobContainer.size());
7289
}
73-
ov::test::utils::removeDir(testCacheDir);
7490
}
7591

7692
TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForFStream) {
@@ -104,7 +120,6 @@ TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForFStream) {
104120
dynamic_cast<const intel_npu::BlobContainerAlignedBuffer*>(&blobContainer);
105121
OPENVINO_ASSERT(blobContainerAlignedBufferPtr == nullptr, "Cannot have memory mapped blob for std::fstream!");
106122
}
107-
ov::test::utils::removeFile(testFileName);
108123
}
109124

110125
TEST_F(BlobContainerUnitTests, isBlobContainerCorrectlyPickedForSStream) {
@@ -161,35 +176,42 @@ TEST_F(BlobContainerUnitTests, isBlobHeaderHandledCorrectly) {
161176
std::string parseDummyHeader;
162177
std::string blob;
163178
blobStream >> parseDummyHeader;
179+
blobStream.get();
164180

165-
EXPECT_THAT(parseDummyHeader, testing::HasSubstr("blobwillstartafterspace"));
166181
auto compiledModel =
167182
core->import_model(blobStream, ov::test::utils::DEVICE_NPU, {ov::intel_npu::defer_weights_load(true)});
168-
blobStream = {};
169183

170184
auto* compiledModelPtr = dynamic_cast<intel_npu::ICompiledModel*>(compiledModel._ptr.get());
171185
OPENVINO_ASSERT(compiledModelPtr != nullptr);
172186
const auto& blobContainer = compiledModelPtr->get_graph()->get_blob_container();
173187
blob.assign(reinterpret_cast<const char*>(blobContainer.get_ptr()), blobContainer.size());
174-
EXPECT_THAT(blob, testing::HasSubstr("correctblob!"));
188+
ASSERT_EQ(blobStream.str().substr(std::strlen(dummyBlobHeader), blobContainer.size()), blob);
175189
}
176190

177191
{
178192
std::string parseDummyHeader;
179193
std::string blob;
194+
std::string referenceBlob;
180195
auto inputFile = std::ifstream(testFileName, std::ios::in | std::ios::binary);
181-
blobStream >> parseDummyHeader;
196+
inputFile >> parseDummyHeader;
197+
inputFile.get();
198+
199+
std::streampos currentPos = inputFile.tellg();
200+
inputFile.seekg(0, std::ios::end);
201+
std::streampos endPos = inputFile.tellg();
202+
inputFile.seekg(currentPos, std::ios::beg);
203+
referenceBlob.resize(endPos - currentPos);
204+
inputFile.read(&referenceBlob[0], referenceBlob.size());
205+
inputFile.seekg(currentPos, std::ios::beg);
182206

183-
EXPECT_THAT(parseDummyHeader, testing::HasSubstr("blobwillstartafterspace"));
184207
auto compiledModel =
185-
core->import_model(blobStream, ov::test::utils::DEVICE_NPU, {ov::intel_npu::defer_weights_load(true)});
208+
core->import_model(inputFile, ov::test::utils::DEVICE_NPU, {ov::intel_npu::defer_weights_load(true)});
186209

187210
auto* compiledModelPtr = dynamic_cast<intel_npu::ICompiledModel*>(compiledModel._ptr.get());
188211
OPENVINO_ASSERT(compiledModelPtr != nullptr);
189212
const auto& blobContainer = compiledModelPtr->get_graph()->get_blob_container();
190213
blob.assign(reinterpret_cast<const char*>(blobContainer.get_ptr()), blobContainer.size());
191-
EXPECT_THAT(blob, testing::HasSubstr("correctblob!"));
214+
referenceBlob.resize(blobContainer.size()); // exclude metadata
215+
ASSERT_EQ(referenceBlob, blob);
192216
}
193-
194-
ov::test::utils::removeFile(testFileName);
195217
}

0 commit comments

Comments
 (0)