Skip to content

Commit 89e94a6

Browse files
committed
Add new Mallocator
1 parent 1405c53 commit 89e94a6

File tree

6 files changed

+125
-115
lines changed

6 files changed

+125
-115
lines changed

src/plugins/intel_npu/src/al/include/intel_npu/al/icompiler.hpp

+83-2
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,85 @@ struct NetworkDescription final {
151151
NetworkMetadata metadata;
152152
};
153153

154+
template<class T>
155+
struct Mallocator
156+
{
157+
typedef T value_type;
158+
159+
Mallocator() : _allocatedByUs(true) {}
160+
161+
Mallocator(void* preAllocatedMem, size_t size) : _size(size), _allocatedByUs(false), _constPreAllocatedMem(nullptr) {
162+
_preAllocatedMem = static_cast<void*>(new(preAllocatedMem)char[_size * sizeof(T)]);
163+
}
164+
165+
Mallocator(const void* constPreAllocatedMem, size_t size) : _size(size), _allocatedByUs(false), _preAllocatedMem(nullptr), _constPreAllocatedMem(constPreAllocatedMem) {}
166+
167+
template<class U>
168+
constexpr Mallocator(const Mallocator <U>& other) noexcept {
169+
// Workaround for MSFT std::_Container_Proxy
170+
_allocatedByUs = true;
171+
_preAllocatedMem = other._preAllocatedMem;
172+
_constPreAllocatedMem = other._constPreAllocatedMem;
173+
_size = other._size;
174+
}
175+
176+
[[nodiscard]] T* allocate(std::size_t n)
177+
{
178+
if (n > std::numeric_limits<std::size_t>::max() / sizeof(T)) {
179+
throw std::bad_array_new_length();
180+
}
181+
182+
if (!_allocatedByUs) {
183+
return (T*)_preAllocatedMem;
184+
}
185+
186+
T* p = static_cast<T*>(std::malloc(n * sizeof(T)));
187+
_preAllocatedMem = (void*)p;
188+
return p;
189+
}
190+
191+
void deallocate(T* p, std::size_t n) noexcept
192+
{
193+
if (_allocatedByUs) {
194+
delete p;
195+
}
196+
}
197+
198+
const T* data() {
199+
if (_allocatedByUs) {
200+
return (const T*)_preAllocatedMem;
201+
}
202+
if (_preAllocatedMem != nullptr) {
203+
return (const T*)_preAllocatedMem;
204+
}
205+
if (_constPreAllocatedMem != nullptr) {
206+
return (const T*)_constPreAllocatedMem;
207+
}
208+
throw std::bad_alloc();
209+
}
210+
211+
size_t size() {
212+
return _size;
213+
}
214+
private:
215+
216+
void* _preAllocatedMem;
217+
const void* _constPreAllocatedMem;
218+
size_t _size;
219+
220+
bool _allocatedByUs;
221+
222+
template <typename>
223+
friend struct Mallocator;
224+
};
225+
226+
template<class T, class U>
227+
bool operator==(const Mallocator <T>&, const Mallocator <U>&) { return true; }
228+
229+
template<class T, class U>
230+
bool operator!=(const Mallocator <T>&, const Mallocator <U>&) { return false; }
231+
232+
154233
/**
155234
* @interface ICompiler
156235
* @brief An interface to be implemented by a concrete compiler to provide
@@ -203,8 +282,10 @@ class ICompiler : public std::enable_shared_from_this<ICompiler> {
203282
// Driver compiler can use this to release graphHandle, if we do not have executor
204283
virtual void release([[maybe_unused]] std::shared_ptr<const NetworkDescription> networkDescription){};
205284

206-
virtual std::vector<uint8_t> getCompiledNetwork(const NetworkDescription& networkDescription) {
207-
return networkDescription.compiledNetwork;
285+
virtual std::vector<uint8_t, Mallocator<uint8_t>> getCompiledNetwork(const NetworkDescription& networkDescription) {
286+
Mallocator<uint8_t> mal(networkDescription.compiledNetwork.data(),
287+
networkDescription.compiledNetwork.size());
288+
return std::vector<uint8_t, Mallocator<uint8_t>>(mal);
208289
}
209290

210291
protected:

src/plugins/intel_npu/src/compiler/include/driver_compiler_adapter.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class LevelZeroCompilerAdapter final : public ICompiler {
3636

3737
void release(std::shared_ptr<const NetworkDescription> networkDescription) override;
3838

39-
CompiledNetwork getCompiledNetwork(const NetworkDescription& networkDescription) override;
39+
std::vector<uint8_t, Mallocator<uint8_t>> getCompiledNetwork(const NetworkDescription& networkDescription) override;
4040

4141
private:
4242
/**

src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp

+5-7
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ class LevelZeroCompilerInDriver final : public ICompiler {
105105

106106
void release(std::shared_ptr<const NetworkDescription> networkDescription) override;
107107

108-
std::vector<uint8_t> getCompiledNetwork(const NetworkDescription& networkDescription) override;
108+
std::vector<uint8_t, Mallocator<uint8_t>> getCompiledNetwork(const NetworkDescription& networkDescription) override;
109109

110110
private:
111111
NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const;
@@ -129,14 +129,12 @@ class LevelZeroCompilerInDriver final : public ICompiler {
129129
std::vector<IODescriptor>& outputs) const;
130130

131131
template <typename T = TableExtension, typename std::enable_if_t<UseCopyForNativeBinary(T), bool> = true>
132-
void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
133-
ze_graph_handle_t graphHandle,
134-
std::vector<uint8_t>& blob) const;
132+
std::vector<uint8_t, Mallocator<uint8_t>> getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
133+
ze_graph_handle_t graphHandle) const;
135134

136135
template <typename T = TableExtension, typename std::enable_if_t<!UseCopyForNativeBinary(T), bool> = true>
137-
void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
138-
ze_graph_handle_t graphHandle,
139-
std::vector<uint8_t>& blob) const;
136+
std::vector<uint8_t, Mallocator<uint8_t>> getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
137+
ze_graph_handle_t graphHandle) const;
140138

141139
template <typename T = TableExtension, typename std::enable_if_t<SupportAPIGraphQueryNetworkV2(T), bool> = true>
142140
ze_result_t seriazlideIRModelAndQueryNetworkCreateV2(const std::shared_ptr<const ov::Model>& model,

src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ void LevelZeroCompilerAdapter::release(std::shared_ptr<const NetworkDescription>
115115
apiAdapter->release(std::move(networkDescription));
116116
}
117117

118-
CompiledNetwork LevelZeroCompilerAdapter::getCompiledNetwork(const NetworkDescription& networkDescription) {
118+
std::vector<uint8_t, Mallocator<uint8_t>> LevelZeroCompilerAdapter::getCompiledNetwork(const NetworkDescription& networkDescription) {
119119
_logger.info("getCompiledNetwork - using adapter to perform getCompiledNetwork(networkDescription)");
120120
return apiAdapter->getCompiledNetwork(networkDescription);
121121
}

src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp

+29-99
Original file line numberDiff line numberDiff line change
@@ -362,61 +362,14 @@ void LevelZeroCompilerInDriver<TableExtension>::release(std::shared_ptr<const Ne
362362
_logger.debug("release completed");
363363
}
364364

365-
/*
366-
FAILED: src/plugins/intel_npu/src/compiler/CMakeFiles/openvino_npu_driver_compiler_adapter.dir/src/zero_compiler_in_driver.cpp.obj
367-
C:\Work\mirceaau\ccache-4.8.2-windows-x86_64\ccache C:\PROGRA~2\MICROS~2\2019\BUILDT~1\VC\Tools\MSVC\1429~1.301\bin\Hostx64\x64\cl.exe /nologo /TP -DIN_OV_COMPONENT -DNPU_PLUGIN_DEVELOPER_BUILD -DOV_BUILD_POSTFIX=\"d\" -DOV_NATIVE_PARENT_PROJECT_ROOT_DIR=\"openvino\" -DOV_THREAD=OV_THREAD_TBB -DSNIPPETS_DEBUG_CAPS -DTBB_USE_DEBUG -D__TBB_NO_IMPLICIT_LINKAGE=1 -IC:\Work\mirceaau\openvino\src\plugins\intel_npu\src\compiler\include -IC:\Work\mirceaau\openvino\src\plugins\intel_npu\src\al\include -IC:\Work\mirceaau\openvino\src\plugins\intel_npu\src\utils\include -IC:\Work\mirceaau\openvino\src\inference\dev_api -IC:\Work\mirceaau\openvino\src\core\include -IC:\Work\mirceaau\openvino\src\frontends\common\include -IC:\Work\mirceaau\openvino\src\inference\include -IC:\Work\mirceaau\openvino\src\core\dev_api -IC:\Work\mirceaau\openvino\src\common\transformations\include -IC:\Work\mirceaau\openvino\src\common\low_precision_transformations\include -IC:\Work\mirceaau\openvino\src\common\itt\include -IC:\Work\mirceaau\openvino\src\common\util\include -IC:\Work\mirceaau\openvino\thirdparty\pugixml\src -IC:\Work\mirceaau\openvino\thirdparty\level_zero\level-zero\include -IC:\Work\mirceaau\openvino\src\plugins\intel_npu\thirdparty\level-zero-ext -IC:\Work\mirceaau\openvino\src\plugins\intel_npu\src\backend\include -external:IC:\Work\mirceaau\openvino\temp\tbb\include -external:W0 /DWIN32 /D_WINDOWS /GR /EHsc /D_CRT_SECURE_NO_WARNINGS /D_SCL_SECURE_NO_WARNINGS /EHsc /Gy /W3 /bigobj /MP /wd4251 /wd4275 /Z7 /Ob0 /Od /RTC1 -std:c++17 -MDd /d1trimfile:C:\Work\mirceaau\openvino\ /d1trimfile:C:/Work/mirceaau/openvino/ -WX /showIncludes /Fosrc\plugins\intel_npu\src\compiler\CMakeFiles\openvino_npu_driver_compiler_adapter.dir\src\zero_compiler_in_driver.cpp.obj /FdC:\Work\mirceaau\openvino\bin\intel64\Debug\ /FS -c C:\Work\mirceaau\openvino\src\plugins\intel_npu\src\compiler\src\zero_compiler_in_driver.cpp
368-
C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Tools\MSVC\14.29.30133\include\vector(701): error C2440: 'static_cast': cannot convert from 'intel_npu::driverCompilerAdapter::PreAllocatedAllocator<uint8_t>' to 'intel_npu::driverCompilerAdapter::PreAllocatedAllocator<U>'
369-
with
370-
[
371-
U=std::_Container_proxy
372-
]
373-
*/
374-
template <typename T>
375-
class PreAllocatedAllocator
376-
{
377-
public:
378-
using value_type = T;
379-
380-
// Constructor accepts a pointer to the pre-allocated memory block
381-
PreAllocatedAllocator(T* pre_allocated, std::size_t max_size)
382-
: memory(pre_allocated), max_size(max_size), offset(0) {}
383-
384-
// Allocator constructor and copy constructor
385-
PreAllocatedAllocator(const PreAllocatedAllocator<T>& other)
386-
: memory(other.memory), max_size(other.max_size), offset(other.offset) {}
387-
388-
T* allocate(std::size_t n) {
389-
if (offset + n > max_size) {
390-
throw std::bad_alloc();
391-
}
392-
T* ptr = memory + offset;
393-
offset += n;
394-
return ptr;
395-
}
396-
397-
void deallocate(T*, std::size_t) {
398-
// Deallocate does nothing since memory is externally managed
399-
}
400-
401-
template <typename U>
402-
struct rebind {
403-
using other = PreAllocatedAllocator<U>;
404-
};
405-
406-
private:
407-
T* memory;
408-
std::size_t max_size;
409-
std::size_t offset;
410-
411-
};
412-
413365
template <typename TableExtension>
414366
template <typename T, std::enable_if_t<UseCopyForNativeBinary(T), bool>>
415-
void LevelZeroCompilerInDriver<TableExtension>::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
416-
ze_graph_handle_t graphHandle,
417-
std::vector<uint8_t>& blob) const {
418-
// Get blob size first
419-
size_t blobSize = -1;
367+
std::vector<uint8_t, Mallocator<uint8_t>> LevelZeroCompilerInDriver<TableExtension>::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
368+
ze_graph_handle_t graphHandle) const {
369+
Mallocator<uint8_t> mal;
370+
std::vector<uint8_t, Mallocator<uint8_t>> blob(mal);
371+
size_t blobSize;
372+
420373
auto result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, &blobSize, nullptr);
421374
blob.resize(blobSize);
422375

@@ -442,71 +395,48 @@ void LevelZeroCompilerInDriver<TableExtension>::getNativeBinary(ze_graph_dditabl
442395
uint64_t(result),
443396
". ",
444397
getLatestBuildError());
398+
return std::move(blob);
445399
}
446400

447-
// Allocators of the same type are always equal
448-
template <typename T1, typename T2>
449-
bool operator==(const PreAllocatedAllocator<T1>&, const PreAllocatedAllocator<T2>&) { return true; }
450-
451-
template <typename T1, typename T2>
452-
bool operator!=(const PreAllocatedAllocator<T1>&, const PreAllocatedAllocator<T2>&) { return false; }
453-
454401
template <typename TableExtension>
455402
template <typename T, std::enable_if_t<!UseCopyForNativeBinary(T), bool>>
456-
void LevelZeroCompilerInDriver<TableExtension>::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
457-
ze_graph_handle_t graphHandle,
458-
std::vector<uint8_t>& blob) const {
459-
// Get blob ptr and size
460-
uint8_t* blobPtr = nullptr;
461-
size_t blobSize = -1;
462-
463-
auto result = _graphDdiTableExt.pfnGetNativeBinary2(graphHandle, &blobSize, &blobPtr);
464-
465-
OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS,
466-
"Failed to compile network. L0 pfnGetNativeBinary get blob size",
467-
" result: ",
468-
ze_result_to_string(result),
469-
", code 0x",
470-
std::hex,
471-
uint64_t(result),
472-
". ",
473-
getLatestBuildError());
474-
475-
// std::initializer_list<uint8_t> initializerListTmp(blobPtr, blobPtr + blobSize);
403+
std::vector<uint8_t, Mallocator<uint8_t>> LevelZeroCompilerInDriver<TableExtension>::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
404+
ze_graph_handle_t graphHandle) const {
476405

477-
// std::vector<uint8_t> blobTmp = {initializerListTmp.begin(), initializerListTmp.end()};
406+
uint8_t* blobPtr;
407+
size_t blobSize;
478408

479-
// blob.swap(blobTmp);
409+
// Get blob ptr and size
410+
auto result = _graphDdiTableExt.pfnGetNativeBinary2(graphHandle, &blobSize, &blobPtr);
480411

481-
/*placement_memory_allocator<uint8_t> pl(blobPtr);
482-
std::vector<uint8_t, placement_memory_allocator<uint8_t>> tmpBlob(pl);
483-
tmpBlob.reserve(blobSize);
484-
tmpBlob.push_back(0);*/
485-
// blob.swap(tmpBlob);
486-
487-
PreAllocatedAllocator allocator(blobPtr, blobSize);
488-
489-
std::vector<uint8_t, PreAllocatedAllocator<uint8_t>> tmpBlob(allocator);
412+
OPENVINO_ASSERT(result == ZE_RESULT_SUCCESS,
413+
"Failed to compile network. L0 pfnGetNativeBinary get blob size",
414+
" result: ",
415+
ze_result_to_string(result),
416+
", code 0x",
417+
std::hex,
418+
uint64_t(result),
419+
". ",
420+
getLatestBuildError());
490421

491-
std::cout << tmpBlob[0] << " " << tmpBlob[1] << std::endl;
422+
Mallocator<uint8_t> mal(blobPtr, blobSize);
423+
return std::vector<uint8_t, Mallocator<uint8_t>>(mal);
492424
}
493425

494426
template <typename TableExtension>
495-
std::vector<uint8_t> LevelZeroCompilerInDriver<TableExtension>::getCompiledNetwork(
427+
std::vector<uint8_t, Mallocator<uint8_t>> LevelZeroCompilerInDriver<TableExtension>::getCompiledNetwork(
496428
const NetworkDescription& networkDescription) {
497429
if (networkDescription.metadata.graphHandle != nullptr && networkDescription.compiledNetwork.size() == 0) {
498430
_logger.info("LevelZeroCompilerInDriver getCompiledNetwork get blob from graphHandle");
499431
ze_graph_handle_t graphHandle = static_cast<ze_graph_handle_t>(networkDescription.metadata.graphHandle);
500432

501-
std::vector<uint8_t> blob;
502-
503-
getNativeBinary(_graphDdiTableExt, graphHandle, blob);
504-
505433
_logger.info("LevelZeroCompilerInDriver getCompiledNetwork returning blob");
506-
return std::move(blob);
434+
return getNativeBinary(_graphDdiTableExt, graphHandle);
507435
}
508436
_logger.info("return the blob from network description");
509-
return networkDescription.compiledNetwork;
437+
438+
Mallocator<uint8_t> mal(networkDescription.compiledNetwork.data(), networkDescription.compiledNetwork.size());
439+
return std::vector<uint8_t, Mallocator<uint8_t>>(mal);
510440
}
511441

512442
template <typename TableExtension>

src/plugins/intel_npu/src/plugin/src/compiled_model.cpp

+6-5
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,11 @@ constexpr std::string_view NO_EXECUTOR_FOR_INFERENCE =
2727
"Can't create infer request!\n"
2828
"Please make sure that the device is available. Only exports can be made.";
2929

30-
std::uint32_t hash(const std::vector<uint8_t>& blob) {
30+
std::uint32_t hash(const std::vector<uint8_t, intel_npu::Mallocator<uint8_t>>& blob) {
3131
std::uint32_t result = 1171117u;
32-
for (auto c : blob)
33-
result = ((result << 7) + result) + static_cast<uint32_t>(c);
32+
for (const uint8_t* it = blob.get_allocator().data(); it != blob.data() + blob.get_allocator().size(); ++it) {
33+
result = ((result << 7) + result) + static_cast<uint32_t>(*it);
34+
}
3435
return result;
3536
}
3637

@@ -140,14 +141,14 @@ std::shared_ptr<ov::ISyncInferRequest> CompiledModel::create_sync_infer_request(
140141
void CompiledModel::export_model(std::ostream& stream) const {
141142
_logger.debug("CompiledModel::export_model");
142143
const auto blob = _compiler->getCompiledNetwork(*_networkPtr);
143-
stream.write(reinterpret_cast<const char*>(blob.data()), blob.size());
144+
stream.write(reinterpret_cast<const char*>(blob.get_allocator().data()), blob.get_allocator().size());
144145

145146
if (!stream) {
146147
_logger.error("Write blob to stream failed. Blob is broken!");
147148
} else {
148149
if (_logger.level() >= ov::log::Level::INFO) {
149150
std::stringstream str;
150-
str << "Blob size: " << blob.size() << ", hash: " << std::hex << hash(blob);
151+
str << "Blob size: " << blob.get_allocator().size() << ", hash: " << std::hex << hash(blob);
151152
_logger.info(str.str().c_str());
152153
}
153154
_logger.info("Write blob to stream successfully.");

0 commit comments

Comments
 (0)