Skip to content

Commit 7bc796a

Browse files
committedFeb 21, 2025·
[CORE][CPU][GPU] Added global weights cache
1 parent 6aa2544 commit 7bc796a

23 files changed

+669
-20
lines changed
 
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
// Copyright (C) 2018-2024 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#pragma once
6+
7+
#include "itensor_desc.hpp"
8+
#include "openvino/core/shape.hpp"
9+
10+
namespace ov {
11+
// namespace one_plugin {
12+
13+
class OPENVINO_RUNTIME_API BlockedTensorDesc : public virtual ITensorDesc {
14+
public:
15+
typedef std::shared_ptr<BlockedTensorDesc> Ptr;
16+
typedef std::shared_ptr<const BlockedTensorDesc> CPtr;
17+
18+
public:
19+
BlockedTensorDesc() = default;
20+
BlockedTensorDesc(const ov::Shape& shape);
21+
BlockedTensorDesc(const ov::Shape& blocked_dims,
22+
const ov::Shape& order,
23+
const ov::Shape& strides = {},
24+
const ov::Shape& offset_padding_to_data = {});
25+
~BlockedTensorDesc() override = default;
26+
27+
const ov::Shape& get_blocked_dims() const;
28+
const ov::Shape& get_order() const;
29+
const ov::Shape& get_strides() const;
30+
const ov::Shape& get_offset_padding_to_data() const;
31+
32+
protected:
33+
mutable ov::Shape m_blocked_dims;
34+
mutable ov::Shape m_order;
35+
mutable ov::Shape m_strides;
36+
mutable ov::Shape m_offset_padding_to_data;
37+
};
38+
39+
// } // namespace one_plugin
40+
} // namespace ov

‎src/inference/dev_api/openvino/runtime/iplugin.hpp

+5
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "openvino/runtime/iremote_context.hpp"
2222
#include "openvino/runtime/threading/executor_manager.hpp"
2323
#include "openvino/util/pp.hpp"
24+
#include "openvino/runtime/plugin_context.hpp"
2425

2526
namespace ov {
2627

@@ -212,6 +213,9 @@ class OPENVINO_RUNTIME_API IPlugin : public std::enable_shared_from_this<IPlugin
212213
*/
213214
const std::shared_ptr<ov::threading::ExecutorManager>& get_executor_manager() const;
214215

216+
void set_plugin_context(const std::shared_ptr<ov::PluginContext>& context);
217+
std::shared_ptr<ov::PluginContext> get_plugin_context() const;
218+
215219
virtual ~IPlugin() = default;
216220

217221
protected:
@@ -222,6 +226,7 @@ class OPENVINO_RUNTIME_API IPlugin : public std::enable_shared_from_this<IPlugin
222226
std::weak_ptr<ov::ICore> m_core; //!< A pointer to ICore interface
223227
std::shared_ptr<ov::threading::ExecutorManager> m_executor_manager; //!< A tasks execution manager
224228
ov::Version m_version; //!< Member contains plugin version
229+
std::shared_ptr<ov::PluginContext> m_context;
225230
};
226231

227232
/**
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
// Copyright (C) 2018-2024 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#pragma once
6+
7+
#include "openvino/runtime/common.hpp"
8+
9+
// #include "itensor.hpp"
10+
// #include "shape.hpp"
11+
12+
namespace ov {
13+
// namespace one_plugin {
14+
15+
class OPENVINO_RUNTIME_API ITensorDesc : public std::enable_shared_from_this<ITensorDesc> {
16+
public:
17+
// ITensorDescType getType() const {
18+
// return type;
19+
// }
20+
21+
// const ov::Shape& get_shape() const {
22+
// return shape;
23+
// }
24+
25+
virtual ~ITensorDesc() = default;
26+
27+
// virtual ov::element::Type getPrecision() const = 0;
28+
29+
// virtual ITensorDescPtr clone() const = 0;
30+
31+
// /**
32+
// * @brief Returns the offset to the current memory block
33+
// *
34+
// * @return offset
35+
// */
36+
// virtual size_t getOffsetPadding() const = 0;
37+
38+
// /**
39+
// * @brief Clone descriptor with new dims.
40+
// * Throws an exception if relaxedCheck is false and some of the new dims conflicts with the internal shape (i.e.
41+
// its
42+
// * defined dims ,rank, upper bounds) or if internal shape and dims have different ranks
43+
// * @param dims new dims
44+
// * @param relaxedCheck flag which defined must we check dims with internal desc on compatibility
45+
// * @return ITensorDescPtr with new dims
46+
// */
47+
// ITensorDescPtr cloneWithNewDims(const VectorDims& dims, bool relaxedCheck = false) const {
48+
// if (relaxedCheck) {
49+
// if (getShape().getRank() != dims.size()) {
50+
// OPENVINO_THROW("ParameterMismatch: Can not clone with new dims, ranks mistmatch. Descriptor's rank:
51+
// ",
52+
// getShape().getRank(),
53+
// " is incompatible with provided rank of dimensions: ",
54+
// dims.size(),
55+
// ".");
56+
// }
57+
// } else if (!getShape().isCompatible(dims)) {
58+
// OPENVINO_THROW("ParameterMismatch: Can not clone with new dims. Descriptor's shape: ",
59+
// getShape().toString(),
60+
// " is incompatible with provided dimensions: ",
61+
// dims2str(dims),
62+
// ".");
63+
// }
64+
65+
// return cloneWithNewDimsImp(dims);
66+
// }
67+
68+
// virtual ITensorDescPtr cloneWithNewPrecision(const ov::element::Type prec) const = 0;
69+
70+
// virtual bool isCompatible(const ITensorDesc& rhs) const = 0;
71+
72+
// // Checks that all dimensions, offsets, strides, etc are defined (!= UNDEFINED_DIM)
73+
// bool isDefined() const {
74+
// if (descStatus::Unknown == status) {
75+
// status = isDefinedImp() ? descStatus::Defined : descStatus::Undefined;
76+
// }
77+
// return descStatus::Defined == status;
78+
// }
79+
80+
// virtual bool hasLayoutType(LayoutType layoutType) const = 0;
81+
82+
// virtual std::string serializeFormat() const = 0;
83+
84+
// // Get memory upper bound if possible. Can be undefined
85+
// virtual size_t getMaxMemSize() const = 0;
86+
87+
// /**
88+
// * @brief Get minimal required memory size in bytes.
89+
// * @return return minimal required memory size in bytes or UNDEFINED_SIZE in case undefined descriptor
90+
// */
91+
// size_t getCurrentMemSize() const {
92+
// size_t retVal = UNDEFINED_SIZE;
93+
// if (canComputeMemSize()) {
94+
// retVal = getCurrentMemSizeImp();
95+
// }
96+
// return retVal;
97+
// }
98+
99+
// bool hasDefinedMaxSize() const {
100+
// return getMaxMemSize() != ITensorDesc::UNDEFINED_SIZE;
101+
// }
102+
103+
// bool empty() const {
104+
// return type == Empty;
105+
// }
106+
107+
// template <typename T,
108+
// typename std::enable_if<!std::is_pointer<T>::value && !std::is_reference<T>::value, int>::type = 0,
109+
// typename std::enable_if<std::is_base_of<ITensorDesc, T>::value, int>::type = 0>
110+
// T* as() {
111+
// T* casted = dynamic_cast<T*>(this);
112+
// if (!casted)
113+
// OPENVINO_THROW("Cannot dynamically cast ITensorDesc");
114+
// return casted;
115+
// }
116+
117+
// template <typename T,
118+
// typename std::enable_if<!std::is_pointer<T>::value && !std::is_reference<T>::value, int>::type = 0,
119+
// typename std::enable_if<std::is_base_of<ITensorDesc, T>::value, int>::type = 0>
120+
// const T* as() const {
121+
// const T* casted = dynamic_cast<const T*>(this);
122+
// if (!casted)
123+
// OPENVINO_THROW("Cannot dynamically cast ITensorDesc");
124+
// return casted;
125+
// }
126+
127+
// static constexpr size_t UNDEFINED_SIZE = std::numeric_limits<size_t>::max();
128+
129+
// protected:
130+
// ITensorDesc() : type(ITensorDescType::Undef) {}
131+
// ITensorDesc(Shape shape, ITensorDescType type) : type(type), shape(std::move(shape)) {}
132+
133+
// ITensorDesc(const VectorDims& dims, ITensorDescType type) : type(type), shape(dims) {}
134+
135+
// virtual void setPrecision(ov::element::Type prc) = 0;
136+
137+
// virtual size_t getCurrentMemSizeImp() const = 0;
138+
139+
// // Get offset to the n'th element. Returns physical index of the element by the logical one considering padding,
140+
// // layout, blocking etc.
141+
// virtual size_t getElementOffset(size_t elemNumber) const = 0;
142+
143+
// virtual bool canComputeMemSizeZeroDims() const = 0;
144+
// virtual bool isDefinedImp() const = 0;
145+
146+
// bool canComputeMemSize() const {
147+
// return isDefined() || canComputeMemSizeZeroDims();
148+
// }
149+
150+
// virtual ITensorDescPtr cloneWithNewDimsImp(const VectorDims& dims) const = 0;
151+
152+
// ITensorDescType type;
153+
// Shape shape;
154+
155+
// mutable enum class descStatus : uint8_t {
156+
// Unknown,
157+
// Defined,
158+
// Undefined,
159+
// } status = descStatus::Unknown;
160+
};
161+
162+
// } // namespace one_plugin
163+
} // namespace ov
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// Copyright (C) 2018-2024 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#pragma once
6+
7+
#include "openvino/runtime/tensor_cache.hpp"
8+
#include "openvino/runtime/common.hpp"
9+
10+
namespace ov {
11+
// namespace one_plugin {
12+
13+
class OPENVINO_RUNTIME_API PluginContext {
14+
public:
15+
typedef std::shared_ptr<PluginContext> Ptr;
16+
typedef std::shared_ptr<const PluginContext> CPtr;
17+
18+
PluginContext(WeightsCache::Ptr tensor_cache);
19+
20+
WeightsCache::Ptr get_tensor_cache() const {
21+
return m_tensor_cache;
22+
}
23+
24+
private:
25+
WeightsCache::Ptr m_tensor_cache;
26+
};
27+
28+
// } // namespace one_plugin
29+
} // namespace ov
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
// Copyright (C) 2018-2024 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#pragma once
6+
7+
#include <atomic>
8+
#include <functional>
9+
#include <map>
10+
#include <memory>
11+
#include <mutex>
12+
#include <string>
13+
#include <unordered_map>
14+
15+
#include "blocked_tensor_desc.hpp"
16+
#include "openvino/op/constant.hpp"
17+
#include "openvino/runtime/tensor.hpp"
18+
#include "openvino/runtime/common.hpp"
19+
20+
namespace ov {
21+
// namespace one_plugin {
22+
23+
std::string OPENVINO_RUNTIME_API get_weights_id(const std::shared_ptr<ov::op::v0::Constant>& constant);
24+
25+
class OPENVINO_RUNTIME_API WeightsCache {
26+
public:
27+
typedef std::shared_ptr<WeightsCache> Ptr;
28+
29+
struct TensorInfo {
30+
typedef std::shared_ptr<TensorInfo> Ptr;
31+
32+
TensorInfo(ov::Tensor tensor, BlockedTensorDesc::CPtr tensor_desc) : m_tensor(std::move(tensor)), m_tensor_desc(std::move(tensor_desc)) {}
33+
34+
ov::Tensor m_tensor;
35+
BlockedTensorDesc::CPtr m_tensor_desc;
36+
};
37+
38+
std::multimap<std::string, TensorInfo>::const_iterator get(std::string id) const;
39+
std::multimap<std::string, TensorInfo>::const_iterator findOrCreate(std::string id, std::function<TensorInfo::Ptr(void)> create);
40+
std::multimap<std::string, WeightsCache::TensorInfo>::const_iterator replaceWith(
41+
std::multimap<std::string, WeightsCache::TensorInfo>::const_iterator pos, WeightsCache::TensorInfo::Ptr item);
42+
43+
// TODO: make private
44+
std::multimap<std::string, TensorInfo> m_storage;
45+
private:
46+
};
47+
48+
// class WeightsCache {
49+
// using ITensorPtr = std::shared_ptr<ITensor>;
50+
51+
// struct TensorInfo {
52+
// typedef std::shared_ptr<TensorInfo> Ptr;
53+
54+
// TensorInfo(ITensorPtr tensor, bool valid) : shared_tensor(tensor), valid(valid) {}
55+
56+
// // std::mutex guard;
57+
// // std::atomic<bool> valid;
58+
// std::shared_ptr<ITensor> m_tensor;
59+
// std::shared_ptr<ITensor> m_tensor;
60+
// };
61+
62+
// public:
63+
// typedef std::shared_ptr<WeightsCache> Ptr;
64+
65+
// class SharedTensor {
66+
// public:
67+
// typedef std::shared_ptr<SharedTensor> Ptr;
68+
69+
// SharedTensor(std::unique_lock<std::mutex>&& lock, const TensorInfo::Ptr& memory, ITensorPtr newPtr =
70+
// nullptr);
71+
72+
// operator ITensorPtr() const;
73+
// bool isValid() const;
74+
// void valid(bool b);
75+
76+
// private:
77+
// std::unique_lock<std::mutex> lock;
78+
// TensorInfo::Ptr memory;
79+
// ITensorPtr newPtr;
80+
// };
81+
82+
// SharedTensor::Ptr findOrCreate(const std::string& key, std::function<ITensorPtr(void)> create, bool valid =
83+
// true);
84+
85+
// SharedTensor::Ptr get(const std::string& key) const;
86+
87+
// protected:
88+
// // mutable std::mutex guard;
89+
// std::unordered_map<size_t, TensorInfo::Ptr> m_storage;
90+
// };
91+
92+
// } // namespace one_plugin
93+
} // namespace ov

0 commit comments

Comments
 (0)