Skip to content

Commit f788aac

Browse files
[CPU] New plugin config impl
1 parent fcb9c9d commit f788aac

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+818
-1118
lines changed

src/inference/dev_api/openvino/runtime/plugin_config.hpp

+7-5
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,12 @@ class OPENVINO_RUNTIME_API PluginConfig {
181181
m_options_map.emplace(name, ptr);
182182
}
183183

184+
// TODO: move to protected
185+
template <typename T, PropertyMutability mutability>
186+
bool is_set_by_user(const ov::Property<T, mutability>& property) const {
187+
return m_user_properties.find(property.name()) != m_user_properties.end();
188+
}
189+
184190
protected:
185191
template <typename OptionType>
186192
class GlobalOptionInitializer {
@@ -203,11 +209,7 @@ class OPENVINO_RUNTIME_API PluginConfig {
203209
void apply_config_options(std::string_view device_name, std::filesystem::path config_path = "");
204210
virtual void finalize_impl(const IRemoteContext* context) {}
205211

206-
template <typename T, PropertyMutability mutability>
207-
bool is_set_by_user(const ov::Property<T, mutability>& property) const {
208-
return m_user_properties.find(property.name()) != m_user_properties.end();
209-
}
210-
212+
protected:
211213
ConfigOptionBase* get_option_ptr(const std::string& name) const {
212214
auto it = m_options_map.find(name);
213215
OPENVINO_ASSERT(it != m_options_map.end(), "Option not found: ", name);

src/inference/include/openvino/runtime/intel_cpu/properties.hpp

+48-2
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,60 @@ namespace ov {
2626
*/
2727
namespace intel_cpu {
2828

29+
struct DenormalsOptimization {
30+
enum class Mode {
31+
DEFAULT,
32+
ON,
33+
OFF
34+
};
35+
36+
DenormalsOptimization() {};
37+
DenormalsOptimization(Mode mode) : m_mode(mode) {};
38+
DenormalsOptimization(bool mode) { m_mode = mode ? Mode::ON : Mode::OFF; }
39+
operator bool() const { return m_mode == Mode::ON; }
40+
41+
Mode m_mode = Mode::DEFAULT;
42+
};
43+
44+
/** @cond INTERNAL */
45+
inline std::ostream& operator<<(std::ostream& os, const DenormalsOptimization& value) {
46+
switch (value.m_mode) {
47+
case DenormalsOptimization::Mode::DEFAULT:
48+
return os << "DEFAULT";
49+
case DenormalsOptimization::Mode::ON:
50+
return os << "ON";
51+
case DenormalsOptimization::Mode::OFF:
52+
return os << "OFF";
53+
default:
54+
OPENVINO_THROW("Unsupported denormals optimization mode: ");
55+
}
56+
}
57+
58+
inline std::istream& operator>>(std::istream& is, DenormalsOptimization& value) {
59+
std::string str;
60+
is >> str;
61+
if (str == "DEFAULT") {
62+
value = DenormalsOptimization::Mode::DEFAULT;
63+
} else if (str == "ON") {
64+
value = DenormalsOptimization::Mode::ON;
65+
} else if (str == "OFF") {
66+
value = DenormalsOptimization::Mode::OFF;
67+
} else {
68+
OPENVINO_THROW("Could not read denormals optimization mode from str: ", str);
69+
}
70+
return is;
71+
}
72+
/** @endcond */
73+
2974
/**
3075
* @brief This property define whether to perform denormals optimization.
3176
* @ingroup ov_runtime_cpu_prop_cpp_api
3277
*
3378
* Computation with denormals is very time consuming. FTZ(Flushing denormals to zero) and DAZ(Denormals as zero)
3479
* could significantly improve the performance, but it does not comply with IEEE standard. In most cases, this behavior
3580
* has little impact on model accuracy. Users could enable this optimization if no or acceptable accuracy drop is seen.
36-
* The following code enables denormals optimization
81+
* By default OV runtime doesn't change master thread settings.
82+
* The following code explicitly enables denormals optimization
3783
*
3884
* @code
3985
* ie.set_property(ov::denormals_optimization(true)); // enable denormals optimization
@@ -45,7 +91,7 @@ namespace intel_cpu {
4591
* ie.set_property(ov::denormals_optimization(false)); // disable denormals optimization
4692
* @endcode
4793
*/
48-
static constexpr Property<bool> denormals_optimization{"CPU_DENORMALS_OPTIMIZATION"};
94+
static constexpr Property<DenormalsOptimization> denormals_optimization{"CPU_DENORMALS_OPTIMIZATION"};
4995

5096
/**
5197
* @brief This property defines threshold for sparse weights decompression feature activation

src/plugins/intel_cpu/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ if(WIN32)
160160
endif()
161161

162162
if(ENABLE_CPU_DEBUG_CAPS)
163+
add_definitions(-DENABLE_DEBUG_CAPS)
163164
add_definitions(-DCPU_DEBUG_CAPS)
164165
endif()
165166

src/plugins/intel_cpu/src/compiled_model.cpp

+32-104
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include "transformations/transformation_pipeline.h"
2828
#include "transformations/utils/utils.hpp"
2929
#include "utils/serialize.hpp"
30+
#include "utils/denormals.hpp"
3031

3132
#if defined(OV_CPU_WITH_ACL)
3233
# include "nodes/executors/acl/acl_ie_scheduler.hpp"
@@ -55,15 +56,17 @@ CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
5556
m_cfg{std::move(cfg)},
5657
m_name{model->get_name()},
5758
m_loaded_from_cache(loaded_from_cache),
58-
m_sub_memory_manager(std::move(sub_memory_manager)) {
59+
m_sub_memory_manager(std::move(sub_memory_manager)),
60+
m_model_name(model->get_friendly_name()) {
5961
m_mutex = std::make_shared<std::mutex>();
6062
const auto& core = m_plugin->get_core();
6163
if (!core) {
6264
OPENVINO_THROW("Unable to get API version. Core is unavailable");
6365
}
6466

67+
6568
IStreamsExecutor::Config executor_config;
66-
if (m_cfg.exclusiveAsyncRequests) {
69+
if (m_cfg.get_exclusive_async_requests()) {
6770
// special case when all InferRequests are muxed into a single queue
6871
m_task_executor = m_plugin->get_executor_manager()->get_executor("CPU");
6972
} else {
@@ -146,6 +149,19 @@ CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
146149
}
147150
}
148151

152+
static bool set_denormals_optimization(const ov::intel_cpu::DenormalsOptimization& value){
153+
if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::sse41)) {
154+
if (value.m_mode == DenormalsOptimization::Mode::ON) {
155+
flush_to_zero(true);
156+
return denormals_as_zero(true);
157+
} else if (value.m_mode == DenormalsOptimization::Mode::OFF) {
158+
flush_to_zero(false);
159+
denormals_as_zero(false);
160+
}
161+
}
162+
return false;
163+
}
164+
149165
CompiledModel::GraphGuard::Lock CompiledModel::get_graph() const {
150166
int streamId = 0;
151167
int socketId = 0;
@@ -162,11 +178,15 @@ CompiledModel::GraphGuard::Lock CompiledModel::get_graph() const {
162178
GraphContext::Ptr ctx;
163179
{
164180
std::lock_guard<std::mutex> lock{*m_mutex.get()};
165-
auto isQuantizedFlag = (m_cfg.lpTransformsMode == Config::On) &&
181+
auto isQuantizedFlag = (m_cfg.get_enable_lp_transformations()) &&
166182
ov::pass::low_precision::LowPrecision::isFunctionQuantized(m_model);
183+
// SSE runtime check is needed for some ATOM machine, which is x86-64 but w/o SSE
184+
185+
bool denormalsAsZero = set_denormals_optimization(m_cfg.get_denormals_optimization());
167186
ctx = std::make_shared<GraphContext>(m_cfg,
168187
m_socketWeights[socketId],
169188
isQuantizedFlag,
189+
denormalsAsZero,
170190
streamsExecutor,
171191
m_sub_memory_manager);
172192
}
@@ -221,25 +241,6 @@ std::shared_ptr<const ov::Model> CompiledModel::get_runtime_model() const {
221241
}
222242

223243
ov::Any CompiledModel::get_property(const std::string& name) const {
224-
if (m_graphs.empty()) {
225-
OPENVINO_THROW("No graph was found");
226-
}
227-
228-
if (name == ov::loaded_from_cache) {
229-
return m_loaded_from_cache;
230-
}
231-
232-
Config engConfig = get_graph()._graph.getConfig();
233-
auto option = engConfig._config.find(name);
234-
if (option != engConfig._config.end()) {
235-
return option->second;
236-
}
237-
238-
// @todo Can't we just use local copy (_cfg) instead?
239-
auto graphLock = get_graph();
240-
const auto& graph = graphLock._graph;
241-
const auto& config = graph.getConfig();
242-
243244
auto RO_property = [](const std::string& propertyName) {
244245
return ov::PropertyName(propertyName, ov::PropertyMutability::RO);
245246
};
@@ -277,98 +278,25 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
277278
}
278279

279280
if (name == ov::model_name) {
280-
// @todo Does not seem ok to 'dump()' the whole graph everytime in order to get a name
281-
const std::string modelName = graph.dump()->get_friendly_name();
282-
return decltype(ov::model_name)::value_type(modelName);
281+
return decltype(ov::model_name)::value_type {m_model_name};
282+
}
283+
if (name == ov::loaded_from_cache) {
284+
return decltype(ov::loaded_from_cache)::value_type {m_loaded_from_cache};
283285
}
284286
if (name == ov::optimal_number_of_infer_requests) {
285-
const auto streams = config.streamExecutorConfig.get_streams();
286-
return static_cast<decltype(ov::optimal_number_of_infer_requests)::value_type>(
287+
const auto streams = m_cfg.streamExecutorConfig.get_streams();
288+
return decltype(ov::optimal_number_of_infer_requests)::value_type(
287289
streams > 0 ? streams : 1); // ov::optimal_number_of_infer_requests has no negative values
288290
}
289-
if (name == ov::num_streams) {
290-
const auto streams = config.streamExecutorConfig.get_streams();
291-
return decltype(ov::num_streams)::value_type(
292-
streams); // ov::num_streams has special negative values (AUTO = -1, NUMA = -2)
293-
}
294-
if (name == ov::inference_num_threads) {
295-
const auto num_threads = config.streamExecutorConfig.get_threads();
296-
return static_cast<decltype(ov::inference_num_threads)::value_type>(num_threads);
297-
}
298-
if (name == ov::enable_profiling.name()) {
299-
const bool perfCount = config.collectPerfCounters;
300-
return static_cast<decltype(ov::enable_profiling)::value_type>(perfCount);
301-
}
302-
if (name == ov::hint::inference_precision) {
303-
return decltype(ov::hint::inference_precision)::value_type(config.inferencePrecision);
304-
}
305-
if (name == ov::hint::performance_mode) {
306-
return static_cast<decltype(ov::hint::performance_mode)::value_type>(config.hintPerfMode);
307-
}
308-
if (name == ov::log::level) {
309-
return static_cast<decltype(ov::log::level)::value_type>(config.logLevel);
310-
}
311-
if (name == ov::hint::enable_cpu_pinning.name()) {
312-
const bool use_pin = config.enableCpuPinning;
313-
return static_cast<decltype(ov::hint::enable_cpu_pinning)::value_type>(use_pin);
314-
}
315-
if (name == ov::hint::enable_cpu_reservation.name()) {
316-
const bool use_reserve = config.enableCpuReservation;
317-
return static_cast<decltype(ov::hint::enable_cpu_reservation)::value_type>(use_reserve);
318-
}
319-
if (name == ov::hint::scheduling_core_type) {
320-
const auto stream_mode = config.schedulingCoreType;
321-
return stream_mode;
322-
}
323-
if (name == ov::hint::model_distribution_policy) {
324-
const auto& distribution_policy = config.modelDistributionPolicy;
325-
return distribution_policy;
326-
}
327-
if (name == ov::hint::enable_hyper_threading.name()) {
328-
const bool use_ht = config.enableHyperThreading;
329-
return static_cast<decltype(ov::hint::enable_hyper_threading)::value_type>(use_ht);
330-
}
331-
if (name == ov::hint::execution_mode) {
332-
return config.executionMode;
333-
}
334-
if (name == ov::hint::num_requests) {
335-
return static_cast<decltype(ov::hint::num_requests)::value_type>(config.hintNumRequests);
336-
}
337291
if (name == ov::execution_devices) {
338292
return decltype(ov::execution_devices)::value_type{m_plugin->get_device_name()};
339293
}
340-
if (name == ov::intel_cpu::denormals_optimization) {
341-
return static_cast<decltype(ov::intel_cpu::denormals_optimization)::value_type>(
342-
config.denormalsOptMode == Config::DenormalsOptMode::DO_On);
343-
}
344-
if (name == ov::intel_cpu::sparse_weights_decompression_rate) {
345-
return static_cast<decltype(ov::intel_cpu::sparse_weights_decompression_rate)::value_type>(
346-
config.fcSparseWeiDecompressionRate);
347-
}
348-
if (name == ov::hint::dynamic_quantization_group_size) {
349-
return static_cast<decltype(ov::hint::dynamic_quantization_group_size)::value_type>(
350-
config.fcDynamicQuantizationGroupSize);
351-
}
352-
if (name == ov::hint::kv_cache_precision) {
353-
return decltype(ov::hint::kv_cache_precision)::value_type(config.kvCachePrecision);
354-
}
355-
if (name == ov::key_cache_precision) {
356-
return decltype(ov::key_cache_precision)::value_type(config.keyCachePrecision);
357-
}
358-
if (name == ov::value_cache_precision) {
359-
return decltype(ov::value_cache_precision)::value_type(config.valueCachePrecision);
360-
}
361-
if (name == ov::key_cache_group_size) {
362-
return static_cast<decltype(ov::key_cache_group_size)::value_type>(config.keyCacheGroupSize);
363-
}
364-
if (name == ov::value_cache_group_size) {
365-
return static_cast<decltype(ov::value_cache_group_size)::value_type>(config.valueCacheGroupSize);
366-
}
367-
OPENVINO_THROW("Unsupported property: ", name);
294+
295+
return m_cfg.get_property(name, OptionVisibility::RELEASE);
368296
}
369297

370298
void CompiledModel::export_model(std::ostream& modelStream) const {
371-
ModelSerializer serializer(modelStream, m_cfg.cacheEncrypt);
299+
ModelSerializer serializer(modelStream, m_cfg.get_cache_encryption_callbacks().encrypt);
372300
serializer << m_model;
373301
}
374302

src/plugins/intel_cpu/src/compiled_model.h

+2
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ class CompiledModel : public ov::ICompiledModel {
100100
std::vector<std::shared_ptr<CompiledModel>> m_sub_compiled_models;
101101
std::shared_ptr<SubMemoryManager> m_sub_memory_manager = nullptr;
102102
bool m_has_sub_compiled_models = false;
103+
104+
std::string m_model_name;
103105
};
104106

105107
// This class provides safe access to the internal CompiledModel structures and helps to decouple SyncInferRequest and

0 commit comments

Comments
 (0)