27
27
#include " transformations/transformation_pipeline.h"
28
28
#include " transformations/utils/utils.hpp"
29
29
#include " utils/serialize.hpp"
30
+ #include " utils/denormals.hpp"
30
31
31
32
#if defined(OV_CPU_WITH_ACL)
32
33
# include " nodes/executors/acl/acl_ie_scheduler.hpp"
@@ -55,15 +56,17 @@ CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
55
56
m_cfg{std::move (cfg)},
56
57
m_name{model->get_name ()},
57
58
m_loaded_from_cache (loaded_from_cache),
58
- m_sub_memory_manager (std::move(sub_memory_manager)) {
59
+ m_sub_memory_manager (std::move(sub_memory_manager)),
60
+ m_model_name (model->get_friendly_name ()) {
59
61
m_mutex = std::make_shared<std::mutex>();
60
62
const auto & core = m_plugin->get_core ();
61
63
if (!core) {
62
64
OPENVINO_THROW (" Unable to get API version. Core is unavailable" );
63
65
}
64
66
67
+
65
68
IStreamsExecutor::Config executor_config;
66
- if (m_cfg.exclusiveAsyncRequests ) {
69
+ if (m_cfg.get_exclusive_async_requests () ) {
67
70
// special case when all InferRequests are muxed into a single queue
68
71
m_task_executor = m_plugin->get_executor_manager ()->get_executor (" CPU" );
69
72
} else {
@@ -146,6 +149,19 @@ CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
146
149
}
147
150
}
148
151
152
+ static bool set_denormals_optimization (const ov::intel_cpu::DenormalsOptimization& value){
153
+ if (dnnl::impl::cpu::x64::mayiuse (dnnl::impl::cpu::x64::sse41)) {
154
+ if (value.m_mode == DenormalsOptimization::Mode::ON) {
155
+ flush_to_zero (true );
156
+ return denormals_as_zero (true );
157
+ } else if (value.m_mode == DenormalsOptimization::Mode::OFF) {
158
+ flush_to_zero (false );
159
+ denormals_as_zero (false );
160
+ }
161
+ }
162
+ return false ;
163
+ }
164
+
149
165
CompiledModel::GraphGuard::Lock CompiledModel::get_graph () const {
150
166
int streamId = 0 ;
151
167
int socketId = 0 ;
@@ -162,11 +178,15 @@ CompiledModel::GraphGuard::Lock CompiledModel::get_graph() const {
162
178
GraphContext::Ptr ctx;
163
179
{
164
180
std::lock_guard<std::mutex> lock{*m_mutex.get ()};
165
- auto isQuantizedFlag = (m_cfg.lpTransformsMode == Config::On ) &&
181
+ auto isQuantizedFlag = (m_cfg.get_enable_lp_transformations () ) &&
166
182
ov::pass::low_precision::LowPrecision::isFunctionQuantized (m_model);
183
+ // SSE runtime check is needed for some ATOM machine, which is x86-64 but w/o SSE
184
+
185
+ bool denormalsAsZero = set_denormals_optimization (m_cfg.get_denormals_optimization ());
167
186
ctx = std::make_shared<GraphContext>(m_cfg,
168
187
m_socketWeights[socketId],
169
188
isQuantizedFlag,
189
+ denormalsAsZero,
170
190
streamsExecutor,
171
191
m_sub_memory_manager);
172
192
}
@@ -221,25 +241,6 @@ std::shared_ptr<const ov::Model> CompiledModel::get_runtime_model() const {
221
241
}
222
242
223
243
ov::Any CompiledModel::get_property (const std::string& name) const {
224
- if (m_graphs.empty ()) {
225
- OPENVINO_THROW (" No graph was found" );
226
- }
227
-
228
- if (name == ov::loaded_from_cache) {
229
- return m_loaded_from_cache;
230
- }
231
-
232
- Config engConfig = get_graph ()._graph .getConfig ();
233
- auto option = engConfig._config .find (name);
234
- if (option != engConfig._config .end ()) {
235
- return option->second ;
236
- }
237
-
238
- // @todo Can't we just use local copy (_cfg) instead?
239
- auto graphLock = get_graph ();
240
- const auto & graph = graphLock._graph ;
241
- const auto & config = graph.getConfig ();
242
-
243
244
auto RO_property = [](const std::string& propertyName) {
244
245
return ov::PropertyName (propertyName, ov::PropertyMutability::RO);
245
246
};
@@ -277,98 +278,25 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
277
278
}
278
279
279
280
if (name == ov::model_name) {
280
- // @todo Does not seem ok to 'dump()' the whole graph everytime in order to get a name
281
- const std::string modelName = graph.dump ()->get_friendly_name ();
282
- return decltype (ov::model_name)::value_type (modelName);
281
+ return decltype (ov::model_name)::value_type {m_model_name};
282
+ }
283
+ if (name == ov::loaded_from_cache) {
284
+ return decltype (ov::loaded_from_cache)::value_type {m_loaded_from_cache};
283
285
}
284
286
if (name == ov::optimal_number_of_infer_requests) {
285
- const auto streams = config .streamExecutorConfig .get_streams ();
286
- return static_cast < decltype (ov::optimal_number_of_infer_requests)::value_type> (
287
+ const auto streams = m_cfg .streamExecutorConfig .get_streams ();
288
+ return decltype (ov::optimal_number_of_infer_requests)::value_type (
287
289
streams > 0 ? streams : 1 ); // ov::optimal_number_of_infer_requests has no negative values
288
290
}
289
- if (name == ov::num_streams) {
290
- const auto streams = config.streamExecutorConfig .get_streams ();
291
- return decltype (ov::num_streams)::value_type (
292
- streams); // ov::num_streams has special negative values (AUTO = -1, NUMA = -2)
293
- }
294
- if (name == ov::inference_num_threads) {
295
- const auto num_threads = config.streamExecutorConfig .get_threads ();
296
- return static_cast <decltype (ov::inference_num_threads)::value_type>(num_threads);
297
- }
298
- if (name == ov::enable_profiling.name ()) {
299
- const bool perfCount = config.collectPerfCounters ;
300
- return static_cast <decltype (ov::enable_profiling)::value_type>(perfCount);
301
- }
302
- if (name == ov::hint::inference_precision) {
303
- return decltype (ov::hint::inference_precision)::value_type (config.inferencePrecision );
304
- }
305
- if (name == ov::hint::performance_mode) {
306
- return static_cast <decltype (ov::hint::performance_mode)::value_type>(config.hintPerfMode );
307
- }
308
- if (name == ov::log ::level) {
309
- return static_cast <decltype (ov::log ::level)::value_type>(config.logLevel );
310
- }
311
- if (name == ov::hint::enable_cpu_pinning.name ()) {
312
- const bool use_pin = config.enableCpuPinning ;
313
- return static_cast <decltype (ov::hint::enable_cpu_pinning)::value_type>(use_pin);
314
- }
315
- if (name == ov::hint::enable_cpu_reservation.name ()) {
316
- const bool use_reserve = config.enableCpuReservation ;
317
- return static_cast <decltype (ov::hint::enable_cpu_reservation)::value_type>(use_reserve);
318
- }
319
- if (name == ov::hint::scheduling_core_type) {
320
- const auto stream_mode = config.schedulingCoreType ;
321
- return stream_mode;
322
- }
323
- if (name == ov::hint::model_distribution_policy) {
324
- const auto & distribution_policy = config.modelDistributionPolicy ;
325
- return distribution_policy;
326
- }
327
- if (name == ov::hint::enable_hyper_threading.name ()) {
328
- const bool use_ht = config.enableHyperThreading ;
329
- return static_cast <decltype (ov::hint::enable_hyper_threading)::value_type>(use_ht);
330
- }
331
- if (name == ov::hint::execution_mode) {
332
- return config.executionMode ;
333
- }
334
- if (name == ov::hint::num_requests) {
335
- return static_cast <decltype (ov::hint::num_requests)::value_type>(config.hintNumRequests );
336
- }
337
291
if (name == ov::execution_devices) {
338
292
return decltype (ov::execution_devices)::value_type{m_plugin->get_device_name ()};
339
293
}
340
- if (name == ov::intel_cpu::denormals_optimization) {
341
- return static_cast <decltype (ov::intel_cpu::denormals_optimization)::value_type>(
342
- config.denormalsOptMode == Config::DenormalsOptMode::DO_On);
343
- }
344
- if (name == ov::intel_cpu::sparse_weights_decompression_rate) {
345
- return static_cast <decltype (ov::intel_cpu::sparse_weights_decompression_rate)::value_type>(
346
- config.fcSparseWeiDecompressionRate );
347
- }
348
- if (name == ov::hint::dynamic_quantization_group_size) {
349
- return static_cast <decltype (ov::hint::dynamic_quantization_group_size)::value_type>(
350
- config.fcDynamicQuantizationGroupSize );
351
- }
352
- if (name == ov::hint::kv_cache_precision) {
353
- return decltype (ov::hint::kv_cache_precision)::value_type (config.kvCachePrecision );
354
- }
355
- if (name == ov::key_cache_precision) {
356
- return decltype (ov::key_cache_precision)::value_type (config.keyCachePrecision );
357
- }
358
- if (name == ov::value_cache_precision) {
359
- return decltype (ov::value_cache_precision)::value_type (config.valueCachePrecision );
360
- }
361
- if (name == ov::key_cache_group_size) {
362
- return static_cast <decltype (ov::key_cache_group_size)::value_type>(config.keyCacheGroupSize );
363
- }
364
- if (name == ov::value_cache_group_size) {
365
- return static_cast <decltype (ov::value_cache_group_size)::value_type>(config.valueCacheGroupSize );
366
- }
367
- OPENVINO_THROW (" Unsupported property: " , name);
294
+
295
+ return m_cfg.get_property (name, OptionVisibility::RELEASE);
368
296
}
369
297
370
298
void CompiledModel::export_model (std::ostream& modelStream) const {
371
- ModelSerializer serializer (modelStream, m_cfg.cacheEncrypt );
299
+ ModelSerializer serializer (modelStream, m_cfg.get_cache_encryption_callbacks (). encrypt );
372
300
serializer << m_model;
373
301
}
374
302
0 commit comments