dmitry-gorokhov
diff --git a/‎src/bindings/python/src/pyopenvino/core/properties/properties.cpp
+31 b/‎src/bindings/python/src/pyopenvino/core/properties/properties.cpp
+31
diff --git a/‎src/inference/dev_api/openvino/runtime/plugin_config.hpp
+1-1 b/‎src/inference/dev_api/openvino/runtime/plugin_config.hpp
+1-1
diff --git a/‎src/inference/dev_api/openvino/runtime/threading/cpu_streams_executor.hpp
+2 b/‎src/inference/dev_api/openvino/runtime/threading/cpu_streams_executor.hpp
+2
diff --git a/‎src/inference/src/dev/plugin_config.cpp
+1-1 b/‎src/inference/src/dev/plugin_config.cpp
+1-1
diff --git a/‎src/inference/src/dev/threading/cpu_streams_executor.cpp
+4 b/‎src/inference/src/dev/threading/cpu_streams_executor.cpp
+4
diff --git a/‎src/inference/tests/unit/config_test.cpp
+1-1 b/‎src/inference/tests/unit/config_test.cpp
+1-1
diff --git a/‎src/plugins/intel_cpu/src/compiled_model.cpp
+23-5 b/‎src/plugins/intel_cpu/src/compiled_model.cpp
+23-5
diff --git a/‎src/plugins/intel_cpu/src/compiled_model.h
+1 b/‎src/plugins/intel_cpu/src/compiled_model.h
+1
diff --git a/‎src/plugins/intel_cpu/src/config.cpp
+57-108 b/‎src/plugins/intel_cpu/src/config.cpp
+57-108
@@ -338,4 +338,35 @@ void regmodule_properties(py::module m) {
     wrap_property_RW(m_intel_npu, ov::intel_npu::defer_weights_load, "defer_weights_load");
     wrap_property_RW(m_intel_npu, ov::intel_npu::compiler_dynamic_quantization, "compiler_dynamic_quantization");
     wrap_property_RW(m_intel_npu, ov::intel_npu::qdq_optimization, "qdq_optimization");
+
+    // Submodule streams
+    py::module m_denormals_optimization_ =
+        m_properties.def_submodule("denormals_optimization", "openvino.properties.denormals_optimization submodule that simulates ov::intel_cpu::DenormalsOptimization");
+
+    py::class_<ov::intel_cpu::DenormalsOptimization, std::shared_ptr<ov::intel_cpu::DenormalsOptimization>> cls_do(m_denormals_optimization_, "DenormalsOptimization");
+
+    // DenormalsOptimization() {};
+    // DenormalsOptimization(Mode mode) : m_mode(mode) {};
+    // DenormalsOptimization(bool mode) { m_mode = mode ? Mode::ON : Mode::OFF; }
+    // operator bool() const { return m_mode == Mode::ON; }
+
+
+    cls_do.def(py::init<>());
+    cls_do.def(py::init<const bool>());
+
+    // // Covers static constexpr Num AUTO{-1};
+    // cls_num.attr("AUTO") = ov::streams::AUTO;
+    // // Covers static constexpr Num NUMA{-2};
+    // cls_num.attr("NUMA") = ov::streams::NUMA;
+
+    cls_do.def("to_bool", [](ov::intel_cpu::DenormalsOptimization& self) {
+        return self.m_mode == ov::intel_cpu::DenormalsOptimization::Mode::ON;
+    });
+
+    // Submodule streams - properties RW
+    wrap_property_RW(m_denormals_optimization_, ov::intel_cpu::denormals_optimization, "denormals_optimization");
+    // Extra scenarios for ov::streams::num
+    // m_streams.def("num", [](const int32_t value) {
+    //     return ov::streams::num(ov::streams::Num(value));
+    // });
 }
@@ -201,7 +201,7 @@ class OPENVINO_RUNTIME_API PluginConfig {
     virtual void apply_model_specific_options(const IRemoteContext* context, const ov::Model& model) {}
     void apply_env_options();
     void apply_config_options(std::string_view device_name, std::filesystem::path config_path = "");
-    virtual void finalize_impl(const IRemoteContext* context, const ov::Model* model) {}
+    virtual void finalize_impl(const IRemoteContext* context) {}
 
     template <typename T, PropertyMutability mutability>
     bool is_set_by_user(const ov::Property<T, mutability>& property) const {
 
@@ -51,6 +51,8 @@ class OPENVINO_RUNTIME_API CPUStreamsExecutor : public IStreamsExecutor {
 
     int get_streams_num();
 
+    int get_threads_per_stream();
+
     int get_numa_node_id() override;
 
     int get_socket_id() override;
 
@@ -116,7 +116,7 @@ void PluginConfig::finalize(const IRemoteContext* context, const ov::Model* mode
         option->set_any(value);
     }
 
-    finalize_impl(context, model);
+    finalize_impl(context);
 
 #ifdef ENABLE_DEBUG_CAPS
     apply_env_options();
 
@@ -474,6 +474,10 @@ int CPUStreamsExecutor::get_streams_num() {
     return _impl->_config.get_streams();
 }
 
+int CPUStreamsExecutor::get_threads_per_stream() {
+    return _impl->_config.get_threads_per_stream();
+}
+
 int CPUStreamsExecutor::get_numa_node_id() {
     if (!_impl->_streams.find_thread_id()) {
         return 0;
 
@@ -141,7 +141,7 @@ struct NotEmptyTestConfig : public ov::PluginConfig {
         return supported_properties;
     }
 
-    void finalize_impl(const IRemoteContext* context, const ov::Model* model) override {
+    void finalize_impl(const IRemoteContext* context) override {
         if (!is_set_by_user(low_level_property)) {
             m_low_level_property.value = m_high_level_property.value;
         }
 
@@ -56,6 +56,7 @@ CompiledModel::~CompiledModel() {
 CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
                              const std::shared_ptr<const ov::IPlugin>& plugin,
                              Config cfg,
+                             ov::threading::IStreamsExecutor::Config streamExecutorConfig,
                              const bool loaded_from_cache,
                              std::shared_ptr<SubMemoryManager> sub_memory_manager)
     : ov::ICompiledModel::ICompiledModel(model, plugin),
@@ -84,10 +85,10 @@ CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
                                                                              ov::hint::SchedulingCoreType::ANY_CORE,
                                                                              false,
                                                                              true}
-                                                          : m_cfg.get_stream_executor_config();
+                                                          : streamExecutorConfig;
         m_task_executor = m_plugin->get_executor_manager()->get_idle_cpu_streams_executor(executor_config);
     }
-    if (0 != m_cfg.get_stream_executor_config().get_streams()) {
+    if (0 != streamExecutorConfig.get_streams()) {
         m_callback_executor = m_plugin->get_executor_manager()->get_idle_cpu_streams_executor(
             IStreamsExecutor::Config{"CPUCallbackExecutor", 1, 0});
     } else {
@@ -136,9 +137,26 @@ CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
         m_sub_memory_manager = std::make_shared<SubMemoryManager>(m_cfg.get_num_sub_streams());
         message->set_num_sub_streams(m_cfg.get_num_sub_streams());
         for (int i = 0; i < m_cfg.get_num_sub_streams(); i++) {
-            auto sub_cfg = m_cfg.clone(i, true);
+            auto sub_cfg = m_cfg.clone(1);
+
+            auto streams_info_table = sub_cfg.get_stream_info_table();
+            std::vector<std::vector<int>> sub_streams_table;
+            sub_streams_table.push_back(streams_info_table[i + 1]);
+            sub_streams_table[0][NUMBER_OF_STREAMS] = 1;
+            auto subStreamExecutorConfig =
+                ov::threading::IStreamsExecutor::Config{
+                    "CPUStreamsExecutor",
+                    1,
+                    1,
+                    ov::hint::SchedulingCoreType::ANY_CORE,
+                    false,
+                    true,
+                    true,
+                    std::move(sub_streams_table),
+                    sub_cfg.get_stream_rank_table()[i]};
+
             m_sub_compiled_models.push_back(
-                std::make_shared<CompiledModel>(model, plugin, sub_cfg, loaded_from_cache, m_sub_memory_manager));
+                std::make_shared<CompiledModel>(model, plugin, sub_cfg, subStreamExecutorConfig, loaded_from_cache, m_sub_memory_manager));
         }
     }
 }
@@ -278,7 +296,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
         return decltype(ov::loaded_from_cache)::value_type {m_loaded_from_cache};
     }
     if (name == ov::optimal_number_of_infer_requests) {
-        const auto streams = m_cfg.get_stream_executor_config().get_streams();
+        const auto streams = m_cfg.get_num_streams().num;
         return decltype(ov::optimal_number_of_infer_requests)::value_type(
             streams > 0 ? streams : 1);  // ov::optimal_number_of_infer_requests has no negative values
     }
 
@@ -35,6 +35,7 @@ class CompiledModel : public ov::ICompiledModel {
     CompiledModel(const std::shared_ptr<ov::Model>& model,
                   const std::shared_ptr<const ov::IPlugin>& plugin,
                   Config cfg,
+                  ov::threading::IStreamsExecutor::Config streamExecutorConfig,
                   const bool loaded_from_cache,
                   std::shared_ptr<SubMemoryManager> sub_memory_manager = nullptr);
 
 
@@ -62,9 +62,10 @@ Config::Config(const Config& other) : Config() {
         m_options_map.at(kv.first)->set_any(kv.second->get_any());
     }
 
-    m_stream_executor_config = other.m_stream_executor_config;
+    // m_stream_executor_config = other.m_stream_executor_config;
     m_model_prefer_threads = other.m_model_prefer_threads;
-    m_streams_rank_table = other.m_streams_rank_table;
+    m_stream_rank_table = other.m_stream_rank_table;
+    m_stream_info_table = other.m_stream_info_table;
     m_num_sub_streams = other.m_num_sub_streams;
     m_proc_type_table = other.m_proc_type_table;
     m_numa_node_id = other.m_numa_node_id;
@@ -77,9 +78,10 @@ Config& Config::operator=(const Config& other) {
         m_options_map.at(kv.first)->set_any(kv.second->get_any());
     }
 
-    m_stream_executor_config = other.m_stream_executor_config;
+    // m_stream_executor_config = other.m_stream_executor_config;
     m_model_prefer_threads = other.m_model_prefer_threads;
-    m_streams_rank_table = other.m_streams_rank_table;
+    m_stream_rank_table = other.m_stream_rank_table;
+    m_stream_info_table = other.m_stream_info_table;
     m_num_sub_streams = other.m_num_sub_streams;
     m_proc_type_table = other.m_proc_type_table;
     m_numa_node_id = other.m_numa_node_id;
@@ -94,26 +96,9 @@ Config Config::clone() const {
 }
 
 
-Config Config::clone(int sub_stream_idx, bool enable_node_split) const {
+Config Config::clone(int num_sub_streamst) const {
     Config new_config = *this;
-
-    new_config.m_num_sub_streams = 1;
-    auto streams_info_table = new_config.m_stream_executor_config.get_streams_info_table();
-    std::vector<std::vector<int>> sub_streams_table;
-    sub_streams_table.push_back(streams_info_table[sub_stream_idx + 1]);
-    sub_streams_table[0][NUMBER_OF_STREAMS] = 1;
-    new_config.m_stream_executor_config =
-        ov::threading::IStreamsExecutor::Config{
-            "CPUStreamsExecutor",
-            1,
-            1,
-            ov::hint::SchedulingCoreType::ANY_CORE,
-            false,
-            true,
-            true,
-            std::move(sub_streams_table),
-            new_config.m_streams_rank_table[sub_stream_idx]};
-
+    new_config.m_num_sub_streams = num_sub_streamst;
     return new_config;
 }
 
@@ -152,9 +137,9 @@ void Config::apply_cpu_rt_info(const ov::RTMap& rt_info) {
     }
 }
 
-void Config::finalize_impl(const IRemoteContext* context, const ov::Model* model) {
+void Config::finalize_impl(const IRemoteContext* context) {
     apply_hints();
-    apply_threading_properties(model);
+    apply_threading_properties();
 
     if (!m_cache_encryption_callbacks.value.encrypt || !m_cache_encryption_callbacks.value.decrypt) {
         m_cache_encryption_callbacks.value.encrypt = codec_xor_str;
@@ -236,12 +221,10 @@ void Config::apply_execution_hints() {
         m_value_cache_precision = m_kv_cache_precision;
     }
 
-    if (!hasHardwareSupport(m_inference_precision)) {
+    if (!hasHardwareSupport(m_inference_precision) && m_inference_precision != ov::element::dynamic) {
         m_inference_precision = ov::element::f32;
     }
 
-
-
 #if defined(__APPLE__)
     m_enable_cpu_reservation = false;
 #endif
@@ -254,91 +237,26 @@ void Config::apply_model_specific_options(const IRemoteContext* context, const o
     if (!is_set_by_user(ov::intel_cpu::model_type)) {
         m_model_type = getModelType(model.shared_from_this());
     }
+
+    if (-1 == m_model_prefer_threads) {
+        m_model_prefer_threads = calc_model_prefer_threads(get_default_num_streams(), get_default_proc_type_table(), model.shared_from_this());
+    }
 }
 
 void Config::apply_performance_hints() {
-    // if (is_set_by_user(ov::hint::performance_mode)) {
-    //     const auto mode = get_property(ov::hint::performance_mode);
-    //     if (!is_set_by_user(ov::num_streams)) {
-    //         if (mode == ov::hint::PerformanceMode::LATENCY) {
-    //             set_property(ov::num_streams(1));
-    //         } else if (mode == ov::hint::PerformanceMode::THROUGHPUT) {
-    //             set_property(ov::num_streams(ov::streams::AUTO));
-    //         }
-    //     }
-    // }
-
-    // if (get_property(ov::num_streams) == ov::streams::AUTO) {
-    //     int32_t n_streams = std::max<int32_t>(info.num_ccs, 2);
-    //     set_property(ov::num_streams(n_streams));
-    // }
-
-    // if (get_property(ov::internal::exclusive_async_requests)) {
-    //     set_property(ov::num_streams(1));
-    // }
-
-    // // Allow kernels reuse only for single-stream scenarios
-    // if (get_property(ov::intel_gpu::hint::enable_kernels_reuse)) {
-    //     if (get_property(ov::num_streams) != 1) {
-    //         set_property(ov::intel_gpu::hint::enable_kernels_reuse(false));
-    //     }
-    // }
 }
 
-void Config::apply_threading_properties(const ov::Model* model) {
-#if defined(OV_CPU_WITH_SHL)
-    // TODO: multi-stream execution is unsafe when SHL is used:
-    //       The library uses global static variables as flags and counters.
-    streams = 1;
-#else
-    // int streams_set
-    int streams = get_num_streams();
-    if (get_exclusive_async_requests()) {
-        streams = 1;
-    } else if (streams == ov::streams::NUMA) {
-        streams = ov::get_num_numa_nodes();
-    } else if (streams == ov::streams::AUTO) {
-        // bare minimum of streams (that evenly divides available number of cores)
-        streams = ov::threading::IStreamsExecutor::Config::get_default_num_streams();
-    }
-#endif
-
-    // if (is_set_by_user(ov::num_streams) && streams_set > 0) {
-    //     streams = streams_set;
-    // } else if (get_performance_mode() == ov::hint::PerformanceMode::LATENCY) {
-    //     streams = 1;
-    // } else if (get_performance_mode() == ov::hint::PerformanceMode::THROUGHPUT) {
-    //     streams = 0;
-    // } else {
-    //     streams = streams_set == 1 ? 0 : streams_set;
-    // }
-
-    if (!(0 == streams && is_set_by_user(ov::num_streams))) {
-        std::lock_guard<std::mutex> lock{ov::threading::_streams_executor_mutex};
-        m_proc_type_table = get_proc_type_table();
-        auto stream_info_table = generate_stream_info(streams, model);
-
-        // ???
-        auto threadsPerStream = m_stream_executor_config.get_threads_per_stream();
-
-        m_stream_executor_config = ov::threading::IStreamsExecutor::Config{"CPUStreamsExecutor",
-                                                                       streams,
-                                                                       threadsPerStream,
-                                                                       ov::hint::SchedulingCoreType::ANY_CORE,
-                                                                       get_enable_cpu_reservation(),
-                                                                       get_enable_cpu_pinning(),
-                                                                       true,
-                                                                       std::move(stream_info_table),
-                                                                       {},
-                                                                       false};
-    } else {
-        m_stream_executor_config = ov::threading::IStreamsExecutor::Config{"CPUStreamsExecutor", streams};
+void Config::apply_threading_properties() {
+    auto streams = get_default_num_streams();
+    if (0 != streams || !is_set_by_user(ov::num_streams)) {
+        m_proc_type_table = get_default_proc_type_table();
+        m_stream_info_table = generate_stream_info(streams);
     }
 
     m_num_streams = ov::streams::Num(streams);
 }
 
-std::vector<std::vector<int>> Config::generate_stream_info(int streams, const ov::Model* model) {
+std::vector<std::vector<int>> Config::generate_stream_info(int streams) {
 #if defined(__APPLE__)
     // CPUStreamExecutor doesn't support CPU reservation on Mac
     config.set_user_property(ov::hint::enable_cpu_reservation(false));
@@ -354,10 +272,6 @@ std::vector<std::vector<int>> Config::generate_stream_info(int streams, const ov
                                             ov::util::to_string(get_performance_mode()),
                                             m_proc_type_table);
 
-    if (-1 == m_model_prefer_threads && model) {
-        m_model_prefer_threads = calc_model_prefer_threads(streams, m_proc_type_table, model->shared_from_this());
-    }
-
     if (m_proc_type_table.size() > 1) {
         const auto cur_numa_node_id = m_numa_node_id < 0 ? get_current_numa_node_id() : m_numa_node_id;
         sort_table_by_numa_node_id(cur_numa_node_id, m_proc_type_table);
@@ -379,7 +293,7 @@ std::vector<std::vector<int>> Config::generate_stream_info(int streams, const ov
 
     auto modelDistributionPolicy = get_model_distribution_policy();
     if (modelDistributionPolicy.find(ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL) != modelDistributionPolicy.end()) {
-        m_streams_rank_table = get_streams_rank_table(streams_info_table, 1, m_num_sub_streams);
+        m_stream_rank_table = get_streams_rank_table(streams_info_table, 1, m_num_sub_streams);
     }
 
     m_enable_cpu_pinning = check_cpu_pinning(get_enable_cpu_pinning(),
@@ -390,4 +304,39 @@ std::vector<std::vector<int>> Config::generate_stream_info(int streams, const ov
     return streams_info_table;
 }
 
+int Config::get_default_num_streams() {
+#if defined(OV_CPU_WITH_SHL)
+    // TODO: multi-stream execution is unsafe when SHL is used:
+    //       The library uses global static variables as flags and counters.
+    streams = 1;
+#else
+    // int streams_set
+    auto streams = get_property(ov::num_streams.name()).as<int>();
+    if (get_exclusive_async_requests()) {
+        streams = 1;
+    } else if (streams == ov::streams::NUMA) {
+        streams = ov::get_num_numa_nodes();
+    } else if (streams == ov::streams::AUTO) {
+        // bare minimum of streams (that evenly divides available number of cores)
+        streams = ov::threading::IStreamsExecutor::Config::get_default_num_streams();
+    }
+#endif
+    // if (is_set_by_user(ov::num_streams) && streams_set > 0) {
+    //     streams = streams_set;
+    // } else if (get_performance_mode() == ov::hint::PerformanceMode::LATENCY) {
+    //     streams = 1;
+    // } else if (get_performance_mode() == ov::hint::PerformanceMode::THROUGHPUT) {
+    //     streams = 0;
+    // } else {
+    //     streams = streams_set == 1 ? 0 : streams_set;
+    // }
+
+    return streams;
+}
+
+std::vector<std::vector<int>> Config::get_default_proc_type_table() {
+    std::lock_guard<std::mutex> lock{ov::threading::_streams_executor_mutex};
+    return get_proc_type_table();
+}
+
 }  // namespace ov::intel_cpu
Original file line number	Diff line number	Diff line change
`@@ -116,7 +116,7 @@ void PluginConfig::finalize(const IRemoteContext* context, const ov::Model* mode`
`116`	`116`	`option->set_any(value);`
`117`	`117`	`}`
`118`	`118`
`119`		`- finalize_impl(context, model);`
	`119`	`+ finalize_impl(context);`
`120`	`120`
`121`	`121`	`#ifdef ENABLE_DEBUG_CAPS`
`122`	`122`	`apply_env_options();`
Original file line number	Diff line number	Diff line change
`@@ -141,7 +141,7 @@ struct NotEmptyTestConfig : public ov::PluginConfig {`
`141`	`141`	`return supported_properties;`
`142`	`142`	`}`
`143`	`143`
`144`		`- void finalize_impl(const IRemoteContext* context, const ov::Model* model) override {`
	`144`	`+ void finalize_impl(const IRemoteContext* context) override {`
`145`	`145`	`if (!is_set_by_user(low_level_property)) {`
`146`	`146`	`m_low_level_property.value = m_high_level_property.value;`
`147`	`147`	`}`