dmitry-gorokhov
diff --git a/‎src/bindings/python/src/pyopenvino/core/properties/properties.cpp
+31 b/‎src/bindings/python/src/pyopenvino/core/properties/properties.cpp
+31
diff --git a/‎src/inference/dev_api/openvino/runtime/threading/cpu_streams_executor.hpp
+2 b/‎src/inference/dev_api/openvino/runtime/threading/cpu_streams_executor.hpp
+2
diff --git a/‎src/inference/src/dev/threading/cpu_streams_executor.cpp
+4 b/‎src/inference/src/dev/threading/cpu_streams_executor.cpp
+4
diff --git a/‎src/plugins/intel_cpu/src/compiled_model.cpp
+22-4 b/‎src/plugins/intel_cpu/src/compiled_model.cpp
+22-4
diff --git a/‎src/plugins/intel_cpu/src/compiled_model.h
+1 b/‎src/plugins/intel_cpu/src/compiled_model.h
+1
diff --git a/‎src/plugins/intel_cpu/src/config.cpp
+42-41 b/‎src/plugins/intel_cpu/src/config.cpp
+42-41
diff --git a/‎src/plugins/intel_cpu/src/config.h
+14-5 b/‎src/plugins/intel_cpu/src/config.h
+14-5
diff --git a/‎src/plugins/intel_cpu/src/nodes/qkv_proj.cpp
+1-1 b/‎src/plugins/intel_cpu/src/nodes/qkv_proj.cpp
+1-1
@@ -338,4 +338,35 @@ void regmodule_properties(py::module m) {
     wrap_property_RW(m_intel_npu, ov::intel_npu::defer_weights_load, "defer_weights_load");
     wrap_property_RW(m_intel_npu, ov::intel_npu::compiler_dynamic_quantization, "compiler_dynamic_quantization");
     wrap_property_RW(m_intel_npu, ov::intel_npu::qdq_optimization, "qdq_optimization");
+
+    // Submodule streams
+    py::module m_denormals_optimization_ =
+        m_properties.def_submodule("denormals_optimization", "openvino.properties.denormals_optimization submodule that simulates ov::intel_cpu::DenormalsOptimization");
+
+    py::class_<ov::intel_cpu::DenormalsOptimization, std::shared_ptr<ov::intel_cpu::DenormalsOptimization>> cls_do(m_denormals_optimization_, "DenormalsOptimization");
+
+    // DenormalsOptimization() {};
+    // DenormalsOptimization(Mode mode) : m_mode(mode) {};
+    // DenormalsOptimization(bool mode) { m_mode = mode ? Mode::ON : Mode::OFF; }
+    // operator bool() const { return m_mode == Mode::ON; }
+
+
+    cls_do.def(py::init<>());
+    cls_do.def(py::init<const bool>());
+
+    // // Covers static constexpr Num AUTO{-1};
+    // cls_num.attr("AUTO") = ov::streams::AUTO;
+    // // Covers static constexpr Num NUMA{-2};
+    // cls_num.attr("NUMA") = ov::streams::NUMA;
+
+    cls_do.def("to_bool", [](ov::intel_cpu::DenormalsOptimization& self) {
+        return self.m_mode == ov::intel_cpu::DenormalsOptimization::Mode::ON;
+    });
+
+    // Submodule streams - properties RW
+    wrap_property_RW(m_denormals_optimization_, ov::intel_cpu::denormals_optimization, "denormals_optimization");
+    // Extra scenarios for ov::streams::num
+    // m_streams.def("num", [](const int32_t value) {
+    //     return ov::streams::num(ov::streams::Num(value));
+    // });
 }
@@ -51,6 +51,8 @@ class OPENVINO_RUNTIME_API CPUStreamsExecutor : public IStreamsExecutor {
 
     int get_streams_num();
 
+    int get_threads_per_stream();
+
     int get_numa_node_id() override;
 
     int get_socket_id() override;
 
@@ -474,6 +474,10 @@ int CPUStreamsExecutor::get_streams_num() {
     return _impl->_config.get_streams();
 }
 
+int CPUStreamsExecutor::get_threads_per_stream() {
+    return _impl->_config.get_threads_per_stream();
+}
+
 int CPUStreamsExecutor::get_numa_node_id() {
     if (!_impl->_streams.find_thread_id()) {
         return 0;
 
@@ -56,6 +56,7 @@ CompiledModel::~CompiledModel() {
 CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
                              const std::shared_ptr<const ov::IPlugin>& plugin,
                              Config cfg,
+                             ov::threading::IStreamsExecutor::Config streamExecutorConfig,
                              const bool loaded_from_cache,
                              std::shared_ptr<SubMemoryManager> sub_memory_manager)
     : ov::ICompiledModel::ICompiledModel(model, plugin),
@@ -84,10 +85,10 @@ CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
                                                                              ov::hint::SchedulingCoreType::ANY_CORE,
                                                                              false,
                                                                              true}
-                                                          : m_cfg.get_stream_executor_config();
+                                                          : streamExecutorConfig;
         m_task_executor = m_plugin->get_executor_manager()->get_idle_cpu_streams_executor(executor_config);
     }
-    if (0 != m_cfg.get_stream_executor_config().get_streams()) {
+    if (0 != streamExecutorConfig.get_streams()) {
         m_callback_executor = m_plugin->get_executor_manager()->get_idle_cpu_streams_executor(
             IStreamsExecutor::Config{"CPUCallbackExecutor", 1, 0});
     } else {
@@ -137,8 +138,25 @@ CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
         message->set_num_sub_streams(m_cfg.get_num_sub_streams());
         for (int i = 0; i < m_cfg.get_num_sub_streams(); i++) {
             auto sub_cfg = m_cfg.clone(i, true);
+
+            auto streams_info_table = sub_cfg.get_stream_info_table();
+            std::vector<std::vector<int>> sub_streams_table;
+            sub_streams_table.push_back(streams_info_table[i + 1]);
+            sub_streams_table[0][NUMBER_OF_STREAMS] = 1;
+            auto subStreamExecutorConfig =
+                ov::threading::IStreamsExecutor::Config{
+                    "CPUStreamsExecutor",
+                    1,
+                    1,
+                    ov::hint::SchedulingCoreType::ANY_CORE,
+                    false,
+                    true,
+                    true,
+                    std::move(sub_streams_table),
+                    sub_cfg.get_stream_rank_table()[i]};
+
             m_sub_compiled_models.push_back(
-                std::make_shared<CompiledModel>(model, plugin, sub_cfg, loaded_from_cache, m_sub_memory_manager));
+                std::make_shared<CompiledModel>(model, plugin, sub_cfg, subStreamExecutorConfig, loaded_from_cache, m_sub_memory_manager));
         }
     }
 }
@@ -278,7 +296,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
         return decltype(ov::loaded_from_cache)::value_type {m_loaded_from_cache};
     }
     if (name == ov::optimal_number_of_infer_requests) {
-        const auto streams = m_cfg.get_stream_executor_config().get_streams();
+        const auto streams = m_cfg.get_num_streams().num;
         return decltype(ov::optimal_number_of_infer_requests)::value_type(
             streams > 0 ? streams : 1);  // ov::optimal_number_of_infer_requests has no negative values
     }
 
@@ -35,6 +35,7 @@ class CompiledModel : public ov::ICompiledModel {
     CompiledModel(const std::shared_ptr<ov::Model>& model,
                   const std::shared_ptr<const ov::IPlugin>& plugin,
                   Config cfg,
+                  ov::threading::IStreamsExecutor::Config streamExecutorConfig,
                   const bool loaded_from_cache,
                   std::shared_ptr<SubMemoryManager> sub_memory_manager = nullptr);
 
 
@@ -62,9 +62,10 @@ Config::Config(const Config& other) : Config() {
         m_options_map.at(kv.first)->set_any(kv.second->get_any());
     }
 
-    m_stream_executor_config = other.m_stream_executor_config;
+    // m_stream_executor_config = other.m_stream_executor_config;
     m_model_prefer_threads = other.m_model_prefer_threads;
-    m_streams_rank_table = other.m_streams_rank_table;
+    m_stream_rank_table = other.m_stream_rank_table;
+    m_stream_info_table = other.m_stream_info_table;
     m_num_sub_streams = other.m_num_sub_streams;
     m_proc_type_table = other.m_proc_type_table;
     m_numa_node_id = other.m_numa_node_id;
@@ -77,9 +78,10 @@ Config& Config::operator=(const Config& other) {
         m_options_map.at(kv.first)->set_any(kv.second->get_any());
     }
 
-    m_stream_executor_config = other.m_stream_executor_config;
+    // m_stream_executor_config = other.m_stream_executor_config;
     m_model_prefer_threads = other.m_model_prefer_threads;
-    m_streams_rank_table = other.m_streams_rank_table;
+    m_stream_rank_table = other.m_stream_rank_table;
+    m_stream_info_table = other.m_stream_info_table;
     m_num_sub_streams = other.m_num_sub_streams;
     m_proc_type_table = other.m_proc_type_table;
     m_numa_node_id = other.m_numa_node_id;
@@ -98,21 +100,21 @@ Config Config::clone(int sub_stream_idx, bool enable_node_split) const {
     Config new_config = *this;
 
     new_config.m_num_sub_streams = 1;
-    auto streams_info_table = new_config.m_stream_executor_config.get_streams_info_table();
-    std::vector<std::vector<int>> sub_streams_table;
-    sub_streams_table.push_back(streams_info_table[sub_stream_idx + 1]);
-    sub_streams_table[0][NUMBER_OF_STREAMS] = 1;
-    new_config.m_stream_executor_config =
-        ov::threading::IStreamsExecutor::Config{
-            "CPUStreamsExecutor",
-            1,
-            1,
-            ov::hint::SchedulingCoreType::ANY_CORE,
-            false,
-            true,
-            true,
-            std::move(sub_streams_table),
-            new_config.m_streams_rank_table[sub_stream_idx]};
+    // auto streams_info_table = new_config.m_stream_executor_config.get_streams_info_table();
+    // std::vector<std::vector<int>> sub_streams_table;
+    // sub_streams_table.push_back(streams_info_table[sub_stream_idx + 1]);
+    // sub_streams_table[0][NUMBER_OF_STREAMS] = 1;
+    // new_config.m_stream_executor_config =
+    //     ov::threading::IStreamsExecutor::Config{
+    //         "CPUStreamsExecutor",
+    //         1,
+    //         1,
+    //         ov::hint::SchedulingCoreType::ANY_CORE,
+    //         false,
+    //         true,
+    //         true,
+    //         std::move(sub_streams_table),
+    //         new_config.m_streams_rank_table[sub_stream_idx]};
 
     return new_config;
 }
@@ -236,12 +238,10 @@ void Config::apply_execution_hints() {
         m_value_cache_precision = m_kv_cache_precision;
     }
 
-    if (!hasHardwareSupport(m_inference_precision)) {
+    if (!hasHardwareSupport(m_inference_precision) && m_inference_precision != ov::element::dynamic) {
         m_inference_precision = ov::element::f32;
     }
 
-
-
 #if defined(__APPLE__)
     m_enable_cpu_reservation = false;
 #endif
@@ -313,29 +313,30 @@ void Config::apply_threading_properties(const ov::Model* model) {
     //     streams = streams_set == 1 ? 0 : streams_set;
     // }
 
-    if (!(0 == streams && is_set_by_user(ov::num_streams))) {
+    if (0 != streams || !is_set_by_user(ov::num_streams)) {
         std::lock_guard<std::mutex> lock{ov::threading::_streams_executor_mutex};
         m_proc_type_table = get_proc_type_table();
-        auto stream_info_table = generate_stream_info(streams, model);
-
-        // ???
-        auto threadsPerStream = m_stream_executor_config.get_threads_per_stream();
-
-        m_stream_executor_config = ov::threading::IStreamsExecutor::Config{"CPUStreamsExecutor",
-                                                                       streams,
-                                                                       threadsPerStream,
-                                                                       ov::hint::SchedulingCoreType::ANY_CORE,
-                                                                       get_enable_cpu_reservation(),
-                                                                       get_enable_cpu_pinning(),
-                                                                       true,
-                                                                       std::move(stream_info_table),
-                                                                       {},
-                                                                       false};
-    } else {
-        m_stream_executor_config = ov::threading::IStreamsExecutor::Config{"CPUStreamsExecutor", streams};
+        m_stream_info_table = generate_stream_info(streams, model);
     }
 
     m_num_streams = ov::streams::Num(streams);
+
+        // ???
+        // auto threadsPerStream = m_stream_executor_config.get_threads_per_stream();
+
+        // m_stream_executor_config = ov::threading::IStreamsExecutor::Config{"CPUStreamsExecutor",
+        //                                                                streams,
+        //                                                                threadsPerStream,
+        //                                                                ov::hint::SchedulingCoreType::ANY_CORE,
+        //                                                                get_enable_cpu_reservation(),
+        //                                                                get_enable_cpu_pinning(),
+        //                                                                true,
+        //                                                                std::move(stream_info_table),
+        //                                                                {},
+        //                                                                false};
+    // } else {
+    //     // m_stream_executor_config = ov::threading::IStreamsExecutor::Config{"CPUStreamsExecutor", streams};
+    // }
 }
 
 std::vector<std::vector<int>> Config::generate_stream_info(int streams, const ov::Model* model) {
@@ -379,7 +380,7 @@ std::vector<std::vector<int>> Config::generate_stream_info(int streams, const ov
 
     auto modelDistributionPolicy = get_model_distribution_policy();
     if (modelDistributionPolicy.find(ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL) != modelDistributionPolicy.end()) {
-        m_streams_rank_table = get_streams_rank_table(streams_info_table, 1, m_num_sub_streams);
+        m_stream_rank_table = get_streams_rank_table(streams_info_table, 1, m_num_sub_streams);
     }
 
     m_enable_cpu_pinning = check_cpu_pinning(get_enable_cpu_pinning(),
 
@@ -49,18 +49,27 @@ struct Config : public ov::PluginConfig {
         return m_model_prefer_threads;
     }
 
-    const ov::threading::IStreamsExecutor::Config& get_stream_executor_config() const {
-        return m_stream_executor_config;
-    }
+    // const ov::threading::IStreamsExecutor::Config& get_stream_executor_config() const {
+    //     return m_stream_executor_config;
+    // }
 
     int get_num_sub_streams() const {
         return m_num_sub_streams;
     }
 
+    const std::vector<std::vector<int>>& get_stream_rank_table() const {
+        return m_stream_rank_table;
+    }
+
+    const std::vector<std::vector<int>>& get_stream_info_table() const {
+        return m_stream_info_table;
+    }
+
 private:
-    ov::threading::IStreamsExecutor::Config m_stream_executor_config;
+    // ov::threading::IStreamsExecutor::Config m_stream_executor_config;
     int m_model_prefer_threads = -1;
-    std::vector<std::vector<int>> m_streams_rank_table = {};
+    std::vector<std::vector<int>> m_stream_rank_table = {};
+    std::vector<std::vector<int>> m_stream_info_table = {};
     int m_num_sub_streams = 0;
     std::vector<std::vector<int>> m_proc_type_table = {};
     int m_numa_node_id = -1;
 
@@ -336,7 +336,7 @@ QKVProjection::QKVProjection(const std::shared_ptr<ov::Node>& op, const GraphCon
     std::string errorMessage;
 
     const auto& config = context->getConfig();
-    size_t concurrency = config.get_stream_executor_config().get_threads_per_stream();
+    size_t concurrency = context->getCPUStreamExecutor()->get_threads_per_stream();
     if (concurrency == 0) {
         concurrency = parallel_get_max_threads();
     }
Original file line number	Diff line number	Diff line change
`@@ -336,7 +336,7 @@ QKVProjection::QKVProjection(const std::shared_ptr<ov::Node>& op, const GraphCon`
`336`	`336`	`std::string errorMessage;`
`337`	`337`
`338`	`338`	`const auto& config = context->getConfig();`
`339`		`- size_t concurrency = config.get_stream_executor_config().get_threads_per_stream();`
	`339`	`+ size_t concurrency = context->getCPUStreamExecutor()->get_threads_per_stream();`
`340`	`340`	`if (concurrency == 0) {`
`341`	`341`	`concurrency = parallel_get_max_threads();`
`342`	`342`	`}`