Skip to content

Commit ba4c284

Browse files
wip
1 parent b1257b6 commit ba4c284

File tree

13 files changed

+202
-69
lines changed

13 files changed

+202
-69
lines changed

src/bindings/python/src/pyopenvino/core/properties/properties.cpp

+31
Original file line numberDiff line numberDiff line change
@@ -338,4 +338,35 @@ void regmodule_properties(py::module m) {
338338
wrap_property_RW(m_intel_npu, ov::intel_npu::defer_weights_load, "defer_weights_load");
339339
wrap_property_RW(m_intel_npu, ov::intel_npu::compiler_dynamic_quantization, "compiler_dynamic_quantization");
340340
wrap_property_RW(m_intel_npu, ov::intel_npu::qdq_optimization, "qdq_optimization");
341+
342+
// Submodule streams
343+
py::module m_denormals_optimization_ =
344+
m_properties.def_submodule("denormals_optimization", "openvino.properties.denormals_optimization submodule that simulates ov::intel_cpu::DenormalsOptimization");
345+
346+
py::class_<ov::intel_cpu::DenormalsOptimization, std::shared_ptr<ov::intel_cpu::DenormalsOptimization>> cls_do(m_denormals_optimization_, "DenormalsOptimization");
347+
348+
// DenormalsOptimization() {};
349+
// DenormalsOptimization(Mode mode) : m_mode(mode) {};
350+
// DenormalsOptimization(bool mode) { m_mode = mode ? Mode::ON : Mode::OFF; }
351+
// operator bool() const { return m_mode == Mode::ON; }
352+
353+
354+
cls_do.def(py::init<>());
355+
cls_do.def(py::init<const bool>());
356+
357+
// // Covers static constexpr Num AUTO{-1};
358+
// cls_num.attr("AUTO") = ov::streams::AUTO;
359+
// // Covers static constexpr Num NUMA{-2};
360+
// cls_num.attr("NUMA") = ov::streams::NUMA;
361+
362+
cls_do.def("to_bool", [](ov::intel_cpu::DenormalsOptimization& self) {
363+
return self.m_mode == ov::intel_cpu::DenormalsOptimization::Mode::ON;
364+
});
365+
366+
// Submodule streams - properties RW
367+
wrap_property_RW(m_denormals_optimization_, ov::intel_cpu::denormals_optimization, "denormals_optimization");
368+
// Extra scenarios for ov::streams::num
369+
// m_streams.def("num", [](const int32_t value) {
370+
// return ov::streams::num(ov::streams::Num(value));
371+
// });
341372
}

src/inference/dev_api/openvino/runtime/threading/cpu_streams_executor.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ class OPENVINO_RUNTIME_API CPUStreamsExecutor : public IStreamsExecutor {
5151

5252
int get_streams_num();
5353

54+
int get_threads_per_stream();
55+
5456
int get_numa_node_id() override;
5557

5658
int get_socket_id() override;

src/inference/src/dev/threading/cpu_streams_executor.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,10 @@ int CPUStreamsExecutor::get_streams_num() {
474474
return _impl->_config.get_streams();
475475
}
476476

477+
int CPUStreamsExecutor::get_threads_per_stream() {
478+
return _impl->_config.get_threads_per_stream();
479+
}
480+
477481
int CPUStreamsExecutor::get_numa_node_id() {
478482
if (!_impl->_streams.find_thread_id()) {
479483
return 0;

src/plugins/intel_cpu/src/compiled_model.cpp

+22-4
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ CompiledModel::~CompiledModel() {
5656
CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
5757
const std::shared_ptr<const ov::IPlugin>& plugin,
5858
Config cfg,
59+
ov::threading::IStreamsExecutor::Config streamExecutorConfig,
5960
const bool loaded_from_cache,
6061
std::shared_ptr<SubMemoryManager> sub_memory_manager)
6162
: ov::ICompiledModel::ICompiledModel(model, plugin),
@@ -84,10 +85,10 @@ CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
8485
ov::hint::SchedulingCoreType::ANY_CORE,
8586
false,
8687
true}
87-
: m_cfg.get_stream_executor_config();
88+
: streamExecutorConfig;
8889
m_task_executor = m_plugin->get_executor_manager()->get_idle_cpu_streams_executor(executor_config);
8990
}
90-
if (0 != m_cfg.get_stream_executor_config().get_streams()) {
91+
if (0 != streamExecutorConfig.get_streams()) {
9192
m_callback_executor = m_plugin->get_executor_manager()->get_idle_cpu_streams_executor(
9293
IStreamsExecutor::Config{"CPUCallbackExecutor", 1, 0});
9394
} else {
@@ -137,8 +138,25 @@ CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
137138
message->set_num_sub_streams(m_cfg.get_num_sub_streams());
138139
for (int i = 0; i < m_cfg.get_num_sub_streams(); i++) {
139140
auto sub_cfg = m_cfg.clone(i, true);
141+
142+
auto streams_info_table = sub_cfg.get_stream_info_table();
143+
std::vector<std::vector<int>> sub_streams_table;
144+
sub_streams_table.push_back(streams_info_table[i + 1]);
145+
sub_streams_table[0][NUMBER_OF_STREAMS] = 1;
146+
auto subStreamExecutorConfig =
147+
ov::threading::IStreamsExecutor::Config{
148+
"CPUStreamsExecutor",
149+
1,
150+
1,
151+
ov::hint::SchedulingCoreType::ANY_CORE,
152+
false,
153+
true,
154+
true,
155+
std::move(sub_streams_table),
156+
sub_cfg.get_stream_rank_table()[i]};
157+
140158
m_sub_compiled_models.push_back(
141-
std::make_shared<CompiledModel>(model, plugin, sub_cfg, loaded_from_cache, m_sub_memory_manager));
159+
std::make_shared<CompiledModel>(model, plugin, sub_cfg, subStreamExecutorConfig, loaded_from_cache, m_sub_memory_manager));
142160
}
143161
}
144162
}
@@ -278,7 +296,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
278296
return decltype(ov::loaded_from_cache)::value_type {m_loaded_from_cache};
279297
}
280298
if (name == ov::optimal_number_of_infer_requests) {
281-
const auto streams = m_cfg.get_stream_executor_config().get_streams();
299+
const auto streams = m_cfg.get_num_streams().num;
282300
return decltype(ov::optimal_number_of_infer_requests)::value_type(
283301
streams > 0 ? streams : 1); // ov::optimal_number_of_infer_requests has no negative values
284302
}

src/plugins/intel_cpu/src/compiled_model.h

+1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class CompiledModel : public ov::ICompiledModel {
3535
CompiledModel(const std::shared_ptr<ov::Model>& model,
3636
const std::shared_ptr<const ov::IPlugin>& plugin,
3737
Config cfg,
38+
ov::threading::IStreamsExecutor::Config streamExecutorConfig,
3839
const bool loaded_from_cache,
3940
std::shared_ptr<SubMemoryManager> sub_memory_manager = nullptr);
4041

src/plugins/intel_cpu/src/config.cpp

+42-41
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,10 @@ Config::Config(const Config& other) : Config() {
6262
m_options_map.at(kv.first)->set_any(kv.second->get_any());
6363
}
6464

65-
m_stream_executor_config = other.m_stream_executor_config;
65+
// m_stream_executor_config = other.m_stream_executor_config;
6666
m_model_prefer_threads = other.m_model_prefer_threads;
67-
m_streams_rank_table = other.m_streams_rank_table;
67+
m_stream_rank_table = other.m_stream_rank_table;
68+
m_stream_info_table = other.m_stream_info_table;
6869
m_num_sub_streams = other.m_num_sub_streams;
6970
m_proc_type_table = other.m_proc_type_table;
7071
m_numa_node_id = other.m_numa_node_id;
@@ -77,9 +78,10 @@ Config& Config::operator=(const Config& other) {
7778
m_options_map.at(kv.first)->set_any(kv.second->get_any());
7879
}
7980

80-
m_stream_executor_config = other.m_stream_executor_config;
81+
// m_stream_executor_config = other.m_stream_executor_config;
8182
m_model_prefer_threads = other.m_model_prefer_threads;
82-
m_streams_rank_table = other.m_streams_rank_table;
83+
m_stream_rank_table = other.m_stream_rank_table;
84+
m_stream_info_table = other.m_stream_info_table;
8385
m_num_sub_streams = other.m_num_sub_streams;
8486
m_proc_type_table = other.m_proc_type_table;
8587
m_numa_node_id = other.m_numa_node_id;
@@ -98,21 +100,21 @@ Config Config::clone(int sub_stream_idx, bool enable_node_split) const {
98100
Config new_config = *this;
99101

100102
new_config.m_num_sub_streams = 1;
101-
auto streams_info_table = new_config.m_stream_executor_config.get_streams_info_table();
102-
std::vector<std::vector<int>> sub_streams_table;
103-
sub_streams_table.push_back(streams_info_table[sub_stream_idx + 1]);
104-
sub_streams_table[0][NUMBER_OF_STREAMS] = 1;
105-
new_config.m_stream_executor_config =
106-
ov::threading::IStreamsExecutor::Config{
107-
"CPUStreamsExecutor",
108-
1,
109-
1,
110-
ov::hint::SchedulingCoreType::ANY_CORE,
111-
false,
112-
true,
113-
true,
114-
std::move(sub_streams_table),
115-
new_config.m_streams_rank_table[sub_stream_idx]};
103+
// auto streams_info_table = new_config.m_stream_executor_config.get_streams_info_table();
104+
// std::vector<std::vector<int>> sub_streams_table;
105+
// sub_streams_table.push_back(streams_info_table[sub_stream_idx + 1]);
106+
// sub_streams_table[0][NUMBER_OF_STREAMS] = 1;
107+
// new_config.m_stream_executor_config =
108+
// ov::threading::IStreamsExecutor::Config{
109+
// "CPUStreamsExecutor",
110+
// 1,
111+
// 1,
112+
// ov::hint::SchedulingCoreType::ANY_CORE,
113+
// false,
114+
// true,
115+
// true,
116+
// std::move(sub_streams_table),
117+
// new_config.m_streams_rank_table[sub_stream_idx]};
116118

117119
return new_config;
118120
}
@@ -236,12 +238,10 @@ void Config::apply_execution_hints() {
236238
m_value_cache_precision = m_kv_cache_precision;
237239
}
238240

239-
if (!hasHardwareSupport(m_inference_precision)) {
241+
if (!hasHardwareSupport(m_inference_precision) && m_inference_precision != ov::element::dynamic) {
240242
m_inference_precision = ov::element::f32;
241243
}
242244

243-
244-
245245
#if defined(__APPLE__)
246246
m_enable_cpu_reservation = false;
247247
#endif
@@ -313,29 +313,30 @@ void Config::apply_threading_properties(const ov::Model* model) {
313313
// streams = streams_set == 1 ? 0 : streams_set;
314314
// }
315315

316-
if (!(0 == streams && is_set_by_user(ov::num_streams))) {
316+
if (0 != streams || !is_set_by_user(ov::num_streams)) {
317317
std::lock_guard<std::mutex> lock{ov::threading::_streams_executor_mutex};
318318
m_proc_type_table = get_proc_type_table();
319-
auto stream_info_table = generate_stream_info(streams, model);
320-
321-
// ???
322-
auto threadsPerStream = m_stream_executor_config.get_threads_per_stream();
323-
324-
m_stream_executor_config = ov::threading::IStreamsExecutor::Config{"CPUStreamsExecutor",
325-
streams,
326-
threadsPerStream,
327-
ov::hint::SchedulingCoreType::ANY_CORE,
328-
get_enable_cpu_reservation(),
329-
get_enable_cpu_pinning(),
330-
true,
331-
std::move(stream_info_table),
332-
{},
333-
false};
334-
} else {
335-
m_stream_executor_config = ov::threading::IStreamsExecutor::Config{"CPUStreamsExecutor", streams};
319+
m_stream_info_table = generate_stream_info(streams, model);
336320
}
337321

338322
m_num_streams = ov::streams::Num(streams);
323+
324+
// ???
325+
// auto threadsPerStream = m_stream_executor_config.get_threads_per_stream();
326+
327+
// m_stream_executor_config = ov::threading::IStreamsExecutor::Config{"CPUStreamsExecutor",
328+
// streams,
329+
// threadsPerStream,
330+
// ov::hint::SchedulingCoreType::ANY_CORE,
331+
// get_enable_cpu_reservation(),
332+
// get_enable_cpu_pinning(),
333+
// true,
334+
// std::move(stream_info_table),
335+
// {},
336+
// false};
337+
// } else {
338+
// // m_stream_executor_config = ov::threading::IStreamsExecutor::Config{"CPUStreamsExecutor", streams};
339+
// }
339340
}
340341

341342
std::vector<std::vector<int>> Config::generate_stream_info(int streams, const ov::Model* model) {
@@ -379,7 +380,7 @@ std::vector<std::vector<int>> Config::generate_stream_info(int streams, const ov
379380

380381
auto modelDistributionPolicy = get_model_distribution_policy();
381382
if (modelDistributionPolicy.find(ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL) != modelDistributionPolicy.end()) {
382-
m_streams_rank_table = get_streams_rank_table(streams_info_table, 1, m_num_sub_streams);
383+
m_stream_rank_table = get_streams_rank_table(streams_info_table, 1, m_num_sub_streams);
383384
}
384385

385386
m_enable_cpu_pinning = check_cpu_pinning(get_enable_cpu_pinning(),

src/plugins/intel_cpu/src/config.h

+14-5
Original file line numberDiff line numberDiff line change
@@ -49,18 +49,27 @@ struct Config : public ov::PluginConfig {
4949
return m_model_prefer_threads;
5050
}
5151

52-
const ov::threading::IStreamsExecutor::Config& get_stream_executor_config() const {
53-
return m_stream_executor_config;
54-
}
52+
// const ov::threading::IStreamsExecutor::Config& get_stream_executor_config() const {
53+
// return m_stream_executor_config;
54+
// }
5555

5656
int get_num_sub_streams() const {
5757
return m_num_sub_streams;
5858
}
5959

60+
const std::vector<std::vector<int>>& get_stream_rank_table() const {
61+
return m_stream_rank_table;
62+
}
63+
64+
const std::vector<std::vector<int>>& get_stream_info_table() const {
65+
return m_stream_info_table;
66+
}
67+
6068
private:
61-
ov::threading::IStreamsExecutor::Config m_stream_executor_config;
69+
// ov::threading::IStreamsExecutor::Config m_stream_executor_config;
6270
int m_model_prefer_threads = -1;
63-
std::vector<std::vector<int>> m_streams_rank_table = {};
71+
std::vector<std::vector<int>> m_stream_rank_table = {};
72+
std::vector<std::vector<int>> m_stream_info_table = {};
6473
int m_num_sub_streams = 0;
6574
std::vector<std::vector<int>> m_proc_type_table = {};
6675
int m_numa_node_id = -1;

src/plugins/intel_cpu/src/nodes/qkv_proj.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,7 @@ QKVProjection::QKVProjection(const std::shared_ptr<ov::Node>& op, const GraphCon
336336
std::string errorMessage;
337337

338338
const auto& config = context->getConfig();
339-
size_t concurrency = config.get_stream_executor_config().get_threads_per_stream();
339+
size_t concurrency = context->getCPUStreamExecutor()->get_threads_per_stream();
340340
if (concurrency == 0) {
341341
concurrency = parallel_get_max_threads();
342342
}

0 commit comments

Comments
 (0)