Skip to content

Commit e21e176

Browse files
wip
1 parent 119ef51 commit e21e176

File tree

19 files changed

+236
-154
lines changed

19 files changed

+236
-154
lines changed

src/bindings/python/src/pyopenvino/core/properties/properties.cpp

+31
Original file line numberDiff line numberDiff line change
@@ -338,4 +338,35 @@ void regmodule_properties(py::module m) {
338338
wrap_property_RW(m_intel_npu, ov::intel_npu::defer_weights_load, "defer_weights_load");
339339
wrap_property_RW(m_intel_npu, ov::intel_npu::compiler_dynamic_quantization, "compiler_dynamic_quantization");
340340
wrap_property_RW(m_intel_npu, ov::intel_npu::qdq_optimization, "qdq_optimization");
341+
342+
// Submodule streams
343+
py::module m_denormals_optimization_ =
344+
m_properties.def_submodule("denormals_optimization", "openvino.properties.denormals_optimization submodule that simulates ov::intel_cpu::DenormalsOptimization");
345+
346+
py::class_<ov::intel_cpu::DenormalsOptimization, std::shared_ptr<ov::intel_cpu::DenormalsOptimization>> cls_do(m_denormals_optimization_, "DenormalsOptimization");
347+
348+
// DenormalsOptimization() {};
349+
// DenormalsOptimization(Mode mode) : m_mode(mode) {};
350+
// DenormalsOptimization(bool mode) { m_mode = mode ? Mode::ON : Mode::OFF; }
351+
// operator bool() const { return m_mode == Mode::ON; }
352+
353+
354+
cls_do.def(py::init<>());
355+
cls_do.def(py::init<const bool>());
356+
357+
// // Covers static constexpr Num AUTO{-1};
358+
// cls_num.attr("AUTO") = ov::streams::AUTO;
359+
// // Covers static constexpr Num NUMA{-2};
360+
// cls_num.attr("NUMA") = ov::streams::NUMA;
361+
362+
cls_do.def("to_bool", [](ov::intel_cpu::DenormalsOptimization& self) {
363+
return self.m_mode == ov::intel_cpu::DenormalsOptimization::Mode::ON;
364+
});
365+
366+
// Submodule streams - properties RW
367+
wrap_property_RW(m_denormals_optimization_, ov::intel_cpu::denormals_optimization, "denormals_optimization");
368+
// Extra scenarios for ov::streams::num
369+
// m_streams.def("num", [](const int32_t value) {
370+
// return ov::streams::num(ov::streams::Num(value));
371+
// });
341372
}

src/inference/dev_api/openvino/runtime/plugin_config.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ class OPENVINO_RUNTIME_API PluginConfig {
201201
virtual void apply_model_specific_options(const IRemoteContext* context, const ov::Model& model) {}
202202
void apply_env_options();
203203
void apply_config_options(std::string_view device_name, std::filesystem::path config_path = "");
204-
virtual void finalize_impl(const IRemoteContext* context, const ov::Model* model) {}
204+
virtual void finalize_impl(const IRemoteContext* context) {}
205205

206206
template <typename T, PropertyMutability mutability>
207207
bool is_set_by_user(const ov::Property<T, mutability>& property) const {

src/inference/dev_api/openvino/runtime/threading/cpu_streams_executor.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ class OPENVINO_RUNTIME_API CPUStreamsExecutor : public IStreamsExecutor {
5151

5252
int get_streams_num();
5353

54+
int get_threads_per_stream();
55+
5456
int get_numa_node_id() override;
5557

5658
int get_socket_id() override;

src/inference/src/dev/plugin_config.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ void PluginConfig::finalize(const IRemoteContext* context, const ov::Model* mode
116116
option->set_any(value);
117117
}
118118

119-
finalize_impl(context, model);
119+
finalize_impl(context);
120120

121121
#ifdef ENABLE_DEBUG_CAPS
122122
apply_env_options();

src/inference/src/dev/threading/cpu_streams_executor.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,10 @@ int CPUStreamsExecutor::get_streams_num() {
474474
return _impl->_config.get_streams();
475475
}
476476

477+
int CPUStreamsExecutor::get_threads_per_stream() {
478+
return _impl->_config.get_threads_per_stream();
479+
}
480+
477481
int CPUStreamsExecutor::get_numa_node_id() {
478482
if (!_impl->_streams.find_thread_id()) {
479483
return 0;

src/inference/tests/unit/config_test.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ struct NotEmptyTestConfig : public ov::PluginConfig {
141141
return supported_properties;
142142
}
143143

144-
void finalize_impl(const IRemoteContext* context, const ov::Model* model) override {
144+
void finalize_impl(const IRemoteContext* context) override {
145145
if (!is_set_by_user(low_level_property)) {
146146
m_low_level_property.value = m_high_level_property.value;
147147
}

src/plugins/intel_cpu/src/compiled_model.cpp

+23-5
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ CompiledModel::~CompiledModel() {
5656
CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
5757
const std::shared_ptr<const ov::IPlugin>& plugin,
5858
Config cfg,
59+
ov::threading::IStreamsExecutor::Config streamExecutorConfig,
5960
const bool loaded_from_cache,
6061
std::shared_ptr<SubMemoryManager> sub_memory_manager)
6162
: ov::ICompiledModel::ICompiledModel(model, plugin),
@@ -84,10 +85,10 @@ CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
8485
ov::hint::SchedulingCoreType::ANY_CORE,
8586
false,
8687
true}
87-
: m_cfg.get_stream_executor_config();
88+
: streamExecutorConfig;
8889
m_task_executor = m_plugin->get_executor_manager()->get_idle_cpu_streams_executor(executor_config);
8990
}
90-
if (0 != m_cfg.get_stream_executor_config().get_streams()) {
91+
if (0 != streamExecutorConfig.get_streams()) {
9192
m_callback_executor = m_plugin->get_executor_manager()->get_idle_cpu_streams_executor(
9293
IStreamsExecutor::Config{"CPUCallbackExecutor", 1, 0});
9394
} else {
@@ -136,9 +137,26 @@ CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
136137
m_sub_memory_manager = std::make_shared<SubMemoryManager>(m_cfg.get_num_sub_streams());
137138
message->set_num_sub_streams(m_cfg.get_num_sub_streams());
138139
for (int i = 0; i < m_cfg.get_num_sub_streams(); i++) {
139-
auto sub_cfg = m_cfg.clone(i, true);
140+
auto sub_cfg = m_cfg.clone(1);
141+
142+
auto streams_info_table = sub_cfg.get_stream_info_table();
143+
std::vector<std::vector<int>> sub_streams_table;
144+
sub_streams_table.push_back(streams_info_table[i + 1]);
145+
sub_streams_table[0][NUMBER_OF_STREAMS] = 1;
146+
auto subStreamExecutorConfig =
147+
ov::threading::IStreamsExecutor::Config{
148+
"CPUStreamsExecutor",
149+
1,
150+
1,
151+
ov::hint::SchedulingCoreType::ANY_CORE,
152+
false,
153+
true,
154+
true,
155+
std::move(sub_streams_table),
156+
sub_cfg.get_stream_rank_table()[i]};
157+
140158
m_sub_compiled_models.push_back(
141-
std::make_shared<CompiledModel>(model, plugin, sub_cfg, loaded_from_cache, m_sub_memory_manager));
159+
std::make_shared<CompiledModel>(model, plugin, sub_cfg, subStreamExecutorConfig, loaded_from_cache, m_sub_memory_manager));
142160
}
143161
}
144162
}
@@ -278,7 +296,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
278296
return decltype(ov::loaded_from_cache)::value_type {m_loaded_from_cache};
279297
}
280298
if (name == ov::optimal_number_of_infer_requests) {
281-
const auto streams = m_cfg.get_stream_executor_config().get_streams();
299+
const auto streams = m_cfg.get_num_streams().num;
282300
return decltype(ov::optimal_number_of_infer_requests)::value_type(
283301
streams > 0 ? streams : 1); // ov::optimal_number_of_infer_requests has no negative values
284302
}

src/plugins/intel_cpu/src/compiled_model.h

+1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class CompiledModel : public ov::ICompiledModel {
3535
CompiledModel(const std::shared_ptr<ov::Model>& model,
3636
const std::shared_ptr<const ov::IPlugin>& plugin,
3737
Config cfg,
38+
ov::threading::IStreamsExecutor::Config streamExecutorConfig,
3839
const bool loaded_from_cache,
3940
std::shared_ptr<SubMemoryManager> sub_memory_manager = nullptr);
4041

src/plugins/intel_cpu/src/config.cpp

+57-108
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,10 @@ Config::Config(const Config& other) : Config() {
6262
m_options_map.at(kv.first)->set_any(kv.second->get_any());
6363
}
6464

65-
m_stream_executor_config = other.m_stream_executor_config;
65+
// m_stream_executor_config = other.m_stream_executor_config;
6666
m_model_prefer_threads = other.m_model_prefer_threads;
67-
m_streams_rank_table = other.m_streams_rank_table;
67+
m_stream_rank_table = other.m_stream_rank_table;
68+
m_stream_info_table = other.m_stream_info_table;
6869
m_num_sub_streams = other.m_num_sub_streams;
6970
m_proc_type_table = other.m_proc_type_table;
7071
m_numa_node_id = other.m_numa_node_id;
@@ -77,9 +78,10 @@ Config& Config::operator=(const Config& other) {
7778
m_options_map.at(kv.first)->set_any(kv.second->get_any());
7879
}
7980

80-
m_stream_executor_config = other.m_stream_executor_config;
81+
// m_stream_executor_config = other.m_stream_executor_config;
8182
m_model_prefer_threads = other.m_model_prefer_threads;
82-
m_streams_rank_table = other.m_streams_rank_table;
83+
m_stream_rank_table = other.m_stream_rank_table;
84+
m_stream_info_table = other.m_stream_info_table;
8385
m_num_sub_streams = other.m_num_sub_streams;
8486
m_proc_type_table = other.m_proc_type_table;
8587
m_numa_node_id = other.m_numa_node_id;
@@ -94,26 +96,9 @@ Config Config::clone() const {
9496
}
9597

9698

97-
Config Config::clone(int sub_stream_idx, bool enable_node_split) const {
99+
Config Config::clone(int num_sub_streamst) const {
98100
Config new_config = *this;
99-
100-
new_config.m_num_sub_streams = 1;
101-
auto streams_info_table = new_config.m_stream_executor_config.get_streams_info_table();
102-
std::vector<std::vector<int>> sub_streams_table;
103-
sub_streams_table.push_back(streams_info_table[sub_stream_idx + 1]);
104-
sub_streams_table[0][NUMBER_OF_STREAMS] = 1;
105-
new_config.m_stream_executor_config =
106-
ov::threading::IStreamsExecutor::Config{
107-
"CPUStreamsExecutor",
108-
1,
109-
1,
110-
ov::hint::SchedulingCoreType::ANY_CORE,
111-
false,
112-
true,
113-
true,
114-
std::move(sub_streams_table),
115-
new_config.m_streams_rank_table[sub_stream_idx]};
116-
101+
new_config.m_num_sub_streams = num_sub_streamst;
117102
return new_config;
118103
}
119104

@@ -152,9 +137,9 @@ void Config::apply_cpu_rt_info(const ov::RTMap& rt_info) {
152137
}
153138
}
154139

155-
void Config::finalize_impl(const IRemoteContext* context, const ov::Model* model) {
140+
void Config::finalize_impl(const IRemoteContext* context) {
156141
apply_hints();
157-
apply_threading_properties(model);
142+
apply_threading_properties();
158143

159144
if (!m_cache_encryption_callbacks.value.encrypt || !m_cache_encryption_callbacks.value.decrypt) {
160145
m_cache_encryption_callbacks.value.encrypt = codec_xor_str;
@@ -236,12 +221,10 @@ void Config::apply_execution_hints() {
236221
m_value_cache_precision = m_kv_cache_precision;
237222
}
238223

239-
if (!hasHardwareSupport(m_inference_precision)) {
224+
if (!hasHardwareSupport(m_inference_precision) && m_inference_precision != ov::element::dynamic) {
240225
m_inference_precision = ov::element::f32;
241226
}
242227

243-
244-
245228
#if defined(__APPLE__)
246229
m_enable_cpu_reservation = false;
247230
#endif
@@ -254,91 +237,26 @@ void Config::apply_model_specific_options(const IRemoteContext* context, const o
254237
if (!is_set_by_user(ov::intel_cpu::model_type)) {
255238
m_model_type = getModelType(model.shared_from_this());
256239
}
240+
241+
if (-1 == m_model_prefer_threads) {
242+
m_model_prefer_threads = calc_model_prefer_threads(get_default_num_streams(), get_default_proc_type_table(), model.shared_from_this());
243+
}
257244
}
258245

259246
void Config::apply_performance_hints() {
260-
// if (is_set_by_user(ov::hint::performance_mode)) {
261-
// const auto mode = get_property(ov::hint::performance_mode);
262-
// if (!is_set_by_user(ov::num_streams)) {
263-
// if (mode == ov::hint::PerformanceMode::LATENCY) {
264-
// set_property(ov::num_streams(1));
265-
// } else if (mode == ov::hint::PerformanceMode::THROUGHPUT) {
266-
// set_property(ov::num_streams(ov::streams::AUTO));
267-
// }
268-
// }
269-
// }
270-
271-
// if (get_property(ov::num_streams) == ov::streams::AUTO) {
272-
// int32_t n_streams = std::max<int32_t>(info.num_ccs, 2);
273-
// set_property(ov::num_streams(n_streams));
274-
// }
275-
276-
// if (get_property(ov::internal::exclusive_async_requests)) {
277-
// set_property(ov::num_streams(1));
278-
// }
279-
280-
// // Allow kernels reuse only for single-stream scenarios
281-
// if (get_property(ov::intel_gpu::hint::enable_kernels_reuse)) {
282-
// if (get_property(ov::num_streams) != 1) {
283-
// set_property(ov::intel_gpu::hint::enable_kernels_reuse(false));
284-
// }
285-
// }
286247
}
287248

288-
void Config::apply_threading_properties(const ov::Model* model) {
289-
#if defined(OV_CPU_WITH_SHL)
290-
// TODO: multi-stream execution is unsafe when SHL is used:
291-
// The library uses global static variables as flags and counters.
292-
streams = 1;
293-
#else
294-
// int streams_set
295-
int streams = get_num_streams();
296-
if (get_exclusive_async_requests()) {
297-
streams = 1;
298-
} else if (streams == ov::streams::NUMA) {
299-
streams = ov::get_num_numa_nodes();
300-
} else if (streams == ov::streams::AUTO) {
301-
// bare minimum of streams (that evenly divides available number of cores)
302-
streams = ov::threading::IStreamsExecutor::Config::get_default_num_streams();
303-
}
304-
#endif
305-
306-
// if (is_set_by_user(ov::num_streams) && streams_set > 0) {
307-
// streams = streams_set;
308-
// } else if (get_performance_mode() == ov::hint::PerformanceMode::LATENCY) {
309-
// streams = 1;
310-
// } else if (get_performance_mode() == ov::hint::PerformanceMode::THROUGHPUT) {
311-
// streams = 0;
312-
// } else {
313-
// streams = streams_set == 1 ? 0 : streams_set;
314-
// }
315-
316-
if (!(0 == streams && is_set_by_user(ov::num_streams))) {
317-
std::lock_guard<std::mutex> lock{ov::threading::_streams_executor_mutex};
318-
m_proc_type_table = get_proc_type_table();
319-
auto stream_info_table = generate_stream_info(streams, model);
320-
321-
// ???
322-
auto threadsPerStream = m_stream_executor_config.get_threads_per_stream();
323-
324-
m_stream_executor_config = ov::threading::IStreamsExecutor::Config{"CPUStreamsExecutor",
325-
streams,
326-
threadsPerStream,
327-
ov::hint::SchedulingCoreType::ANY_CORE,
328-
get_enable_cpu_reservation(),
329-
get_enable_cpu_pinning(),
330-
true,
331-
std::move(stream_info_table),
332-
{},
333-
false};
334-
} else {
335-
m_stream_executor_config = ov::threading::IStreamsExecutor::Config{"CPUStreamsExecutor", streams};
249+
void Config::apply_threading_properties() {
250+
auto streams = get_default_num_streams();
251+
if (0 != streams || !is_set_by_user(ov::num_streams)) {
252+
m_proc_type_table = get_default_proc_type_table();
253+
m_stream_info_table = generate_stream_info(streams);
336254
}
337255

338256
m_num_streams = ov::streams::Num(streams);
339257
}
340258

341-
std::vector<std::vector<int>> Config::generate_stream_info(int streams, const ov::Model* model) {
259+
std::vector<std::vector<int>> Config::generate_stream_info(int streams) {
342260
#if defined(__APPLE__)
343261
// CPUStreamExecutor doesn't support CPU reservation on Mac
344262
config.set_user_property(ov::hint::enable_cpu_reservation(false));
@@ -354,10 +272,6 @@ std::vector<std::vector<int>> Config::generate_stream_info(int streams, const ov
354272
ov::util::to_string(get_performance_mode()),
355273
m_proc_type_table);
356274

357-
if (-1 == m_model_prefer_threads && model) {
358-
m_model_prefer_threads = calc_model_prefer_threads(streams, m_proc_type_table, model->shared_from_this());
359-
}
360-
361275
if (m_proc_type_table.size() > 1) {
362276
const auto cur_numa_node_id = m_numa_node_id < 0 ? get_current_numa_node_id() : m_numa_node_id;
363277
sort_table_by_numa_node_id(cur_numa_node_id, m_proc_type_table);
@@ -379,7 +293,7 @@ std::vector<std::vector<int>> Config::generate_stream_info(int streams, const ov
379293

380294
auto modelDistributionPolicy = get_model_distribution_policy();
381295
if (modelDistributionPolicy.find(ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL) != modelDistributionPolicy.end()) {
382-
m_streams_rank_table = get_streams_rank_table(streams_info_table, 1, m_num_sub_streams);
296+
m_stream_rank_table = get_streams_rank_table(streams_info_table, 1, m_num_sub_streams);
383297
}
384298

385299
m_enable_cpu_pinning = check_cpu_pinning(get_enable_cpu_pinning(),
@@ -390,4 +304,39 @@ std::vector<std::vector<int>> Config::generate_stream_info(int streams, const ov
390304
return streams_info_table;
391305
}
392306

307+
int Config::get_default_num_streams() {
308+
#if defined(OV_CPU_WITH_SHL)
309+
// TODO: multi-stream execution is unsafe when SHL is used:
310+
// The library uses global static variables as flags and counters.
311+
streams = 1;
312+
#else
313+
// int streams_set
314+
auto streams = get_property(ov::num_streams.name()).as<int>();
315+
if (get_exclusive_async_requests()) {
316+
streams = 1;
317+
} else if (streams == ov::streams::NUMA) {
318+
streams = ov::get_num_numa_nodes();
319+
} else if (streams == ov::streams::AUTO) {
320+
// bare minimum of streams (that evenly divides available number of cores)
321+
streams = ov::threading::IStreamsExecutor::Config::get_default_num_streams();
322+
}
323+
#endif
324+
// if (is_set_by_user(ov::num_streams) && streams_set > 0) {
325+
// streams = streams_set;
326+
// } else if (get_performance_mode() == ov::hint::PerformanceMode::LATENCY) {
327+
// streams = 1;
328+
// } else if (get_performance_mode() == ov::hint::PerformanceMode::THROUGHPUT) {
329+
// streams = 0;
330+
// } else {
331+
// streams = streams_set == 1 ? 0 : streams_set;
332+
// }
333+
334+
return streams;
335+
}
336+
337+
std::vector<std::vector<int>> Config::get_default_proc_type_table() {
338+
std::lock_guard<std::mutex> lock{ov::threading::_streams_executor_mutex};
339+
return get_proc_type_table();
340+
}
341+
393342
} // namespace ov::intel_cpu

0 commit comments

Comments
 (0)