@@ -62,9 +62,10 @@ Config::Config(const Config& other) : Config() {
62
62
m_options_map.at (kv.first )->set_any (kv.second ->get_any ());
63
63
}
64
64
65
- m_stream_executor_config = other.m_stream_executor_config ;
65
+ // m_stream_executor_config = other.m_stream_executor_config;
66
66
m_model_prefer_threads = other.m_model_prefer_threads ;
67
- m_streams_rank_table = other.m_streams_rank_table ;
67
+ m_stream_rank_table = other.m_stream_rank_table ;
68
+ m_stream_info_table = other.m_stream_info_table ;
68
69
m_num_sub_streams = other.m_num_sub_streams ;
69
70
m_proc_type_table = other.m_proc_type_table ;
70
71
m_numa_node_id = other.m_numa_node_id ;
@@ -77,9 +78,10 @@ Config& Config::operator=(const Config& other) {
77
78
m_options_map.at (kv.first )->set_any (kv.second ->get_any ());
78
79
}
79
80
80
- m_stream_executor_config = other.m_stream_executor_config ;
81
+ // m_stream_executor_config = other.m_stream_executor_config;
81
82
m_model_prefer_threads = other.m_model_prefer_threads ;
82
- m_streams_rank_table = other.m_streams_rank_table ;
83
+ m_stream_rank_table = other.m_stream_rank_table ;
84
+ m_stream_info_table = other.m_stream_info_table ;
83
85
m_num_sub_streams = other.m_num_sub_streams ;
84
86
m_proc_type_table = other.m_proc_type_table ;
85
87
m_numa_node_id = other.m_numa_node_id ;
@@ -94,26 +96,9 @@ Config Config::clone() const {
94
96
}
95
97
96
98
97
- Config Config::clone (int sub_stream_idx, bool enable_node_split ) const {
99
+ Config Config::clone (int num_sub_streamst ) const {
98
100
Config new_config = *this ;
99
-
100
- new_config.m_num_sub_streams = 1 ;
101
- auto streams_info_table = new_config.m_stream_executor_config .get_streams_info_table ();
102
- std::vector<std::vector<int >> sub_streams_table;
103
- sub_streams_table.push_back (streams_info_table[sub_stream_idx + 1 ]);
104
- sub_streams_table[0 ][NUMBER_OF_STREAMS] = 1 ;
105
- new_config.m_stream_executor_config =
106
- ov::threading::IStreamsExecutor::Config{
107
- " CPUStreamsExecutor" ,
108
- 1 ,
109
- 1 ,
110
- ov::hint::SchedulingCoreType::ANY_CORE,
111
- false ,
112
- true ,
113
- true ,
114
- std::move (sub_streams_table),
115
- new_config.m_streams_rank_table [sub_stream_idx]};
116
-
101
+ new_config.m_num_sub_streams = num_sub_streamst;
117
102
return new_config;
118
103
}
119
104
@@ -152,9 +137,9 @@ void Config::apply_cpu_rt_info(const ov::RTMap& rt_info) {
152
137
}
153
138
}
154
139
155
- void Config::finalize_impl (const IRemoteContext* context, const ov::Model* model ) {
140
+ void Config::finalize_impl (const IRemoteContext* context) {
156
141
apply_hints ();
157
- apply_threading_properties (model );
142
+ apply_threading_properties ();
158
143
159
144
if (!m_cache_encryption_callbacks.value .encrypt || !m_cache_encryption_callbacks.value .decrypt ) {
160
145
m_cache_encryption_callbacks.value .encrypt = codec_xor_str;
@@ -236,12 +221,10 @@ void Config::apply_execution_hints() {
236
221
m_value_cache_precision = m_kv_cache_precision;
237
222
}
238
223
239
- if (!hasHardwareSupport (m_inference_precision)) {
224
+ if (!hasHardwareSupport (m_inference_precision) && m_inference_precision != ov::element::dynamic ) {
240
225
m_inference_precision = ov::element::f32;
241
226
}
242
227
243
-
244
-
245
228
#if defined(__APPLE__)
246
229
m_enable_cpu_reservation = false ;
247
230
#endif
@@ -254,91 +237,26 @@ void Config::apply_model_specific_options(const IRemoteContext* context, const o
254
237
if (!is_set_by_user (ov::intel_cpu::model_type)) {
255
238
m_model_type = getModelType (model.shared_from_this ());
256
239
}
240
+
241
+ if (-1 == m_model_prefer_threads) {
242
+ m_model_prefer_threads = calc_model_prefer_threads (get_default_num_streams (), get_default_proc_type_table (), model.shared_from_this ());
243
+ }
257
244
}
258
245
259
246
void Config::apply_performance_hints () {
260
- // if (is_set_by_user(ov::hint::performance_mode)) {
261
- // const auto mode = get_property(ov::hint::performance_mode);
262
- // if (!is_set_by_user(ov::num_streams)) {
263
- // if (mode == ov::hint::PerformanceMode::LATENCY) {
264
- // set_property(ov::num_streams(1));
265
- // } else if (mode == ov::hint::PerformanceMode::THROUGHPUT) {
266
- // set_property(ov::num_streams(ov::streams::AUTO));
267
- // }
268
- // }
269
- // }
270
-
271
- // if (get_property(ov::num_streams) == ov::streams::AUTO) {
272
- // int32_t n_streams = std::max<int32_t>(info.num_ccs, 2);
273
- // set_property(ov::num_streams(n_streams));
274
- // }
275
-
276
- // if (get_property(ov::internal::exclusive_async_requests)) {
277
- // set_property(ov::num_streams(1));
278
- // }
279
-
280
- // // Allow kernels reuse only for single-stream scenarios
281
- // if (get_property(ov::intel_gpu::hint::enable_kernels_reuse)) {
282
- // if (get_property(ov::num_streams) != 1) {
283
- // set_property(ov::intel_gpu::hint::enable_kernels_reuse(false));
284
- // }
285
- // }
286
247
}
287
248
288
- void Config::apply_threading_properties (const ov::Model* model) {
289
- #if defined(OV_CPU_WITH_SHL)
290
- // TODO: multi-stream execution is unsafe when SHL is used:
291
- // The library uses global static variables as flags and counters.
292
- streams = 1 ;
293
- #else
294
- // int streams_set
295
- int streams = get_num_streams ();
296
- if (get_exclusive_async_requests ()) {
297
- streams = 1 ;
298
- } else if (streams == ov::streams::NUMA) {
299
- streams = ov::get_num_numa_nodes ();
300
- } else if (streams == ov::streams::AUTO) {
301
- // bare minimum of streams (that evenly divides available number of cores)
302
- streams = ov::threading::IStreamsExecutor::Config::get_default_num_streams ();
303
- }
304
- #endif
305
-
306
- // if (is_set_by_user(ov::num_streams) && streams_set > 0) {
307
- // streams = streams_set;
308
- // } else if (get_performance_mode() == ov::hint::PerformanceMode::LATENCY) {
309
- // streams = 1;
310
- // } else if (get_performance_mode() == ov::hint::PerformanceMode::THROUGHPUT) {
311
- // streams = 0;
312
- // } else {
313
- // streams = streams_set == 1 ? 0 : streams_set;
314
- // }
315
-
316
- if (!(0 == streams && is_set_by_user (ov::num_streams))) {
317
- std::lock_guard<std::mutex> lock{ov::threading::_streams_executor_mutex};
318
- m_proc_type_table = get_proc_type_table ();
319
- auto stream_info_table = generate_stream_info (streams, model);
320
-
321
- // ???
322
- auto threadsPerStream = m_stream_executor_config.get_threads_per_stream ();
323
-
324
- m_stream_executor_config = ov::threading::IStreamsExecutor::Config{" CPUStreamsExecutor" ,
325
- streams,
326
- threadsPerStream,
327
- ov::hint::SchedulingCoreType::ANY_CORE,
328
- get_enable_cpu_reservation (),
329
- get_enable_cpu_pinning (),
330
- true ,
331
- std::move (stream_info_table),
332
- {},
333
- false };
334
- } else {
335
- m_stream_executor_config = ov::threading::IStreamsExecutor::Config{" CPUStreamsExecutor" , streams};
249
+ void Config::apply_threading_properties () {
250
+ auto streams = get_default_num_streams ();
251
+ if (0 != streams || !is_set_by_user (ov::num_streams)) {
252
+ m_proc_type_table = get_default_proc_type_table ();
253
+ m_stream_info_table = generate_stream_info (streams);
336
254
}
337
255
338
256
m_num_streams = ov::streams::Num (streams);
339
257
}
340
258
341
- std::vector<std::vector<int >> Config::generate_stream_info (int streams, const ov::Model* model ) {
259
+ std::vector<std::vector<int >> Config::generate_stream_info (int streams) {
342
260
#if defined(__APPLE__)
343
261
// CPUStreamExecutor doesn't support CPU reservation on Mac
344
262
config.set_user_property (ov::hint::enable_cpu_reservation (false ));
@@ -354,10 +272,6 @@ std::vector<std::vector<int>> Config::generate_stream_info(int streams, const ov
354
272
ov::util::to_string (get_performance_mode ()),
355
273
m_proc_type_table);
356
274
357
- if (-1 == m_model_prefer_threads && model) {
358
- m_model_prefer_threads = calc_model_prefer_threads (streams, m_proc_type_table, model->shared_from_this ());
359
- }
360
-
361
275
if (m_proc_type_table.size () > 1 ) {
362
276
const auto cur_numa_node_id = m_numa_node_id < 0 ? get_current_numa_node_id () : m_numa_node_id;
363
277
sort_table_by_numa_node_id (cur_numa_node_id, m_proc_type_table);
@@ -379,7 +293,7 @@ std::vector<std::vector<int>> Config::generate_stream_info(int streams, const ov
379
293
380
294
auto modelDistributionPolicy = get_model_distribution_policy ();
381
295
if (modelDistributionPolicy.find (ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL) != modelDistributionPolicy.end ()) {
382
- m_streams_rank_table = get_streams_rank_table (streams_info_table, 1 , m_num_sub_streams);
296
+ m_stream_rank_table = get_streams_rank_table (streams_info_table, 1 , m_num_sub_streams);
383
297
}
384
298
385
299
m_enable_cpu_pinning = check_cpu_pinning (get_enable_cpu_pinning (),
@@ -390,4 +304,39 @@ std::vector<std::vector<int>> Config::generate_stream_info(int streams, const ov
390
304
return streams_info_table;
391
305
}
392
306
307
+ int Config::get_default_num_streams () {
308
+ #if defined(OV_CPU_WITH_SHL)
309
+ // TODO: multi-stream execution is unsafe when SHL is used:
310
+ // The library uses global static variables as flags and counters.
311
+ streams = 1 ;
312
+ #else
313
+ // int streams_set
314
+ auto streams = get_property (ov::num_streams.name ()).as <int >();
315
+ if (get_exclusive_async_requests ()) {
316
+ streams = 1 ;
317
+ } else if (streams == ov::streams::NUMA) {
318
+ streams = ov::get_num_numa_nodes ();
319
+ } else if (streams == ov::streams::AUTO) {
320
+ // bare minimum of streams (that evenly divides available number of cores)
321
+ streams = ov::threading::IStreamsExecutor::Config::get_default_num_streams ();
322
+ }
323
+ #endif
324
+ // if (is_set_by_user(ov::num_streams) && streams_set > 0) {
325
+ // streams = streams_set;
326
+ // } else if (get_performance_mode() == ov::hint::PerformanceMode::LATENCY) {
327
+ // streams = 1;
328
+ // } else if (get_performance_mode() == ov::hint::PerformanceMode::THROUGHPUT) {
329
+ // streams = 0;
330
+ // } else {
331
+ // streams = streams_set == 1 ? 0 : streams_set;
332
+ // }
333
+
334
+ return streams;
335
+ }
336
+
337
+ std::vector<std::vector<int >> Config::get_default_proc_type_table () {
338
+ std::lock_guard<std::mutex> lock{ov::threading::_streams_executor_mutex};
339
+ return get_proc_type_table ();
340
+ }
341
+
393
342
} // namespace ov::intel_cpu
0 commit comments