@@ -62,9 +62,10 @@ Config::Config(const Config& other) : Config() {
62
62
m_options_map.at (kv.first )->set_any (kv.second ->get_any ());
63
63
}
64
64
65
- m_stream_executor_config = other.m_stream_executor_config ;
65
+ // m_stream_executor_config = other.m_stream_executor_config;
66
66
m_model_prefer_threads = other.m_model_prefer_threads ;
67
- m_streams_rank_table = other.m_streams_rank_table ;
67
+ m_stream_rank_table = other.m_stream_rank_table ;
68
+ m_stream_info_table = other.m_stream_info_table ;
68
69
m_num_sub_streams = other.m_num_sub_streams ;
69
70
m_proc_type_table = other.m_proc_type_table ;
70
71
m_numa_node_id = other.m_numa_node_id ;
@@ -77,9 +78,10 @@ Config& Config::operator=(const Config& other) {
77
78
m_options_map.at (kv.first )->set_any (kv.second ->get_any ());
78
79
}
79
80
80
- m_stream_executor_config = other.m_stream_executor_config ;
81
+ // m_stream_executor_config = other.m_stream_executor_config;
81
82
m_model_prefer_threads = other.m_model_prefer_threads ;
82
- m_streams_rank_table = other.m_streams_rank_table ;
83
+ m_stream_rank_table = other.m_stream_rank_table ;
84
+ m_stream_info_table = other.m_stream_info_table ;
83
85
m_num_sub_streams = other.m_num_sub_streams ;
84
86
m_proc_type_table = other.m_proc_type_table ;
85
87
m_numa_node_id = other.m_numa_node_id ;
@@ -98,21 +100,21 @@ Config Config::clone(int sub_stream_idx, bool enable_node_split) const {
98
100
Config new_config = *this ;
99
101
100
102
new_config.m_num_sub_streams = 1 ;
101
- auto streams_info_table = new_config.m_stream_executor_config .get_streams_info_table ();
102
- std::vector<std::vector<int >> sub_streams_table;
103
- sub_streams_table.push_back (streams_info_table[sub_stream_idx + 1 ]);
104
- sub_streams_table[0 ][NUMBER_OF_STREAMS] = 1 ;
105
- new_config.m_stream_executor_config =
106
- ov::threading::IStreamsExecutor::Config{
107
- " CPUStreamsExecutor" ,
108
- 1 ,
109
- 1 ,
110
- ov::hint::SchedulingCoreType::ANY_CORE,
111
- false ,
112
- true ,
113
- true ,
114
- std::move (sub_streams_table),
115
- new_config.m_streams_rank_table [sub_stream_idx]};
103
+ // auto streams_info_table = new_config.m_stream_executor_config.get_streams_info_table();
104
+ // std::vector<std::vector<int>> sub_streams_table;
105
+ // sub_streams_table.push_back(streams_info_table[sub_stream_idx + 1]);
106
+ // sub_streams_table[0][NUMBER_OF_STREAMS] = 1;
107
+ // new_config.m_stream_executor_config =
108
+ // ov::threading::IStreamsExecutor::Config{
109
+ // "CPUStreamsExecutor",
110
+ // 1,
111
+ // 1,
112
+ // ov::hint::SchedulingCoreType::ANY_CORE,
113
+ // false,
114
+ // true,
115
+ // true,
116
+ // std::move(sub_streams_table),
117
+ // new_config.m_streams_rank_table[sub_stream_idx]};
116
118
117
119
return new_config;
118
120
}
@@ -236,12 +238,10 @@ void Config::apply_execution_hints() {
236
238
m_value_cache_precision = m_kv_cache_precision;
237
239
}
238
240
239
- if (!hasHardwareSupport (m_inference_precision)) {
241
+ if (!hasHardwareSupport (m_inference_precision) && m_inference_precision != ov::element::dynamic ) {
240
242
m_inference_precision = ov::element::f32;
241
243
}
242
244
243
-
244
-
245
245
#if defined(__APPLE__)
246
246
m_enable_cpu_reservation = false ;
247
247
#endif
@@ -313,29 +313,30 @@ void Config::apply_threading_properties(const ov::Model* model) {
313
313
// streams = streams_set == 1 ? 0 : streams_set;
314
314
// }
315
315
316
- if (!( 0 == streams && is_set_by_user (ov::num_streams) )) {
316
+ if (0 != streams || ! is_set_by_user (ov::num_streams)) {
317
317
std::lock_guard<std::mutex> lock{ov::threading::_streams_executor_mutex};
318
318
m_proc_type_table = get_proc_type_table ();
319
- auto stream_info_table = generate_stream_info (streams, model);
320
-
321
- // ???
322
- auto threadsPerStream = m_stream_executor_config.get_threads_per_stream ();
323
-
324
- m_stream_executor_config = ov::threading::IStreamsExecutor::Config{" CPUStreamsExecutor" ,
325
- streams,
326
- threadsPerStream,
327
- ov::hint::SchedulingCoreType::ANY_CORE,
328
- get_enable_cpu_reservation (),
329
- get_enable_cpu_pinning (),
330
- true ,
331
- std::move (stream_info_table),
332
- {},
333
- false };
334
- } else {
335
- m_stream_executor_config = ov::threading::IStreamsExecutor::Config{" CPUStreamsExecutor" , streams};
319
+ m_stream_info_table = generate_stream_info (streams, model);
336
320
}
337
321
338
322
m_num_streams = ov::streams::Num (streams);
323
+
324
+ // ???
325
+ // auto threadsPerStream = m_stream_executor_config.get_threads_per_stream();
326
+
327
+ // m_stream_executor_config = ov::threading::IStreamsExecutor::Config{"CPUStreamsExecutor",
328
+ // streams,
329
+ // threadsPerStream,
330
+ // ov::hint::SchedulingCoreType::ANY_CORE,
331
+ // get_enable_cpu_reservation(),
332
+ // get_enable_cpu_pinning(),
333
+ // true,
334
+ // std::move(stream_info_table),
335
+ // {},
336
+ // false};
337
+ // } else {
338
+ // // m_stream_executor_config = ov::threading::IStreamsExecutor::Config{"CPUStreamsExecutor", streams};
339
+ // }
339
340
}
340
341
341
342
std::vector<std::vector<int >> Config::generate_stream_info (int streams, const ov::Model* model) {
@@ -379,7 +380,7 @@ std::vector<std::vector<int>> Config::generate_stream_info(int streams, const ov
379
380
380
381
auto modelDistributionPolicy = get_model_distribution_policy ();
381
382
if (modelDistributionPolicy.find (ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL) != modelDistributionPolicy.end ()) {
382
- m_streams_rank_table = get_streams_rank_table (streams_info_table, 1 , m_num_sub_streams);
383
+ m_stream_rank_table = get_streams_rank_table (streams_info_table, 1 , m_num_sub_streams);
383
384
}
384
385
385
386
m_enable_cpu_pinning = check_cpu_pinning (get_enable_cpu_pinning (),
0 commit comments