@@ -79,7 +79,6 @@ void enable_npuw_dq_if_allowed(ov::AnyMap& config,
79
79
const std::shared_ptr<ov::Model>& model) {
80
80
if (allow_to_enable_npuw_dq (model)) {
81
81
config[" NPUW_DQ" ] = " YES" ;
82
- pop_option (config, " NPUW_ONLINE_AVOID" );
83
82
}
84
83
}
85
84
@@ -213,33 +212,34 @@ void merge_config_with(ov::AnyMap& lhs, const ov::AnyMap& rhs) {
213
212
}
214
213
}
215
214
216
- ov::AnyMap get_default_prefill_config ( const std::shared_ptr<ov::Model>& model ) {
215
+ ov::AnyMap get_baseline_common_config ( ) {
217
216
ov::AnyMap config = {
218
- { " NPU_USE_NPUW" , " YES" },
217
+ { " NPU_COMPILATION_MODE_PARAMS" , " compute-layers-with-higher-precision=Sqrt,Power,ReduceMean,Add" },
218
+ { " NPU_USE_NPUW" , " YES" },
219
219
{ " NPUW_FOLD" , " YES" },
220
220
{ " NPUW_DCOFF_TYPE" , " f16" },
221
- { " NPUW_DCOFF_SCALE" , " YES" },
222
- { " NPUW_WEIGHTS_BANK" , " shared" },
223
- { " NPU_COMPILATION_MODE_PARAMS" , " compute-layers-with-higher-precision=Sqrt,Power,ReduceMean,Add" },
224
- { " NPUW_ONLINE_AVOID" , " P:RMSNorm/NPU" }
221
+ { " NPUW_DCOFF_SCALE" , " YES" },
222
+ { " NPUW_WEIGHTS_BANK" , " shared" },
223
+ { " NPUW_PMM" , " NO" }
225
224
};
225
+ return config;
226
+ }
227
+
228
+ ov::AnyMap get_default_common_config (const std::shared_ptr<ov::Model>& model) {
229
+ auto config = get_baseline_common_config ();
230
+ config.emplace (" NPUW_WEIGHTS_BANK_ALLOC" , " CPU" );
226
231
enable_npuw_dq_if_allowed (config, model);
227
232
return config;
228
233
}
229
234
235
+ ov::AnyMap get_default_prefill_config (const std::shared_ptr<ov::Model>& model) {
236
+ return get_default_common_config (model);
237
+ }
238
+
230
239
ov::AnyMap get_default_generate_config (const std::shared_ptr<ov::Model>& model) {
231
- ov::AnyMap config = {
232
- { " NPU_USE_NPUW" , " YES" },
233
- { " NPUW_FOLD" , " YES" },
234
- { " NPUW_DCOFF_TYPE" , " f16" },
235
- { " NPUW_DCOFF_SCALE" , " YES" },
236
- { " NPU_COMPILATION_MODE_PARAMS" , " compute-layers-with-higher-precision=Sqrt,Power,ReduceMean,Add" },
237
- { " NPUW_PARALLEL_COMPILE" , " YES" },
238
- { " NPUW_FUNCALL_ASYNC" , " YES" },
239
- { " NPUW_WEIGHTS_BANK" , " shared" },
240
- { " NPUW_ONLINE_AVOID" , " P:RMSNorm/NPU" }
241
- };
242
- enable_npuw_dq_if_allowed (config, model);
240
+ auto config = get_default_common_config (model);
241
+ config.emplace (" NPUW_FUNCALL_ASYNC" , " YES" );
242
+ config.emplace (" NPUW_PARALLEL_COMPILE" , " YES" );
243
243
return config;
244
244
}
245
245
0 commit comments