Skip to content

Commit be23fc6

Browse files
StaticLLMPipeline: Update config (openvinotoolkit#969)
1 parent d3bfaa5 commit be23fc6

File tree

1 file changed

+19
-19
lines changed

1 file changed

+19
-19
lines changed

src/cpp/src/llm_pipeline_static.cpp

+19-19
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ void enable_npuw_dq_if_allowed(ov::AnyMap& config,
7979
const std::shared_ptr<ov::Model>& model) {
8080
if (allow_to_enable_npuw_dq(model)) {
8181
config["NPUW_DQ"] = "YES";
82-
pop_option(config, "NPUW_ONLINE_AVOID");
8382
}
8483
}
8584

@@ -213,33 +212,34 @@ void merge_config_with(ov::AnyMap& lhs, const ov::AnyMap& rhs) {
213212
}
214213
}
215214

216-
ov::AnyMap get_default_prefill_config(const std::shared_ptr<ov::Model>& model) {
215+
ov::AnyMap get_baseline_common_config() {
217216
ov::AnyMap config = {
218-
{ "NPU_USE_NPUW", "YES" },
217+
{ "NPU_COMPILATION_MODE_PARAMS", "compute-layers-with-higher-precision=Sqrt,Power,ReduceMean,Add" },
218+
{ "NPU_USE_NPUW", "YES" },
219219
{ "NPUW_FOLD", "YES" },
220220
{ "NPUW_DCOFF_TYPE", "f16" },
221-
{ "NPUW_DCOFF_SCALE", "YES" },
222-
{ "NPUW_WEIGHTS_BANK", "shared" },
223-
{ "NPU_COMPILATION_MODE_PARAMS", "compute-layers-with-higher-precision=Sqrt,Power,ReduceMean,Add" },
224-
{ "NPUW_ONLINE_AVOID", "P:RMSNorm/NPU" }
221+
{ "NPUW_DCOFF_SCALE", "YES"},
222+
{ "NPUW_WEIGHTS_BANK", "shared" },
223+
{ "NPUW_PMM", "NO" }
225224
};
225+
return config;
226+
}
227+
228+
ov::AnyMap get_default_common_config(const std::shared_ptr<ov::Model>& model) {
229+
auto config = get_baseline_common_config();
230+
config.emplace("NPUW_WEIGHTS_BANK_ALLOC", "CPU");
226231
enable_npuw_dq_if_allowed(config, model);
227232
return config;
228233
}
229234

235+
ov::AnyMap get_default_prefill_config(const std::shared_ptr<ov::Model>& model) {
236+
return get_default_common_config(model);
237+
}
238+
230239
ov::AnyMap get_default_generate_config(const std::shared_ptr<ov::Model>& model) {
231-
ov::AnyMap config = {
232-
{ "NPU_USE_NPUW", "YES" },
233-
{ "NPUW_FOLD", "YES" },
234-
{ "NPUW_DCOFF_TYPE", "f16" },
235-
{ "NPUW_DCOFF_SCALE", "YES" },
236-
{ "NPU_COMPILATION_MODE_PARAMS", "compute-layers-with-higher-precision=Sqrt,Power,ReduceMean,Add" },
237-
{ "NPUW_PARALLEL_COMPILE", "YES" },
238-
{ "NPUW_FUNCALL_ASYNC", "YES" },
239-
{ "NPUW_WEIGHTS_BANK", "shared" },
240-
{ "NPUW_ONLINE_AVOID", "P:RMSNorm/NPU" }
241-
};
242-
enable_npuw_dq_if_allowed(config, model);
240+
auto config = get_default_common_config(model);
241+
config.emplace("NPUW_FUNCALL_ASYNC", "YES");
242+
config.emplace("NPUW_PARALLEL_COMPILE", "YES");
243243
return config;
244244
}
245245

0 commit comments

Comments
 (0)