Skip to content

Commit e2fa0d0

Browse files
authored
StaticLLMPipeline: Introduced NPUW_UNFOLD_IREQ for hint FAST_COMPILE (openvinotoolkit#1275)
E-149055
1 parent 1feb067 commit e2fa0d0

File tree

1 file changed

+18
-4
lines changed

1 file changed

+18
-4
lines changed

src/cpp/src/llm_pipeline_static.cpp

+18-4
Original file line numberDiff line numberDiff line change
@@ -233,15 +233,26 @@ enum class GenerateHint {
233233
BEST_PERF
234234
};
235235

236+
std::string to_string(GenerateHint h) {
237+
switch(h) {
238+
case GenerateHint::FAST_COMPILE :
239+
return "FAST_COMPILE";
240+
case GenerateHint::BEST_PERF :
241+
return "BEST_PERF";
242+
default:
243+
OPENVINO_THROW("Unsupported value for type GenerateHint provided");
244+
}
245+
}
246+
236247
GenerateHint str_to_hint(const std::string& str) {
237-
if (str == "FAST_COMPILE") {
248+
if (str == to_string(GenerateHint::FAST_COMPILE)) {
238249
return GenerateHint::FAST_COMPILE;
239250
}
240-
if (str == "BEST_PERF") {
251+
if (str == to_string(GenerateHint::BEST_PERF)) {
241252
return GenerateHint::BEST_PERF;
242253
}
243254
OPENVINO_THROW("Unsupported \"GENERATE_HINT\" provided: " +
244-
str + ". Please select either \"FAST_COMPILE\" or \"BEST_PERF\".");
255+
str + ". Please select either \"" + to_string(GenerateHint::BEST_PERF) + "\" or \"" + to_string(GenerateHint::FAST_COMPILE) +"\".");
245256
}
246257

247258
std::shared_ptr<ov::Model> cvt_kvcache_to_fp16(const std::shared_ptr<ov::Model>& model) {
@@ -534,6 +545,9 @@ ov::AnyMap get_default_generate_config(const std::shared_ptr<ov::Model>& model,
534545
if (npudesc.has_value() && npudesc->arch == "4000") {
535546
config.emplace("NPU_DPU_GROUPS", 4);
536547
}
548+
if (hint == GenerateHint::FAST_COMPILE) {
549+
config.emplace("NPUW_UNFOLD_IREQS", "YES");
550+
}
537551
if (npudesc.has_value() && npudesc->compiler_dq) {
538552
config.emplace("NPUW_DQ_FULL", "NO");
539553
}
@@ -727,7 +741,7 @@ void StaticLLMPipeline::setupAndCompileModels(
727741
properties, "PREFILL_CONFIG", get_default_prefill_config(prefill_model, npudesc)
728742
);
729743
// NB: GENERATE_HINT is only applicable for default generate config!
730-
auto generate_hint = str_to_hint(pop_or_default<std::string>(properties, "GENERATE_HINT", "FAST_COMPILE"));
744+
auto generate_hint = str_to_hint(pop_or_default<std::string>(properties, "GENERATE_HINT", to_string(GenerateHint::FAST_COMPILE)));
731745
auto generate_config = pop_or_default(
732746
properties, "GENERATE_CONFIG", get_default_generate_config(kvcache_model, npudesc, generate_hint)
733747
);

0 commit comments

Comments
 (0)