Skip to content

Commit f4dc9c0

Browse files
Drop model_path
1 parent 9f1530b commit f4dc9c0

7 files changed

+14
-31
lines changed

src/cpp/src/llm_pipeline_stateful.cpp

+3-7
Original file line numberDiff line numberDiff line change
@@ -39,17 +39,15 @@ StatefulLLMPipeline::StatefulLLMPipeline(
3939
tokenizer,
4040
device,
4141
properties,
42-
utils::from_config_json_if_exists(models_path),
43-
models_path
42+
utils::from_config_json_if_exists(models_path)
4443
} {}
4544

4645
StatefulLLMPipeline::StatefulLLMPipeline(
4746
const std::shared_ptr<ov::Model>& model,
4847
const ov::genai::Tokenizer& tokenizer,
4948
const std::string& device,
5049
const ov::AnyMap& properties,
51-
const ov::genai::GenerationConfig& generation_config,
52-
const std::filesystem::path& models_path)
50+
const ov::genai::GenerationConfig& generation_config)
5351
: LLMPipelineImplBase(tokenizer, generation_config), m_sampler(m_tokenizer) {
5452
utils::apply_slice_before_matmul_transformation(model);
5553
auto kv_pos = ov::genai::utils::get_kv_axes_pos(model);
@@ -70,9 +68,7 @@ StatefulLLMPipeline::StatefulLLMPipeline(
7068
ov::CompiledModel compiled_model;
7169
if (m_is_npu) {
7270
utils::KVDesc kv_desc;
73-
std::tie(compiled_model, kv_desc) = utils::compile_decoder_for_npu(
74-
model, *filtered_properties, kv_pos, models_path / "openvino_model.xml"
75-
);
71+
std::tie(compiled_model, kv_desc) = utils::compile_decoder_for_npu(model, *filtered_properties, kv_pos);
7672
m_max_kv_cache_size = kv_desc.max_prompt_len + kv_desc.min_response_len;
7773
} else {
7874
compiled_model = utils::singleton_core().compile_model(model, device, *filtered_properties);

src/cpp/src/llm_pipeline_stateful.hpp

+1-2
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,7 @@ class StatefulLLMPipeline final : public LLMPipelineImplBase {
5050
const ov::genai::Tokenizer& tokenizer,
5151
const std::string& device,
5252
const ov::AnyMap& config,
53-
const ov::genai::GenerationConfig& generation_config,
54-
const std::filesystem::path& models_path = {}
53+
const ov::genai::GenerationConfig& generation_config
5554
);
5655

5756
StatefulLLMPipeline(

src/cpp/src/llm_pipeline_static.cpp

+5-11
Original file line numberDiff line numberDiff line change
@@ -102,23 +102,19 @@ StatefulLLMPipeline::StatefulLLMPipeline(
102102
): StatefulLLMPipeline(
103103
genai::utils::singleton_core().read_model(models_path / "openvino_model.xml", {}, config),
104104
tokenizer, config,
105-
utils::from_config_json_if_exists(models_path),
106-
models_path
105+
utils::from_config_json_if_exists(models_path)
107106
) {
108107
}
109108

110109
StatefulLLMPipeline::StatefulLLMPipeline(
111110
const std::shared_ptr<ov::Model>& model,
112111
const ov::genai::Tokenizer& tokenizer,
113112
const ov::AnyMap& properties,
114-
const ov::genai::GenerationConfig& generation_config,
115-
const std::filesystem::path& models_path
113+
const ov::genai::GenerationConfig& generation_config
116114
) : LLMPipelineImplBase(tokenizer, generation_config),
117115
m_sampler(m_tokenizer) {
118116
auto kv_pos = ov::genai::utils::get_kv_axes_pos(model);
119-
auto [compiled, kv_desc] = utils::compile_decoder_for_npu(
120-
model, properties, kv_pos, models_path / "openvino_model.xml"
121-
);
117+
auto [compiled, kv_desc] = utils::compile_decoder_for_npu(model, properties, kv_pos);
122118
m_max_prompt_len = kv_desc.max_prompt_len;
123119
m_kvcache_total = kv_desc.max_prompt_len + kv_desc.min_response_len;
124120
m_request = compiled.create_infer_request();
@@ -358,16 +354,14 @@ LLMPipelineFactory::create(const std::filesystem::path& models_path,
358354
std::unique_ptr<LLMPipelineImplBase> LLMPipelineFactory::create(const std::shared_ptr<ov::Model>& model,
359355
const ov::genai::Tokenizer& tokenizer,
360356
const ov::AnyMap& properties,
361-
const ov::genai::GenerationConfig& generation_config,
362-
const std::filesystem::path& models_path) {
357+
const ov::genai::GenerationConfig& generation_config) {
363358
auto properties_copy = properties;
364359
const auto pipeline_mode = str_to_pipeline(utils::pop_or_default(properties_copy, "STATIC_PIPELINE", std::string("STATEFUL")));
365360
if (pipeline_mode == StaticPipelineKind::STATEFUL) {
366361
return std::make_unique<ov::genai::static_llm::StatefulLLMPipeline>(model,
367362
tokenizer,
368363
properties_copy,
369-
generation_config,
370-
models_path);
364+
generation_config);
371365
}
372366
OPENVINO_ASSERT(false);
373367
}

src/cpp/src/llm_pipeline_static.hpp

+2-4
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,7 @@ struct LLMPipelineFactory {
2323
static std::unique_ptr<LLMPipelineImplBase> create(const std::shared_ptr<ov::Model>& model,
2424
const ov::genai::Tokenizer& tokenizer,
2525
const ov::AnyMap& properties,
26-
const ov::genai::GenerationConfig& generation_config,
27-
const std::filesystem::path& models_path = {});
26+
const ov::genai::GenerationConfig& generation_config);
2827
};
2928

3029
class StatefulLLMPipeline : public LLMPipelineImplBase {
@@ -39,8 +38,7 @@ class StatefulLLMPipeline : public LLMPipelineImplBase {
3938
const std::shared_ptr<ov::Model>& model,
4039
const ov::genai::Tokenizer& tokenizer,
4140
const ov::AnyMap& properties,
42-
const ov::genai::GenerationConfig& generation_config,
43-
const std::filesystem::path& path = {}
41+
const ov::genai::GenerationConfig& generation_config
4442
);
4543

4644
DecodedResults generate(

src/cpp/src/utils.cpp

+1-2
Original file line numberDiff line numberDiff line change
@@ -418,8 +418,7 @@ void print_compiled_model_properties(ov::CompiledModel& compiled_Model, const ch
418418
std::pair<ov::CompiledModel, KVDesc>
419419
compile_decoder_for_npu(const std::shared_ptr<ov::Model>& model,
420420
const ov::AnyMap& config,
421-
const KVAxesPosition& kv_pos,
422-
const std::filesystem::path& model_path) {
421+
const KVAxesPosition& kv_pos) {
423422
ov::CompiledModel compiled;
424423
ov::AnyMap properties = config;
425424
KVDesc kv_desc;

src/cpp/src/utils.hpp

+1-2
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,7 @@ struct KVDesc {
137137

138138
std::pair<ov::CompiledModel, KVDesc> compile_decoder_for_npu(const std::shared_ptr<ov::Model>& model,
139139
const ov::AnyMap& config,
140-
const KVAxesPosition& kv_pos,
141-
const std::filesystem::path& path = {});
140+
const KVAxesPosition& kv_pos);
142141

143142
/// @brief SharedOptional is a wrapper around a reference to an existing object and an optional shared alternative value.
144143
/// The difference from std::optional is that the default state is not empty and contains a reference to an existing object outside the class.

src/cpp/src/visual_language/pipeline.cpp

+1-3
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,7 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{
8080
if (m_is_npu) {
8181
embedder_device = "CPU";
8282
utils::KVDesc kv_desc;
83-
std::tie(compiled_language_model, kv_desc) = utils::compile_decoder_for_npu(
84-
language_model, lm_properties, kv_pos, language_model_path
85-
);
83+
std::tie(compiled_language_model, kv_desc) = utils::compile_decoder_for_npu(language_model, lm_properties, kv_pos);
8684
m_max_kv_cache_size = kv_desc.max_prompt_len + kv_desc.min_response_len;
8785
} else {
8886
compiled_language_model = utils::singleton_core().compile_model(language_model, device, lm_properties);

0 commit comments

Comments
 (0)