Skip to content

Commit 0f672a8

Browse files
authored
Split core_plugin and compile_plugin configs (#908)
Fast fix for: CVS-153281 To be reverted once CVS-153906 is fixed on core side
1 parent 616ab12 commit 0f672a8

File tree

4 files changed

+44
-17
lines changed

4 files changed

+44
-17
lines changed

src/cpp/src/llm_pipeline.cpp

+6-4
Original file line numberDiff line numberDiff line change
@@ -99,14 +99,16 @@ class StatefulLLMPipeline final : public LLMPipelineImplBase {
9999
m_generation_config.adapters = adapters_iter->second.as<AdapterConfig>();
100100
auto filtered_plugin_config = plugin_config;
101101
filtered_plugin_config.erase(ov::genai::adapters.name());
102-
core.set_property(device, filtered_plugin_config);
102+
auto [core_plugin_config, compile_plugin_config] = ov::genai::utils::split_core_complile_config(filtered_plugin_config);
103+
core.set_property(core_plugin_config);
103104
auto model = core.read_model(model_path / "openvino_model.xml");
104105
m_adapter_controller = AdapterController(model, m_generation_config.adapters, "base_model.model.model.", device); // TODO: Make the prefix name configurable
105-
m_model_runner = core.compile_model(model, device).create_infer_request();
106+
m_model_runner = core.compile_model(model, device, compile_plugin_config).create_infer_request();
106107
m_adapter_controller->apply(m_model_runner, m_generation_config.adapters);
107108
} else {
108-
core.set_property(device, plugin_config);
109-
m_model_runner = core.compile_model(model_path / "openvino_model.xml", device).create_infer_request();
109+
auto [core_plugin_config, compile_plugin_config] = ov::genai::utils::split_core_complile_config(plugin_config);
110+
core.set_property(core_plugin_config);
111+
m_model_runner = core.compile_model(model_path / "openvino_model.xml", device, compile_plugin_config).create_infer_request();
110112
}
111113

112114
// If eos_token_id was not provided, take value

src/cpp/src/utils.cpp

+28-9
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// SPDX-License-Identifier: Apache-2.0
33

44
#include "utils.hpp"
5+
56
#include <fstream>
67

78
namespace ov {
@@ -42,7 +43,7 @@ int64_t argmax(const ov::Tensor& logits, const size_t batch_idx) {
4243
size_t batch_offset = batch_idx * logits.get_shape()[1] * vocab_size;
4344
size_t sequence_offset = (logits.get_shape()[1] - 1) * vocab_size;
4445
const float* logits_data = logits.data<const float>() + batch_offset + sequence_offset;
45-
46+
4647
int64_t out_token = std::max_element(logits_data, logits_data + vocab_size) - logits_data;
4748
float max_logit = logits_data[out_token];
4849

@@ -52,16 +53,14 @@ int64_t argmax(const ov::Tensor& logits, const size_t batch_idx) {
5253
/**
5354
* Initializes position ids based on attention mask and starting position
5455
*/
55-
void initialize_position_ids(ov::Tensor& position_ids,
56-
const ov::Tensor& attention_mask,
57-
int64_t start_pos) {
58-
OPENVINO_ASSERT(position_ids.get_element_type() == ov::element::i64,
56+
void initialize_position_ids(ov::Tensor& position_ids, const ov::Tensor& attention_mask, int64_t start_pos) {
57+
OPENVINO_ASSERT(position_ids.get_element_type() == ov::element::i64,
5958
"position_ids tensor element type should be an i64");
60-
OPENVINO_ASSERT(position_ids.get_shape().size() == 2,
59+
OPENVINO_ASSERT(position_ids.get_shape().size() == 2,
6160
"position_ids tensor should of rank 2 with shape [batch_size, seq_len]");
62-
OPENVINO_ASSERT(attention_mask.get_element_type() == ov::element::i64,
61+
OPENVINO_ASSERT(attention_mask.get_element_type() == ov::element::i64,
6362
"attention_mask tensor element type should be an i64");
64-
OPENVINO_ASSERT(attention_mask.get_shape().size() == 2,
63+
OPENVINO_ASSERT(attention_mask.get_shape().size() == 2,
6564
"attention_mask tensor should of rank 2 with shape [batch_size, seq_len]");
6665

6766
const size_t batch_size = attention_mask.get_shape()[0];
@@ -97,7 +96,6 @@ void initialize_beam_inputs(const ov::Tensor& input_ids, const ov::Tensor& atten
9796
std::fill_n(beam_idx.data<int32_t>(), input_shape.at(0), 0);
9897
}
9998

100-
10199
void set_attention_mask(ov::Tensor&& attention_mask, std::vector<int32_t> next_beams) {
102100
ov::Tensor original_mask{ov::element::i64, attention_mask.get_shape()};
103101
ov::Shape original_shape = original_mask.get_shape();
@@ -185,6 +183,27 @@ ov::genai::OptionalGenerationConfig get_config_from_map(const ov::AnyMap& config
185183
return std::nullopt;
186184
}
187185

186+
/**
187+
* Split config by core and compile configs
188+
* There are not supported by `core.compile` function plugin options like `ENABLE_MMAP`
189+
* Move this options to `core.set_property` config
190+
*/
191+
std::pair<ov::AnyMap, ov::AnyMap> split_core_complile_config(const ov::AnyMap& plugin_config) {
192+
const std::vector<std::string> unsupported_by_compile_options{"ENABLE_MMAP"};
193+
ov::AnyMap core_config;
194+
ov::AnyMap compile_config{plugin_config};
195+
196+
for (const auto option : unsupported_by_compile_options) {
197+
auto iter = plugin_config.find(option);
198+
if (iter != plugin_config.end()) {
199+
core_config[option] = iter->second;
200+
compile_config.erase(option);
201+
}
202+
}
203+
204+
return {core_config, compile_config};
205+
};
206+
188207
} // namespace utils
189208
} // namespace genai
190209
} // namespace ov

src/cpp/src/utils.hpp

+2
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ ov::genai::StreamerVariant get_streamer_from_map(const ov::AnyMap& config_map);
7070

7171
ov::genai::OptionalGenerationConfig get_config_from_map(const ov::AnyMap& config_map);
7272

73+
std::pair<ov::AnyMap, ov::AnyMap> split_core_complile_config(const ov::AnyMap& plugin_config);
74+
7375
} // namespace utils
7476
} // namespace genai
7577
} // namespace ov

src/cpp/src/whisper_pipeline.cpp

+8-4
Original file line numberDiff line numberDiff line change
@@ -57,12 +57,16 @@ class WhisperPipeline::Impl {
5757
m_tokenizer{tokenizer},
5858
m_feature_extractor{(model_path / "preprocessor_config.json").string()} {
5959
ov::Core core;
60-
core.set_property(device, plugin_config);
60+
auto [core_plugin_config, compile_plugin_config] = ov::genai::utils::split_core_complile_config(plugin_config);
61+
core.set_property(core_plugin_config);
6162

62-
m_models.encoder = core.compile_model(model_path / "openvino_encoder_model.xml", device).create_infer_request();
63-
m_models.decoder = core.compile_model(model_path / "openvino_decoder_model.xml", device).create_infer_request();
63+
m_models.encoder = core.compile_model(model_path / "openvino_encoder_model.xml", device, compile_plugin_config)
64+
.create_infer_request();
65+
m_models.decoder = core.compile_model(model_path / "openvino_decoder_model.xml", device, compile_plugin_config)
66+
.create_infer_request();
6467
m_models.decoder_with_past =
65-
core.compile_model(model_path / "openvino_decoder_with_past_model.xml", device).create_infer_request();
68+
core.compile_model(model_path / "openvino_decoder_with_past_model.xml", device, compile_plugin_config)
69+
.create_infer_request();
6670

6771
// If eos_token_id was not provided, take value
6872
if (m_generation_config.eos_token_id == -1) {

0 commit comments

Comments
 (0)