Skip to content

Commit 3c2f4a9

Browse files
CB constructor from ModelsMap (openvinotoolkit#1863)
Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com>
1 parent 6f44b5c commit 3c2f4a9

File tree

5 files changed

+105
-20
lines changed

5 files changed

+105
-20
lines changed

src/cpp/include/openvino/genai/continuous_batching_pipeline.hpp

+22
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,28 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline {
119119
const ov::genai::GenerationConfig& generation_config = {}
120120
);
121121

122+
/**
123+
* @brief Constructs a ContinuousBatchingPipeline from models map.
124+
*
125+
* @param models_map A map where key is model name (e.g. "vision_embeddings", "text_embeddings", "language", "resampler")
126+
* and value is a pair of model IR as string and weights as tensor.
127+
* @param tokenizer A manually initialized ov::genai::Tokenizer.
128+
* @param scheduler_config Configuration for the scheduler.
129+
* @param device The device to run the pipeline on (e.g., CPU, GPU).
130+
* @param embedder_config_dir_path Optional path to a directory containing embedder config.
131+
* @param properties Optional properties for the pipeline.
132+
* @param generation_config Optional generation configuration for the pipeline.
133+
*/
134+
ContinuousBatchingPipeline(
135+
const ModelsMap& models_map,
136+
const ov::genai::Tokenizer& tokenizer,
137+
const SchedulerConfig& scheduler_config,
138+
const std::string& device,
139+
std::optional<std::filesystem::path> embedder_config_dir_path = std::nullopt,
140+
const ov::AnyMap& properties = {},
141+
const ov::genai::GenerationConfig& generation_config = {}
142+
);
143+
122144
ov::genai::Tokenizer get_tokenizer() const;
123145

124146
ov::genai::GenerationConfig get_config() const;

src/cpp/include/openvino/genai/llm_pipeline.hpp

+5
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@ using OptionalGenerationConfig = std::optional<GenerationConfig>;
2626
using EncodedInputs = std::variant<ov::Tensor, TokenizedInputs>;
2727
using StringInputs = std::variant<std::string, std::vector<std::string>>;
2828

29+
/// @brief A map of models for VLMPipeline constructor.
30+
/// Key is model name (e.g. "vision_embeddings", "text_embeddings", "language", "resampler")
31+
/// and value is a pair of model IR as string and weights as tensor.
32+
using ModelsMap = std::map<std::string, std::pair<std::string, ov::Tensor>>;
33+
2934
/**
3035
* @brief Structure to store resulting batched tokens and scores for each batch sequence.
3136
* The first num_return_sequences elements correspond to the first batch element.

src/cpp/include/openvino/genai/visual_language/pipeline.hpp

-5
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,6 @@ class OPENVINO_GENAI_EXPORTS VLMDecodedResults : public DecodedResults{
1919
VLMPerfMetrics perf_metrics;
2020
};
2121

22-
/// @brief A map of models for VLMPipeline constructor.
23-
/// Key is model name (e.g. "vision_embeddings", "text_embeddings", "language", "resampler")
24-
/// and value is a pair of model IR as string and weights as tensor.
25-
using ModelsMap = std::map<std::string, std::pair<std::string, ov::Tensor>>;
26-
2722
/// @brief A Visual language modeling pipeline class used to generate a
2823
/// response or run a chat given a prompt and an image.
2924
class OPENVINO_GENAI_EXPORTS VLMPipeline {

src/cpp/src/continuous_batching_pipeline.cpp

+71-9
Original file line numberDiff line numberDiff line change
@@ -71,15 +71,21 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline( const std::filesystem::p
7171
auto tokenizer = ov::genai::Tokenizer(directory, tokenizer_properties);
7272
auto generation_config = utils::from_config_json_if_exists(directory);
7373

74+
std::shared_ptr<InputsEmbedder> embedder;
75+
if (std::filesystem::exists(directory / "openvino_text_embeddings_model.xml")) {
76+
embedder = std::make_shared<InputsEmbedder>(directory, device, properties);
77+
}
78+
7479
if (is_prompt_lookup_enabled) {
7580
OPENVINO_ASSERT(draft_model_desr.model == nullptr, "Speculative decoding and prompt lookup decoding are mutually exclusive");
81+
OPENVINO_ASSERT(embedder == nullptr, "Prompt lookup decoding is not supported for models with embeddings");
7682
m_impl = std::make_shared<PromptLookupImpl>(model, tokenizer, scheduler_config, device, properties_without_draft_model, generation_config);
7783
} else if (draft_model_desr.model != nullptr) {
84+
OPENVINO_ASSERT(embedder == nullptr, "Speculative decoding is not supported for models with embeddings");
7885
auto main_model_descr = ov::genai::ModelDesc(model, tokenizer, device, properties_without_draft_model, scheduler_config, generation_config);
7986
m_impl = std::make_shared<SpeculativeDecodingImpl>(main_model_descr, draft_model_desr);
80-
} else if (std::filesystem::exists(directory / "openvino_text_embeddings_model.xml") ) {
81-
auto inputs_embedder = std::make_shared<InputsEmbedder>(directory, device, properties);
82-
m_impl = std::make_shared<ContinuousBatchingImpl>(model, inputs_embedder, tokenizer, scheduler_config, device, properties, generation_config);
87+
} else if (embedder) {
88+
m_impl = std::make_shared<ContinuousBatchingImpl>(model, embedder, tokenizer, scheduler_config, device, properties, generation_config);
8389
}
8490
else {
8591
m_impl = std::make_shared<ContinuousBatchingImpl>(model, tokenizer, scheduler_config, device, properties, generation_config);
@@ -112,16 +118,21 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline(
112118
}
113119
auto model = utils::singleton_core().read_model(model_path, {}, properties_without_draft_model);
114120
auto generation_config = utils::from_config_json_if_exists(directory);
121+
std::shared_ptr<InputsEmbedder> embedder;
122+
if (std::filesystem::exists(directory / "openvino_text_embeddings_model.xml")) {
123+
embedder = std::make_shared<InputsEmbedder>(directory, device, properties);
124+
}
115125

116126
if (is_prompt_lookup_enabled) {
117127
OPENVINO_ASSERT(draft_model_desr.model == nullptr, "Speculative decoding and prompt lookup decoding are mutually exclusive");
128+
OPENVINO_ASSERT(embedder == nullptr, "Prompt lookup decoding is not supported for models with embeddings");
118129
m_impl = std::make_shared<PromptLookupImpl>(model, tokenizer, scheduler_config, device, properties_without_draft_model, generation_config);
119130
} else if (draft_model_desr.model != nullptr) {
131+
OPENVINO_ASSERT(embedder == nullptr, "Speculative decoding is not supported for models with embeddings");
120132
auto main_model_descr = ov::genai::ModelDesc(model, tokenizer, device, properties_without_draft_model, scheduler_config, generation_config);
121133
m_impl = std::make_shared<SpeculativeDecodingImpl>(main_model_descr, draft_model_desr);
122-
} else if (std::filesystem::exists(directory / "openvino_text_embeddings_model.xml") ) {
123-
auto inputs_embedder = std::make_shared<InputsEmbedder>(directory, device, properties);
124-
m_impl = std::make_shared<ContinuousBatchingImpl>(model, inputs_embedder, tokenizer, scheduler_config, device, properties, generation_config);
134+
} else if (embedder) {
135+
m_impl = std::make_shared<ContinuousBatchingImpl>(model, embedder, tokenizer, scheduler_config, device, properties, generation_config);
125136
} else {
126137
m_impl = std::make_shared<ContinuousBatchingImpl>(model, tokenizer, scheduler_config, device, properties, generation_config);
127138
}
@@ -144,20 +155,71 @@ ContinuousBatchingPipeline::ContinuousBatchingPipeline(
144155
auto is_prompt_lookup_enabled = extract_prompt_lookup_from_config(properties_without_draft_model);
145156
auto model = utils::singleton_core().read_model(model_str, weights_tensor);
146157
auto rt_info = model->get_rt_info();
158+
std::shared_ptr<InputsEmbedder> embedder = nullptr;
147159
std::filesystem::path directory = "";
148160
if (rt_info.find("__weights_path") != rt_info.end()) {
149161
std::string weights_path = rt_info.at("__weights_path").as<std::string>();
150162
directory = std::filesystem::path(weights_path).parent_path();
163+
if (std::filesystem::exists(directory / "openvino_text_embeddings_model.xml")) {
164+
embedder = std::make_shared<InputsEmbedder>(directory, device, properties);
165+
}
166+
}
167+
if (is_prompt_lookup_enabled) {
168+
OPENVINO_ASSERT(draft_model_desr.model == nullptr, "Speculative decoding and prompt lookup decoding are mutually exclusive");
169+
OPENVINO_ASSERT(embedder == nullptr, "Prompt lookup decoding is not supported for models with embeddings");
170+
m_impl = std::make_shared<PromptLookupImpl>(model, tokenizer, scheduler_config, device, properties_without_draft_model, generation_config);
171+
} else if (draft_model_desr.model != nullptr) {
172+
OPENVINO_ASSERT(embedder == nullptr, "Speculative decoding is not supported for models with embeddings");
173+
auto main_model_descr = ov::genai::ModelDesc(model, tokenizer, device, properties_without_draft_model, scheduler_config, generation_config);
174+
m_impl = std::make_shared<SpeculativeDecodingImpl>(main_model_descr, draft_model_desr);
175+
} else if (embedder) {
176+
m_impl = std::make_shared<ContinuousBatchingImpl>(model, embedder, tokenizer, scheduler_config, device, properties, generation_config);
177+
} else {
178+
m_impl = std::make_shared<ContinuousBatchingImpl>(model, tokenizer, scheduler_config, device, properties, generation_config);
179+
}
180+
181+
m_impl->m_load_time_ms = get_load_time(start_time);
182+
}
183+
184+
ContinuousBatchingPipeline::ContinuousBatchingPipeline(
185+
const ModelsMap& models_map,
186+
const ov::genai::Tokenizer& tokenizer,
187+
const SchedulerConfig& scheduler_config,
188+
const std::string& device,
189+
std::optional<std::filesystem::path> embedder_config_dir_path,
190+
const ov::AnyMap& properties,
191+
const ov::genai::GenerationConfig& generation_config) {
192+
auto start_time = std::chrono::steady_clock::now();
193+
194+
auto properties_without_draft_model = properties;
195+
auto draft_model_desr = extract_draft_model_from_config(properties_without_draft_model);
196+
auto is_prompt_lookup_enabled = extract_prompt_lookup_from_config(properties_without_draft_model);
197+
auto model_pair = utils::get_model_weights_pair(models_map, "language");
198+
auto model = utils::singleton_core().read_model(model_pair.first, model_pair.second);
199+
auto rt_info = model->get_rt_info();
200+
std::filesystem::path directory = "";
201+
std::shared_ptr<InputsEmbedder> embedder = nullptr;
202+
if (embedder_config_dir_path.has_value()) {
203+
auto path = *embedder_config_dir_path;
204+
embedder = std::make_shared<InputsEmbedder>(models_map, tokenizer, path, device, properties);
205+
}
206+
else if (rt_info.find("__weights_path") != rt_info.end()) {
207+
std::string weights_path = rt_info.at("__weights_path").as<std::string>();
208+
directory = std::filesystem::path(weights_path).parent_path();
209+
if (std::filesystem::exists(directory / "openvino_text_embeddings_model.xml")) {
210+
embedder = std::make_shared<InputsEmbedder>(directory, device, properties);
211+
}
151212
}
152213
if (is_prompt_lookup_enabled) {
153214
OPENVINO_ASSERT(draft_model_desr.model == nullptr, "Speculative decoding and prompt lookup decoding are mutually exclusive");
215+
OPENVINO_ASSERT(embedder == nullptr, "Prompt lookup decoding is not supported for models with embeddings");
154216
m_impl = std::make_shared<PromptLookupImpl>(model, tokenizer, scheduler_config, device, properties_without_draft_model, generation_config);
155217
} else if (draft_model_desr.model != nullptr) {
218+
OPENVINO_ASSERT(embedder == nullptr, "Speculative decoding is not supported for models with embeddings");
156219
auto main_model_descr = ov::genai::ModelDesc(model, tokenizer, device, properties_without_draft_model, scheduler_config, generation_config);
157220
m_impl = std::make_shared<SpeculativeDecodingImpl>(main_model_descr, draft_model_desr);
158-
} else if (std::filesystem::exists(directory / "openvino_text_embeddings_model.xml")) {
159-
auto inputs_embedder = std::make_shared<InputsEmbedder>(directory, device, properties);
160-
m_impl = std::make_shared<ContinuousBatchingImpl>(model, inputs_embedder, tokenizer, scheduler_config, device, properties, generation_config);
221+
} else if (embedder) {
222+
m_impl = std::make_shared<ContinuousBatchingImpl>(model, embedder, tokenizer, scheduler_config, device, properties, generation_config);
161223
} else {
162224
m_impl = std::make_shared<ContinuousBatchingImpl>(model, tokenizer, scheduler_config, device, properties, generation_config);
163225
}

src/cpp/src/visual_language/continuous_batching_adapter.hpp

+7-6
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,13 @@ class ov::genai::VLMPipeline::VLMContinuousBatchingAdapter : public ov::genai::V
3232
const ov::AnyMap& properties,
3333
const ov::genai::GenerationConfig& generation_config
3434
): m_impl{
35-
"./",
36-
scheduler_config,
37-
device,
38-
properties} {
39-
// TODO: Implement the constructor of ContinuousBatchingPipeline from ModelsMap
40-
OPENVINO_THROW("Not implemented.");
35+
models_map,
36+
tokenizer,
37+
scheduler_config,
38+
device,
39+
config_dir_path,
40+
properties,
41+
generation_config} {
4142
}
4243

4344
VLMDecodedResults generate(

0 commit comments

Comments
 (0)