Skip to content

Commit 447e745

Browse files
committed
add set_chat_template
1 parent 08d133c commit 447e745

File tree

2 files changed

+68
-6
lines changed

2 files changed

+68
-6
lines changed

src/cpp/include/openvino/genai/vlm_pipeline.hpp

+59-1
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,14 @@ class OPENVINO_GENAI_EXPORTS VLMPipeline {
5454
std::string m_templated_chat_history;
5555
size_t image_id = 0; // Used to insert <image_id>i</image_id> per image (not a slice).
5656

57+
/// @brief Construct a pipeline form a folder containing tokenizer
58+
/// and model IRs.
59+
/// @param model_dir A folder to read tokenizer and model IRs.
60+
/// @param device Inference device. A tokenizer is always compiled
61+
/// for CPU.
62+
/// @param device_config A config to pass to ov::Core.set_property()
63+
/// and ov::Core::compile_model().
64+
/// @param core ov::Core instance to use.
5765
explicit VLMPipeline(
5866
const std::filesystem::path& model_dir,
5967
const std::string& device="CPU",
@@ -67,6 +75,14 @@ class OPENVINO_GENAI_EXPORTS VLMPipeline {
6775
core
6876
} {}
6977

78+
/// @brief Construct a pipeline form a folder containing model IRs
79+
/// and from a Tokenizer instance.
80+
/// @param model_dir A folder to read model IRs.
81+
/// @param tokenizer An instance of Tokenizer to use.
82+
/// @param device Inference device.
83+
/// @param device_config A config to pass to ov::Core.set_property()
84+
/// and ov::Core::compile_model().
85+
/// @param core ov::Core instance to use.
7086
VLMPipeline(
7187
const std::filesystem::path& model_dir,
7288
const ov::genai::Tokenizer& tokenizer,
@@ -78,16 +94,37 @@ class OPENVINO_GENAI_EXPORTS VLMPipeline {
7894
/// @brief Default destructor.
7995
~VLMPipeline();
8096

97+
/// @brief Generate a response given a prompt and any number of
98+
/// uint8 RGB images.
99+
/// @param prompt A prompt to respond to.
100+
/// @param images Images to be prepended to a prompt.
101+
/// @param generation_config A config to follow for text generation.
102+
/// @param streamer A streamer to acquire intermidiate result.
103+
/// @return A string generated by a model.
81104
DecodedResults generate(
82105
const std::string& prompt,
83-
const std::vector<ov::Tensor>& images,
106+
const std::vector<ov::Tensor>& rgbs,
84107
const GenerationConfig& generation_config,
85108
const StreamerVariant& streamer
86109
);
110+
/// @brief Generate a response given a prompt and config.
111+
/// @param prompt A prompt to respond to.
112+
/// @param config_map A config may contain GenerationConfig, values
113+
/// for its members, StreamerVariant a single image or multiple
114+
/// images.
115+
/// @return A string generated by a model.
87116
DecodedResults generate(
88117
const std::string& prompt,
89118
const ov::AnyMap& config_map
90119
);
120+
/// @brief Generate a response given a prompt and arbitrary number
121+
/// of ov::Property instances.
122+
/// Example:
123+
/// generate("text", image(std::move(rgb)), do_sample(true));
124+
/// @param prompt A prompt to respond to.
125+
/// @param ...properties ov::Property instances to be combined into
126+
/// ov::AnyMap.
127+
/// @return A string generated by a model.
91128
template <typename... Properties>
92129
util::EnableIfAllStringAny<DecodedResults, Properties...> generate(
93130
const std::string& prompt,
@@ -97,9 +134,30 @@ class OPENVINO_GENAI_EXPORTS VLMPipeline {
97134
prompt, AnyMap{std::forward<Properties>(properties)...}
98135
);
99136
}
137+
/// @brief Activate chat mode. Chat preserves previous history and
138+
/// applies chat_template to input prompts. Calling start_chat()
139+
/// again or finish_chat() drops the memorized history.
140+
/// It's possible to disable
141+
/// chat_template application by calling
142+
/// set_chat_template("{% for message in messages %}{{ message['content'] }}{% endfor %}")
143+
/// @param system_message Some chat_templates contain system role
144+
/// in addition to user and assistant roles. Set a message for that
145+
/// role.
100146
void start_chat(const std::string& system_message="");
147+
/// @brief Deactivate chat mode.
101148
void finish_chat() {m_is_chat_conversation = false;}
149+
/// @brief Set a custom chat template. Can be used to deactivate
150+
/// chat_template application for chat mode if called with
151+
/// "{% for message in messages %}{{ message['content'] }}{% endfor %}"
152+
/// or workaround unsupported chat_template entries in a default
153+
/// model chat_template.
154+
/// @param new_template A new template to override with.
155+
void set_chat_template(const std::string& new_template);
156+
/// @brief Extract GenerationConfig used to get default values.
157+
/// @return Default values used.
102158
GenerationConfig get_generation_config() const;
159+
/// @brief Override default values for GenerationConfig
160+
/// @param new_config A config to override default values with.
103161
void set_generation_config(const GenerationConfig& new_config);
104162
private:
105163
class VLMPipelineImpl;

src/cpp/src/vlm_pipeline.cpp

+9-5
Original file line numberDiff line numberDiff line change
@@ -333,15 +333,15 @@ ov::genai::VLMPipeline::~VLMPipeline() = default;
333333

334334
DecodedResults VLMPipeline::generate(
335335
const std::string& prompt,
336-
const std::vector<ov::Tensor>& images,
336+
const std::vector<ov::Tensor>& rgbs,
337337
const GenerationConfig& generation_config,
338338
const StreamerVariant& streamer
339339
) {
340340
std::string images_prompt;
341341
EncodedImage embeds;
342-
if (!images.empty()) {
343-
OPENVINO_ASSERT(1 == images.size(), "TODO: Only a single image allowed");
344-
embeds = m_vision_encoder.encode(images.at(0));
342+
if (!rgbs.empty()) {
343+
OPENVINO_ASSERT(1 == rgbs.size(), "TODO: Only a single image allowed");
344+
embeds = m_vision_encoder.encode(rgbs.at(0));
345345
if (m_vlm_config.use_image_id) {
346346
images_prompt = m_vlm_config.im_id_start + std::to_string(image_id) + m_vlm_config.im_id_end;
347347
++image_id;
@@ -403,7 +403,7 @@ DecodedResults VLMPipeline::generate(
403403
m_vlm_config.hidden_size == inputs_embeds.get_shape().at(2),
404404
"Unexpected embedding size"
405405
);
406-
if (!images.empty()) {
406+
if (!rgbs.empty()) {
407407
int64_t* ids = input_ids.data<int64_t>();
408408
const ov::Tensor& resampled_source = resample(*this, embeds.resized_source, {embeds.resized_source_size});
409409
float* emb = resampled_source.data<float>();
@@ -575,6 +575,10 @@ void VLMPipeline::start_chat(const std::string& system_message) {
575575
m_templated_chat_history = m_tokenizer.apply_chat_template(m_history, add_generation_prompt);
576576
}
577577

578+
void VLMPipeline::set_chat_template(const std::string& new_template) {
579+
m_tokenizer.set_chat_template(new_template);
580+
}
581+
578582
GenerationConfig VLMPipeline::get_generation_config() const {
579583
return m_generation_config;
580584
}

0 commit comments

Comments
 (0)