@@ -70,6 +70,61 @@ const std::pair<std::string, std::string> chat_template_fallback_map[] = {
70
70
}
71
71
};
72
72
73
+ std::optional<std::string> remap_template (const std::string& chat_template) {
74
+ for (const auto & [known, fallback] : chat_template_fallback_map) {
75
+ if (chat_template == known) {
76
+ return fallback;
77
+ }
78
+ }
79
+ return std::nullopt;
80
+ }
81
+
82
+ void parse_if_exists (const std::filesystem::path& path, std::string& value) {
83
+ if (std::filesystem::exists (path)) {
84
+ ov::genai::utils::read_json_param (nlohmann::json::parse (std::ifstream{path}), " chat_template" , value);
85
+ }
86
+ }
87
+
88
+ template <typename T>
89
+ const T& find_or_fallback (const ov::AnyMap& rt_info, const char name[], const T& fallback) {
90
+ auto iter = rt_info.find (name);
91
+ if (rt_info.end () == iter) {
92
+ return fallback;
93
+ }
94
+ return iter->second .as <T>();
95
+ }
96
+
97
+ std::string patch_template (std::string&& chat_template) {
98
+ // Replace what jinja2cpp doesn't support
99
+ std::pair<std::string, std::string> replace_str_map[] = {
100
+ {" '}" , " ' }" },
101
+ {" {'" , " { '" },
102
+ {" .strip()" , " " },
103
+ {" is not none" , " is defined" },
104
+ {" is none" , " is undefined" },
105
+ {" = none" , " = undefined" },
106
+ // Jinja2Cpp does not support Python-style slicing, e.g. [1:].
107
+ // If chat template contains such slicing, we replace it with
108
+ // a placeholder at the moment.
109
+ {" messages[1:]" , " slice(messages, 1)" },
110
+ };
111
+
112
+ for (const auto & [from, to] : replace_str_map) {
113
+ size_t pos = 0 ;
114
+ while ((pos = chat_template.find (from, pos)) != std::string::npos) {
115
+ chat_template.replace (pos, from.size (), to);
116
+ pos += to.size ();
117
+ }
118
+ }
119
+ return chat_template;
120
+ }
121
+
122
+ std::string remap_and_patch (const std::string& chat_template) {
123
+ return patch_template (
124
+ remap_template (chat_template).value_or (chat_template)
125
+ );
126
+ }
127
+
73
128
} // namespace
74
129
75
130
namespace ov {
@@ -195,11 +250,10 @@ class Tokenizer::TokenizerImpl {
195
250
read_special_tokens_map (models_path);
196
251
// Try to read tokenizer_config if some token ids or token str are not defined.
197
252
read_tokenizer_config_if_necessary (models_path);
253
+ parse_if_exists (models_path / " tokenizer_config.json" , m_chat_template);
254
+ parse_if_exists (models_path / " processor_config.json" , m_chat_template);
255
+ parse_if_exists (models_path / " chat_template.json" , m_chat_template);
198
256
setup_tokenizer (std::make_pair (ov_tokenizer, ov_detokenizer), properties);
199
- m_chat_template = chat_template_from_file_if_exists (models_path, " tokenizer_config.json" );
200
- if (m_chat_template.empty ()) {
201
- m_chat_template = chat_template_from_file_if_exists (models_path, " chat_template.json" );
202
- }
203
257
}
204
258
205
259
void setup_tokenizer (const std::pair<std::shared_ptr<ov::Model>, std::shared_ptr<ov::Model>>& models, const ov::AnyMap& properties) {
@@ -209,10 +263,8 @@ class Tokenizer::TokenizerImpl {
209
263
auto core = get_core_singleton ();
210
264
std::string device = " CPU" ; // only CPU is supported for now
211
265
212
- std::string version_str;
213
- utils::read_rt_info (ov_tokenizer != nullptr ? ov_tokenizer: ov_detokenizer , " openvino_tokenizers_version" , version_str);
214
- // Saving IR version was added only in 24.5, so if it's empty, then it's older than 24.5
215
- m_older_than_24_5 = version_str.empty ();
266
+ // Saving IR version was added only in 24.5, so if it's missing, then it's older than 24.5
267
+ m_older_than_24_5 = (ov_tokenizer ? ov_tokenizer: ov_detokenizer)->get_rt_info ().count (" openvino_tokenizers_version" ) == 0 ;
216
268
217
269
if (ov_tokenizer) {
218
270
ov::pass::Manager manager;
@@ -227,6 +279,18 @@ class Tokenizer::TokenizerImpl {
227
279
[this ]() -> ov::InferRequest {
228
280
return std::move (this ->m_tokenizer .create_infer_request ());
229
281
});
282
+
283
+ const ov::AnyMap& rt_info = ov_tokenizer->get_rt_info ();
284
+ m_pad_token_id = find_or_fallback (rt_info, " pad_token_id" , m_pad_token_id);
285
+ m_bos_token_id = find_or_fallback (rt_info, " bos_token_id" , m_bos_token_id);
286
+ m_eos_token_id = find_or_fallback (rt_info, " eos_token_id" , m_eos_token_id);
287
+
288
+ m_chat_template = find_or_fallback (rt_info, " chat_template" , m_chat_template);
289
+ std::optional<std::string> fallback = remap_template (m_chat_template);
290
+ m_chat_template = patch_template (fallback.value_or (m_chat_template));
291
+ if (!fallback.has_value ()) {
292
+ m_chat_template = find_or_fallback (rt_info, " simplified_chat_template" , m_chat_template);
293
+ }
230
294
}
231
295
232
296
if (ov_detokenizer) {
@@ -241,6 +305,14 @@ class Tokenizer::TokenizerImpl {
241
305
[this ]() -> ov::InferRequest {
242
306
return std::move (this ->m_detokenizer .create_infer_request ());
243
307
});
308
+
309
+ // Unset/-1 token causes exception in SentencePiece detokenization.
310
+ if (m_pad_token_id != -1 && m_pad_token.empty ())
311
+ m_pad_token = decode (std::vector{m_pad_token_id}, {ov::genai::add_special_tokens (true )});
312
+ if (m_bos_token_id != -1 && m_bos_token.empty ())
313
+ m_bos_token = decode (std::vector{m_bos_token_id}, {ov::genai::add_special_tokens (true )});
314
+ if (m_eos_token_id != -1 && m_eos_token.empty ())
315
+ m_eos_token = decode (std::vector{m_eos_token_id}, {ov::genai::add_special_tokens (true )});
244
316
}
245
317
246
318
// Initialize tokenizer's cache to save time later.
@@ -251,24 +323,6 @@ class Tokenizer::TokenizerImpl {
251
323
if (m_detokenizer) {
252
324
decode ({1 , 33 , 199 , 42 , 42 });
253
325
}
254
-
255
- if (m_tokenizer) {
256
- utils::read_rt_info (ov_tokenizer, " chat_template" , m_chat_template);
257
- utils::read_rt_info (ov_tokenizer, " pad_token_id" , m_pad_token_id);
258
- utils::read_rt_info (ov_tokenizer, " bos_token_id" , m_bos_token_id);
259
- utils::read_rt_info (ov_tokenizer, " eos_token_id" , m_eos_token_id);
260
- }
261
-
262
- m_chat_template = patch_chat_template (m_chat_template);
263
- if (m_detokenizer) {
264
- // Unset/-1 token causes exception in SentencePiece detokenization.
265
- if (m_pad_token_id != -1 && m_pad_token.empty ())
266
- m_pad_token = decode (std::vector{m_pad_token_id}, {ov::genai::add_special_tokens (true )});
267
- if (m_bos_token_id != -1 && m_bos_token.empty ())
268
- m_bos_token = decode (std::vector{m_bos_token_id}, {ov::genai::add_special_tokens (true )});
269
- if (m_eos_token_id != -1 && m_eos_token.empty ())
270
- m_eos_token = decode (std::vector{m_eos_token_id}, {ov::genai::add_special_tokens (true )});
271
- }
272
326
}
273
327
274
328
// load special tokens ids from config.json
@@ -495,53 +549,10 @@ class Tokenizer::TokenizerImpl {
495
549
return std::vector<std::string>(res_data, res_data + res.get_shape ()[0 ]);
496
550
}
497
551
498
- std::string patch_chat_template (std::string template_str) const {
499
- for (const auto & [chat_template, fallback] : chat_template_fallback_map) {
500
- if (template_str == chat_template) {
501
- return fallback;
502
- }
503
- }
504
-
505
- // Replace what jinja2cpp doesn't support
506
- std::pair<std::string, std::string> replace_str_map[] = {
507
- {" '}" , " ' }" },
508
- {" {'" , " { '" },
509
- {" .strip()" , " " },
510
- {" is not none" , " is defined" },
511
- {" is none" , " is undefined" },
512
- {" = none" , " = undefined" },
513
- // Jinja2Cpp does not support Python-style slicing, e.g. [1:].
514
- // If chat template contains such slicing, we replace it with
515
- // a placeholder at the moment.
516
- {" messages[1:]" , " slice(messages, 1)" },
517
- };
518
-
519
- for (const auto & [from, to] : replace_str_map) {
520
- size_t pos = 0 ;
521
- while ((pos = template_str.find (from, pos)) != std::string::npos) {
522
- template_str.replace (pos, from.size (), to);
523
- pos += to.size ();
524
- }
525
- }
526
- return template_str;
527
- }
528
-
529
- std::string chat_template_from_file_if_exists (const std::filesystem::path& path, const std::string& file_name) {
530
- auto tokenizer_config_file_path = path / file_name;
531
- if (!std::filesystem::exists (tokenizer_config_file_path))
532
- return " " ;
533
- std::ifstream file (tokenizer_config_file_path);
534
-
535
- std::string res;
536
- ov::genai::utils::read_json_param (nlohmann::json::parse (file), " chat_template" , res);
537
-
538
- return patch_chat_template (res);
539
- }
540
-
541
552
std::string apply_chat_template (ChatHistory history,
542
553
bool add_generation_prompt,
543
554
const std::string& chat_template) const {
544
- std::string chat_tpl = chat_template.empty () ? m_chat_template : patch_chat_template (chat_template);
555
+ std::string chat_tpl = chat_template.empty () ? m_chat_template : remap_and_patch (chat_template);
545
556
OPENVINO_ASSERT (!chat_tpl.empty (),
546
557
" Chat template wasn't found. This may indicate that the model wasn't trained for chat scenario."
547
558
" Please add 'chat_template' to tokenizer_config.json to use the model in chat scenario."
@@ -599,7 +610,7 @@ class Tokenizer::TokenizerImpl {
599
610
}
600
611
601
612
void set_chat_template (const std::string& chat_template) {
602
- m_chat_template = patch_chat_template (chat_template);
613
+ m_chat_template = remap_and_patch (chat_template);
603
614
}
604
615
605
616
std::string get_chat_template () {
0 commit comments