4
4
#include < filesystem>
5
5
#include < fstream>
6
6
#include < memory>
7
-
8
7
#include < jinja2cpp/template.h>
9
8
#include < jinja2cpp/template_env.h>
10
9
#include < jinja2cpp/user_callable.h>
11
10
#include < jinja2cpp/generic_list.h>
12
11
#include < jinja2cpp/generic_list_iterator.h>
13
12
13
+ #include " openvino/pass/manager.hpp"
14
14
#include " openvino/runtime/core.hpp"
15
15
#include " openvino/genai/tokenizer.hpp"
16
16
17
+ #include " make_combine_segments_stateful.hpp"
17
18
#include " tokenizers_path.hpp"
18
19
#include " circular_buffer_queue.hpp"
19
20
#include " utils.hpp"
@@ -69,7 +70,10 @@ class Tokenizer::TokenizerImpl {
69
70
70
71
std::unique_ptr<CircularBufferQueue<ov::InferRequest>> m_ireq_queue_tokenizer;
71
72
std::unique_ptr<CircularBufferQueue<ov::InferRequest>> m_ireq_queue_detokenizer;
72
-
73
+ // To change the adding special tokens mode we use a statefull subgraph,
74
+ // this flag holds the current state value of the CompiledModel.
75
+ bool m_add_special_tokens = true ;
76
+
73
77
int64_t m_pad_token_id = -1 ;
74
78
int64_t m_bos_token_id = -1 ;
75
79
int64_t m_eos_token_id = -1 ;
@@ -80,6 +84,29 @@ class Tokenizer::TokenizerImpl {
80
84
81
85
std::string m_chat_template = " " ;
82
86
87
+ void set_state_if_necessary (CircularBufferQueueElementGuard<ov::InferRequest>& infer_request_guard, bool add_special_tokens) {
88
+ // If user requested add_special_tokens mode different from the current one,
89
+ // need to set state variable.
90
+ // If requested mode matches the stored state set, then don't touch states.
91
+ if (add_special_tokens == m_add_special_tokens) {
92
+ return ;
93
+ }
94
+
95
+ // auto states = m_ireq_queue_tokenizer->get(0).query_state();
96
+ ov::Tensor add_special_tensor = ov::Tensor (ov::element::boolean, {});
97
+ *add_special_tensor.data <bool >() = add_special_tokens;
98
+
99
+ for (auto & state: infer_request_guard.get ().query_state ()) {
100
+ if (state.get_name ().find (ov::genai::ADD_SPECIAL_TOKENS_VAR_ID) == std::string::npos) {
101
+ // It's not add_special_tokens flag state.
102
+ continue ;
103
+ }
104
+ state.set_state (add_special_tensor);
105
+ break ;
106
+ }
107
+ m_add_special_tokens = add_special_tokens;
108
+ }
109
+
83
110
TokenizerImpl () = default ;
84
111
85
112
TokenizerImpl (std::filesystem::path tokenizer_path, const ov::AnyMap& plugin_config)
@@ -99,13 +126,18 @@ class Tokenizer::TokenizerImpl {
99
126
read_tokenizer_config_if_necessary (tokenizer_path);
100
127
101
128
auto device = " CPU" ; // currently openvino_tokenizer supports only CPU
102
- m_tokenizer = core.compile_model (tokenizer_path / " openvino_tokenizer.xml" ,
103
- device, plugin_config);
129
+ auto ov_tokenizer = core.read_model (tokenizer_path / " openvino_tokenizer.xml" );
130
+
131
+ ov::pass::Manager manager;
132
+ manager.register_pass <MakeCombineSegmentsSatateful>();
133
+ manager.run_passes (ov_tokenizer);
134
+
135
+ m_tokenizer = core.compile_model (ov_tokenizer, device, plugin_config);
104
136
if (std::filesystem::exists (tokenizer_path / " openvino_detokenizer.xml" )) {
105
- m_detokenizer = core.compile_model (tokenizer_path / " openvino_detokenizer.xml" ,
106
- device, plugin_config);
137
+ m_detokenizer = core.compile_model (tokenizer_path / " openvino_detokenizer.xml" , device, plugin_config);
107
138
}
108
139
140
+
109
141
const size_t INFER_REQUEST_QUEUE_SIZE = m_tokenizer.get_property (ov::optimal_number_of_infer_requests);
110
142
m_ireq_queue_tokenizer = std::make_unique<CircularBufferQueue<ov::InferRequest>>(
111
143
INFER_REQUEST_QUEUE_SIZE,
@@ -256,8 +288,12 @@ class Tokenizer::TokenizerImpl {
256
288
get_id_from_str (m_eos_token, m_eos_token_id);
257
289
}
258
290
259
- TokenizedInputs encode (std::string prompt) {
291
+ TokenizedInputs encode (std::string prompt, const ov::AnyMap& tokenization_params = {}) {
292
+ bool add_special_tokens_flag = true ;
293
+ ov::genai::utils::read_anymap_param (tokenization_params, add_special_tokens.name (), add_special_tokens_flag);
294
+
260
295
CircularBufferQueueElementGuard<ov::InferRequest> infer_request_guard (this ->m_ireq_queue_tokenizer .get ());
296
+ set_state_if_necessary (infer_request_guard, add_special_tokens_flag);
261
297
size_t batch_size = 1 ;
262
298
infer_request_guard.get ().set_input_tensor (ov::Tensor{ov::element::string, {batch_size}, &prompt});
263
299
infer_request_guard.get ().start_async ();
@@ -268,10 +304,15 @@ class Tokenizer::TokenizerImpl {
268
304
);
269
305
}
270
306
271
- TokenizedInputs encode (std::vector<std::string>& prompts) {
307
+ TokenizedInputs encode (std::vector<std::string>& prompts, const ov::AnyMap& tokenization_params = {}) {
308
+
272
309
TokenizedInputs unpadded;
273
310
{
311
+ bool add_special_tokens_flag = true ;
312
+ ov::genai::utils::read_anymap_param (tokenization_params, add_special_tokens.name (), add_special_tokens_flag);
313
+
274
314
CircularBufferQueueElementGuard<ov::InferRequest> infer_request_guard (this ->m_ireq_queue_tokenizer .get ());
315
+ set_state_if_necessary (infer_request_guard, add_special_tokens_flag);
275
316
infer_request_guard.get ().set_input_tensor (ov::Tensor{ov::element::string, {prompts.size ()}, prompts.data ()});
276
317
auto size_ = infer_request_guard.get ().get_input_tensor ().get_shape ();
277
318
infer_request_guard.get ().start_async ();
@@ -454,20 +495,20 @@ Tokenizer::Tokenizer(const std::string& tokenizer_path, const ov::AnyMap& plugin
454
495
m_pimpl = std::make_shared<TokenizerImpl>(tokenizer_path, plugin_config);
455
496
}
456
497
457
- TokenizedInputs Tokenizer::encode (const std::string prompt) {
458
- return m_pimpl->encode (std::move (prompt));
498
+ TokenizedInputs Tokenizer::encode (const std::string prompt, const ov::AnyMap& tokenization_params ) {
499
+ return m_pimpl->encode (std::move (prompt), tokenization_params );
459
500
}
460
501
461
- TokenizedInputs Tokenizer::encode (std::vector<std::string>& prompts) {
462
- return m_pimpl->encode (prompts);
502
+ TokenizedInputs Tokenizer::encode (std::vector<std::string>& prompts, const ov::AnyMap& tokenization_params ) {
503
+ return m_pimpl->encode (prompts, tokenization_params );
463
504
}
464
505
465
- TokenizedInputs Tokenizer::encode (std::vector<std::string>&& prompts) {
466
- return m_pimpl->encode (prompts);
506
+ TokenizedInputs Tokenizer::encode (std::vector<std::string>&& prompts, const ov::AnyMap& tokenization_params ) {
507
+ return m_pimpl->encode (prompts, tokenization_params );
467
508
}
468
509
469
- TokenizedInputs Tokenizer::encode (std::initializer_list<std::string>& text) {
470
- return encode (std::vector<std::string>(text.begin (), text.end ()));
510
+ TokenizedInputs Tokenizer::encode (std::initializer_list<std::string>& text, const ov::AnyMap& tokenization_params ) {
511
+ return encode (std::vector<std::string>(text.begin (), text.end ()), tokenization_params );
471
512
}
472
513
473
514
std::string Tokenizer::decode (std::vector<int64_t > tokens) {
0 commit comments