@@ -188,18 +188,14 @@ C++ template for a stremer.
188
188
class CustomStreamer : public ov ::genai::StreamerBase {
189
189
public:
190
190
bool put(int64_t token) {
191
- bool stop_flag = false;
192
- /*
193
- custom decoding/tokens processing code
194
- tokens_cache.push_back(token);
195
- std::string text = m_tokenizer.decode(tokens_cache);
196
- ...
197
- * /
198
- return stop_flag; // flag whether generation should be stoped, if true generation stops.
191
+ // Custom decoding/tokens processing logic.
192
+
193
+ // Returns a flag whether generation should be stoped, if true generation stops.
194
+ return false;
199
195
};
200
196
201
197
void end() {
202
- /* custom finalization */
198
+ // Custom finalization logic.
203
199
};
204
200
};
205
201
@@ -208,7 +204,7 @@ int main(int argc, char* argv[]) {
208
204
209
205
std::string model_path = argv[1];
210
206
ov::genai::LLMPipeline pipe(model_path, "CPU");
211
- std::cout << pipe.generate("The Sun is yellow because", ov::genai::streamer(custom_streamer ), ov::genai::max_new_tokens(200 ));
207
+ std::cout << pipe.generate("The Sun is yellow because", , ov::genai::max_new_tokens(15 ), ov::genai::streamer(custom_streamer ));
212
208
}
213
209
```
214
210
@@ -217,37 +213,21 @@ Python template for a streamer.
217
213
import openvino_genai as ov_genai
218
214
219
215
class CustomStreamer(ov_genai.StreamerBase):
220
- def __init__(self, tokenizer ):
216
+ def __init__(self):
221
217
super().__init__()
222
- self.tokenizer = tokenizer
223
- # Initialize a cache to store tokens
224
- self.tokens_cache = []
218
+ # Initialization logic.
225
219
226
220
def put(self, token_id) -> bool:
227
- # Process a token ID and determine if the generation should stop.
228
- # Rerturn a boolean flag indicating whether the generation should stop.
229
- stop_flag = False
230
-
231
- # Add the token to the cache and decode the tokens to get the text
232
- self.tokens_cache.append(token_id)
233
- text = self.tokenizer.decode(self.tokens_cache)
234
-
235
- # Custom processing logic (if any)
236
- # For example, you might want to stop generation if a certain condition is met
237
- if some_condition:
238
- stop_flag = True
239
-
240
- return stop_flag
221
+ # Custom decoding/tokens processing logic.
222
+
223
+ # Returns a flag whether generation should be stoped, if true generation stops.
224
+ return False
241
225
242
226
def end(self):
243
- # Custom finalization logic (if any)
244
- # For example, you might want to process the final text or clear the cache
245
- final_text = self.tokenizer.decode(self.tokens_cache)
246
- self.tokens_cache = []
247
-
227
+ # Custom finalization logic.
248
228
249
229
pipe = ov_genai.LLMPipeline(model_path, "CPU")
250
- custom_streamer = TextPrintStreamer(pipe.get_tokenizer() )
230
+ custom_streamer = CustomStreamer( )
251
231
252
232
pipe.generate("The Sun is yellow because", max_new_tokens=15, streamer=custom_streamer)
253
233
```
0 commit comments