@@ -438,7 +438,8 @@ class Tokenizer::TokenizerImpl {
438
438
set_state_if_necessary (infer_request_guard, tokenization_params);
439
439
size_t batch_size = 1 ;
440
440
infer_request_guard.get ().set_input_tensor (ov::Tensor{ov::element::string, {batch_size}, &prompt});
441
- infer_request_guard.get ().infer ();
441
+ infer_request_guard.get ().start_async ();
442
+ infer_request_guard.get ().wait ();
442
443
443
444
return get_copied_results (
444
445
infer_request_guard.get ().get_tensor (" input_ids" ),
@@ -456,7 +457,8 @@ class Tokenizer::TokenizerImpl {
456
457
set_state_if_necessary (infer_request_guard, tokenization_params);
457
458
infer_request_guard.get ().set_input_tensor (ov::Tensor{ov::element::string, {prompts.size ()}, prompts.data ()});
458
459
auto size_ = infer_request_guard.get ().get_input_tensor ().get_shape ();
459
- infer_request_guard.get ().infer ();
460
+ infer_request_guard.get ().start_async ();
461
+ infer_request_guard.get ().wait ();
460
462
461
463
unpadded = get_copied_results (
462
464
infer_request_guard.get ().get_tensor (" input_ids" ),
@@ -483,7 +485,8 @@ class Tokenizer::TokenizerImpl {
483
485
set_state_if_necessary (infer_request_guard, detokenization_params);
484
486
size_t batch_size = 1 ;
485
487
infer_request_guard.get ().set_input_tensor (ov::Tensor{ov::element::i64, {batch_size, tokens.size ()}, tokens.data ()});
486
- infer_request_guard.get ().infer ();
488
+ infer_request_guard.get ().start_async ();
489
+ infer_request_guard.get ().wait ();
487
490
return infer_request_guard.get ().get_output_tensor ().data <std::string>()[0 ];
488
491
}
489
492
@@ -495,7 +498,8 @@ class Tokenizer::TokenizerImpl {
495
498
CircularBufferQueueElementGuard<ov::InferRequest> infer_request_guard (this ->m_ireq_queue_detokenizer .get ());
496
499
set_state_if_necessary (infer_request_guard, detokenization_params);
497
500
infer_request_guard.get ().set_input_tensor (tokens);
498
- infer_request_guard.get ().infer ();
501
+ infer_request_guard.get ().start_async ();
502
+ infer_request_guard.get ().wait ();
499
503
500
504
auto res = infer_request_guard.get ().get_output_tensor ();
501
505
auto res_data = res.data <std::string>();
@@ -523,7 +527,8 @@ class Tokenizer::TokenizerImpl {
523
527
CircularBufferQueueElementGuard<ov::InferRequest> infer_request_guard (this ->m_ireq_queue_detokenizer .get ());
524
528
set_state_if_necessary (infer_request_guard, detokenization_params);
525
529
infer_request_guard.get ().set_input_tensor (tokens);
526
- infer_request_guard.get ().infer ();
530
+ infer_request_guard.get ().start_async ();
531
+ infer_request_guard.get ().wait ();
527
532
auto res = infer_request_guard.get ().get_output_tensor ();
528
533
auto res_data = res.data <std::string>();
529
534
return std::vector<std::string>(res_data, res_data + res.get_shape ()[0 ]);
0 commit comments