@@ -156,8 +156,8 @@ def parse(
156
156
),
157
157
]:
158
158
add_steps ()
159
- self .pipeline .eos_token_id = getattr (self .original_tokenizer , "eos_token_id" , None )
160
159
160
+ self .pipeline .eos_token_id = self .pipeline .get_eos_token_id (self .original_tokenizer )
161
161
return self .pipeline
162
162
163
163
normalizers_map : Dict [
@@ -522,8 +522,9 @@ def convert_sentencepiece_model_tokenizer(
522
522
tokenizer = Model (outputs , [input_node ], TOKENIZER_NAME )
523
523
tokenizer .validate_nodes_and_infer_types ()
524
524
525
- if hf_tokenizer .eos_token_id is not None :
526
- tokenizer .set_rt_info (hf_tokenizer .eos_token_id , EOS_TOKEN_ID_NAME )
525
+ eos_token_id = TokenizerPipeline .get_eos_token_id (hf_tokenizer )
526
+ if eos_token_id is not None :
527
+ tokenizer .set_rt_info (eos_token_id , EOS_TOKEN_ID_NAME )
527
528
528
529
if not with_detokenizer :
529
530
return tokenizer
@@ -537,8 +538,8 @@ def convert_sentencepiece_model_tokenizer(
537
538
clean_up_tokenization_spaces = clean_up_tokenization_spaces ,
538
539
)
539
540
540
- if hf_tokenizer . eos_token_id is not None :
541
- detokenizer .set_rt_info (hf_tokenizer . eos_token_id , EOS_TOKEN_ID_NAME )
541
+ if eos_token_id is not None :
542
+ detokenizer .set_rt_info (eos_token_id , EOS_TOKEN_ID_NAME )
542
543
543
544
return tokenizer , detokenizer
544
545
@@ -613,9 +614,9 @@ def convert_tiktoken_model_tokenizer(
613
614
if clean_up_tokenization_spaces :
614
615
pipeline .add_steps (RegexDecodingStep .clean_up_tokenization_spaces ())
615
616
617
+ pipeline .eos_token_id = pipeline .get_eos_token_id (hf_tokenizer )
618
+
616
619
if not with_detokenizer :
617
620
return pipeline .get_tokenizer_ov_subgraph ()
618
621
619
- pipeline .eos_token_id = hf_tokenizer .eos_token_id
620
-
621
622
return pipeline .get_tokenizer_ov_subgraph (), pipeline .get_detokenizer_ov_subgraph ()
0 commit comments