Skip to content

Commit fb1b35b

Browse files
Merge pull request #697 from apaniukov/ov-tokenizers-leftovers
OV Tokenizers Leftovers
2 parents e6fadb1 + 0029e91 commit fb1b35b

File tree

5 files changed

+52
-13
lines changed

5 files changed

+52
-13
lines changed

optimum/commands/export/openvino.py

+17-3
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,9 @@ def run(self):
226226
)
227227
library_name = "transformers"
228228

229+
if self.args.convert_tokenizer:
230+
logger.warning("`--convert-tokenizer` option is deprecated. Tokenizer will be converted by default.")
231+
229232
if (
230233
library_name == "diffusers"
231234
and ov_config
@@ -261,10 +264,21 @@ def run(self):
261264
)
262265
model.save_pretrained(self.args.output)
263266

264-
else:
265-
if self.args.convert_tokenizer:
266-
logger.warning("`--convert-tokenizer` option is deprecated. Tokenizer will be converted by default.")
267+
if self.args.disable_convert_tokenizer:
268+
return
269+
270+
# avoid import when using other exporters (IPEX, INC)
271+
from ...exporters.openvino.convert import export_tokenizer
267272

273+
output = Path(self.args.output)
274+
tokenizer = getattr(model, "tokenizer", None)
275+
if tokenizer is not None:
276+
export_tokenizer(tokenizer, output / "tokenizer")
277+
278+
tokenizer_2 = getattr(model, "tokenizer_2", None)
279+
if tokenizer_2 is not None:
280+
export_tokenizer(tokenizer_2, output / "tokenizer_2")
281+
else:
268282
# TODO : add input shapes
269283
main_export(
270284
model_name_or_path=self.args.model,

optimum/exporters/openvino/__main__.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from optimum.exporters import TasksManager
2525
from optimum.exporters.onnx.base import OnnxConfig
2626
from optimum.exporters.onnx.constants import SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED
27-
from optimum.exporters.openvino.convert import export_from_model, export_tokenizer
27+
from optimum.exporters.openvino.convert import export_from_model
2828
from optimum.intel.utils.import_utils import is_openvino_tokenizers_available, is_transformers_version
2929
from optimum.utils.save_utils import maybe_load_preprocessors
3030

@@ -355,6 +355,9 @@ class StoreAttr(object):
355355
**kwargs_shapes,
356356
)
357357

358+
# hide openvino import when using other exporters
359+
from optimum.exporters.openvino.convert import export_tokenizer
360+
358361
if convert_tokenizer and is_openvino_tokenizers_available():
359362
if library_name != "diffusers":
360363
tokenizer = next(
@@ -373,11 +376,11 @@ class StoreAttr(object):
373376
else:
374377
tokenizer = getattr(model, "tokenizer", None)
375378
if tokenizer is not None:
376-
export_tokenizer(tokenizer, output)
379+
export_tokenizer(tokenizer, output / "tokenizer")
377380

378381
tokenizer_2 = getattr(model, "tokenizer_2", None)
379382
if tokenizer_2 is not None:
380-
export_tokenizer(tokenizer_2, output, suffix="_2")
383+
export_tokenizer(tokenizer_2, output / "tokenizer_2")
381384
elif convert_tokenizer and not is_openvino_tokenizers_available():
382385
logger.warning("Tokenizer won't be converted.")
383386

optimum/exporters/openvino/convert.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -667,20 +667,21 @@ def export_tokenizer(
667667
output: Union[str, Path],
668668
suffix: Optional[str] = "",
669669
):
670-
from optimum.intel.openvino import OV_DETOKENIZER_NAME, OV_TOKENIZER_NAME # avoid circular imports
670+
# avoid circular imports
671+
from optimum.intel.openvino import OV_DETOKENIZER_NAME, OV_TOKENIZER_NAME
672+
from optimum.intel.openvino.utils import maybe_convert_tokenizer_to_fast
671673

672674
try:
673675
from openvino_tokenizers import convert_tokenizer
674676
except ModuleNotFoundError:
675-
# avoid this message before tokenizers are part of the openvino dependencies
676-
# logger.info(
677-
# "Run `pip install openvino-tokenizers[transformers]` to get OpenVINO tokenizer/detokenizer models."
678-
# )
679677
return
680678

681679
if not isinstance(output, Path):
682680
output = Path(output)
683681

682+
if output.exists():
683+
tokenizer = maybe_convert_tokenizer_to_fast(tokenizer, output)
684+
684685
try:
685686
converted = convert_tokenizer(tokenizer, with_detokenizer=True)
686687
except NotImplementedError:

optimum/intel/openvino/utils.py

+21
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,13 @@
1717
import logging
1818
import os
1919
from glob import glob
20+
from pathlib import Path
21+
from typing import Tuple, Union
2022

2123
import numpy as np
2224
from huggingface_hub import model_info
2325
from openvino.runtime import Core, Type, properties
26+
from transformers import AutoTokenizer, CLIPTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast
2427
from transformers.onnx.utils import ParameterFormat, compute_serialized_parameters_size
2528

2629

@@ -107,6 +110,24 @@
107110
}
108111

109112

113+
NEED_CONVERT_TO_FAST_TOKENIZER: Tuple[type(PreTrainedTokenizer)] = (CLIPTokenizer,)
114+
115+
116+
def maybe_convert_tokenizer_to_fast(
117+
hf_tokenizer: PreTrainedTokenizer, tokenizer_path: Path
118+
) -> Union[PreTrainedTokenizer, PreTrainedTokenizerFast]:
119+
if isinstance(hf_tokenizer, PreTrainedTokenizerFast):
120+
return hf_tokenizer
121+
122+
if isinstance(hf_tokenizer, NEED_CONVERT_TO_FAST_TOKENIZER):
123+
try:
124+
return AutoTokenizer.from_pretrained(tokenizer_path)
125+
except Exception:
126+
return hf_tokenizer
127+
128+
return hf_tokenizer
129+
130+
110131
def use_external_data_format(num_parameters: int) -> bool:
111132
"""
112133
Returns whether or not the model requires using external data format for the ONNX export

tests/openvino/test_exporters_cli.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,8 @@ class OVCLIExportTestCase(unittest.TestCase):
7474
"wav2vec2": 0, # no tokenizer
7575
"bert": 1, # no detokenizer
7676
"blenderbot": 2,
77-
"stable-diffusion": 0, # not supported
78-
"stable-diffusion-xl": 0, # not supported
77+
"stable-diffusion": 2,
78+
"stable-diffusion-xl": 4,
7979
}
8080

8181
SUPPORTED_SD_HYBRID_ARCHITECTURES = (

0 commit comments

Comments
 (0)