diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py index 4fed3f6f88..025a40e057 100644 --- a/optimum/commands/export/openvino.py +++ b/optimum/commands/export/openvino.py @@ -226,6 +226,9 @@ def run(self): ) library_name = "transformers" + if self.args.convert_tokenizer: + logger.warning("`--convert-tokenizer` option is deprecated. Tokenizer will be converted by default.") + if ( library_name == "diffusers" and ov_config @@ -261,10 +264,21 @@ def run(self): ) model.save_pretrained(self.args.output) - else: - if self.args.convert_tokenizer: - logger.warning("`--convert-tokenizer` option is deprecated. Tokenizer will be converted by default.") + if self.args.disable_convert_tokenizer: + return + + # avoid import when using other exporters (IPEX, INC) + from ...exporters.openvino.convert import export_tokenizer + output = Path(self.args.output) + tokenizer = getattr(model, "tokenizer", None) + if tokenizer is not None: + export_tokenizer(tokenizer, output / "tokenizer") + + tokenizer_2 = getattr(model, "tokenizer_2", None) + if tokenizer_2 is not None: + export_tokenizer(tokenizer_2, output / "tokenizer_2") + else: # TODO : add input shapes main_export( model_name_or_path=self.args.model, diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py index 8908c430b3..31abd0f327 100644 --- a/optimum/exporters/openvino/__main__.py +++ b/optimum/exporters/openvino/__main__.py @@ -24,7 +24,7 @@ from optimum.exporters import TasksManager from optimum.exporters.onnx.base import OnnxConfig from optimum.exporters.onnx.constants import SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED -from optimum.exporters.openvino.convert import export_from_model, export_tokenizer +from optimum.exporters.openvino.convert import export_from_model from optimum.intel.utils.import_utils import is_openvino_tokenizers_available, is_transformers_version from optimum.utils.save_utils import maybe_load_preprocessors @@ -355,6 +355,9 @@ class StoreAttr(object): **kwargs_shapes, ) + # hide openvino import when using other exporters + from optimum.exporters.openvino.convert import export_tokenizer + if convert_tokenizer and is_openvino_tokenizers_available(): if library_name != "diffusers": tokenizer = next( @@ -373,11 +376,11 @@ class StoreAttr(object): else: tokenizer = getattr(model, "tokenizer", None) if tokenizer is not None: - export_tokenizer(tokenizer, output) + export_tokenizer(tokenizer, output / "tokenizer") tokenizer_2 = getattr(model, "tokenizer_2", None) if tokenizer_2 is not None: - export_tokenizer(tokenizer_2, output, suffix="_2") + export_tokenizer(tokenizer_2, output / "tokenizer_2") elif convert_tokenizer and not is_openvino_tokenizers_available(): logger.warning("Tokenizer won't be converted.") diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py index 6c86c2c2df..bb781a6904 100644 --- a/optimum/exporters/openvino/convert.py +++ b/optimum/exporters/openvino/convert.py @@ -667,20 +667,21 @@ def export_tokenizer( output: Union[str, Path], suffix: Optional[str] = "", ): - from optimum.intel.openvino import OV_DETOKENIZER_NAME, OV_TOKENIZER_NAME # avoid circular imports + # avoid circular imports + from optimum.intel.openvino import OV_DETOKENIZER_NAME, OV_TOKENIZER_NAME + from optimum.intel.openvino.utils import maybe_convert_tokenizer_to_fast try: from openvino_tokenizers import convert_tokenizer except ModuleNotFoundError: - # avoid this message before tokenizers are part of the openvino dependencies - # logger.info( - # "Run `pip install openvino-tokenizers[transformers]` to get OpenVINO tokenizer/detokenizer models." - # ) return if not isinstance(output, Path): output = Path(output) + if output.exists(): + tokenizer = maybe_convert_tokenizer_to_fast(tokenizer, output) + try: converted = convert_tokenizer(tokenizer, with_detokenizer=True) except NotImplementedError: diff --git a/optimum/intel/openvino/utils.py b/optimum/intel/openvino/utils.py index 4d1479f733..69a750fb65 100644 --- a/optimum/intel/openvino/utils.py +++ b/optimum/intel/openvino/utils.py @@ -17,10 +17,13 @@ import logging import os from glob import glob +from pathlib import Path +from typing import Tuple, Union import numpy as np from huggingface_hub import model_info from openvino.runtime import Core, Type, properties +from transformers import AutoTokenizer, CLIPTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast from transformers.onnx.utils import ParameterFormat, compute_serialized_parameters_size @@ -107,6 +110,24 @@ } +NEED_CONVERT_TO_FAST_TOKENIZER: Tuple[type(PreTrainedTokenizer)] = (CLIPTokenizer,) + + +def maybe_convert_tokenizer_to_fast( + hf_tokenizer: PreTrainedTokenizer, tokenizer_path: Path +) -> Union[PreTrainedTokenizer, PreTrainedTokenizerFast]: + if isinstance(hf_tokenizer, PreTrainedTokenizerFast): + return hf_tokenizer + + if isinstance(hf_tokenizer, NEED_CONVERT_TO_FAST_TOKENIZER): + try: + return AutoTokenizer.from_pretrained(tokenizer_path) + except Exception: + return hf_tokenizer + + return hf_tokenizer + + def use_external_data_format(num_parameters: int) -> bool: """ Returns whether or not the model requires using external data format for the ONNX export diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py index 09fad5d773..cac79abaee 100644 --- a/tests/openvino/test_exporters_cli.py +++ b/tests/openvino/test_exporters_cli.py @@ -74,8 +74,8 @@ class OVCLIExportTestCase(unittest.TestCase): "wav2vec2": 0, # no tokenizer "bert": 1, # no detokenizer "blenderbot": 2, - "stable-diffusion": 0, # not supported - "stable-diffusion-xl": 0, # not supported + "stable-diffusion": 2, + "stable-diffusion-xl": 4, } SUPPORTED_SD_HYBRID_ARCHITECTURES = (