diff --git a/examples/openvino/stable-diffusion/requirements.txt b/examples/openvino/stable-diffusion/requirements.txt index 8f8eb6a770..8dd237913a 100644 --- a/examples/openvino/stable-diffusion/requirements.txt +++ b/examples/openvino/stable-diffusion/requirements.txt @@ -2,4 +2,4 @@ accelerate diffusers torch~=1.13 nncf @ git+https://github.com/openvinotoolkit/nncf.git -tomesd @ git+https://github.com/AlexKoff88/tomesd/tree/openvino +tomesd @ git+https://github.com/AlexKoff88/tomesd.git@openvino diff --git a/examples/openvino/stable-diffusion/train_text_to_image_qat.py b/examples/openvino/stable-diffusion/train_text_to_image_qat.py index 147490c586..9a4a6b7415 100644 --- a/examples/openvino/stable-diffusion/train_text_to_image_qat.py +++ b/examples/openvino/stable-diffusion/train_text_to_image_qat.py @@ -19,7 +19,6 @@ import math import os import random -import tempfile from copy import deepcopy from functools import partial from io import BytesIO @@ -34,7 +33,7 @@ import torch.utils.checkpoint from accelerate import Accelerator from accelerate.logging import get_logger -from accelerate.utils import set_seed +from accelerate.utils import ProjectConfiguration, set_seed from datasets import load_dataset from diffusers import DDIMScheduler, DDPMScheduler, DiffusionPipeline, LMSDiscreteScheduler, StableDiffusionPipeline from diffusers.optimization import get_scheduler @@ -44,20 +43,12 @@ from nncf.torch import create_compressed_model, register_default_init_args from nncf.torch.initialization import PTInitializingDataLoader from nncf.torch.layer_utils import CompressionParameter -from openvino._offline_transformations import apply_moc_transformations, compress_quantize_weights_transformation from PIL import Image from requests.packages.urllib3.exceptions import InsecureRequestWarning from torchvision import transforms from tqdm import tqdm -from optimum.exporters.onnx import export_models, get_stable_diffusion_models_for_export -from optimum.intel import OVStableDiffusionPipeline -from optimum.utils import ( - DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER, - DIFFUSION_MODEL_UNET_SUBFOLDER, - DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER, - DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER, -) +from optimum.exporters.openvino import export_from_model requests.packages.urllib3.disable_warnings(InsecureRequestWarning) @@ -583,47 +574,6 @@ def get_noise_scheduler(args): return noise_scheduler -def export_to_onnx(pipeline, save_dir): - unet = pipeline.unet - vae = pipeline.vae - text_encoder = pipeline.text_encoder - - unet.eval().cpu() - vae.eval().cpu() - text_encoder.eval().cpu() - - ONNX_WEIGHTS_NAME = "model.onnx" - - output_names = [ - os.path.join(DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER, ONNX_WEIGHTS_NAME), - os.path.join(DIFFUSION_MODEL_UNET_SUBFOLDER, ONNX_WEIGHTS_NAME), - os.path.join(DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER, ONNX_WEIGHTS_NAME), - os.path.join(DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER, ONNX_WEIGHTS_NAME), - ] - - with torch.no_grad(): - models_and_onnx_configs = get_stable_diffusion_models_for_export(pipeline) - pipeline.save_config(save_dir) - export_models( - models_and_onnx_configs=models_and_onnx_configs, output_dir=Path(save_dir), output_names=output_names - ) - - -def export_to_openvino(pipeline, onnx_dir, save_dir): - ov_pipe = OVStableDiffusionPipeline.from_pretrained( - model_id=onnx_dir, - from_onnx=True, - model_save_dir=save_dir, - tokenizer=pipeline.tokenizer, - scheduler=pipeline.scheduler, - feature_extractor=pipeline.feature_extractor, - compile=False, - ) - apply_moc_transformations(ov_pipe.unet.model, cf=False) - compress_quantize_weights_transformation(ov_pipe.unet.model) - ov_pipe.save_pretrained(save_dir) - - class UnetInitDataset(torch.utils.data.Dataset): def __init__(self, data): super().__init__() @@ -700,7 +650,7 @@ def get_nncf_config(pipeline, dataloader, args): "ignored_scopes": [ "{re}.*__add___[0-2]", "{re}.*layer_norm_0", - "{re}.*Attention.*/bmm_0", + # "{re}.*Attention.*/bmm_0", "{re}.*__truediv__*", "{re}.*group_norm_0", "{re}.*mul___[0-2]", @@ -771,11 +721,13 @@ def main(): logging_dir = os.path.join(args.output_dir, args.logging_dir) + accelerator_project_config = ProjectConfiguration(project_dir=args.output_dir, logging_dir=logging_dir) + accelerator = Accelerator( gradient_accumulation_steps=args.gradient_accumulation_steps, mixed_precision=args.mixed_precision, log_with=args.report_to, - logging_dir=logging_dir, + project_config=accelerator_project_config, ) logging.basicConfig( @@ -922,7 +874,7 @@ def tokenize_captions(examples, is_train=True): with accelerator.main_process_first(): if args.max_train_samples is not None: - dataset["train"] = dataset["train"].shuffle(seed=42, buffer_size=args.max_train_samples) + dataset["train"] = dataset["train"].shuffle(seed=42).select(range(args.max_train_samples)) # Set the training transforms train_dataset = dataset["train"] @@ -1132,9 +1084,8 @@ def collate_fn(examples): feature_extractor=pipeline.feature_extractor, ) - with tempfile.TemporaryDirectory() as tmpdirname: - export_to_onnx(export_pipeline, tmpdirname) - export_to_openvino(export_pipeline, tmpdirname, Path(args.output_dir) / "openvino") + save_directory = Path(args.output_dir) / "openvino" + export_from_model(export_pipeline, output=save_directory, task="stable-diffusion") if __name__ == "__main__": diff --git a/optimum/exporters/openvino/__init__.py b/optimum/exporters/openvino/__init__.py index 6fd7970a07..41b456abce 100644 --- a/optimum/exporters/openvino/__init__.py +++ b/optimum/exporters/openvino/__init__.py @@ -1,5 +1,5 @@ from .__main__ import main_export -from .convert import export, export_models, export_pytorch_via_onnx +from .convert import export, export_from_model, export_models, export_pytorch_via_onnx from .stateful import ensure_stateful_is_available, patch_stateful diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py index 96d57ff3b1..18f650c2ad 100644 --- a/optimum/exporters/openvino/__main__.py +++ b/optimum/exporters/openvino/__main__.py @@ -13,27 +13,22 @@ # limitations under the License. import logging -import os from pathlib import Path from typing import Any, Callable, Dict, Optional, Union from requests.exceptions import ConnectionError as RequestsConnectionError -from transformers import AutoConfig, PreTrainedTokenizerBase +from transformers import AutoConfig, AutoTokenizer, PreTrainedTokenizerBase from optimum.exporters import TasksManager -from optimum.exporters.onnx import __main__ as optimum_main -from optimum.exporters.onnx.base import OnnxConfig, OnnxConfigWithPast -from optimum.utils import DEFAULT_DUMMY_SHAPES -from optimum.utils.save_utils import maybe_load_preprocessors, maybe_save_preprocessors +from optimum.exporters.onnx.base import OnnxConfig +from optimum.utils.save_utils import maybe_load_preprocessors from ...intel.utils.import_utils import ( - is_nncf_available, is_openvino_tokenizers_available, is_optimum_version, is_transformers_version, ) -from .convert import export_models, export_tokenizer -from .stateful import ensure_export_task_support_stateful +from .convert import export_from_model, export_tokenizer if is_optimum_version(">=", "1.16.0"): @@ -45,8 +40,6 @@ "whisper", ] -OV_XML_FILE_NAME = "openvino_model.xml" -_MAX_UNCOMPRESSED_SIZE = 1e9 logger = logging.getLogger(__name__) @@ -72,6 +65,7 @@ def main_export( compression_ratio: Optional[float] = None, stateful: bool = True, convert_tokenizer: bool = False, + library_name: Optional[str] = None, **kwargs_shapes, ): """ @@ -143,37 +137,68 @@ def main_export( >>> main_export("gpt2", output="gpt2_onnx/") ``` """ - if ( - compression_option is not None - and compression_option != "fp16" - and compression_option != "fp32" - and not is_nncf_available() - ): - raise ImportError( - f"Compression of the weights to {compression_option} requires nncf, please install it with `pip install nncf`" - ) - - model_kwargs = model_kwargs or {} - - output = Path(output) - if not output.exists(): - output.mkdir(parents=True) - original_task = task task = TasksManager.map_from_synonym(task) + framework = TasksManager.determine_framework(model_name_or_path, subfolder=subfolder, framework=framework) + library_name = TasksManager.infer_library_from_model( + model_name_or_path, subfolder=subfolder, library_name=library_name + ) + + if task == "auto": + try: + task = TasksManager.infer_task_from_model(model_name_or_path) + except KeyError as e: + raise KeyError( + f"The task could not be automatically inferred. Please provide the argument --task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}" + ) + except RequestsConnectionError as e: + raise RequestsConnectionError( + f"The task could not be automatically inferred as this is available only for models hosted on the Hugging Face Hub. Please provide the argument --task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}" + ) + + if convert_tokenizer and not is_openvino_tokenizers_available(): + logger.warning( + "`convert_tokenizer` requires openvino-tokenizers, please install it with `pip install optimum-intel[openvino-tokenizers]`" + ) + convert_tokenizer = False - # Patch the modules to export of GPTQ models w/o GPU do_gptq_patching = False - try: - config = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=trust_remote_code) - model_type = config.model_type.replace("_", "-") - config_dict = config.to_dict() - quantization_config = config_dict.get("quantization_config", None) + custom_architecture = False + loading_kwargs = {} + if library_name == "transformers": + config = AutoConfig.from_pretrained( + model_name_or_path, + subfolder=subfolder, + revision=revision, + cache_dir=cache_dir, + use_auth_token=use_auth_token, + local_files_only=local_files_only, + force_download=force_download, + trust_remote_code=trust_remote_code, + ) + quantization_config = getattr(config, "quantization_config", None) do_gptq_patching = quantization_config and quantization_config["quant_method"] == "gptq" - except Exception: - model_type = None - pass + model_type = config.model_type.replace("_", "-") + + if model_type not in TasksManager._SUPPORTED_MODEL_TYPE: + custom_architecture = True + elif task not in TasksManager.get_supported_tasks_for_model_type( + model_type, exporter="onnx", library_name=library_name + ): + if original_task == "auto": + autodetected_message = " (auto-detected)" + else: + autodetected_message = "" + model_tasks = TasksManager.get_supported_tasks_for_model_type( + model_type, exporter="onnx", library_name=library_name + ) + raise ValueError( + f"Asked to export a {model_type} model for the task {task}{autodetected_message}, but the Optimum OpenVINO exporter only supports the tasks {', '.join(model_tasks.keys())} for {model_type}. Please use a supported task. Please open an issue at https://github.com/huggingface/optimum/issues if you would like the task {task} to be supported in the ONNX export for {model_type}." + ) + if is_transformers_version(">=", "4.36") and model_type in SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED: + loading_kwargs["attn_implementation"] = "eager" + # Patch the modules to export of GPTQ models w/o GPU if do_gptq_patching: import torch @@ -199,31 +224,6 @@ class StoreAttr(object): GPTQQuantizer.post_init_model = post_init_model - framework = TasksManager.determine_framework(model_name_or_path, subfolder=subfolder, framework=framework) - - # get the shapes to be used to generate dummy inputs - input_shapes = {} - for input_name in DEFAULT_DUMMY_SHAPES.keys(): - input_shapes[input_name] = ( - kwargs_shapes[input_name] if input_name in kwargs_shapes else DEFAULT_DUMMY_SHAPES[input_name] - ) - - if task == "auto": - try: - task = TasksManager.infer_task_from_model(model_name_or_path) - except KeyError as e: - raise KeyError( - f"The task could not be automatically inferred. Please provide the argument --task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}" - ) - except RequestsConnectionError as e: - raise RequestsConnectionError( - f"The task could not be automatically inferred as this is available only for models hosted on the Hugging Face Hub. Please provide the argument --task with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}" - ) - - loading_kwargs = {} - if is_transformers_version(">=", "4.36") and model_type in SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED: - loading_kwargs["attn_implementation"] = "eager" - model = TasksManager.get_model_from_task( task, model_name_or_path, @@ -236,40 +236,39 @@ class StoreAttr(object): trust_remote_code=trust_remote_code, framework=framework, device=device, + library_name=library_name, **loading_kwargs, ) - custom_architecture = False - is_stable_diffusion = "stable-diffusion" in task - model_type = "stable-diffusion" if is_stable_diffusion else model.config.model_type.replace("_", "-") - - if not is_stable_diffusion: - if model_type in TasksManager._UNSUPPORTED_CLI_MODEL_TYPE: - raise ValueError( - f"{model_type} is not supported yet. Only {TasksManager._SUPPORTED_CLI_MODEL_TYPE} are supported. " - f"If you want to support {model_type} please propose a PR or open up an issue." - ) - if model.config.model_type.replace("-", "_") not in TasksManager.get_supported_model_type_for_task( - task, exporter="onnx" - ): - custom_architecture = True + needs_pad_token_id = task == "text-classification" and getattr(model.config, "pad_token_id", None) is None - if custom_architecture and custom_onnx_configs is None: - raise ValueError( - "Trying to export a model with a custom architecture, but no custom onnx configuration was passed as `custom_onnx_configs`. Please refer to https://huggingface.co/docs/optimum/main/en/exporters/onnx/usage_guides/export_a_model#custom-export-of-transformers-models for an example on how to export custom models." - ) + if needs_pad_token_id: + if pad_token_id is not None: + model.config.pad_token_id = pad_token_id + else: + tok = AutoTokenizer.from_pretrained(model_name_or_path) + pad_token_id = getattr(tok, "pad_token_id", None) + if pad_token_id is None: + raise ValueError( + "Could not infer the pad token id, which is needed in this case, please provide it with the --pad_token_id argument" + ) + model.config.pad_token_id = pad_token_id - if custom_architecture and original_task == "auto": - raise ValueError( - f'Automatic task detection is not supported with custom architectures. Please specify the `task` argument. Suggestion: task="{task}" (or task="{task}-with-past" if the model is decoder-based and supports KV cache)' - ) + if "stable-diffusion" in task: + model_type = "stable-diffusion" + elif hasattr(model.config, "export_model_type"): + model_type = model.config.export_model_type.replace("_", "-") + else: + model_type = model.config.model_type.replace("_", "-") if ( not custom_architecture - and not is_stable_diffusion - and task + "-with-past" in TasksManager.get_supported_tasks_for_model_type(model_type, "onnx") + and library_name != "diffusers" + and task + "-with-past" + in TasksManager.get_supported_tasks_for_model_type(model_type, exporter="onnx", library_name=library_name) ): - if original_task == "auto": # Make -with-past the default if --task was not explicitely specified + # Make -with-past the default if --task was not explicitely specified + if original_task == "auto": task = task + "-with-past" else: logger.info( @@ -286,125 +285,48 @@ class StoreAttr(object): possible_synonyms = "" logger.info(f"Automatic task detection to {task}{possible_synonyms}.") - task_support_stateful = ensure_export_task_support_stateful(task) - stateful = stateful and task_support_stateful - preprocessors = maybe_load_preprocessors( model_name_or_path, subfolder=subfolder, trust_remote_code=trust_remote_code ) - onnx_config, models_and_onnx_configs = optimum_main._get_submodels_and_onnx_configs( + export_from_model( model=model, + output=output, task=task, - monolith=False, - custom_onnx_configs=custom_onnx_configs if custom_onnx_configs is not None else {}, - custom_architecture=custom_architecture, + compression_option=compression_option, + compression_ratio=compression_ratio, + stateful=stateful, + model_kwargs=model_kwargs, + custom_onnx_configs=custom_onnx_configs, fn_get_submodels=fn_get_submodels, preprocessors=preprocessors, - _variant="default", - legacy=False, + device=device, + **kwargs_shapes, ) - if compression_option is None: - num_parameters = model.num_parameters() if not is_stable_diffusion else model.unet.num_parameters() - if num_parameters >= _MAX_UNCOMPRESSED_SIZE: - if is_nncf_available(): - compression_option = "int8" - logger.info("The model weights will be quantized to int8.") - else: - logger.warning( - "The model will be converted with no weights quantization. Quantization of the weights to int8 requires nncf." - "please install it with `pip install nncf`" - ) - - if not is_stable_diffusion: - needs_pad_token_id = ( - isinstance(onnx_config, OnnxConfigWithPast) - and getattr(model.config, "pad_token_id", None) is None - and task in ["text-classification"] - ) - - tokenizer = next( - (preprocessor for preprocessor in preprocessors if isinstance(preprocessor, PreTrainedTokenizerBase)), None - ) + if convert_tokenizer: + if library_name != "diffusers": + tokenizer = next( + (preprocessor for preprocessor in preprocessors if isinstance(preprocessor, PreTrainedTokenizerBase)), + None, + ) - if needs_pad_token_id: - if pad_token_id is not None: - model.config.pad_token_id = pad_token_id - elif tokenizer is not None: + if tokenizer is not None: try: - model.config.pad_token_id = tokenizer.pad_token_id - except Exception: - raise ValueError( - "Could not infer the pad token id, which is needed in this case, please provide it with the --pad_token_id argument" + export_tokenizer(tokenizer, output) + except Exception as exception: + logger.warning( + "Could not load tokenizer using specified model ID or path. OpenVINO tokenizer/detokenizer " + f"models won't be generated. Exception: {exception}" ) - # Saving the model config and preprocessor as this is needed sometimes. - model.config.save_pretrained(output) - generation_config = getattr(model, "generation_config", None) - if generation_config is not None: - generation_config.save_pretrained(output) - maybe_save_preprocessors(model_name_or_path, output) - - if convert_tokenizer and tokenizer is not None and is_openvino_tokenizers_available(): - try: - export_tokenizer(tokenizer, output) - except Exception as exception: - logger.warning( - "Could not load tokenizer using specified model ID or path. OpenVINO tokenizer/detokenizer " - f"models won't be generated. Exception: {exception}" - ) - - if model.config.is_encoder_decoder and task.startswith("text-generation"): - raise ValueError( - f"model.config.is_encoder_decoder is True and task is `{task}`, which are incompatible. If the task was auto-inferred, please fill a bug report" - f"at https://github.com/huggingface/optimum, if --task was explicitely passed, make sure you selected the right task for the model," - f" referring to `optimum.exporters.tasks.TaskManager`'s `_TASKS_TO_AUTOMODELS`." - ) - - files_subpaths = ["openvino_" + model_name + ".xml" for model_name in models_and_onnx_configs.keys()] - else: - # save the subcomponent configuration - for model_name in models_and_onnx_configs: - subcomponent = models_and_onnx_configs[model_name][0] - if hasattr(subcomponent, "save_config"): - subcomponent.save_config(output / model_name) - elif hasattr(subcomponent, "config") and hasattr(subcomponent.config, "save_pretrained"): - subcomponent.config.save_pretrained(output / model_name) - - files_subpaths = [os.path.join(name_dir, OV_XML_FILE_NAME) for name_dir in models_and_onnx_configs] - - # Saving the additional components needed to perform inference. - model.scheduler.save_pretrained(output.joinpath("scheduler")) - - feature_extractor = getattr(model, "feature_extractor", None) - if feature_extractor is not None: - feature_extractor.save_pretrained(output.joinpath("feature_extractor")) - - tokenizer = getattr(model, "tokenizer", None) - if tokenizer is not None: - tokenizer.save_pretrained(output.joinpath("tokenizer")) - if convert_tokenizer and is_openvino_tokenizers_available(): + else: + tokenizer = getattr(model, "tokenizer", None) + if tokenizer is not None: export_tokenizer(tokenizer, output) - tokenizer_2 = getattr(model, "tokenizer_2", None) - if tokenizer_2 is not None: - tokenizer_2.save_pretrained(output.joinpath("tokenizer_2")) - if convert_tokenizer and is_openvino_tokenizers_available(): - export_tokenizer(tokenizer, output, suffix="_2") - - model.save_config(output) - - export_models( - models_and_onnx_configs=models_and_onnx_configs, - output_dir=output, - output_names=files_subpaths, - input_shapes=input_shapes, - device=device, - compression_option=compression_option, - compression_ratio=compression_ratio, - stateful=stateful, - model_kwargs=model_kwargs, - ) + tokenizer_2 = getattr(model, "tokenizer_2", None) + if tokenizer_2 is not None: + export_tokenizer(tokenizer_2, output, suffix="_2") # Unpatch modules after GPTQ export if do_gptq_patching: diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py index 0c8690c9ac..b642bd9eb6 100644 --- a/optimum/exporters/openvino/convert.py +++ b/optimum/exporters/openvino/convert.py @@ -16,8 +16,9 @@ import gc import inspect import logging +import os from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Callable, Dict, List, Optional, Tuple, Union from transformers import T5Tokenizer, T5TokenizerFast from transformers.utils import is_tf_available, is_torch_available @@ -26,17 +27,19 @@ from openvino.runtime.exceptions import OVTypeError from openvino.runtime.utils.types import get_element_type from openvino.tools.ovc import convert_model +from optimum.exporters import TasksManager from optimum.exporters.onnx.base import OnnxConfig from optimum.exporters.onnx.convert import check_dummy_inputs_are_allowed from optimum.exporters.onnx.convert import export_pytorch as export_pytorch_to_onnx from optimum.exporters.onnx.convert import export_tensorflow as export_tensorflow_onnx -from optimum.exporters.onnx.model_patcher import DecoderModelPatcher -from optimum.utils import is_diffusers_available +from optimum.utils import DEFAULT_DUMMY_SHAPES, is_diffusers_available +from optimum.utils.save_utils import maybe_save_preprocessors from ...intel.utils.import_utils import is_nncf_available, is_optimum_version from .model_patcher import patch_model_with_bettertransformer -from .stateful import ensure_stateful_is_available, patch_stateful +from .stateful import ensure_export_task_support_stateful, ensure_stateful_is_available, patch_stateful from .utils import ( + _MAX_UNCOMPRESSED_SIZE, OV_XML_FILE_NAME, clear_class_registry, flattenize_inputs, @@ -45,6 +48,16 @@ ) +if is_optimum_version(">=", "1.16.99"): + from optimum.exporters.onnx.utils import _get_submodels_and_onnx_configs + +else: + from optimum.exporters.onnx.__main__ import _get_submodels_and_onnx_configs + + +UNSUPPORTED_TOKENIZER_CLASSES = (T5Tokenizer, T5TokenizerFast) + + logger = logging.getLogger(__name__) if is_torch_available(): @@ -357,62 +370,42 @@ def export_pytorch( dummy_inputs = tree_map( lambda value: value.to(device) if isinstance(value, torch.Tensor) else value, dummy_inputs ) - check_dummy_inputs_are_allowed(model, dummy_inputs) - inputs = config.ordered_inputs(model) - input_names = list(inputs.keys()) - output_names = list(config.outputs.keys()) - if hasattr(model, "forward"): - sig = inspect.signature(model.forward) - else: - sig = inspect.signature(model.call) + dummy_inputs = config.rename_ambiguous_inputs(dummy_inputs) dummy_inputs, dict_inputs = remove_none_from_dummy_inputs(dummy_inputs) - input_info = get_input_shapes(dummy_inputs, inputs) - custom_patcher = type(config).patch_model_for_export != OnnxConfig.patch_model_for_export - patch_model_forward = False - orig_forward = model.forward + try: # TorchScript used behind OpenVINO conversion. Optimum supports only return_dict=True models for patching, # while TorchScript do not support dictionary with values of mixed types (e.g. Tensor and None) in model input/output # To handle it, additional wrapper on patcher forward applied. - # model.config.torchscript = True can not be used for patching, because it overrides return_dict to Flase - if custom_patcher or dict_inputs: - patcher = config.patch_model_for_export(model, model_kwargs=model_kwargs) - # DecoderModelPatcher does not override model forward in optimum < 1.15 - if ( - isinstance(patcher, DecoderModelPatcher) and is_optimum_version("<", "1.15.0") - ) or patcher.orig_forward_name != "forward": - patch_model_forward = True - patched_forward = model.forward - else: - patched_forward = patcher.patched_forward - - @functools.wraps(patched_forward) - def ts_patched_forward(*args, **kwargs): - for i in range(len(dict_inputs)): - input_name = dict_inputs[i][0] - keys = dict_inputs[i][1] - tuple_input = kwargs[input_name] - input_dict = dict(zip(keys, tuple_input)) - kwargs[input_name] = input_dict - outputs = patched_forward(*args, **kwargs) - return tuple(outputs.values()) - - if not patch_model_forward: - patcher.patched_forward = ts_patched_forward - else: - model.forward = ts_patched_forward - with patcher: - ov_model = convert_model(model, example_input=dummy_inputs, input=input_info) - else: - model.config.torchscript = True - model.config.retun_dict = False + # model.config.torchscript = True can not be used for patching, because it overrides return_dict to False + patcher = config.patch_model_for_export(model, model_kwargs=model_kwargs) + patched_forward = patcher.patched_forward + + @functools.wraps(patched_forward) + def ts_patched_forward(*args, **kwargs): + for i in range(len(dict_inputs)): + input_name, keys = dict_inputs[i] + tuple_input = kwargs[input_name] + input_dict = dict(zip(keys, tuple_input)) + kwargs[input_name] = input_dict + outputs = patched_forward(*args, **kwargs) + return tuple(outputs.values()) + + patcher.patched_forward = ts_patched_forward + + with patcher: + check_dummy_inputs_are_allowed(model, dummy_inputs) + inputs = config.ordered_inputs(model) + input_names = list(inputs.keys()) + output_names = list(config.outputs.keys()) + input_info = get_input_shapes(dummy_inputs, inputs) + ov_model = convert_model(model, example_input=dummy_inputs, input=input_info) except Exception as ex: logger.warning(f"Export model to OpenVINO directly failed with: \n{ex}.\nModel will be exported to ONNX") - if patch_model_forward: - model.forward = orig_forward + if stateful: # cannot raise because stateful is enabled by default and it would break backward compatibility for models that couldn't convert to OV directly # TODO: Implement stateful for ONNX path as well, not doing it right now because of lack of validation @@ -432,9 +425,8 @@ def ts_patched_forward(*args, **kwargs): compression_option=compression_option, compression_ratio=compression_ratio, ) - # return original forward - if patch_model_forward: - model.forward = orig_forward + + sig = inspect.signature(model.forward) if hasattr(model, "forward") else inspect.signature(model.call) ordered_dummy_inputs = {param: dummy_inputs[param] for param in sig.parameters if param in dummy_inputs} ordered_input_names = list(inputs) flatten_inputs = flattenize_inputs(ordered_dummy_inputs.values()) @@ -449,7 +441,6 @@ def ts_patched_forward(*args, **kwargs): inp_data = flatten_inputs[idx] static_shape = PartialShape(inp_data.shape) dims = inputs[input_name] - for dim in dims: static_shape[dim] = -1 inp_tensor.get_node().set_partial_shape(static_shape) @@ -540,10 +531,176 @@ def export_models( return outputs -UNSUPPORTED_TOKENIZER_CLASSES = ( - T5Tokenizer, - T5TokenizerFast, -) +def export_from_model( + model: Union["PreTrainedModel", "TFPreTrainedModel"], + output: Union[str, Path], + task: Optional[str] = None, + compression_option: Optional[str] = None, + compression_ratio: Optional[float] = None, + stateful: bool = True, + opset: Optional[int] = None, + model_kwargs: Optional[Dict[str, Any]] = None, + custom_onnx_configs: Optional[Dict[str, "OnnxConfig"]] = None, + fn_get_submodels: Optional[Callable] = None, + preprocessors: List = None, + device: str = "cpu", + **kwargs_shapes, +): + if ( + compression_option is not None + and compression_option != "fp16" + and compression_option != "fp32" + and not is_nncf_available() + ): + raise ImportError( + f"Compression of the weights to {compression_option} requires nncf, please install it with `pip install nncf`" + ) + + model_kwargs = model_kwargs or {} + library_name = TasksManager._infer_library_from_model(model) + TasksManager.standardize_model_attributes(model, library_name) + + if hasattr(model.config, "export_model_type"): + model_type = model.config.export_model_type.replace("_", "-") + else: + model_type = model.config.model_type.replace("_", "-") + + custom_architecture = library_name == "transformers" and model_type not in TasksManager._SUPPORTED_MODEL_TYPE + + if task is not None: + task = TasksManager.map_from_synonym(task) + else: + try: + task = TasksManager._infer_task_from_model_or_model_class(model=model) + except (ValueError, KeyError) as e: + raise RuntimeError( + f"The model task could not be automatically inferred in `onnx_export_from_model`. Please provide the argument `task` with the relevant task from {', '.join(TasksManager.get_all_tasks())}. Detailed error: {e}" + ) + + if ( + not custom_architecture + and library_name != "diffusers" + and task + "-with-past" + in TasksManager.get_supported_tasks_for_model_type(model_type, "onnx", library_name=library_name) + ): + # -with-past is the default. + task = task + "-with-past" + + logger.info(f"Automatic task detection to: {task}.") + + stateful = stateful and ensure_export_task_support_stateful(task) + + # TODO: support onnx_config.py in the model repo + if custom_architecture and custom_onnx_configs is None: + raise ValueError( + f"Trying to export a {model_type} model, that is a custom or unsupported architecture, but no custom onnx configuration was passed as `custom_onnx_configs`. Please refer to https://huggingface.co/docs/optimum/main/en/exporters/onnx/usage_guides/export_a_model#custom-export-of-transformers-models for an example on how to export custom models. Please open an issue at https://github.com/huggingface/optimum/issues if you would like the model type {model_type} to be supported natively in the ONNX export." + ) + + if task.startswith("text-generation") and model.config.is_encoder_decoder: + raise ValueError( + f"model.config.is_encoder_decoder is True and task is `{task}`, which are incompatible. If the task was auto-inferred, please fill a bug report" + f"at https://github.com/huggingface/optimum, if --task was explicitely passed, make sure you selected the right task for the model," + f" referring to `optimum.exporters.tasks.TaskManager`'s `_TRANSFORMERS_TASKS_TO_MODEL_LOADERS`." + ) + if library_name != "diffusers" and model_type in TasksManager._UNSUPPORTED_CLI_MODEL_TYPE: + raise ValueError( + f"{model_type} is not supported yet. Only {list(TasksManager._SUPPORTED_CLI_MODEL_TYPE.keys())} are supported. " + f"If you want to support {model_type} please propose a PR or open up an issue." + ) + + output = Path(output) + if not output.exists(): + output.mkdir(parents=True) + + # Get the shapes to be used to generate dummy inputs + input_shapes = {} + for input_name in DEFAULT_DUMMY_SHAPES.keys(): + input_shapes[input_name] = ( + kwargs_shapes[input_name] if input_name in kwargs_shapes else DEFAULT_DUMMY_SHAPES[input_name] + ) + + onnx_config, models_and_onnx_configs = _get_submodels_and_onnx_configs( + model=model, + task=task, + monolith=False, + custom_onnx_configs=custom_onnx_configs if custom_onnx_configs is not None else {}, + custom_architecture=custom_architecture, + fn_get_submodels=fn_get_submodels, + preprocessors=preprocessors, + library_name=library_name, + model_kwargs=model_kwargs, + _variant="default", + legacy=False, + ) + + if compression_option is None: + if library_name == "diffusers": + num_parameters = model.unet.num_parameters() + else: + num_parameters = sum(param.numel() for param in list(model.parameters()) if param.requires_grad) + + if num_parameters >= _MAX_UNCOMPRESSED_SIZE: + if is_nncf_available(): + compression_option = "int8" + logger.info("The model weights will be quantized to int8.") + else: + logger.warning( + "The model will be converted with no weights quantization. Quantization of the weights to int8 requires nncf." + "please install it with `pip install nncf`" + ) + + if library_name != "diffusers": + # Saving the model config and preprocessor as this is needed sometimes. + model.config.save_pretrained(output) + generation_config = getattr(model, "generation_config", None) + if generation_config is not None: + generation_config.save_pretrained(output) + + model_name_or_path = model.config._name_or_path + maybe_save_preprocessors(model_name_or_path, output) + + files_subpaths = ["openvino_" + model_name + ".xml" for model_name in models_and_onnx_configs.keys()] + + else: + # save the subcomponent configuration + for model_name in models_and_onnx_configs: + subcomponent = models_and_onnx_configs[model_name][0] + if hasattr(subcomponent, "save_config"): + subcomponent.save_config(output / model_name) + elif hasattr(subcomponent, "config") and hasattr(subcomponent.config, "save_pretrained"): + subcomponent.config.save_pretrained(output / model_name) + + files_subpaths = [os.path.join(name_dir, OV_XML_FILE_NAME) for name_dir in models_and_onnx_configs] + + # Saving the additional components needed to perform inference. + model.scheduler.save_pretrained(output.joinpath("scheduler")) + + feature_extractor = getattr(model, "feature_extractor", None) + if feature_extractor is not None: + feature_extractor.save_pretrained(output.joinpath("feature_extractor")) + + tokenizer = getattr(model, "tokenizer", None) + if tokenizer is not None: + tokenizer.save_pretrained(output.joinpath("tokenizer")) + + tokenizer_2 = getattr(model, "tokenizer_2", None) + if tokenizer_2 is not None: + tokenizer_2.save_pretrained(output.joinpath("tokenizer_2")) + + model.save_config(output) + + export_models( + models_and_onnx_configs=models_and_onnx_configs, + output_dir=output, + output_names=files_subpaths, + input_shapes=input_shapes, + device=device, + compression_option=compression_option, + compression_ratio=compression_ratio, + stateful=stateful, + opset=opset, + model_kwargs=model_kwargs, + ) def export_tokenizer( diff --git a/optimum/exporters/openvino/utils.py b/optimum/exporters/openvino/utils.py index f0d5366526..10909e443b 100644 --- a/optimum/exporters/openvino/utils.py +++ b/optimum/exporters/openvino/utils.py @@ -30,6 +30,7 @@ OV_XML_FILE_NAME = "openvino_model.xml" +_MAX_UNCOMPRESSED_SIZE = 1e9 def is_torch_model(model: Union["PreTrainedModel", "ModelMixin"]): diff --git a/optimum/intel/ipex/__init__.py b/optimum/intel/ipex/__init__.py index a9ecc351b9..83943176b2 100644 --- a/optimum/intel/ipex/__init__.py +++ b/optimum/intel/ipex/__init__.py @@ -1,3 +1,17 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from optimum.intel.ipex.modeling_base import ( IPEXModel, IPEXModelForAudioClassification, diff --git a/optimum/intel/openvino/__init__.py b/optimum/intel/openvino/__init__.py index 568b0ade04..e3f04dfc5b 100644 --- a/optimum/intel/openvino/__init__.py +++ b/optimum/intel/openvino/__init__.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import logging from ..utils.import_utils import is_diffusers_available, is_nncf_available diff --git a/optimum/intel/openvino/modeling.py b/optimum/intel/openvino/modeling.py index 9408483166..558cc3b904 100644 --- a/optimum/intel/openvino/modeling.py +++ b/optimum/intel/openvino/modeling.py @@ -11,9 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import logging import os from pathlib import Path +from tempfile import TemporaryDirectory from typing import Optional, Union import numpy as np @@ -49,6 +51,7 @@ from optimum.exporters import TasksManager +from ...exporters.openvino import main_export from ..utils.import_utils import is_timm_available, is_timm_version from .modeling_base import OVBaseModel from .utils import _is_timm_ov_dir @@ -410,6 +413,48 @@ def forward( ) return BaseModelOutput(last_hidden_state=last_hidden_state) + @classmethod + def _from_transformers( + cls, + model_id: str, + config: PretrainedConfig, + use_auth_token: Optional[Union[bool, str]] = None, + revision: Optional[str] = None, + force_download: bool = False, + cache_dir: Optional[str] = None, + subfolder: str = "", + local_files_only: bool = False, + task: Optional[str] = None, + trust_remote_code: bool = False, + load_in_8bit: Optional[bool] = None, + load_in_4bit: Optional[bool] = None, + **kwargs, + ): + save_dir = TemporaryDirectory() + save_dir_path = Path(save_dir.name) + + # If load_in_8bit is not specified then compression_option should be set to None and will be set by default in main_export depending on the model size + compression_option = "fp32" if load_in_8bit is not None else None + + # OVModelForFeatureExtraction works with Transformers type of models, thus even sentence-transformers models are loaded as such. + main_export( + model_name_or_path=model_id, + output=save_dir_path, + task=task or cls.export_feature, + subfolder=subfolder, + revision=revision, + cache_dir=cache_dir, + use_auth_token=use_auth_token, + local_files_only=local_files_only, + force_download=force_download, + trust_remote_code=trust_remote_code, + compression_option=compression_option, + library_name="transformers", + ) + + config.save_pretrained(save_dir_path) + return cls._from_pretrained(model_id=save_dir_path, config=config, load_in_8bit=load_in_8bit, **kwargs) + MASKED_LM_EXAMPLE = r""" Example of masked language modeling using `transformers.pipelines`: diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py index 41f06936f8..867354a543 100644 --- a/optimum/intel/openvino/modeling_diffusion.py +++ b/optimum/intel/openvino/modeling_diffusion.py @@ -28,6 +28,7 @@ LMSDiscreteScheduler, PNDMScheduler, StableDiffusionPipeline, + StableDiffusionXLImg2ImgPipeline, StableDiffusionXLPipeline, ) from diffusers.schedulers.scheduling_utils import SCHEDULER_CONFIG_NAME @@ -903,6 +904,8 @@ def __call__( class OVStableDiffusionXLImg2ImgPipeline(OVStableDiffusionXLPipelineBase, StableDiffusionXLImg2ImgPipelineMixin): + auto_model_class = StableDiffusionXLImg2ImgPipeline + def __call__( self, prompt: Optional[Union[str, List[str]]] = None, diff --git a/optimum/intel/openvino/modeling_timm.py b/optimum/intel/openvino/modeling_timm.py index 2b20a6a746..a84f80c9f7 100644 --- a/optimum/intel/openvino/modeling_timm.py +++ b/optimum/intel/openvino/modeling_timm.py @@ -1,3 +1,17 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os from collections import OrderedDict from typing import Dict, List, Optional, Union diff --git a/optimum/intel/openvino/training_args.py b/optimum/intel/openvino/training_args.py index 2e079665a8..4928d67717 100644 --- a/optimum/intel/openvino/training_args.py +++ b/optimum/intel/openvino/training_args.py @@ -1,3 +1,17 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from dataclasses import dataclass, field from transformers import TrainingArguments diff --git a/setup.py b/setup.py index fbb27ee6bb..023ac84b6c 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ INSTALL_REQUIRE = [ "torch>=1.11", - "optimum>=1.14.0", + "optimum @ git+https://github.com/huggingface/optimum.git", # TODO : 1.17.0 "transformers>=4.26.0", "datasets>=1.4.0", "sentencepiece", @@ -33,6 +33,7 @@ "rjieba", "timm", "invisible-watermark>=0.2.0", + "auto-gptq", ] QUALITY_REQUIRE = ["black~=23.1", "ruff>=0.0.241"] diff --git a/tests/openvino/test_export.py b/tests/openvino/test_export.py new file mode 100644 index 0000000000..21bec021f8 --- /dev/null +++ b/tests/openvino/test_export.py @@ -0,0 +1,116 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import unittest +from pathlib import Path +from tempfile import TemporaryDirectory +from typing import Optional + +from parameterized import parameterized +from utils_tests import MODEL_NAMES + +from optimum.exporters.onnx.constants import SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED +from optimum.exporters.openvino import export_from_model +from optimum.exporters.tasks import TasksManager +from optimum.intel import ( + OVLatentConsistencyModelPipeline, + OVModelForAudioClassification, + OVModelForCausalLM, + OVModelForFeatureExtraction, + OVModelForImageClassification, + OVModelForMaskedLM, + OVModelForPix2Struct, + OVModelForQuestionAnswering, + OVModelForSeq2SeqLM, + OVModelForSequenceClassification, + OVModelForSpeechSeq2Seq, + OVModelForTokenClassification, + OVStableDiffusionPipeline, + OVStableDiffusionXLImg2ImgPipeline, + OVStableDiffusionXLPipeline, +) +from optimum.intel.openvino.modeling_base import OVBaseModel +from optimum.utils.save_utils import maybe_load_preprocessors + + +class ExportModelTest(unittest.TestCase): + SUPPORTED_ARCHITECTURES = { + "bert": OVModelForMaskedLM, + "pix2struct": OVModelForPix2Struct, + "t5": OVModelForSeq2SeqLM, + "bart": OVModelForSeq2SeqLM, + "gpt2": OVModelForCausalLM, + "distilbert": OVModelForQuestionAnswering, + "albert": OVModelForSequenceClassification, + "vit": OVModelForImageClassification, + "roberta": OVModelForTokenClassification, + "wav2vec2": OVModelForAudioClassification, + "whisper": OVModelForSpeechSeq2Seq, + "blenderbot": OVModelForFeatureExtraction, + "stable-diffusion": OVStableDiffusionPipeline, + "stable-diffusion-xl": OVStableDiffusionXLPipeline, + "stable-diffusion-xl-refiner": OVStableDiffusionXLImg2ImgPipeline, + "latent-consistency": OVLatentConsistencyModelPipeline, + } + + def _openvino_export( + self, + model_type: str, + compression_option: Optional[str] = None, + stateful: bool = True, + ): + auto_model = self.SUPPORTED_ARCHITECTURES[model_type] + task = auto_model.export_feature + model_name = MODEL_NAMES[model_type] + library_name = TasksManager.infer_library_from_model(model_name) + loading_kwargs = {"attn_implementation": "eager"} if model_type in SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED else {} + + if library_name == "timm": + model_class = TasksManager.get_model_class_for_task(task, library=library_name) + model = model_class(f"hf_hub:{model_name}", pretrained=True, exportable=True) + TasksManager.standardize_model_attributes(model_name, model, library_name=library_name) + else: + model = auto_model.auto_model_class.from_pretrained(model_name, **loading_kwargs) + + if getattr(model.config, "model_type", None) == "pix2struct": + preprocessors = maybe_load_preprocessors(model_name) + else: + preprocessors = None + + supported_tasks = (task, task + "-with-past") if "text-generation" in task else (task,) + for supported_task in supported_tasks: + with TemporaryDirectory() as tmpdirname: + export_from_model( + model=model, + output=Path(tmpdirname), + task=supported_task, + preprocessors=preprocessors, + compression_option=compression_option, + stateful=stateful, + ) + + use_cache = supported_task.endswith("-with-past") + ov_model = auto_model.from_pretrained(tmpdirname, use_cache=use_cache) + self.assertIsInstance(ov_model, OVBaseModel) + + if "text-generation" in task: + self.assertEqual(ov_model.use_cache, use_cache) + + if task == "text-generation": + self.assertEqual(ov_model.stateful, stateful and use_cache) + + @parameterized.expand(SUPPORTED_ARCHITECTURES) + def test_export(self, model_type: str): + self._openvino_export(model_type) diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index 77c323d74f..5f3208fd58 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -426,6 +426,7 @@ class OVModelForFeatureExtractionIntegrationTest(unittest.TestCase): "bert", "distilbert", "roberta", + "sentence-transformers-bert", ) @parameterized.expand(SUPPORTED_ARCHITECTURES) @@ -482,8 +483,9 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "gpt_neo", "gpt_neox", "llama", - # "marian", # TODO : enable it back with openvino 2023.3.0 - # "mistral", + "llama_gptq", + "marian", + "mistral", "mpt", "opt", "pegasus", @@ -494,6 +496,10 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): @parameterized.expand(SUPPORTED_ARCHITECTURES) def test_compare_to_transformers(self, model_arch): model_id = MODEL_NAMES[model_arch] + + if "gptq" in model_arch: + self.skipTest("GPTQ model loading unsupported with AutoModelForCausalLM") + set_seed(SEED) ov_model = OVModelForCausalLM.from_pretrained(model_id, export=True, ov_config=F32_CONFIG) self.assertIsInstance(ov_model.config, PretrainedConfig) @@ -819,7 +825,7 @@ class OVModelForSeq2SeqLMIntegrationTest(unittest.TestCase): "blenderbot-small", # "longt5", "m2m_100", - # "marian", # TODO : enable it back with openvino 2023.3.0 + "marian", "mbart", "mt5", "pegasus", @@ -827,7 +833,7 @@ class OVModelForSeq2SeqLMIntegrationTest(unittest.TestCase): ) GENERATION_LENGTH = 100 - SPEEDUP_CACHE = 1.2 + SPEEDUP_CACHE = 1.1 @parameterized.expand(SUPPORTED_ARCHITECTURES) def test_compare_to_transformers(self, model_arch): @@ -1031,7 +1037,7 @@ def test_load_vanilla_transformers_which_is_not_supported(self): with self.assertRaises(Exception) as context: _ = OVModelForCTC.from_pretrained(MODEL_NAMES["t5"], export=True) - self.assertIn("Unrecognized configuration class", str(context.exception)) + self.assertIn("only supports the tasks", str(context.exception)) @parameterized.expand(SUPPORTED_ARCHITECTURES) def test_compare_to_transformers(self, model_arch): @@ -1083,7 +1089,7 @@ def test_load_vanilla_transformers_which_is_not_supported(self): with self.assertRaises(Exception) as context: _ = OVModelForAudioXVector.from_pretrained(MODEL_NAMES["t5"], export=True) - self.assertIn("Unrecognized configuration class", str(context.exception)) + self.assertIn("only supports the tasks", str(context.exception)) @parameterized.expand(SUPPORTED_ARCHITECTURES) def test_compare_to_transformers(self, model_arch): @@ -1137,7 +1143,7 @@ def test_load_vanilla_transformers_which_is_not_supported(self): with self.assertRaises(Exception) as context: _ = OVModelForAudioFrameClassification.from_pretrained(MODEL_NAMES["t5"], export=True) - self.assertIn("Unrecognized configuration class", str(context.exception)) + self.assertIn("only supports the tasks", str(context.exception)) @parameterized.expand(SUPPORTED_ARCHITECTURES) def test_compare_to_transformers(self, model_arch): diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index 11f79a989c..8fabb34e38 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -49,6 +49,7 @@ "levit": "hf-internal-testing/tiny-random-LevitModel", "longt5": "hf-internal-testing/tiny-random-longt5", "llama": "fxmarty/tiny-llama-fast-tokenizer", + "llama_gptq": "hf-internal-testing/TinyLlama-1.1B-Chat-v0.3-GPTQ", "m2m_100": "hf-internal-testing/tiny-random-m2m_100", "opt": "hf-internal-testing/tiny-random-OPTModel", "opt125m": "facebook/opt-125m", @@ -69,6 +70,7 @@ "roberta": "hf-internal-testing/tiny-random-roberta", "roformer": "hf-internal-testing/tiny-random-roformer", "segformer": "hf-internal-testing/tiny-random-SegformerModel", + "sentence-transformers-bert": "sentence-transformers-testing/stsb-bert-tiny-safetensors", "speech_to_text": "hf-internal-testing/tiny-random-Speech2TextModel", "squeezebert": "hf-internal-testing/tiny-random-squeezebert", "stable-diffusion": "hf-internal-testing/tiny-stable-diffusion-torch",