From 1b89624a38159620a2d24e8210e3459957e9200e Mon Sep 17 00:00:00 2001 From: "Feng, Jiqing" Date: Sun, 7 Jan 2024 21:16:32 -0800 Subject: [PATCH 01/30] define optimum-intel pipeline --- optimum/intel/pipelines/__init__.py | 410 ++++++++++++++++++++++++++++ 1 file changed, 410 insertions(+) create mode 100644 optimum/intel/pipelines/__init__.py diff --git a/optimum/intel/pipelines/__init__.py b/optimum/intel/pipelines/__init__.py new file mode 100644 index 0000000000..4303ddfb59 --- /dev/null +++ b/optimum/intel/pipelines/__init__.py @@ -0,0 +1,410 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import warnings +from pathlib import Path +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union + +from huggingface_hub import model_info +from transformers.configuration_utils import PretrainedConfig +from transformers.models.auto.configuration_auto import AutoConfig +from transformers.models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer +from transformers.pipelines.base import ( + Pipeline, + PipelineRegistry, + get_default_model_and_revision, +) +from transformers.pipelines.text_generation import TextGenerationPipeline +from transformers.tokenization_utils import PreTrainedTokenizer +from transformers.utils import ( + CONFIG_NAME, + HUGGINGFACE_CO_RESOLVE_ENDPOINT, + cached_file, + extract_commit_hash, + is_offline_mode, + is_torch_available, + logging, +) + +from ..generation.modeling import TSModelForCausalLM + + +if is_torch_available(): + import torch + from transformers.models.auto.modeling_auto import AutoModelForCausalLM + + +if TYPE_CHECKING: + from transformers.modeling_utils import PreTrainedModel + from transformers.tokenization_utils_fast import PreTrainedTokenizerFast + + +logger = logging.get_logger(__name__) + + +# Register all the supported tasks here +TASK_ALIASES = { + "sentiment-analysis": "text-classification", +} +SUPPORTED_TASKS = { + "text-generation": { + "impl": TextGenerationPipeline, + "pt": (AutoModelForCausalLM,) if is_torch_available() else (), + "default": {"model": {"pt": ("gpt2", "6c0e608")}}, + "type": "text", + }, +} + + +PIPELINE_REGISTRY = PipelineRegistry(supported_tasks=SUPPORTED_TASKS, task_aliases=TASK_ALIASES) + + +def get_supported_tasks() -> List[str]: + """ + Returns a list of supported task strings. + """ + return PIPELINE_REGISTRY.get_supported_tasks() + + +def get_task(model: str, token: Optional[str] = None, **deprecated_kwargs) -> str: + use_auth_token = deprecated_kwargs.pop("use_auth_token", None) + if use_auth_token is not None: + warnings.warn( + "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.", + FutureWarning, + ) + if token is not None: + raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") + token = use_auth_token + + if is_offline_mode(): + raise RuntimeError("You cannot infer task automatically within `pipeline` when using offline mode") + try: + info = model_info(model, token=token) + except Exception as e: + raise RuntimeError(f"Instantiating a pipeline without a task set raised an error: {e}") + if not info.pipeline_tag: + raise RuntimeError( + f"The model {model} does not seem to have a correct `pipeline_tag` set to infer the task automatically" + ) + if getattr(info, "library_name", "transformers") != "transformers": + raise RuntimeError(f"This model is meant to be used with {info.library_name} not with transformers") + task = info.pipeline_tag + return task + + +def check_task(task: str) -> Tuple[str, Dict, Any]: + """ + Checks an incoming task string, to validate it's correct and return the default Pipeline and Model classes, and + default models if they exist. + + Args: + task (`str`): + The task defining which pipeline will be returned. Currently accepted tasks are: + + - `"text-generation"` + + Returns: + (normalized_task: `str`, task_defaults: `dict`, task_options: (`tuple`, None)) The normalized task name + (removed alias and options). The actual dictionary required to initialize the pipeline and some extra task + options for parametrized tasks like "translation_XX_to_YY" + + + """ + return PIPELINE_REGISTRY.check_task(task) + + +def clean_custom_task(task_info): + import transformers + + if "impl" not in task_info: + raise RuntimeError("This model introduces a custom pipeline without specifying its implementation.") + pt_class_names = task_info.get("pt", ()) + if isinstance(pt_class_names, str): + pt_class_names = [pt_class_names] + task_info["pt"] = tuple(getattr(transformers, c) for c in pt_class_names) + return task_info, None + + +def pipeline( + task: str = None, + model: Optional[Union[str, "PreTrainedModel"]] = None, + config: Optional[Union[str, PretrainedConfig]] = None, + tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, + revision: Optional[str] = None, + use_fast: bool = True, + token: Optional[Union[str, bool]] = None, + device: Optional[Union[int, str, "torch.device"]] = None, + device_map=None, + torch_dtype=None, + trust_remote_code: Optional[bool] = None, + model_kwargs: Dict[str, Any] = None, + pipeline_class: Optional[Any] = None, + **kwargs, +) -> Pipeline: + """ + Utility factory method to build a [`Pipeline`]. + + Pipelines are made of: + + - A [tokenizer](tokenizer) in charge of mapping raw textual input to token. + - A [model](model) to make predictions from the inputs. + - Some (optional) post processing for enhancing model's output. + + Args: + task (`str`): + The task defining which pipeline will be returned. Currently accepted tasks are: + + - `"text-generation"`: will return a [`TextGenerationPipeline`]:. + + model (`str` or [`PreTrainedModel`], *optional*): + The model that will be used by the pipeline to make predictions. This can be a model identifier or an + actual instance of a pretrained model inheriting from [`PreTrainedModel`] (for PyTorch). + + If not provided, the default for the `task` will be loaded. + config (`str` or [`PretrainedConfig`], *optional*): + The configuration that will be used by the pipeline to instantiate the model. This can be a model + identifier or an actual pretrained model configuration inheriting from [`PretrainedConfig`]. + + If not provided, the default configuration file for the requested model will be used. That means that if + `model` is given, its default configuration will be used. However, if `model` is not supplied, this + `task`'s default model's config is used instead. + tokenizer (`str` or [`PreTrainedTokenizer`], *optional*): + The tokenizer that will be used by the pipeline to encode data for the model. This can be a model + identifier or an actual pretrained tokenizer inheriting from [`PreTrainedTokenizer`]. + + If not provided, the default tokenizer for the given `model` will be loaded (if it is a string). If `model` + is not specified or not a string, then the default tokenizer for `config` is loaded (if it is a string). + However, if `config` is also not given or not a string, then the default tokenizer for the given `task` + will be loaded. + revision (`str`, *optional*, defaults to `"main"`): + When passing a task name or a string model identifier: The specific model version to use. It can be a + branch name, a tag name, or a commit id, since we use a git-based system for storing models and other + artifacts on huggingface.co, so `revision` can be any identifier allowed by git. + use_fast (`bool`, *optional*, defaults to `True`): + Whether or not to use a Fast tokenizer if possible (a [`PreTrainedTokenizerFast`]). + use_auth_token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated + when running `huggingface-cli login` (stored in `~/.huggingface`). + device (`int` or `str` or `torch.device`): + Defines the device (*e.g.*, `"cpu"`, `"cuda:1"`, `"mps"`, or a GPU ordinal rank like `1`) on which this + pipeline will be allocated. + device_map (`str` or `Dict[str, Union[int, str, torch.device]`, *optional*): + Sent directly as `model_kwargs` (just a simpler shortcut). When `accelerate` library is present, set + `device_map="auto"` to compute the most optimized `device_map` automatically (see + [here](https://huggingface.co/docs/accelerate/main/en/package_reference/big_modeling#accelerate.cpu_offload) + for more information). + + + + Do not use `device_map` AND `device` at the same time as they will conflict + + + + torch_dtype (`str` or `torch.dtype`, *optional*): + Sent directly as `model_kwargs` (just a simpler shortcut) to use the available precision for this model + (`torch.float16`, `torch.bfloat16`, ... or `"auto"`). + trust_remote_code (`bool`, *optional*, defaults to `False`): + Whether or not to allow for custom code defined on the Hub in their own modeling, configuration, + tokenization or even pipeline files. This option should only be set to `True` for repositories you trust + and in which you have read the code, as it will execute code present on the Hub on your local machine. + model_kwargs (`Dict[str, Any]`, *optional*): + Additional dictionary of keyword arguments passed along to the model's `from_pretrained(..., + **model_kwargs)` function. + kwargs (`Dict[str, Any]`, *optional*): + Additional keyword arguments passed along to the specific pipeline init (see the documentation for the + corresponding pipeline class for possible values). + + Returns: + [`Pipeline`]: A suitable pipeline for the task. + + Examples: + + ```python + >>> from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer + + >>> # Sentiment analysis pipeline + >>> analyzer = pipeline("sentiment-analysis") + + >>> # Question answering pipeline, specifying the checkpoint identifier + >>> oracle = pipeline( + ... "question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="bert-base-cased" + ... ) + + >>> # Named entity recognition pipeline, passing in a specific model and tokenizer + >>> model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english") + >>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased") + >>> recognizer = pipeline("ner", model=model, tokenizer=tokenizer) + ```""" + if model_kwargs is None: + model_kwargs = {} + + code_revision = kwargs.pop("code_revision", None) + commit_hash = kwargs.pop("_commit_hash", None) + + hub_kwargs = { + "revision": revision, + "token": token, + "trust_remote_code": trust_remote_code, + "_commit_hash": commit_hash, + } + + if task is None and model is None: + raise RuntimeError( + "Impossible to instantiate a pipeline without either a task or a model " + "being specified. " + "Please provide a task class or a model" + ) + + if task != "text-generation": + raise ValueError("Optimum-intel ipex optimization only supports text-generation task for now.") + + if model is None and tokenizer is not None: + raise RuntimeError( + "Impossible to instantiate a pipeline with tokenizer specified but not the model as the provided tokenizer" + " may not be compatible with the default model. Please provide a PreTrainedModel class or a" + " path/identifier to a pretrained model when providing tokenizer." + ) + + if isinstance(model, Path): + model = str(model) + + if commit_hash is None: + pretrained_model_name_or_path = None + if isinstance(config, str): + pretrained_model_name_or_path = config + elif config is None and isinstance(model, str): + pretrained_model_name_or_path = model + + if not isinstance(config, PretrainedConfig) and pretrained_model_name_or_path is not None: + # We make a call to the config file first (which may be absent) to get the commit hash as soon as possible + resolved_config_file = cached_file( + pretrained_model_name_or_path, + CONFIG_NAME, + _raise_exceptions_for_missing_entries=False, + _raise_exceptions_for_connection_errors=False, + **hub_kwargs, + ) + hub_kwargs["_commit_hash"] = extract_commit_hash(resolved_config_file, commit_hash) + else: + hub_kwargs["_commit_hash"] = getattr(config, "_commit_hash", None) + + # Config is the primordial information item. + # Instantiate config if needed + if isinstance(config, str): + config = AutoConfig.from_pretrained( + config, _from_pipeline=task, code_revision=code_revision, **hub_kwargs, **model_kwargs + ) + hub_kwargs["_commit_hash"] = config._commit_hash + elif config is None and isinstance(model, str): + config = AutoConfig.from_pretrained( + model, _from_pipeline=task, code_revision=code_revision, **hub_kwargs, **model_kwargs + ) + hub_kwargs["_commit_hash"] = config._commit_hash + + if task is None and model is not None: + if not isinstance(model, str): + raise RuntimeError( + "Inferring the task automatically requires to check the hub with a model_id defined as a `str`. " + f"{model} is not a valid model_id." + ) + task = get_task(model, token) + + normalized_task, targeted_task, task_options = check_task(task) + if pipeline_class is None: + pipeline_class = targeted_task["impl"] + + # Use default model/config/tokenizer for the task if no model is provided + if model is None: + model, default_revision = get_default_model_and_revision(targeted_task, "pt", task_options) + revision = revision if revision is not None else default_revision + logger.warning( + f"No model was supplied, defaulted to {model} and revision" + f" {revision} ({HUGGINGFACE_CO_RESOLVE_ENDPOINT}/{model}).\n" + "Using a pipeline without specifying a model name and revision in production is not recommended." + ) + if config is None and isinstance(model, str): + config = AutoConfig.from_pretrained(model, _from_pipeline=task, **hub_kwargs, **model_kwargs) + hub_kwargs["_commit_hash"] = config._commit_hash + + if device_map is not None: + if "device_map" in model_kwargs: + raise ValueError( + 'You cannot use both `pipeline(... device_map=..., model_kwargs={"device_map":...})` as those' + " arguments might conflict, use only one.)" + ) + if device is not None: + logger.warning( + "Both `device` and `device_map` are specified. `device` will override `device_map`. You" + " will most likely encounter unexpected behavior. Please remove `device` and keep `device_map`." + ) + model_kwargs["device_map"] = device_map + if torch_dtype is not None: + if "torch_dtype" in model_kwargs: + raise ValueError( + 'You cannot use both `pipeline(... torch_dtype=..., model_kwargs={"torch_dtype":...})` as those' + " arguments might conflict, use only one.)" + ) + model_kwargs["torch_dtype"] = torch_dtype + + model_name = model if isinstance(model, str) else None + + # Load the correct model if possible + # Infer the framework from the model if not already defined + if isinstance(model, str): + model = TSModelForCausalLM.from_pretrained(model, config=config, export=True, **model_kwargs) + + model_config = model.config + hub_kwargs["_commit_hash"] = model.config._commit_hash + load_tokenizer = type(model_config) in TOKENIZER_MAPPING or model_config.tokenizer_class is not None + + if load_tokenizer: + # Try to infer tokenizer from model or config name (if provided as str) + if tokenizer is None: + if isinstance(model_name, str): + tokenizer = model_name + elif isinstance(config, str): + tokenizer = config + else: + # Impossible to guess what is the right tokenizer here + raise Exception( + "Impossible to guess which tokenizer to use. " + "Please provide a PreTrainedTokenizer class or a path/identifier to a pretrained tokenizer." + ) + + # Instantiate tokenizer if needed + if isinstance(tokenizer, (str, tuple)): + if isinstance(tokenizer, tuple): + # For tuple we have (tokenizer name, {kwargs}) + use_fast = tokenizer[1].pop("use_fast", use_fast) + tokenizer_identifier = tokenizer[0] + tokenizer_kwargs = tokenizer[1] + else: + tokenizer_identifier = tokenizer + tokenizer_kwargs = model_kwargs.copy() + tokenizer_kwargs.pop("torch_dtype", None) + + tokenizer = AutoTokenizer.from_pretrained( + tokenizer_identifier, use_fast=use_fast, _from_pipeline=task, **hub_kwargs, **tokenizer_kwargs + ) + + if tokenizer is not None: + kwargs["tokenizer"] = tokenizer + + if torch_dtype is not None: + kwargs["torch_dtype"] = torch_dtype + + if device is not None: + kwargs["device"] = device + + return pipeline_class(model=model, framework="pt", task=task, **kwargs) From 2bf212208b98ca9c0eed509b556a3c74ceceb5f4 Mon Sep 17 00:00:00 2001 From: "Feng, Jiqing" Date: Sun, 7 Jan 2024 22:30:58 -0800 Subject: [PATCH 02/30] add tests and readme --- README.md | 40 +++++++++++++++++++++ optimum/intel/generation/modeling.py | 1 + optimum/intel/pipelines/__init__.py | 5 +++ tests/pipelines/test_pipelines.py | 53 ++++++++++++++++++++++++++++ 4 files changed, 99 insertions(+) create mode 100644 tests/pipelines/test_pipelines.py diff --git a/README.md b/README.md index 54d8371b5b..fbdc06a3cc 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,46 @@ where `extras` can be one or more of `neural-compressor`, `openvino`, `nncf`. # Quick tour +## IPEX +### pipeline +Hugging Face pipelines provide a simple yet powerful abstraction to quickly set up inference. If you already have a pipeline from transformers, you can unlock the performance benefits of Optimum-Intel by just changing one line. +```diff +import torch +- from transformers.pipelines import pipeline ++ from optimum.intel.pipelines import pipeline + +pipe = pipeline('text-generation', 'gpt2', torch_dtype=torch.bfloat16) +pipe("Describe a real-world application of AI in sustainable energy.") +``` + +### generate +If you want control over advanced features like quantization and token selection strategies, we recommend using the generate() API. Just like with pipelines, switching from existing transformers code is super simple. +```diff +import torch +from transformers import AutoTokenizer, AutoConfig +- from transformers import AutoModelForCausalLM ++ from optimum.intel.generation.modeling import TSModelForCausalLM + +name = 'gpt2' +config = AutoConfig.from_pretrained(name, trust_remote_code=True) + +model = TSModelForCausalLM.from_pretrained( + name, + config=config, + torch_dtype=torch.bfloat16, + export=True, +) + +tokenizer = AutoTokenizer.from_pretrained(name) +input_sentence = ["Answer the following yes/no question by reasoning step-by-step please. Can you write a whole Haiku in a single tweet?"] +model_inputs = tokenizer(input_sentence, return_tensors="pt") +generation_kwargs = dict(max_new_tokens=32, do_sample=False, num_beams=4, num_beam_groups=1, no_repeat_ngram_size=2, use_cache=True) + +generated_ids = model.generate(**model_inputs, **generation_kwargs) +output = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] +print(output) +``` + ## Neural Compressor Dynamic quantization can be used through the Optimum command-line interface: diff --git a/optimum/intel/generation/modeling.py b/optimum/intel/generation/modeling.py index fd946ea607..b15cfe8328 100644 --- a/optimum/intel/generation/modeling.py +++ b/optimum/intel/generation/modeling.py @@ -428,5 +428,6 @@ def _from_transformers( force_download=force_download, cache_dir=cache_dir, local_files_only=local_files_only, + model_dtype=torch_dtype, **kwargs, ) diff --git a/optimum/intel/pipelines/__init__.py b/optimum/intel/pipelines/__init__.py index 4303ddfb59..61329264a1 100644 --- a/optimum/intel/pipelines/__init__.py +++ b/optimum/intel/pipelines/__init__.py @@ -31,6 +31,7 @@ HUGGINGFACE_CO_RESOLVE_ENDPOINT, cached_file, extract_commit_hash, + is_ipex_available, is_offline_mode, is_torch_available, logging, @@ -39,6 +40,10 @@ from ..generation.modeling import TSModelForCausalLM +if is_ipex_available(): + import intel_extension_for_pytorch + + if is_torch_available(): import torch from transformers.models.auto.modeling_auto import AutoModelForCausalLM diff --git a/tests/pipelines/test_pipelines.py b/tests/pipelines/test_pipelines.py new file mode 100644 index 0000000000..0c6382d29c --- /dev/null +++ b/tests/pipelines/test_pipelines.py @@ -0,0 +1,53 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import torch +from parameterized import parameterized +from transformers.pipelines import pipeline as transformers_pipeline + +from optimum.intel.generation.modeling import TSModelForCausalLM +from optimum.intel.pipelines import pipeline as ipex_pipeline + + +MODEL_NAMES = { + "bert": "hf-internal-testing/tiny-random-bert", + "distilbert": "hf-internal-testing/tiny-random-distilbert", + "roberta": "hf-internal-testing/tiny-random-roberta", + "bloom": "hf-internal-testing/tiny-random-bloom", + "gptj": "hf-internal-testing/tiny-random-gptj", + "gpt2": "hf-internal-testing/tiny-random-gpt2", + "gpt_neo": "hf-internal-testing/tiny-random-GPTNeoModel", + "gpt_neox": "hf-internal-testing/tiny-random-GPTNeoXForCausalLM", + "gpt_bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel", +} + + +class PipelinesIntegrationTest(unittest.TestCase): + TEXT_GENERATION_SUPPORTED_ARCHITECTURES = ("bloom", "gptj", "gpt2", "gpt_neo") + + @parameterized.expand(TEXT_GENERATION_SUPPORTED_ARCHITECTURES) + def test_text_generation_pipeline_inference(self, model_arch): + model_id = MODEL_NAMES[model_arch] + inputs = "DeepSpeed is a machine learning framework for deep neural networks and deep reinforcement learning. It is written in C++ and is available for Linux, Mac OS X," + transformers_text_generator = transformers_pipeline("text-generation", model_id) + ipex_text_generator = ipex_pipeline("text-generation", model_id) + with torch.inference_mode(): + transformers_output = transformers_text_generator(inputs) + with torch.inference_mode(): + ipex_output = ipex_text_generator(inputs) + self.assertTrue(isinstance(ipex_text_generator.model, TSModelForCausalLM)) + self.assertTrue(isinstance(ipex_text_generator.model.model, torch.jit.RecursiveScriptModule)) + self.assertEqual(transformers_output[0]["generated_text"], ipex_output[0]["generated_text"]) From db10723a9ec7f185c7c3c342efbd2f1cc51de433 Mon Sep 17 00:00:00 2001 From: "Feng, Jiqing" Date: Sun, 7 Jan 2024 22:33:25 -0800 Subject: [PATCH 03/30] fix pipelines example --- optimum/intel/pipelines/__init__.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/optimum/intel/pipelines/__init__.py b/optimum/intel/pipelines/__init__.py index 61329264a1..3ae08416fa 100644 --- a/optimum/intel/pipelines/__init__.py +++ b/optimum/intel/pipelines/__init__.py @@ -236,20 +236,11 @@ def pipeline( Examples: ```python - >>> from transformers import pipeline, AutoModelForTokenClassification, AutoTokenizer + >>> import torch + >>> from optimum.intel.pipelines import pipeline - >>> # Sentiment analysis pipeline - >>> analyzer = pipeline("sentiment-analysis") - - >>> # Question answering pipeline, specifying the checkpoint identifier - >>> oracle = pipeline( - ... "question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="bert-base-cased" - ... ) - - >>> # Named entity recognition pipeline, passing in a specific model and tokenizer - >>> model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english") - >>> tokenizer = AutoTokenizer.from_pretrained("bert-base-cased") - >>> recognizer = pipeline("ner", model=model, tokenizer=tokenizer) + >>> pipe = pipeline('text-generation', 'gpt2', torch_dtype=torch.bfloat16) + >>> pipe("Describe a real-world application of AI in sustainable energy.") ```""" if model_kwargs is None: model_kwargs = {} From 24f26db991bfc90baf581609550c7c5d263780ac Mon Sep 17 00:00:00 2001 From: jiqing-feng Date: Tue, 9 Jan 2024 03:37:14 -0500 Subject: [PATCH 04/30] fix readme codestyle --- README.md | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index fbdc06a3cc..0506ea944f 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ import torch - from transformers.pipelines import pipeline + from optimum.intel.pipelines import pipeline -pipe = pipeline('text-generation', 'gpt2', torch_dtype=torch.bfloat16) +pipe = pipeline("text-generation", "gpt2", torch_dtype=torch.bfloat16) pipe("Describe a real-world application of AI in sustainable energy.") ``` @@ -61,17 +61,14 @@ from transformers import AutoTokenizer, AutoConfig - from transformers import AutoModelForCausalLM + from optimum.intel.generation.modeling import TSModelForCausalLM -name = 'gpt2' -config = AutoConfig.from_pretrained(name, trust_remote_code=True) - +config = AutoConfig.from_pretrained("gpt2") model = TSModelForCausalLM.from_pretrained( - name, + "gpt2", config=config, torch_dtype=torch.bfloat16, export=True, ) - -tokenizer = AutoTokenizer.from_pretrained(name) +tokenizer = AutoTokenizer.from_pretrained("gpt2") input_sentence = ["Answer the following yes/no question by reasoning step-by-step please. Can you write a whole Haiku in a single tweet?"] model_inputs = tokenizer(input_sentence, return_tensors="pt") generation_kwargs = dict(max_new_tokens=32, do_sample=False, num_beams=4, num_beam_groups=1, no_repeat_ngram_size=2, use_cache=True) From 39b7804ef303b2e0dcc0f5003ed85f99bcd7af4f Mon Sep 17 00:00:00 2001 From: jiqing-feng Date: Tue, 9 Jan 2024 10:26:00 -0500 Subject: [PATCH 05/30] add _load_model in pipeline --- optimum/intel/pipelines/__init__.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/optimum/intel/pipelines/__init__.py b/optimum/intel/pipelines/__init__.py index 3ae08416fa..98615cd7d1 100644 --- a/optimum/intel/pipelines/__init__.py +++ b/optimum/intel/pipelines/__init__.py @@ -141,6 +141,11 @@ def clean_custom_task(task_info): return task_info, None +def _load_model(task, model, **kwargs): + if task == "text-generation": + return TSModelForCausalLM.from_pretrained(model, **kwargs) + + def pipeline( task: str = None, model: Optional[Union[str, "PreTrainedModel"]] = None, @@ -262,8 +267,8 @@ def pipeline( "Please provide a task class or a model" ) - if task != "text-generation": - raise ValueError("Optimum-intel ipex optimization only supports text-generation task for now.") + if task not in SUPPORTED_TASKS.keys(): + raise ValueError(f"Optimum-intel ipex optimization only supports {SUPPORTED_TASKS.keys()} task for now.") if model is None and tokenizer is not None: raise RuntimeError( @@ -358,7 +363,7 @@ def pipeline( # Load the correct model if possible # Infer the framework from the model if not already defined if isinstance(model, str): - model = TSModelForCausalLM.from_pretrained(model, config=config, export=True, **model_kwargs) + model = _load_model(task, model, config=config, export=True, **model_kwargs) model_config = model.config hub_kwargs["_commit_hash"] = model.config._commit_hash From d37ff185d698b33df49a90002fbc45abba3dc9ef Mon Sep 17 00:00:00 2001 From: jiqing-feng Date: Tue, 2 Apr 2024 10:50:11 -0400 Subject: [PATCH 06/30] update pipeline for optimum intel --- optimum/intel/pipelines/__init__.py | 412 +---------------------- optimum/intel/pipelines/pipeline_base.py | 326 ++++++++++++++++++ 2 files changed, 327 insertions(+), 411 deletions(-) create mode 100644 optimum/intel/pipelines/pipeline_base.py diff --git a/optimum/intel/pipelines/__init__.py b/optimum/intel/pipelines/__init__.py index 98615cd7d1..02eb06cb39 100644 --- a/optimum/intel/pipelines/__init__.py +++ b/optimum/intel/pipelines/__init__.py @@ -1,411 +1 @@ -# Copyright 2024 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import warnings -from pathlib import Path -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union - -from huggingface_hub import model_info -from transformers.configuration_utils import PretrainedConfig -from transformers.models.auto.configuration_auto import AutoConfig -from transformers.models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer -from transformers.pipelines.base import ( - Pipeline, - PipelineRegistry, - get_default_model_and_revision, -) -from transformers.pipelines.text_generation import TextGenerationPipeline -from transformers.tokenization_utils import PreTrainedTokenizer -from transformers.utils import ( - CONFIG_NAME, - HUGGINGFACE_CO_RESOLVE_ENDPOINT, - cached_file, - extract_commit_hash, - is_ipex_available, - is_offline_mode, - is_torch_available, - logging, -) - -from ..generation.modeling import TSModelForCausalLM - - -if is_ipex_available(): - import intel_extension_for_pytorch - - -if is_torch_available(): - import torch - from transformers.models.auto.modeling_auto import AutoModelForCausalLM - - -if TYPE_CHECKING: - from transformers.modeling_utils import PreTrainedModel - from transformers.tokenization_utils_fast import PreTrainedTokenizerFast - - -logger = logging.get_logger(__name__) - - -# Register all the supported tasks here -TASK_ALIASES = { - "sentiment-analysis": "text-classification", -} -SUPPORTED_TASKS = { - "text-generation": { - "impl": TextGenerationPipeline, - "pt": (AutoModelForCausalLM,) if is_torch_available() else (), - "default": {"model": {"pt": ("gpt2", "6c0e608")}}, - "type": "text", - }, -} - - -PIPELINE_REGISTRY = PipelineRegistry(supported_tasks=SUPPORTED_TASKS, task_aliases=TASK_ALIASES) - - -def get_supported_tasks() -> List[str]: - """ - Returns a list of supported task strings. - """ - return PIPELINE_REGISTRY.get_supported_tasks() - - -def get_task(model: str, token: Optional[str] = None, **deprecated_kwargs) -> str: - use_auth_token = deprecated_kwargs.pop("use_auth_token", None) - if use_auth_token is not None: - warnings.warn( - "The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.", - FutureWarning, - ) - if token is not None: - raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.") - token = use_auth_token - - if is_offline_mode(): - raise RuntimeError("You cannot infer task automatically within `pipeline` when using offline mode") - try: - info = model_info(model, token=token) - except Exception as e: - raise RuntimeError(f"Instantiating a pipeline without a task set raised an error: {e}") - if not info.pipeline_tag: - raise RuntimeError( - f"The model {model} does not seem to have a correct `pipeline_tag` set to infer the task automatically" - ) - if getattr(info, "library_name", "transformers") != "transformers": - raise RuntimeError(f"This model is meant to be used with {info.library_name} not with transformers") - task = info.pipeline_tag - return task - - -def check_task(task: str) -> Tuple[str, Dict, Any]: - """ - Checks an incoming task string, to validate it's correct and return the default Pipeline and Model classes, and - default models if they exist. - - Args: - task (`str`): - The task defining which pipeline will be returned. Currently accepted tasks are: - - - `"text-generation"` - - Returns: - (normalized_task: `str`, task_defaults: `dict`, task_options: (`tuple`, None)) The normalized task name - (removed alias and options). The actual dictionary required to initialize the pipeline and some extra task - options for parametrized tasks like "translation_XX_to_YY" - - - """ - return PIPELINE_REGISTRY.check_task(task) - - -def clean_custom_task(task_info): - import transformers - - if "impl" not in task_info: - raise RuntimeError("This model introduces a custom pipeline without specifying its implementation.") - pt_class_names = task_info.get("pt", ()) - if isinstance(pt_class_names, str): - pt_class_names = [pt_class_names] - task_info["pt"] = tuple(getattr(transformers, c) for c in pt_class_names) - return task_info, None - - -def _load_model(task, model, **kwargs): - if task == "text-generation": - return TSModelForCausalLM.from_pretrained(model, **kwargs) - - -def pipeline( - task: str = None, - model: Optional[Union[str, "PreTrainedModel"]] = None, - config: Optional[Union[str, PretrainedConfig]] = None, - tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, - revision: Optional[str] = None, - use_fast: bool = True, - token: Optional[Union[str, bool]] = None, - device: Optional[Union[int, str, "torch.device"]] = None, - device_map=None, - torch_dtype=None, - trust_remote_code: Optional[bool] = None, - model_kwargs: Dict[str, Any] = None, - pipeline_class: Optional[Any] = None, - **kwargs, -) -> Pipeline: - """ - Utility factory method to build a [`Pipeline`]. - - Pipelines are made of: - - - A [tokenizer](tokenizer) in charge of mapping raw textual input to token. - - A [model](model) to make predictions from the inputs. - - Some (optional) post processing for enhancing model's output. - - Args: - task (`str`): - The task defining which pipeline will be returned. Currently accepted tasks are: - - - `"text-generation"`: will return a [`TextGenerationPipeline`]:. - - model (`str` or [`PreTrainedModel`], *optional*): - The model that will be used by the pipeline to make predictions. This can be a model identifier or an - actual instance of a pretrained model inheriting from [`PreTrainedModel`] (for PyTorch). - - If not provided, the default for the `task` will be loaded. - config (`str` or [`PretrainedConfig`], *optional*): - The configuration that will be used by the pipeline to instantiate the model. This can be a model - identifier or an actual pretrained model configuration inheriting from [`PretrainedConfig`]. - - If not provided, the default configuration file for the requested model will be used. That means that if - `model` is given, its default configuration will be used. However, if `model` is not supplied, this - `task`'s default model's config is used instead. - tokenizer (`str` or [`PreTrainedTokenizer`], *optional*): - The tokenizer that will be used by the pipeline to encode data for the model. This can be a model - identifier or an actual pretrained tokenizer inheriting from [`PreTrainedTokenizer`]. - - If not provided, the default tokenizer for the given `model` will be loaded (if it is a string). If `model` - is not specified or not a string, then the default tokenizer for `config` is loaded (if it is a string). - However, if `config` is also not given or not a string, then the default tokenizer for the given `task` - will be loaded. - revision (`str`, *optional*, defaults to `"main"`): - When passing a task name or a string model identifier: The specific model version to use. It can be a - branch name, a tag name, or a commit id, since we use a git-based system for storing models and other - artifacts on huggingface.co, so `revision` can be any identifier allowed by git. - use_fast (`bool`, *optional*, defaults to `True`): - Whether or not to use a Fast tokenizer if possible (a [`PreTrainedTokenizerFast`]). - use_auth_token (`str` or *bool*, *optional*): - The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated - when running `huggingface-cli login` (stored in `~/.huggingface`). - device (`int` or `str` or `torch.device`): - Defines the device (*e.g.*, `"cpu"`, `"cuda:1"`, `"mps"`, or a GPU ordinal rank like `1`) on which this - pipeline will be allocated. - device_map (`str` or `Dict[str, Union[int, str, torch.device]`, *optional*): - Sent directly as `model_kwargs` (just a simpler shortcut). When `accelerate` library is present, set - `device_map="auto"` to compute the most optimized `device_map` automatically (see - [here](https://huggingface.co/docs/accelerate/main/en/package_reference/big_modeling#accelerate.cpu_offload) - for more information). - - - - Do not use `device_map` AND `device` at the same time as they will conflict - - - - torch_dtype (`str` or `torch.dtype`, *optional*): - Sent directly as `model_kwargs` (just a simpler shortcut) to use the available precision for this model - (`torch.float16`, `torch.bfloat16`, ... or `"auto"`). - trust_remote_code (`bool`, *optional*, defaults to `False`): - Whether or not to allow for custom code defined on the Hub in their own modeling, configuration, - tokenization or even pipeline files. This option should only be set to `True` for repositories you trust - and in which you have read the code, as it will execute code present on the Hub on your local machine. - model_kwargs (`Dict[str, Any]`, *optional*): - Additional dictionary of keyword arguments passed along to the model's `from_pretrained(..., - **model_kwargs)` function. - kwargs (`Dict[str, Any]`, *optional*): - Additional keyword arguments passed along to the specific pipeline init (see the documentation for the - corresponding pipeline class for possible values). - - Returns: - [`Pipeline`]: A suitable pipeline for the task. - - Examples: - - ```python - >>> import torch - >>> from optimum.intel.pipelines import pipeline - - >>> pipe = pipeline('text-generation', 'gpt2', torch_dtype=torch.bfloat16) - >>> pipe("Describe a real-world application of AI in sustainable energy.") - ```""" - if model_kwargs is None: - model_kwargs = {} - - code_revision = kwargs.pop("code_revision", None) - commit_hash = kwargs.pop("_commit_hash", None) - - hub_kwargs = { - "revision": revision, - "token": token, - "trust_remote_code": trust_remote_code, - "_commit_hash": commit_hash, - } - - if task is None and model is None: - raise RuntimeError( - "Impossible to instantiate a pipeline without either a task or a model " - "being specified. " - "Please provide a task class or a model" - ) - - if task not in SUPPORTED_TASKS.keys(): - raise ValueError(f"Optimum-intel ipex optimization only supports {SUPPORTED_TASKS.keys()} task for now.") - - if model is None and tokenizer is not None: - raise RuntimeError( - "Impossible to instantiate a pipeline with tokenizer specified but not the model as the provided tokenizer" - " may not be compatible with the default model. Please provide a PreTrainedModel class or a" - " path/identifier to a pretrained model when providing tokenizer." - ) - - if isinstance(model, Path): - model = str(model) - - if commit_hash is None: - pretrained_model_name_or_path = None - if isinstance(config, str): - pretrained_model_name_or_path = config - elif config is None and isinstance(model, str): - pretrained_model_name_or_path = model - - if not isinstance(config, PretrainedConfig) and pretrained_model_name_or_path is not None: - # We make a call to the config file first (which may be absent) to get the commit hash as soon as possible - resolved_config_file = cached_file( - pretrained_model_name_or_path, - CONFIG_NAME, - _raise_exceptions_for_missing_entries=False, - _raise_exceptions_for_connection_errors=False, - **hub_kwargs, - ) - hub_kwargs["_commit_hash"] = extract_commit_hash(resolved_config_file, commit_hash) - else: - hub_kwargs["_commit_hash"] = getattr(config, "_commit_hash", None) - - # Config is the primordial information item. - # Instantiate config if needed - if isinstance(config, str): - config = AutoConfig.from_pretrained( - config, _from_pipeline=task, code_revision=code_revision, **hub_kwargs, **model_kwargs - ) - hub_kwargs["_commit_hash"] = config._commit_hash - elif config is None and isinstance(model, str): - config = AutoConfig.from_pretrained( - model, _from_pipeline=task, code_revision=code_revision, **hub_kwargs, **model_kwargs - ) - hub_kwargs["_commit_hash"] = config._commit_hash - - if task is None and model is not None: - if not isinstance(model, str): - raise RuntimeError( - "Inferring the task automatically requires to check the hub with a model_id defined as a `str`. " - f"{model} is not a valid model_id." - ) - task = get_task(model, token) - - normalized_task, targeted_task, task_options = check_task(task) - if pipeline_class is None: - pipeline_class = targeted_task["impl"] - - # Use default model/config/tokenizer for the task if no model is provided - if model is None: - model, default_revision = get_default_model_and_revision(targeted_task, "pt", task_options) - revision = revision if revision is not None else default_revision - logger.warning( - f"No model was supplied, defaulted to {model} and revision" - f" {revision} ({HUGGINGFACE_CO_RESOLVE_ENDPOINT}/{model}).\n" - "Using a pipeline without specifying a model name and revision in production is not recommended." - ) - if config is None and isinstance(model, str): - config = AutoConfig.from_pretrained(model, _from_pipeline=task, **hub_kwargs, **model_kwargs) - hub_kwargs["_commit_hash"] = config._commit_hash - - if device_map is not None: - if "device_map" in model_kwargs: - raise ValueError( - 'You cannot use both `pipeline(... device_map=..., model_kwargs={"device_map":...})` as those' - " arguments might conflict, use only one.)" - ) - if device is not None: - logger.warning( - "Both `device` and `device_map` are specified. `device` will override `device_map`. You" - " will most likely encounter unexpected behavior. Please remove `device` and keep `device_map`." - ) - model_kwargs["device_map"] = device_map - if torch_dtype is not None: - if "torch_dtype" in model_kwargs: - raise ValueError( - 'You cannot use both `pipeline(... torch_dtype=..., model_kwargs={"torch_dtype":...})` as those' - " arguments might conflict, use only one.)" - ) - model_kwargs["torch_dtype"] = torch_dtype - - model_name = model if isinstance(model, str) else None - - # Load the correct model if possible - # Infer the framework from the model if not already defined - if isinstance(model, str): - model = _load_model(task, model, config=config, export=True, **model_kwargs) - - model_config = model.config - hub_kwargs["_commit_hash"] = model.config._commit_hash - load_tokenizer = type(model_config) in TOKENIZER_MAPPING or model_config.tokenizer_class is not None - - if load_tokenizer: - # Try to infer tokenizer from model or config name (if provided as str) - if tokenizer is None: - if isinstance(model_name, str): - tokenizer = model_name - elif isinstance(config, str): - tokenizer = config - else: - # Impossible to guess what is the right tokenizer here - raise Exception( - "Impossible to guess which tokenizer to use. " - "Please provide a PreTrainedTokenizer class or a path/identifier to a pretrained tokenizer." - ) - - # Instantiate tokenizer if needed - if isinstance(tokenizer, (str, tuple)): - if isinstance(tokenizer, tuple): - # For tuple we have (tokenizer name, {kwargs}) - use_fast = tokenizer[1].pop("use_fast", use_fast) - tokenizer_identifier = tokenizer[0] - tokenizer_kwargs = tokenizer[1] - else: - tokenizer_identifier = tokenizer - tokenizer_kwargs = model_kwargs.copy() - tokenizer_kwargs.pop("torch_dtype", None) - - tokenizer = AutoTokenizer.from_pretrained( - tokenizer_identifier, use_fast=use_fast, _from_pipeline=task, **hub_kwargs, **tokenizer_kwargs - ) - - if tokenizer is not None: - kwargs["tokenizer"] = tokenizer - - if torch_dtype is not None: - kwargs["torch_dtype"] = torch_dtype - - if device is not None: - kwargs["device"] = device - - return pipeline_class(model=model, framework="pt", task=task, **kwargs) +from .pipeline_base import pipeline diff --git a/optimum/intel/pipelines/pipeline_base.py b/optimum/intel/pipelines/pipeline_base.py new file mode 100644 index 0000000000..c6bca6a4bf --- /dev/null +++ b/optimum/intel/pipelines/pipeline_base.py @@ -0,0 +1,326 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from pathlib import Path +from typing import TYPE_CHECKING, Any, Dict, Optional, Union + +from transformers import pipeline as transformers_pipeline +from transformers.configuration_utils import PretrainedConfig +from transformers.models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer +from transformers.pipelines import ( + AudioClassificationPipeline, + FillMaskPipeline, + ImageClassificationPipeline, + QuestionAnsweringPipeline, + TextClassificationPipeline, + TextGenerationPipeline, + TokenClassificationPipeline, +) +from transformers.pipelines.base import Pipeline +from transformers.tokenization_utils import PreTrainedTokenizer +from transformers.utils import ( + is_ipex_available, + is_torch_available, + logging, +) + + +if is_ipex_available(): + from ..ipex.modeling_base import ( + IPEXModel, + IPEXModelForAudioClassification, + IPEXModelForCausalLM, + IPEXModelForImageClassification, + IPEXModelForMaskedLM, + IPEXModelForQuestionAnswering, + IPEXModelForSequenceClassification, + IPEXModelForTokenClassification, + ) + + IPEX_SUPPORTED_TASKS = { + "text-generation": { + "impl": TextGenerationPipeline, + "class": (IPEXModelForCausalLM,), + "default": "gpt2", + "type": "text", + }, + "fill-mask": { + "impl": FillMaskPipeline, + "class": (IPEXModelForMaskedLM,), + "default": "bert-base-cased", + "type": "text", + }, + "question-answering": { + "impl": QuestionAnsweringPipeline, + "class": (IPEXModelForQuestionAnswering,), + "default": "distilbert-base-cased-distilled-squad", + "type": "text", + }, + "image-classification": { + "impl": ImageClassificationPipeline, + "class": (IPEXModelForImageClassification,), + "default": "google/vit-base-patch16-224", + "type": "image", + }, + "text-classification": { + "impl": TextClassificationPipeline, + "class": (IPEXModelForSequenceClassification,), + "default": "distilbert-base-uncased-finetuned-sst-2-english", + "type": "text", + }, + "token-classification": { + "impl": TokenClassificationPipeline, + "class": (IPEXModelForTokenClassification,), + "default": "dbmdz/bert-large-cased-finetuned-conll03-english", + "type": "text", + }, + "audio-classification": { + "impl": AudioClassificationPipeline, + "class": (IPEXModelForAudioClassification,), + "default": "superb/hubert-base-superb-ks", + "type": "audio", + }, + } + + +def load_ipex_model( + model, + targeted_task, + SUPPORTED_TASKS, + model_kwargs: Optional[Dict[str, Any]] = None, +): + if model_kwargs is None: + model_kwargs = {} + + if model is None: + model_id = SUPPORTED_TASKS[targeted_task]["default"] + model = SUPPORTED_TASKS[targeted_task]["class"][0].from_pretrained(model_id, export=True) + elif isinstance(model, str): + ipex_model_class = SUPPORTED_TASKS[targeted_task]["class"][0] + model = ipex_model_class.from_pretrained(model, export=True, **model_kwargs) + elif isinstance(model, IPEXModel): + pass + else: + raise ValueError( + f"""Model {model} is not supported. Please provide a valid model either as string or IPEXModel. + You can also provide non model then a default one will be used""" + ) + + return model + + +MAPPING_LOADING_FUNC = { + "ipex": load_ipex_model, +} + + +if is_torch_available(): + import torch + + +if TYPE_CHECKING: + from transformers.modeling_utils import PreTrainedModel + from transformers.tokenization_utils_fast import PreTrainedTokenizerFast + + +logger = logging.get_logger(__name__) + + +def pipeline( + task: str = None, + model: Optional[Union[str, "PreTrainedModel"]] = None, + config: Optional[Union[str, PretrainedConfig]] = None, + tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, + accelerator: Optional[str] = "ipex", + use_fast: bool = True, + device: Optional[Union[int, str, "torch.device"]] = None, + torch_dtype=None, + model_kwargs: Dict[str, Any] = None, + **kwargs, +) -> Pipeline: + """ + Utility factory method to build a [`Pipeline`]. + + Pipelines are made of: + + - A [tokenizer](tokenizer) in charge of mapping raw textual input to token. + - A [model](model) to make predictions from the inputs. + - Some (optional) post processing for enhancing model's output. + + Args: + task (`str`): + The task defining which pipeline will be returned. Currently accepted tasks are: + + - `"text-generation"`: will return a [`TextGenerationPipeline`]:. + + model (`str` or [`PreTrainedModel`], *optional*): + The model that will be used by the pipeline to make predictions. This can be a model identifier or an + actual instance of a pretrained model inheriting from [`PreTrainedModel`] (for PyTorch). + + If not provided, the default for the `task` will be loaded. + config (`str` or [`PretrainedConfig`], *optional*): + The configuration that will be used by the pipeline to instantiate the model. This can be a model + identifier or an actual pretrained model configuration inheriting from [`PretrainedConfig`]. + + If not provided, the default configuration file for the requested model will be used. That means that if + `model` is given, its default configuration will be used. However, if `model` is not supplied, this + `task`'s default model's config is used instead. + tokenizer (`str` or [`PreTrainedTokenizer`], *optional*): + The tokenizer that will be used by the pipeline to encode data for the model. This can be a model + identifier or an actual pretrained tokenizer inheriting from [`PreTrainedTokenizer`]. + + If not provided, the default tokenizer for the given `model` will be loaded (if it is a string). If `model` + is not specified or not a string, then the default tokenizer for `config` is loaded (if it is a string). + However, if `config` is also not given or not a string, then the default tokenizer for the given `task` + will be loaded. + accelerator (`str`, *optional*, defaults to `"ipex"`): + The optimization backends, choose from ["ipex", "inc", "openvino"]. + revision (`str`, *optional*, defaults to `"main"`): + When passing a task name or a string model identifier: The specific model version to use. It can be a + branch name, a tag name, or a commit id, since we use a git-based system for storing models and other + artifacts on huggingface.co, so `revision` can be any identifier allowed by git. + use_fast (`bool`, *optional*, defaults to `True`): + Whether or not to use a Fast tokenizer if possible (a [`PreTrainedTokenizerFast`]). + use_auth_token (`str` or *bool*, *optional*): + The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated + when running `huggingface-cli login` (stored in `~/.huggingface`). + device (`int` or `str` or `torch.device`): + Defines the device (*e.g.*, `"cpu"`, `"cuda:1"`, `"mps"`, or a GPU ordinal rank like `1`) on which this + pipeline will be allocated. + device_map (`str` or `Dict[str, Union[int, str, torch.device]`, *optional*): + Sent directly as `model_kwargs` (just a simpler shortcut). When `accelerate` library is present, set + `device_map="auto"` to compute the most optimized `device_map` automatically (see + [here](https://huggingface.co/docs/accelerate/main/en/package_reference/big_modeling#accelerate.cpu_offload) + for more information). + + + + Do not use `device_map` AND `device` at the same time as they will conflict + + + + torch_dtype (`str` or `torch.dtype`, *optional*): + Sent directly as `model_kwargs` (just a simpler shortcut) to use the available precision for this model + (`torch.float16`, `torch.bfloat16`, ... or `"auto"`). + trust_remote_code (`bool`, *optional*, defaults to `False`): + Whether or not to allow for custom code defined on the Hub in their own modeling, configuration, + tokenization or even pipeline files. This option should only be set to `True` for repositories you trust + and in which you have read the code, as it will execute code present on the Hub on your local machine. + model_kwargs (`Dict[str, Any]`, *optional*): + Additional dictionary of keyword arguments passed along to the model's `from_pretrained(..., + **model_kwargs)` function. + kwargs (`Dict[str, Any]`, *optional*): + Additional keyword arguments passed along to the specific pipeline init (see the documentation for the + corresponding pipeline class for possible values). + + Returns: + [`Pipeline`]: A suitable pipeline for the task. + + Examples: + + ```python + >>> import torch + >>> from optimum.intel.pipelines import pipeline + + >>> pipe = pipeline('text-generation', 'gpt2', torch_dtype=torch.bfloat16) + >>> pipe("Describe a real-world application of AI in sustainable energy.") + ```""" + if model_kwargs is None: + model_kwargs = {} + + if task is None and model is None: + raise RuntimeError( + "Impossible to instantiate a pipeline without either a task or a model " + "being specified. " + "Please provide a task class or a model" + ) + + if model is None and tokenizer is not None: + raise RuntimeError( + "Impossible to instantiate a pipeline with tokenizer specified but not the model as the provided tokenizer" + " may not be compatible with the default model. Please provide a PreTrainedModel class or a" + " path/identifier to a pretrained model when providing tokenizer." + ) + + if accelerator not in MAPPING_LOADING_FUNC: + raise ValueError(f'Accelerator {accelerator} is not supported. Supported accelerator is "ipex".') + + if accelerator == "ipex": + if task not in list(IPEX_SUPPORTED_TASKS.keys()): + raise ValueError( + f"Task {task} is not supported for the ONNX Runtime pipeline. Supported tasks are { list(IPEX_SUPPORTED_TASKS.keys())}" + ) + + if isinstance(model, Path): + model = str(model) + + if torch_dtype is not None: + if "torch_dtype" in model_kwargs: + raise ValueError( + 'You cannot use both `pipeline(... torch_dtype=..., model_kwargs={"torch_dtype":...})` as those' + " arguments might conflict, use only one.)" + ) + model_kwargs["torch_dtype"] = torch_dtype + + model_name = model if isinstance(model, str) else None + + # Load the correct model if possible + # Infer the framework from the model if not already defined + model = MAPPING_LOADING_FUNC[accelerator](model, task, IPEX_SUPPORTED_TASKS, model_kwargs) + + model_config = model.config + load_tokenizer = type(model_config) in TOKENIZER_MAPPING or model_config.tokenizer_class is not None + + if load_tokenizer: + # Try to infer tokenizer from model or config name (if provided as str) + if tokenizer is None: + if isinstance(model_name, str): + tokenizer = model_name + elif isinstance(config, str): + tokenizer = config + else: + # Impossible to guess what is the right tokenizer here + raise Exception( + "Impossible to guess which tokenizer to use. " + "Please provide a PreTrainedTokenizer class or a path/identifier to a pretrained tokenizer." + ) + + # Instantiate tokenizer if needed + if isinstance(tokenizer, (str, tuple)): + if isinstance(tokenizer, tuple): + # For tuple we have (tokenizer name, {kwargs}) + use_fast = tokenizer[1].pop("use_fast", use_fast) + tokenizer_identifier = tokenizer[0] + tokenizer_kwargs = tokenizer[1] + else: + tokenizer_identifier = tokenizer + tokenizer_kwargs = model_kwargs.copy() + tokenizer_kwargs.pop("torch_dtype", None) + + tokenizer = AutoTokenizer.from_pretrained( + tokenizer_identifier, use_fast=use_fast, _from_pipeline=task, **tokenizer_kwargs + ) + + if torch_dtype is not None: + kwargs["torch_dtype"] = torch_dtype + + if device is not None: + kwargs["device"] = device + + return transformers_pipeline( + task, + model=model, + tokenizer=tokenizer, + use_fast=use_fast, + **kwargs, + ) From 688241726b6483f9ef45e934430a62571539dc26 Mon Sep 17 00:00:00 2001 From: jiqing-feng Date: Tue, 2 Apr 2024 10:52:39 -0400 Subject: [PATCH 07/30] update tests --- tests/pipelines/test_pipelines.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/pipelines/test_pipelines.py b/tests/pipelines/test_pipelines.py index 0c6382d29c..3ca7da9406 100644 --- a/tests/pipelines/test_pipelines.py +++ b/tests/pipelines/test_pipelines.py @@ -18,7 +18,7 @@ from parameterized import parameterized from transformers.pipelines import pipeline as transformers_pipeline -from optimum.intel.generation.modeling import TSModelForCausalLM +from optimum.intel.ipex.modeling_base import IPEXModelForCausalLM from optimum.intel.pipelines import pipeline as ipex_pipeline @@ -43,11 +43,11 @@ def test_text_generation_pipeline_inference(self, model_arch): model_id = MODEL_NAMES[model_arch] inputs = "DeepSpeed is a machine learning framework for deep neural networks and deep reinforcement learning. It is written in C++ and is available for Linux, Mac OS X," transformers_text_generator = transformers_pipeline("text-generation", model_id) - ipex_text_generator = ipex_pipeline("text-generation", model_id) + ipex_text_generator = ipex_pipeline("text-generation", model_id, accelerator="ipex") with torch.inference_mode(): transformers_output = transformers_text_generator(inputs) with torch.inference_mode(): ipex_output = ipex_text_generator(inputs) - self.assertTrue(isinstance(ipex_text_generator.model, TSModelForCausalLM)) + self.assertTrue(isinstance(ipex_text_generator.model, IPEXModelForCausalLM)) self.assertTrue(isinstance(ipex_text_generator.model.model, torch.jit.RecursiveScriptModule)) self.assertEqual(transformers_output[0]["generated_text"], ipex_output[0]["generated_text"]) From 64c546c599086c507bbdda2f06c402ccf70e6664 Mon Sep 17 00:00:00 2001 From: jiqing-feng Date: Tue, 2 Apr 2024 10:59:32 -0400 Subject: [PATCH 08/30] remove readme --- README.md | 37 ------------------------------------- 1 file changed, 37 deletions(-) diff --git a/README.md b/README.md index b7b6e82e86..c29a923745 100644 --- a/README.md +++ b/README.md @@ -44,43 +44,6 @@ where `extras` can be one or more of `ipex`, `neural-compressor`, `openvino`, `n # Quick tour -## IPEX -### pipeline -Hugging Face pipelines provide a simple yet powerful abstraction to quickly set up inference. If you already have a pipeline from transformers, you can unlock the performance benefits of Optimum-Intel by just changing one line. -```diff -import torch -- from transformers.pipelines import pipeline -+ from optimum.intel.pipelines import pipeline - -pipe = pipeline("text-generation", "gpt2", torch_dtype=torch.bfloat16) -pipe("Describe a real-world application of AI in sustainable energy.") -``` - -### generate -If you want control over advanced features like quantization and token selection strategies, we recommend using the generate() API. Just like with pipelines, switching from existing transformers code is super simple. -```diff -import torch -from transformers import AutoTokenizer, AutoConfig -- from transformers import AutoModelForCausalLM -+ from optimum.intel.generation.modeling import TSModelForCausalLM - -config = AutoConfig.from_pretrained("gpt2") -model = TSModelForCausalLM.from_pretrained( - "gpt2", - config=config, - torch_dtype=torch.bfloat16, - export=True, -) -tokenizer = AutoTokenizer.from_pretrained("gpt2") -input_sentence = ["Answer the following yes/no question by reasoning step-by-step please. Can you write a whole Haiku in a single tweet?"] -model_inputs = tokenizer(input_sentence, return_tensors="pt") -generation_kwargs = dict(max_new_tokens=32, do_sample=False, num_beams=4, num_beam_groups=1, no_repeat_ngram_size=2, use_cache=True) - -generated_ids = model.generate(**model_inputs, **generation_kwargs) -output = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] -print(output) -``` - ## Neural Compressor Dynamic quantization can be used through the Optimum command-line interface: From 29ad8b2b53434e85d3e13f6b1f5617bdbc86ceba Mon Sep 17 00:00:00 2001 From: jiqing-feng <107918818+jiqing-feng@users.noreply.github.com> Date: Wed, 3 Apr 2024 09:16:52 +0800 Subject: [PATCH 09/30] Update optimum/intel/pipelines/__init__.py Co-authored-by: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> --- optimum/intel/pipelines/__init__.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/optimum/intel/pipelines/__init__.py b/optimum/intel/pipelines/__init__.py index 02eb06cb39..40a1e3ca56 100644 --- a/optimum/intel/pipelines/__init__.py +++ b/optimum/intel/pipelines/__init__.py @@ -1 +1,15 @@ +# Copyright 2024 The HuggingFace Team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + from .pipeline_base import pipeline From b5392c1e8e4732c47eb25e73b79f559c1ce14040 Mon Sep 17 00:00:00 2001 From: jiqing-feng Date: Sun, 7 Apr 2024 06:53:59 -0400 Subject: [PATCH 10/30] fix pipelines --- optimum/intel/ipex/inference.py | 4 + optimum/intel/ipex/modeling_base.py | 2 + optimum/intel/pipelines/pipeline_base.py | 158 ++++++++++---------- tests/{pipelines => ipex}/test_pipelines.py | 2 +- 4 files changed, 84 insertions(+), 82 deletions(-) rename tests/{pipelines => ipex}/test_pipelines.py (93%) diff --git a/optimum/intel/ipex/inference.py b/optimum/intel/ipex/inference.py index ccf2da9d80..a628ebe12e 100644 --- a/optimum/intel/ipex/inference.py +++ b/optimum/intel/ipex/inference.py @@ -97,6 +97,10 @@ def __init__( jit (`boolean = False`, *optional*): Enable jit to accelerate inference speed """ + logger.warning( + "`inference_mode` is deprecated and will be removed in v1.18.0. Use `pipeline` to load and export your model to TorchScript instead." + ) + if not is_ipex_available(): raise ImportError(IPEX_NOT_AVAILABLE_ERROR_MSG) diff --git a/optimum/intel/ipex/modeling_base.py b/optimum/intel/ipex/modeling_base.py index 0664a8e6ac..dfe48f5d4e 100644 --- a/optimum/intel/ipex/modeling_base.py +++ b/optimum/intel/ipex/modeling_base.py @@ -88,6 +88,8 @@ def ipex_jit_trace(model, task, use_cache): sample_inputs = prepare_jit_inputs(model, task, use_cache) model.config.return_dict = False + if "past_key_values" in sample_inputs.keys(): + model.config.use_cache = use_cache model = ipex.optimize(model.eval(), dtype=model.dtype, inplace=True) # Disable repack while jit tracing to reduce the memory diff --git a/optimum/intel/pipelines/pipeline_base.py b/optimum/intel/pipelines/pipeline_base.py index c6bca6a4bf..bdc3ea07d0 100644 --- a/optimum/intel/pipelines/pipeline_base.py +++ b/optimum/intel/pipelines/pipeline_base.py @@ -14,9 +14,10 @@ from pathlib import Path from typing import TYPE_CHECKING, Any, Dict, Optional, Union +from transformers import SequenceFeatureExtractor from transformers import pipeline as transformers_pipeline -from transformers.configuration_utils import PretrainedConfig -from transformers.models.auto.tokenization_auto import TOKENIZER_MAPPING, AutoTokenizer +from transformers.feature_extraction_utils import PreTrainedFeatureExtractor +from transformers.onnx.utils import get_preprocessor from transformers.pipelines import ( AudioClassificationPipeline, FillMaskPipeline, @@ -34,6 +35,8 @@ logging, ) +from optimum.utils.file_utils import find_files_matching_pattern + if is_ipex_available(): from ..ipex.modeling_base import ( @@ -96,27 +99,61 @@ def load_ipex_model( model, targeted_task, + load_tokenizer, + tokenizer, + load_feature_extractor, + feature_extractor, SUPPORTED_TASKS, model_kwargs: Optional[Dict[str, Any]] = None, ): if model_kwargs is None: model_kwargs = {} + ipex_model_class = SUPPORTED_TASKS[targeted_task]["class"][0] + if model is None: model_id = SUPPORTED_TASKS[targeted_task]["default"] - model = SUPPORTED_TASKS[targeted_task]["class"][0].from_pretrained(model_id, export=True) + model = ipex_model_class.from_pretrained(model_id, export=True) elif isinstance(model, str): - ipex_model_class = SUPPORTED_TASKS[targeted_task]["class"][0] - model = ipex_model_class.from_pretrained(model, export=True, **model_kwargs) + model_id = model + ipex_file = find_files_matching_pattern( + model, + ".+?.pt", + glob_pattern="**/*.pt", + subfolder=model_kwargs.pop("subfolder", None), + use_auth_token=model_kwargs.pop("token", None), + revision=model_kwargs.pop("revision", "main"), + ) + export = len(ipex_file) == 0 + model = ipex_model_class.from_pretrained(model, export=export, **model_kwargs) elif isinstance(model, IPEXModel): - pass + if tokenizer is None and load_tokenizer: + for preprocessor in model.preprocessors: + if isinstance(preprocessor, (PreTrainedTokenizer, PreTrainedTokenizerFast)): + tokenizer = preprocessor + break + if tokenizer is None: + raise ValueError( + "Could not automatically find a tokenizer for the IPEXModel, you must pass a tokenizer explictly" + ) + if feature_extractor is None and load_feature_extractor: + for preprocessor in model.preprocessors: + if isinstance(preprocessor, SequenceFeatureExtractor): + feature_extractor = preprocessor + break + if feature_extractor is None: + raise ValueError( + "Could not automatically find a feature extractor for the IPEXModel, you must pass a " + "feature_extractor explictly" + ) + model_id = None else: raise ValueError( f"""Model {model} is not supported. Please provide a valid model either as string or IPEXModel. You can also provide non model then a default one will be used""" ) - return model + return model, model_id, tokenizer, feature_extractor MAPPING_LOADING_FUNC = { @@ -125,7 +162,7 @@ def load_ipex_model( if is_torch_available(): - import torch + pass if TYPE_CHECKING: @@ -139,11 +176,10 @@ def load_ipex_model( def pipeline( task: str = None, model: Optional[Union[str, "PreTrainedModel"]] = None, - config: Optional[Union[str, PretrainedConfig]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, + feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, accelerator: Optional[str] = "ipex", use_fast: bool = True, - device: Optional[Union[int, str, "torch.device"]] = None, torch_dtype=None, model_kwargs: Dict[str, Any] = None, **kwargs, @@ -168,13 +204,6 @@ def pipeline( actual instance of a pretrained model inheriting from [`PreTrainedModel`] (for PyTorch). If not provided, the default for the `task` will be loaded. - config (`str` or [`PretrainedConfig`], *optional*): - The configuration that will be used by the pipeline to instantiate the model. This can be a model - identifier or an actual pretrained model configuration inheriting from [`PretrainedConfig`]. - - If not provided, the default configuration file for the requested model will be used. That means that if - `model` is given, its default configuration will be used. However, if `model` is not supplied, this - `task`'s default model's config is used instead. tokenizer (`str` or [`PreTrainedTokenizer`], *optional*): The tokenizer that will be used by the pipeline to encode data for the model. This can be a model identifier or an actual pretrained tokenizer inheriting from [`PreTrainedTokenizer`]. @@ -185,37 +214,11 @@ def pipeline( will be loaded. accelerator (`str`, *optional*, defaults to `"ipex"`): The optimization backends, choose from ["ipex", "inc", "openvino"]. - revision (`str`, *optional*, defaults to `"main"`): - When passing a task name or a string model identifier: The specific model version to use. It can be a - branch name, a tag name, or a commit id, since we use a git-based system for storing models and other - artifacts on huggingface.co, so `revision` can be any identifier allowed by git. use_fast (`bool`, *optional*, defaults to `True`): Whether or not to use a Fast tokenizer if possible (a [`PreTrainedTokenizerFast`]). - use_auth_token (`str` or *bool*, *optional*): - The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated - when running `huggingface-cli login` (stored in `~/.huggingface`). - device (`int` or `str` or `torch.device`): - Defines the device (*e.g.*, `"cpu"`, `"cuda:1"`, `"mps"`, or a GPU ordinal rank like `1`) on which this - pipeline will be allocated. - device_map (`str` or `Dict[str, Union[int, str, torch.device]`, *optional*): - Sent directly as `model_kwargs` (just a simpler shortcut). When `accelerate` library is present, set - `device_map="auto"` to compute the most optimized `device_map` automatically (see - [here](https://huggingface.co/docs/accelerate/main/en/package_reference/big_modeling#accelerate.cpu_offload) - for more information). - - - - Do not use `device_map` AND `device` at the same time as they will conflict - - - torch_dtype (`str` or `torch.dtype`, *optional*): Sent directly as `model_kwargs` (just a simpler shortcut) to use the available precision for this model (`torch.float16`, `torch.bfloat16`, ... or `"auto"`). - trust_remote_code (`bool`, *optional*, defaults to `False`): - Whether or not to allow for custom code defined on the Hub in their own modeling, configuration, - tokenization or even pipeline files. This option should only be set to `True` for repositories you trust - and in which you have read the code, as it will execute code present on the Hub on your local machine. model_kwargs (`Dict[str, Any]`, *optional*): Additional dictionary of keyword arguments passed along to the model's `from_pretrained(..., **model_kwargs)` function. @@ -261,6 +264,23 @@ def pipeline( f"Task {task} is not supported for the ONNX Runtime pipeline. Supported tasks are { list(IPEX_SUPPORTED_TASKS.keys())}" ) + supported_tasks = IPEX_SUPPORTED_TASKS if accelerator == "ipex" else None + + no_feature_extractor_tasks = set() + no_tokenizer_tasks = set() + for _task, values in supported_tasks.items(): + if values["type"] == "text": + no_feature_extractor_tasks.add(_task) + elif values["type"] in {"image", "video"}: + no_tokenizer_tasks.add(_task) + elif values["type"] in {"audio"}: + no_tokenizer_tasks.add(_task) + elif values["type"] not in ["multimodal", "audio", "video"]: + raise ValueError(f"SUPPORTED_TASK {_task} contains invalid type {values['type']}") + + load_tokenizer = False if task in no_tokenizer_tasks else True + load_feature_extractor = False if task in no_feature_extractor_tasks else True + if isinstance(model, Path): model = str(model) @@ -272,51 +292,27 @@ def pipeline( ) model_kwargs["torch_dtype"] = torch_dtype - model_name = model if isinstance(model, str) else None - # Load the correct model if possible # Infer the framework from the model if not already defined - model = MAPPING_LOADING_FUNC[accelerator](model, task, IPEX_SUPPORTED_TASKS, model_kwargs) - - model_config = model.config - load_tokenizer = type(model_config) in TOKENIZER_MAPPING or model_config.tokenizer_class is not None - - if load_tokenizer: - # Try to infer tokenizer from model or config name (if provided as str) - if tokenizer is None: - if isinstance(model_name, str): - tokenizer = model_name - elif isinstance(config, str): - tokenizer = config - else: - # Impossible to guess what is the right tokenizer here - raise Exception( - "Impossible to guess which tokenizer to use. " - "Please provide a PreTrainedTokenizer class or a path/identifier to a pretrained tokenizer." - ) + model, model_id, tokenizer, feature_extractor = MAPPING_LOADING_FUNC[accelerator]( + model, + task, + load_tokenizer, + tokenizer, + load_feature_extractor, + feature_extractor, + IPEX_SUPPORTED_TASKS, + model_kwargs, + ) - # Instantiate tokenizer if needed - if isinstance(tokenizer, (str, tuple)): - if isinstance(tokenizer, tuple): - # For tuple we have (tokenizer name, {kwargs}) - use_fast = tokenizer[1].pop("use_fast", use_fast) - tokenizer_identifier = tokenizer[0] - tokenizer_kwargs = tokenizer[1] - else: - tokenizer_identifier = tokenizer - tokenizer_kwargs = model_kwargs.copy() - tokenizer_kwargs.pop("torch_dtype", None) - - tokenizer = AutoTokenizer.from_pretrained( - tokenizer_identifier, use_fast=use_fast, _from_pipeline=task, **tokenizer_kwargs - ) + if tokenizer is None and load_tokenizer: + tokenizer = get_preprocessor(model_id) + if feature_extractor is None and load_feature_extractor: + feature_extractor = get_preprocessor(model_id) if torch_dtype is not None: kwargs["torch_dtype"] = torch_dtype - if device is not None: - kwargs["device"] = device - return transformers_pipeline( task, model=model, diff --git a/tests/pipelines/test_pipelines.py b/tests/ipex/test_pipelines.py similarity index 93% rename from tests/pipelines/test_pipelines.py rename to tests/ipex/test_pipelines.py index 3ca7da9406..04b24eca7f 100644 --- a/tests/pipelines/test_pipelines.py +++ b/tests/ipex/test_pipelines.py @@ -41,7 +41,7 @@ class PipelinesIntegrationTest(unittest.TestCase): @parameterized.expand(TEXT_GENERATION_SUPPORTED_ARCHITECTURES) def test_text_generation_pipeline_inference(self, model_arch): model_id = MODEL_NAMES[model_arch] - inputs = "DeepSpeed is a machine learning framework for deep neural networks and deep reinforcement learning. It is written in C++ and is available for Linux, Mac OS X," + inputs = "Describe a real-world application of AI." transformers_text_generator = transformers_pipeline("text-generation", model_id) ipex_text_generator = ipex_pipeline("text-generation", model_id, accelerator="ipex") with torch.inference_mode(): From f294f746ca7680b5464c904c56b325f068ae68d2 Mon Sep 17 00:00:00 2001 From: jiqing-feng Date: Sun, 7 Apr 2024 11:16:46 -0400 Subject: [PATCH 11/30] add all supported tasks testing --- optimum/intel/pipelines/pipeline_base.py | 73 ++----- tests/ipex/test_pipelines.py | 236 +++++++++++++++++++++-- 2 files changed, 245 insertions(+), 64 deletions(-) diff --git a/optimum/intel/pipelines/pipeline_base.py b/optimum/intel/pipelines/pipeline_base.py index bdc3ea07d0..e9f38f58fe 100644 --- a/optimum/intel/pipelines/pipeline_base.py +++ b/optimum/intel/pipelines/pipeline_base.py @@ -11,13 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + from pathlib import Path from typing import TYPE_CHECKING, Any, Dict, Optional, Union -from transformers import SequenceFeatureExtractor +from transformers import AutoConfig, AutoFeatureExtractor, AutoTokenizer from transformers import pipeline as transformers_pipeline from transformers.feature_extraction_utils import PreTrainedFeatureExtractor -from transformers.onnx.utils import get_preprocessor from transformers.pipelines import ( AudioClassificationPipeline, FillMaskPipeline, @@ -35,8 +35,6 @@ logging, ) -from optimum.utils.file_utils import find_files_matching_pattern - if is_ipex_available(): from ..ipex.modeling_base import ( @@ -99,13 +97,11 @@ def load_ipex_model( model, targeted_task, - load_tokenizer, - tokenizer, - load_feature_extractor, - feature_extractor, SUPPORTED_TASKS, model_kwargs: Optional[Dict[str, Any]] = None, + **kwargs, ): + export = kwargs.pop("export", True) if model_kwargs is None: model_kwargs = {} @@ -116,44 +112,25 @@ def load_ipex_model( model = ipex_model_class.from_pretrained(model_id, export=True) elif isinstance(model, str): model_id = model - ipex_file = find_files_matching_pattern( - model, - ".+?.pt", - glob_pattern="**/*.pt", - subfolder=model_kwargs.pop("subfolder", None), - use_auth_token=model_kwargs.pop("token", None), - revision=model_kwargs.pop("revision", "main"), - ) - export = len(ipex_file) == 0 + try: + config = AutoConfig.from_pretrained(model) + torchscript = getattr(config, "torchscript", None) + export = False if torchscript else export + except RuntimeError: + logger.warning( + "config file not found, please pass `export` to decide whether we should export this model. `export` defaullt to True" + ) + model = ipex_model_class.from_pretrained(model, export=export, **model_kwargs) elif isinstance(model, IPEXModel): - if tokenizer is None and load_tokenizer: - for preprocessor in model.preprocessors: - if isinstance(preprocessor, (PreTrainedTokenizer, PreTrainedTokenizerFast)): - tokenizer = preprocessor - break - if tokenizer is None: - raise ValueError( - "Could not automatically find a tokenizer for the IPEXModel, you must pass a tokenizer explictly" - ) - if feature_extractor is None and load_feature_extractor: - for preprocessor in model.preprocessors: - if isinstance(preprocessor, SequenceFeatureExtractor): - feature_extractor = preprocessor - break - if feature_extractor is None: - raise ValueError( - "Could not automatically find a feature extractor for the IPEXModel, you must pass a " - "feature_extractor explictly" - ) model_id = None else: raise ValueError( - f"""Model {model} is not supported. Please provide a valid model either as string or IPEXModel. + f"""Model {model} is not supported. Please provide a valid model name or path or a IPEXModel. You can also provide non model then a default one will be used""" ) - return model, model_id, tokenizer, feature_extractor + return model, model_id MAPPING_LOADING_FUNC = { @@ -294,21 +271,12 @@ def pipeline( # Load the correct model if possible # Infer the framework from the model if not already defined - model, model_id, tokenizer, feature_extractor = MAPPING_LOADING_FUNC[accelerator]( - model, - task, - load_tokenizer, - tokenizer, - load_feature_extractor, - feature_extractor, - IPEX_SUPPORTED_TASKS, - model_kwargs, - ) + model, model_id = MAPPING_LOADING_FUNC[accelerator](model, task, supported_tasks, model_kwargs, **kwargs) - if tokenizer is None and load_tokenizer: - tokenizer = get_preprocessor(model_id) - if feature_extractor is None and load_feature_extractor: - feature_extractor = get_preprocessor(model_id) + if load_tokenizer and model_id and tokenizer is None: + tokenizer = AutoTokenizer.from_pretrained(model_id) + if load_feature_extractor and model_id and feature_extractor is None: + feature_extractor = AutoFeatureExtractor.from_pretrained(model_id) if torch_dtype is not None: kwargs["torch_dtype"] = torch_dtype @@ -317,6 +285,7 @@ def pipeline( task, model=model, tokenizer=tokenizer, + feature_extractor=feature_extractor, use_fast=use_fast, **kwargs, ) diff --git a/tests/ipex/test_pipelines.py b/tests/ipex/test_pipelines.py index 04b24eca7f..89a27ab2c8 100644 --- a/tests/ipex/test_pipelines.py +++ b/tests/ipex/test_pipelines.py @@ -13,41 +13,253 @@ # limitations under the License. import unittest +from tempfile import TemporaryDirectory +import numpy as np import torch from parameterized import parameterized +from transformers import AutoTokenizer from transformers.pipelines import pipeline as transformers_pipeline -from optimum.intel.ipex.modeling_base import IPEXModelForCausalLM +from optimum.intel.ipex.modeling_base import ( + IPEXModelForAudioClassification, + IPEXModelForCausalLM, + IPEXModelForImageClassification, + IPEXModelForMaskedLM, + IPEXModelForQuestionAnswering, + IPEXModelForSequenceClassification, + IPEXModelForTokenClassification, +) from optimum.intel.pipelines import pipeline as ipex_pipeline MODEL_NAMES = { + "albert": "hf-internal-testing/tiny-random-albert", + "beit": "hf-internal-testing/tiny-random-BeitForImageClassification", "bert": "hf-internal-testing/tiny-random-bert", + "bart": "hf-internal-testing/tiny-random-bart", + "blenderbot-small": "hf-internal-testing/tiny-random-BlenderbotModel", + "blenderbot": "hf-internal-testing/tiny-random-BlenderbotModel", + "bloom": "hf-internal-testing/tiny-random-BloomModel", + "convbert": "hf-internal-testing/tiny-random-ConvBertForSequenceClassification", + "codegen": "hf-internal-testing/tiny-random-CodeGenForCausalLM", + "convnext": "hf-internal-testing/tiny-random-convnext", "distilbert": "hf-internal-testing/tiny-random-distilbert", - "roberta": "hf-internal-testing/tiny-random-roberta", - "bloom": "hf-internal-testing/tiny-random-bloom", - "gptj": "hf-internal-testing/tiny-random-gptj", + "electra": "hf-internal-testing/tiny-random-electra", + "flaubert": "hf-internal-testing/tiny-random-flaubert", + "gpt_bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel", "gpt2": "hf-internal-testing/tiny-random-gpt2", "gpt_neo": "hf-internal-testing/tiny-random-GPTNeoModel", "gpt_neox": "hf-internal-testing/tiny-random-GPTNeoXForCausalLM", - "gpt_bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel", + "gptj": "hf-internal-testing/tiny-random-GPTJModel", + "levit": "hf-internal-testing/tiny-random-LevitModel", + "llama": "fxmarty/tiny-llama-fast-tokenizer", + "llama2": "Jiqing/tiny_random_llama2", + "marian": "sshleifer/tiny-marian-en-de", + "mbart": "hf-internal-testing/tiny-random-mbart", + "mistral": "echarlaix/tiny-random-mistral", + "mobilenet_v1": "google/mobilenet_v1_0.75_192", + "mobilenet_v2": "hf-internal-testing/tiny-random-MobileNetV2Model", + "mobilevit": "hf-internal-testing/tiny-random-mobilevit", + "mpt": "hf-internal-testing/tiny-random-MptForCausalLM", + "mt5": "stas/mt5-tiny-random", + "opt": "hf-internal-testing/tiny-random-OPTModel", + "phi": "echarlaix/tiny-random-PhiForCausalLM", + "resnet": "hf-internal-testing/tiny-random-resnet", + "roberta": "hf-internal-testing/tiny-random-roberta", + "roformer": "hf-internal-testing/tiny-random-roformer", + "squeezebert": "hf-internal-testing/tiny-random-squeezebert", + "t5": "hf-internal-testing/tiny-random-t5", + "unispeech": "hf-internal-testing/tiny-random-unispeech", + "vit": "hf-internal-testing/tiny-random-vit", + "wav2vec2": "anton-l/wav2vec2-random-tiny-classifier", + "xlm": "hf-internal-testing/tiny-random-xlm", } class PipelinesIntegrationTest(unittest.TestCase): - TEXT_GENERATION_SUPPORTED_ARCHITECTURES = ("bloom", "gptj", "gpt2", "gpt_neo") + COMMON_SUPPORTED_ARCHITECTURES = ( + "albert", + "bert", + "distilbert", + "electra", + "flaubert", + "roberta", + "roformer", + "squeezebert", + "xlm", + ) + TEXT_GENERATION_SUPPORTED_ARCHITECTURES = ( + "bart", + "gpt_bigcode", + "blenderbot", + "blenderbot-small", + "bloom", + "codegen", + "gpt2", + "gpt_neo", + "gpt_neox", + "llama", + "llama2", + "mistral", + "mpt", + "opt", + ) + QUESTION_ANSWERING_SUPPORTED_ARCHITECTURES = ( + "bert", + "distilbert", + "roberta", + ) + AUDIO_CLASSIFICATION_SUPPORTED_ARCHITECTURES = ( + "unispeech", + "wav2vec2", + ) + IMAGE_CLASSIFICATION_SUPPORTED_ARCHITECTURES = ( + "beit", + "mobilenet_v1", + "mobilenet_v2", + "mobilevit", + "resnet", + "vit", + ) + + @parameterized.expand(COMMON_SUPPORTED_ARCHITECTURES) + def test_token_classification_pipeline_inference(self, model_arch): + model_id = MODEL_NAMES[model_arch] + transformers_generator = transformers_pipeline("token-classification", model_id) + ipex_generator = ipex_pipeline("token-classification", model_id, accelerator="ipex") + inputs = "Hello I'm Omar and I live in Zürich." + with torch.inference_mode(): + transformers_output = transformers_generator(inputs) + with torch.inference_mode(): + ipex_output = ipex_generator(inputs) + self.assertEqual(len(transformers_output), len(ipex_output)) + self.assertTrue(isinstance(ipex_generator.model, IPEXModelForTokenClassification)) + self.assertTrue(isinstance(ipex_generator.model.model, torch.jit.RecursiveScriptModule)) + for i in range(len(transformers_output)): + self.assertAlmostEqual(transformers_output[i]["score"], ipex_output[i]["score"], delta=1e-4) + + @parameterized.expand(COMMON_SUPPORTED_ARCHITECTURES) + def test_sequence_classification_pipeline_inference(self, model_arch): + model_id = MODEL_NAMES[model_arch] + transformers_generator = transformers_pipeline("text-classification", model_id) + ipex_generator = ipex_pipeline("text-classification", model_id, accelerator="ipex") + inputs = "This restaurant is awesome" + with torch.inference_mode(): + transformers_output = transformers_generator(inputs) + with torch.inference_mode(): + ipex_output = ipex_generator(inputs) + self.assertTrue(isinstance(ipex_generator.model, IPEXModelForSequenceClassification)) + self.assertTrue(isinstance(ipex_generator.model.model, torch.jit.RecursiveScriptModule)) + self.assertEqual(transformers_output[0]["label"], ipex_output[0]["label"]) + self.assertAlmostEqual(transformers_output[0]["score"], ipex_output[0]["score"], delta=1e-4) + + @parameterized.expand(COMMON_SUPPORTED_ARCHITECTURES) + def test_fill_mask_pipeline_inference(self, model_arch): + model_id = MODEL_NAMES[model_arch] + inputs = "The Milky Way is a galaxy." + transformers_generator = transformers_pipeline("fill-mask", model_id) + ipex_generator = ipex_pipeline("fill-mask", model_id, accelerator="ipex") + mask_token = transformers_generator.tokenizer.mask_token + inputs = inputs.replace("", mask_token) + with torch.inference_mode(): + transformers_output = transformers_generator(inputs) + with torch.inference_mode(): + ipex_output = ipex_generator(inputs) + self.assertEqual(len(transformers_output), len(ipex_output)) + self.assertTrue(isinstance(ipex_generator.model, IPEXModelForMaskedLM)) + self.assertTrue(isinstance(ipex_generator.model.model, torch.jit.RecursiveScriptModule)) + for i in range(len(transformers_output)): + self.assertEqual(transformers_output[i]["token"], ipex_output[i]["token"]) + self.assertAlmostEqual(transformers_output[i]["score"], ipex_output[i]["score"], delta=1e-4) @parameterized.expand(TEXT_GENERATION_SUPPORTED_ARCHITECTURES) def test_text_generation_pipeline_inference(self, model_arch): model_id = MODEL_NAMES[model_arch] + transformers_generator = transformers_pipeline("text-generation", model_id) + ipex_generator = ipex_pipeline("text-generation", model_id, accelerator="ipex") inputs = "Describe a real-world application of AI." - transformers_text_generator = transformers_pipeline("text-generation", model_id) - ipex_text_generator = ipex_pipeline("text-generation", model_id, accelerator="ipex") with torch.inference_mode(): - transformers_output = transformers_text_generator(inputs) + transformers_output = transformers_generator(inputs) with torch.inference_mode(): - ipex_output = ipex_text_generator(inputs) - self.assertTrue(isinstance(ipex_text_generator.model, IPEXModelForCausalLM)) - self.assertTrue(isinstance(ipex_text_generator.model.model, torch.jit.RecursiveScriptModule)) + ipex_output = ipex_generator(inputs) + self.assertTrue(isinstance(ipex_generator.model, IPEXModelForCausalLM)) + self.assertTrue(isinstance(ipex_generator.model.model, torch.jit.RecursiveScriptModule)) self.assertEqual(transformers_output[0]["generated_text"], ipex_output[0]["generated_text"]) + + @parameterized.expand(QUESTION_ANSWERING_SUPPORTED_ARCHITECTURES) + def test_question_answering_pipeline_inference(self, model_arch): + model_id = MODEL_NAMES[model_arch] + transformers_generator = transformers_pipeline("question-answering", model_id) + ipex_generator = ipex_pipeline("question-answering", model_id, accelerator="ipex") + question = "How many programming languages does BLOOM support?" + context = "BLOOM has 176 billion parameters and can generate text in 46 languages natural languages and 13 programming languages." + with torch.inference_mode(): + transformers_output = transformers_generator(question=question, context=context) + with torch.inference_mode(): + ipex_output = ipex_generator(question=question, context=context) + self.assertTrue(isinstance(ipex_generator.model, IPEXModelForQuestionAnswering)) + self.assertTrue(isinstance(ipex_generator.model.model, torch.jit.RecursiveScriptModule)) + self.assertAlmostEqual(transformers_output["score"], ipex_output["score"], delta=1e-4) + self.assertEqual(transformers_output["start"], ipex_output["start"]) + self.assertEqual(transformers_output["end"], ipex_output["end"]) + + @parameterized.expand(AUDIO_CLASSIFICATION_SUPPORTED_ARCHITECTURES) + def test_audio_classification_pipeline_inference(self, model_arch): + model_id = MODEL_NAMES[model_arch] + transformers_generator = transformers_pipeline("audio-classification", model_id) + ipex_generator = ipex_pipeline("audio-classification", model_id, accelerator="ipex") + inputs = [np.random.random(16000)] + with torch.inference_mode(): + transformers_output = transformers_generator(inputs) + with torch.inference_mode(): + ipex_output = ipex_generator(inputs) + self.assertTrue(isinstance(ipex_generator.model, IPEXModelForAudioClassification)) + self.assertTrue(isinstance(ipex_generator.model.model, torch.jit.RecursiveScriptModule)) + self.assertAlmostEqual(transformers_output[0][0]["score"], ipex_output[0][0]["score"], delta=1e-2) + self.assertAlmostEqual(transformers_output[0][1]["score"], ipex_output[0][1]["score"], delta=1e-2) + + @parameterized.expand(IMAGE_CLASSIFICATION_SUPPORTED_ARCHITECTURES) + def test_image_classification_pipeline_inference(self, model_arch): + model_id = MODEL_NAMES[model_arch] + transformers_generator = transformers_pipeline("image-classification", model_id) + ipex_generator = ipex_pipeline("image-classification", model_id, accelerator="ipex") + inputs = "http://images.cocodataset.org/val2017/000000039769.jpg" + with torch.inference_mode(): + transformers_output = transformers_generator(inputs) + with torch.inference_mode(): + ipex_output = ipex_generator(inputs) + self.assertEqual(len(transformers_output), len(ipex_output)) + self.assertTrue(isinstance(ipex_generator.model, IPEXModelForImageClassification)) + self.assertTrue(isinstance(ipex_generator.model.model, torch.jit.RecursiveScriptModule)) + for i in range(len(transformers_output)): + self.assertEqual(transformers_output[i]["label"], ipex_output[i]["label"]) + self.assertAlmostEqual(transformers_output[i]["score"], ipex_output[i]["score"], delta=1e-4) + + @parameterized.expand(COMMON_SUPPORTED_ARCHITECTURES) + def test_pipeline_load_from_ipex_model(self, model_arch): + model_id = MODEL_NAMES[model_arch] + model = IPEXModelForSequenceClassification.from_pretrained(model_id, export=True) + tokenizer = AutoTokenizer.from_pretrained(model_id) + ipex_generator = ipex_pipeline("text-classification", model, tokenizer=tokenizer, accelerator="ipex") + inputs = "This restaurant is awesome" + with torch.inference_mode(): + ipex_output = ipex_generator(inputs) + self.assertTrue(isinstance(ipex_generator.model, IPEXModelForSequenceClassification)) + self.assertTrue(isinstance(ipex_generator.model.model, torch.jit.RecursiveScriptModule)) + self.assertGreaterEqual(ipex_output[0]["score"], 0.0) + + @parameterized.expand(COMMON_SUPPORTED_ARCHITECTURES) + def test_pipeline_load_from_jit_model(self, model_arch): + model_id = MODEL_NAMES[model_arch] + model = IPEXModelForSequenceClassification.from_pretrained(model_id, export=True) + save_dir = TemporaryDirectory().name + model.save_pretrained(save_dir) + tokenizer = AutoTokenizer.from_pretrained(model_id) + ipex_generator = ipex_pipeline("text-classification", save_dir, tokenizer=tokenizer, accelerator="ipex") + inputs = "This restaurant is awesome" + with torch.inference_mode(): + ipex_output = ipex_generator(inputs) + self.assertTrue(isinstance(ipex_generator.model, IPEXModelForSequenceClassification)) + self.assertTrue(isinstance(ipex_generator.model.model, torch.jit.RecursiveScriptModule)) + self.assertGreaterEqual(ipex_output[0]["score"], 0.0) From 7510036e30ded0f7b01701e2087ad24cf388beed Mon Sep 17 00:00:00 2001 From: jiqing-feng Date: Mon, 15 Apr 2024 09:44:47 -0400 Subject: [PATCH 12/30] add hub_kwargs and model_kwargs on tokenizer and feature_extractor --- optimum/intel/ipex/modeling_base.py | 2 -- optimum/intel/pipelines/pipeline_base.py | 15 ++++++++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/optimum/intel/ipex/modeling_base.py b/optimum/intel/ipex/modeling_base.py index dfe48f5d4e..0664a8e6ac 100644 --- a/optimum/intel/ipex/modeling_base.py +++ b/optimum/intel/ipex/modeling_base.py @@ -88,8 +88,6 @@ def ipex_jit_trace(model, task, use_cache): sample_inputs = prepare_jit_inputs(model, task, use_cache) model.config.return_dict = False - if "past_key_values" in sample_inputs.keys(): - model.config.use_cache = use_cache model = ipex.optimize(model.eval(), dtype=model.dtype, inplace=True) # Disable repack while jit tracing to reduce the memory diff --git a/optimum/intel/pipelines/pipeline_base.py b/optimum/intel/pipelines/pipeline_base.py index e9f38f58fe..d6dd9e2132 100644 --- a/optimum/intel/pipelines/pipeline_base.py +++ b/optimum/intel/pipelines/pipeline_base.py @@ -109,7 +109,7 @@ def load_ipex_model( if model is None: model_id = SUPPORTED_TASKS[targeted_task]["default"] - model = ipex_model_class.from_pretrained(model_id, export=True) + model = ipex_model_class.from_pretrained(model_id, export=True, **model_kwargs) elif isinstance(model, str): model_id = model try: @@ -258,6 +258,15 @@ def pipeline( load_tokenizer = False if task in no_tokenizer_tasks else True load_feature_extractor = False if task in no_feature_extractor_tasks else True + commit_hash = kwargs.pop("_commit_hash", None) + + hub_kwargs = { + "revision": kwargs.pop("revision", None), + "token": kwargs.pop("use_auth_token", None), + "trust_remote_code": kwargs.pop("trust_remote_code", None), + "_commit_hash": commit_hash, + } + if isinstance(model, Path): model = str(model) @@ -274,9 +283,9 @@ def pipeline( model, model_id = MAPPING_LOADING_FUNC[accelerator](model, task, supported_tasks, model_kwargs, **kwargs) if load_tokenizer and model_id and tokenizer is None: - tokenizer = AutoTokenizer.from_pretrained(model_id) + tokenizer = AutoTokenizer.from_pretrained(model_id, **hub_kwargs, **model_kwargs) if load_feature_extractor and model_id and feature_extractor is None: - feature_extractor = AutoFeatureExtractor.from_pretrained(model_id) + feature_extractor = AutoFeatureExtractor.from_pretrained(model_id, **hub_kwargs, **model_kwargs) if torch_dtype is not None: kwargs["torch_dtype"] = torch_dtype From 9e8ce0edbf36cc00c19f6b4cc5e51936d4853de5 Mon Sep 17 00:00:00 2001 From: jiqing-feng Date: Thu, 25 Apr 2024 05:31:48 -0400 Subject: [PATCH 13/30] add hub_kwargs and default pipeline tests --- optimum/exporters/openvino/model_patcher.py | 6 +++--- optimum/intel/pipelines/pipeline_base.py | 9 ++++++--- tests/ipex/test_pipelines.py | 15 +++++++++++++++ 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index 3649c163c6..96df156cc6 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -327,9 +327,9 @@ def _llama_gemma_update_causal_mask(self, attention_mask, input_tensor, cache_po offset = 0 mask_shape = attention_mask.shape mask_slice = (attention_mask.eq(0.0)).to(dtype=dtype) * min_dtype - causal_mask[ - : mask_shape[0], : mask_shape[1], offset : mask_shape[2] + offset, : mask_shape[3] - ] = mask_slice + causal_mask[: mask_shape[0], : mask_shape[1], offset : mask_shape[2] + offset, : mask_shape[3]] = ( + mask_slice + ) if ( self.config._attn_implementation == "sdpa" diff --git a/optimum/intel/pipelines/pipeline_base.py b/optimum/intel/pipelines/pipeline_base.py index d6dd9e2132..5350e1e65d 100644 --- a/optimum/intel/pipelines/pipeline_base.py +++ b/optimum/intel/pipelines/pipeline_base.py @@ -99,6 +99,7 @@ def load_ipex_model( targeted_task, SUPPORTED_TASKS, model_kwargs: Optional[Dict[str, Any]] = None, + hub_kwargs: Optional[Dict[str, Any]] = None, **kwargs, ): export = kwargs.pop("export", True) @@ -109,7 +110,7 @@ def load_ipex_model( if model is None: model_id = SUPPORTED_TASKS[targeted_task]["default"] - model = ipex_model_class.from_pretrained(model_id, export=True, **model_kwargs) + model = ipex_model_class.from_pretrained(model_id, export=True, **model_kwargs, **hub_kwargs) elif isinstance(model, str): model_id = model try: @@ -121,7 +122,7 @@ def load_ipex_model( "config file not found, please pass `export` to decide whether we should export this model. `export` defaullt to True" ) - model = ipex_model_class.from_pretrained(model, export=export, **model_kwargs) + model = ipex_model_class.from_pretrained(model, export=export, **model_kwargs, **hub_kwargs) elif isinstance(model, IPEXModel): model_id = None else: @@ -280,7 +281,9 @@ def pipeline( # Load the correct model if possible # Infer the framework from the model if not already defined - model, model_id = MAPPING_LOADING_FUNC[accelerator](model, task, supported_tasks, model_kwargs, **kwargs) + model, model_id = MAPPING_LOADING_FUNC[accelerator]( + model, task, supported_tasks, model_kwargs, hub_kwargs, **kwargs + ) if load_tokenizer and model_id and tokenizer is None: tokenizer = AutoTokenizer.from_pretrained(model_id, **hub_kwargs, **model_kwargs) diff --git a/tests/ipex/test_pipelines.py b/tests/ipex/test_pipelines.py index 89a27ab2c8..ac1c31ef6c 100644 --- a/tests/ipex/test_pipelines.py +++ b/tests/ipex/test_pipelines.py @@ -22,6 +22,7 @@ from transformers.pipelines import pipeline as transformers_pipeline from optimum.intel.ipex.modeling_base import ( + IPEXModel, IPEXModelForAudioClassification, IPEXModelForCausalLM, IPEXModelForImageClassification, @@ -122,6 +123,15 @@ class PipelinesIntegrationTest(unittest.TestCase): "resnet", "vit", ) + SUPPORT_TASKS = ( + "text-generation", + "fill-mask", + "question-answering", + "image-classification", + "text-classification", + "token-classification", + "audio-classification", + ) @parameterized.expand(COMMON_SUPPORTED_ARCHITECTURES) def test_token_classification_pipeline_inference(self, model_arch): @@ -263,3 +273,8 @@ def test_pipeline_load_from_jit_model(self, model_arch): self.assertTrue(isinstance(ipex_generator.model, IPEXModelForSequenceClassification)) self.assertTrue(isinstance(ipex_generator.model.model, torch.jit.RecursiveScriptModule)) self.assertGreaterEqual(ipex_output[0]["score"], 0.0) + + @parameterized.expand(SUPPORT_TASKS) + def test_pipeline_with_default_model(self, task): + ipex_generator = ipex_pipeline(task, accelerator="ipex") + self.assertTrue(isinstance(ipex_generator.model, IPEXModel)) From 5013fe7df1dea102efbec140149e6dfdc355ff1d Mon Sep 17 00:00:00 2001 From: jiqing-feng Date: Sun, 28 Apr 2024 11:09:36 -0400 Subject: [PATCH 14/30] fix _from_transformers args --- optimum/intel/ipex/modeling_base.py | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-) diff --git a/optimum/intel/ipex/modeling_base.py b/optimum/intel/ipex/modeling_base.py index 3961c1f3af..8fd5dacba3 100644 --- a/optimum/intel/ipex/modeling_base.py +++ b/optimum/intel/ipex/modeling_base.py @@ -151,35 +151,17 @@ def _from_transformers( model_id: str, config: PretrainedConfig, use_cache: bool = True, - use_auth_token: Optional[Union[bool, str]] = None, - revision: Optional[str] = None, - force_download: bool = False, - cache_dir: str = HUGGINGFACE_HUB_CACHE, - subfolder: str = "", - local_files_only: bool = False, - torch_dtype: Optional[Union[str, "torch.dtype"]] = None, - trust_remote_code: bool = False, + **model_kwargs, ): if is_torch_version("<", "2.1.0"): raise ImportError("`torch>=2.0.0` is needed to trace your model") task = cls.export_feature - model_kwargs = { - "revision": revision, - "use_auth_token": use_auth_token, - "cache_dir": cache_dir, - "subfolder": subfolder, - "local_files_only": local_files_only, - "force_download": force_download, - "torch_dtype": torch_dtype, - "trust_remote_code": trust_remote_code, - } - model = TasksManager.get_model_from_task(task, model_id, **model_kwargs) traced_model = ipex_jit_trace(model, task, use_cache) config.torchscript = True - config.torch_dtype = torch_dtype + config.torch_dtype = model_kwargs.get("torch_dtype", None) return cls(traced_model, config=config, model_save_dir=model_id, use_cache=use_cache, warmup=False) From a39112fd49ef70e28b1ca42c952c3c89a6c5e1ef Mon Sep 17 00:00:00 2001 From: jiqing-feng Date: Mon, 29 Apr 2024 11:30:44 -0400 Subject: [PATCH 15/30] rm default pipeline test --- tests/ipex/test_pipelines.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/tests/ipex/test_pipelines.py b/tests/ipex/test_pipelines.py index ac1c31ef6c..585219f00b 100644 --- a/tests/ipex/test_pipelines.py +++ b/tests/ipex/test_pipelines.py @@ -123,15 +123,6 @@ class PipelinesIntegrationTest(unittest.TestCase): "resnet", "vit", ) - SUPPORT_TASKS = ( - "text-generation", - "fill-mask", - "question-answering", - "image-classification", - "text-classification", - "token-classification", - "audio-classification", - ) @parameterized.expand(COMMON_SUPPORTED_ARCHITECTURES) def test_token_classification_pipeline_inference(self, model_arch): @@ -273,8 +264,3 @@ def test_pipeline_load_from_jit_model(self, model_arch): self.assertTrue(isinstance(ipex_generator.model, IPEXModelForSequenceClassification)) self.assertTrue(isinstance(ipex_generator.model.model, torch.jit.RecursiveScriptModule)) self.assertGreaterEqual(ipex_output[0]["score"], 0.0) - - @parameterized.expand(SUPPORT_TASKS) - def test_pipeline_with_default_model(self, task): - ipex_generator = ipex_pipeline(task, accelerator="ipex") - self.assertTrue(isinstance(ipex_generator.model, IPEXModel)) From f401b55d323bf893281989b673262335bb6be0b4 Mon Sep 17 00:00:00 2001 From: jiqing-feng <107918818+jiqing-feng@users.noreply.github.com> Date: Mon, 6 May 2024 08:53:15 +0800 Subject: [PATCH 16/30] Update optimum/intel/pipelines/pipeline_base.py Co-authored-by: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> --- optimum/intel/pipelines/pipeline_base.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/optimum/intel/pipelines/pipeline_base.py b/optimum/intel/pipelines/pipeline_base.py index 5350e1e65d..5ebcfd72d6 100644 --- a/optimum/intel/pipelines/pipeline_base.py +++ b/optimum/intel/pipelines/pipeline_base.py @@ -139,9 +139,6 @@ def load_ipex_model( } -if is_torch_available(): - pass - if TYPE_CHECKING: from transformers.modeling_utils import PreTrainedModel From e784dd2ac65110318157630adb126f3082a3fb8f Mon Sep 17 00:00:00 2001 From: jiqing-feng <107918818+jiqing-feng@users.noreply.github.com> Date: Mon, 6 May 2024 08:53:33 +0800 Subject: [PATCH 17/30] Update optimum/intel/pipelines/pipeline_base.py Co-authored-by: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> --- optimum/intel/pipelines/pipeline_base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/optimum/intel/pipelines/pipeline_base.py b/optimum/intel/pipelines/pipeline_base.py index 5ebcfd72d6..4498a11b3d 100644 --- a/optimum/intel/pipelines/pipeline_base.py +++ b/optimum/intel/pipelines/pipeline_base.py @@ -98,8 +98,10 @@ def load_ipex_model( model, targeted_task, SUPPORTED_TASKS, + subfolder: str = "", + token: Optional[Union[bool, str]] = None, + revision: str = "main", model_kwargs: Optional[Dict[str, Any]] = None, - hub_kwargs: Optional[Dict[str, Any]] = None, **kwargs, ): export = kwargs.pop("export", True) From 6fb886398d3ccfb994b4485f8e0a20c033361424 Mon Sep 17 00:00:00 2001 From: jiqing-feng <107918818+jiqing-feng@users.noreply.github.com> Date: Mon, 6 May 2024 08:53:43 +0800 Subject: [PATCH 18/30] Update optimum/intel/pipelines/pipeline_base.py Co-authored-by: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> --- optimum/intel/pipelines/pipeline_base.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/optimum/intel/pipelines/pipeline_base.py b/optimum/intel/pipelines/pipeline_base.py index 4498a11b3d..426137d9b5 100644 --- a/optimum/intel/pipelines/pipeline_base.py +++ b/optimum/intel/pipelines/pipeline_base.py @@ -155,10 +155,12 @@ def pipeline( model: Optional[Union[str, "PreTrainedModel"]] = None, tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None, feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None, - accelerator: Optional[str] = "ipex", use_fast: bool = True, - torch_dtype=None, - model_kwargs: Dict[str, Any] = None, + token: Optional[Union[str, bool]] = None, + accelerator: Optional[str] = "ort", + revision: Optional[str] = None, + trust_remote_code: Optional[bool] = None, + *model_kwargs, **kwargs, ) -> Pipeline: """ From 79ae3d95eea11094637b83852c4a7d61f398a1dc Mon Sep 17 00:00:00 2001 From: jiqing-feng <107918818+jiqing-feng@users.noreply.github.com> Date: Mon, 6 May 2024 09:21:28 +0800 Subject: [PATCH 19/30] Update optimum/intel/pipelines/pipeline_base.py Co-authored-by: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> --- optimum/intel/pipelines/pipeline_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/optimum/intel/pipelines/pipeline_base.py b/optimum/intel/pipelines/pipeline_base.py index 426137d9b5..6c78800a96 100644 --- a/optimum/intel/pipelines/pipeline_base.py +++ b/optimum/intel/pipelines/pipeline_base.py @@ -257,8 +257,8 @@ def pipeline( elif values["type"] not in ["multimodal", "audio", "video"]: raise ValueError(f"SUPPORTED_TASK {_task} contains invalid type {values['type']}") - load_tokenizer = False if task in no_tokenizer_tasks else True - load_feature_extractor = False if task in no_feature_extractor_tasks else True + load_tokenizer = task not in no_tokenizer_tasks + load_feature_extractor = task not in no_feature_extractor_tasks commit_hash = kwargs.pop("_commit_hash", None) From cfbcf9f0b01d0b4d5357552e40568ff311c661a6 Mon Sep 17 00:00:00 2001 From: jiqing-feng <107918818+jiqing-feng@users.noreply.github.com> Date: Mon, 6 May 2024 09:21:49 +0800 Subject: [PATCH 20/30] Update optimum/intel/pipelines/pipeline_base.py Co-authored-by: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> --- optimum/intel/pipelines/pipeline_base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/optimum/intel/pipelines/pipeline_base.py b/optimum/intel/pipelines/pipeline_base.py index 6c78800a96..e545149fd2 100644 --- a/optimum/intel/pipelines/pipeline_base.py +++ b/optimum/intel/pipelines/pipeline_base.py @@ -291,8 +291,6 @@ def pipeline( if load_feature_extractor and model_id and feature_extractor is None: feature_extractor = AutoFeatureExtractor.from_pretrained(model_id, **hub_kwargs, **model_kwargs) - if torch_dtype is not None: - kwargs["torch_dtype"] = torch_dtype return transformers_pipeline( task, @@ -300,5 +298,6 @@ def pipeline( tokenizer=tokenizer, feature_extractor=feature_extractor, use_fast=use_fast, + torch_dtype=torch_dtype, **kwargs, ) From 3760e1eade71172a8e3864ffed2e669e4c88284e Mon Sep 17 00:00:00 2001 From: jiqing-feng <107918818+jiqing-feng@users.noreply.github.com> Date: Mon, 6 May 2024 09:23:05 +0800 Subject: [PATCH 21/30] Update optimum/intel/pipelines/pipeline_base.py Co-authored-by: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> --- optimum/intel/pipelines/pipeline_base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/optimum/intel/pipelines/pipeline_base.py b/optimum/intel/pipelines/pipeline_base.py index e545149fd2..f99af4e9bf 100644 --- a/optimum/intel/pipelines/pipeline_base.py +++ b/optimum/intel/pipelines/pipeline_base.py @@ -286,9 +286,9 @@ def pipeline( model, task, supported_tasks, model_kwargs, hub_kwargs, **kwargs ) - if load_tokenizer and model_id and tokenizer is None: + if load_tokenizer and tokenizer is None: tokenizer = AutoTokenizer.from_pretrained(model_id, **hub_kwargs, **model_kwargs) - if load_feature_extractor and model_id and feature_extractor is None: + if load_feature_extractor and feature_extractor is None: feature_extractor = AutoFeatureExtractor.from_pretrained(model_id, **hub_kwargs, **model_kwargs) From 6d4726bfb61bc851fd21f53f22c66f73197cac64 Mon Sep 17 00:00:00 2001 From: jiqing-feng <107918818+jiqing-feng@users.noreply.github.com> Date: Mon, 6 May 2024 09:28:16 +0800 Subject: [PATCH 22/30] Update optimum/intel/pipelines/pipeline_base.py Co-authored-by: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> --- optimum/intel/pipelines/pipeline_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/intel/pipelines/pipeline_base.py b/optimum/intel/pipelines/pipeline_base.py index f99af4e9bf..f6b09216f6 100644 --- a/optimum/intel/pipelines/pipeline_base.py +++ b/optimum/intel/pipelines/pipeline_base.py @@ -240,7 +240,7 @@ def pipeline( if accelerator == "ipex": if task not in list(IPEX_SUPPORTED_TASKS.keys()): raise ValueError( - f"Task {task} is not supported for the ONNX Runtime pipeline. Supported tasks are { list(IPEX_SUPPORTED_TASKS.keys())}" + f"Task {task} is not supported for the IPEX pipeline. Supported tasks are { list(IPEX_SUPPORTED_TASKS.keys())}" ) supported_tasks = IPEX_SUPPORTED_TASKS if accelerator == "ipex" else None From 4effaa4d10ea346abca2ebabb1eac376cd1e10b0 Mon Sep 17 00:00:00 2001 From: jiqing-feng <107918818+jiqing-feng@users.noreply.github.com> Date: Mon, 6 May 2024 09:28:23 +0800 Subject: [PATCH 23/30] Update optimum/intel/pipelines/pipeline_base.py Co-authored-by: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> --- optimum/intel/pipelines/pipeline_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/intel/pipelines/pipeline_base.py b/optimum/intel/pipelines/pipeline_base.py index f6b09216f6..49545d8dd1 100644 --- a/optimum/intel/pipelines/pipeline_base.py +++ b/optimum/intel/pipelines/pipeline_base.py @@ -235,7 +235,7 @@ def pipeline( ) if accelerator not in MAPPING_LOADING_FUNC: - raise ValueError(f'Accelerator {accelerator} is not supported. Supported accelerator is "ipex".') + raise ValueError(f'Accelerator {accelerator} is not supported. Supported accelerator is {", ".join(MAPPING_LOADING_FUNC)}.') if accelerator == "ipex": if task not in list(IPEX_SUPPORTED_TASKS.keys()): From bf2ae084ddc5df123c91d4184e662aabcf76928e Mon Sep 17 00:00:00 2001 From: jiqing-feng Date: Mon, 6 May 2024 06:13:40 -0400 Subject: [PATCH 24/30] fix comments --- optimum/intel/ipex/modeling_base.py | 2 ++ optimum/intel/pipelines/pipeline_base.py | 45 +++++++++--------------- tests/ipex/test_pipelines.py | 1 - 3 files changed, 18 insertions(+), 30 deletions(-) diff --git a/optimum/intel/ipex/modeling_base.py b/optimum/intel/ipex/modeling_base.py index 0b688ab4b1..73aa7e8881 100644 --- a/optimum/intel/ipex/modeling_base.py +++ b/optimum/intel/ipex/modeling_base.py @@ -161,6 +161,7 @@ def _from_transformers( local_files_only: bool = False, torch_dtype: Optional[Union[str, "torch.dtype"]] = None, trust_remote_code: bool = False, + _commit_hash: str = None, ): if use_auth_token is not None: warnings.warn( @@ -186,6 +187,7 @@ def _from_transformers( "force_download": force_download, "torch_dtype": torch_dtype, "trust_remote_code": trust_remote_code, + "_commit_hash": _commit_hash, } model = TasksManager.get_model_from_task(task, model_id, **model_kwargs) diff --git a/optimum/intel/pipelines/pipeline_base.py b/optimum/intel/pipelines/pipeline_base.py index 49545d8dd1..d7c2d0d7a7 100644 --- a/optimum/intel/pipelines/pipeline_base.py +++ b/optimum/intel/pipelines/pipeline_base.py @@ -15,6 +15,7 @@ from pathlib import Path from typing import TYPE_CHECKING, Any, Dict, Optional, Union +import torch from transformers import AutoConfig, AutoFeatureExtractor, AutoTokenizer from transformers import pipeline as transformers_pipeline from transformers.feature_extraction_utils import PreTrainedFeatureExtractor @@ -31,7 +32,6 @@ from transformers.tokenization_utils import PreTrainedTokenizer from transformers.utils import ( is_ipex_available, - is_torch_available, logging, ) @@ -98,13 +98,9 @@ def load_ipex_model( model, targeted_task, SUPPORTED_TASKS, - subfolder: str = "", - token: Optional[Union[bool, str]] = None, - revision: str = "main", model_kwargs: Optional[Dict[str, Any]] = None, - **kwargs, + hub_kwargs: Optional[Dict[str, Any]] = None, ): - export = kwargs.pop("export", True) if model_kwargs is None: model_kwargs = {} @@ -118,15 +114,13 @@ def load_ipex_model( try: config = AutoConfig.from_pretrained(model) torchscript = getattr(config, "torchscript", None) - export = False if torchscript else export + export = False if torchscript else True except RuntimeError: - logger.warning( - "config file not found, please pass `export` to decide whether we should export this model. `export` defaullt to True" - ) - + logger.warning("We will use IPEXModel with export=True to export the model") + export = True model = ipex_model_class.from_pretrained(model, export=export, **model_kwargs, **hub_kwargs) elif isinstance(model, IPEXModel): - model_id = None + model_id = getattr(model.config, "name_or_path", None) else: raise ValueError( f"""Model {model} is not supported. Please provide a valid model name or path or a IPEXModel. @@ -141,7 +135,6 @@ def load_ipex_model( } - if TYPE_CHECKING: from transformers.modeling_utils import PreTrainedModel from transformers.tokenization_utils_fast import PreTrainedTokenizerFast @@ -160,8 +153,9 @@ def pipeline( accelerator: Optional[str] = "ort", revision: Optional[str] = None, trust_remote_code: Optional[bool] = None, - *model_kwargs, - **kwargs, + torch_dtype: Optional[Union[str, torch.dtype]] = None, + commit_hash: Optional[str] = None, + **model_kwargs, ) -> Pipeline: """ Utility factory method to build a [`Pipeline`]. @@ -201,9 +195,6 @@ def pipeline( model_kwargs (`Dict[str, Any]`, *optional*): Additional dictionary of keyword arguments passed along to the model's `from_pretrained(..., **model_kwargs)` function. - kwargs (`Dict[str, Any]`, *optional*): - Additional keyword arguments passed along to the specific pipeline init (see the documentation for the - corresponding pipeline class for possible values). Returns: [`Pipeline`]: A suitable pipeline for the task. @@ -235,7 +226,9 @@ def pipeline( ) if accelerator not in MAPPING_LOADING_FUNC: - raise ValueError(f'Accelerator {accelerator} is not supported. Supported accelerator is {", ".join(MAPPING_LOADING_FUNC)}.') + raise ValueError( + f'Accelerator {accelerator} is not supported. Supported accelerator is {", ".join(MAPPING_LOADING_FUNC)}.' + ) if accelerator == "ipex": if task not in list(IPEX_SUPPORTED_TASKS.keys()): @@ -260,12 +253,10 @@ def pipeline( load_tokenizer = task not in no_tokenizer_tasks load_feature_extractor = task not in no_feature_extractor_tasks - commit_hash = kwargs.pop("_commit_hash", None) - hub_kwargs = { - "revision": kwargs.pop("revision", None), - "token": kwargs.pop("use_auth_token", None), - "trust_remote_code": kwargs.pop("trust_remote_code", None), + "revision": revision, + "token": token, + "trust_remote_code": trust_remote_code, "_commit_hash": commit_hash, } @@ -282,16 +273,13 @@ def pipeline( # Load the correct model if possible # Infer the framework from the model if not already defined - model, model_id = MAPPING_LOADING_FUNC[accelerator]( - model, task, supported_tasks, model_kwargs, hub_kwargs, **kwargs - ) + model, model_id = MAPPING_LOADING_FUNC[accelerator](model, task, supported_tasks, model_kwargs, hub_kwargs) if load_tokenizer and tokenizer is None: tokenizer = AutoTokenizer.from_pretrained(model_id, **hub_kwargs, **model_kwargs) if load_feature_extractor and feature_extractor is None: feature_extractor = AutoFeatureExtractor.from_pretrained(model_id, **hub_kwargs, **model_kwargs) - return transformers_pipeline( task, model=model, @@ -299,5 +287,4 @@ def pipeline( feature_extractor=feature_extractor, use_fast=use_fast, torch_dtype=torch_dtype, - **kwargs, ) diff --git a/tests/ipex/test_pipelines.py b/tests/ipex/test_pipelines.py index 585219f00b..89a27ab2c8 100644 --- a/tests/ipex/test_pipelines.py +++ b/tests/ipex/test_pipelines.py @@ -22,7 +22,6 @@ from transformers.pipelines import pipeline as transformers_pipeline from optimum.intel.ipex.modeling_base import ( - IPEXModel, IPEXModelForAudioClassification, IPEXModelForCausalLM, IPEXModelForImageClassification, From 184a6106b81af2fc297750ccf7a0f2141ad81e3a Mon Sep 17 00:00:00 2001 From: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> Date: Tue, 14 May 2024 16:18:43 +0200 Subject: [PATCH 25/30] Update optimum/exporters/openvino/model_patcher.py --- optimum/exporters/openvino/model_patcher.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index 7498a28d2e..f68e873d40 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -341,9 +341,9 @@ def _llama_gemma_update_causal_mask(self, attention_mask, input_tensor, cache_po offset = 0 mask_shape = attention_mask.shape mask_slice = (attention_mask.eq(0.0)).to(dtype=dtype) * min_dtype - causal_mask[: mask_shape[0], : mask_shape[1], offset : mask_shape[2] + offset, : mask_shape[3]] = ( - mask_slice - ) + causal_mask[ + : mask_shape[0], : mask_shape[1], offset : mask_shape[2] + offset, : mask_shape[3] + ] = mask_slice if ( self.config._attn_implementation == "sdpa" From abe8704a9a6c51a881ad8747914e863989169ae2 Mon Sep 17 00:00:00 2001 From: jiqing-feng <107918818+jiqing-feng@users.noreply.github.com> Date: Wed, 15 May 2024 14:04:28 +0800 Subject: [PATCH 26/30] Update optimum/intel/ipex/modeling_base.py Co-authored-by: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> --- optimum/intel/ipex/modeling_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/intel/ipex/modeling_base.py b/optimum/intel/ipex/modeling_base.py index 73aa7e8881..d2963d55a1 100644 --- a/optimum/intel/ipex/modeling_base.py +++ b/optimum/intel/ipex/modeling_base.py @@ -194,7 +194,7 @@ def _from_transformers( traced_model = ipex_jit_trace(model, task, use_cache) config.torchscript = True - config.torch_dtype = model_kwargs.get("torch_dtype", None) + config.torch_dtype = torch_dtype return cls(traced_model, config=config, model_save_dir=model_id, use_cache=use_cache, warmup=False) From aa4d4e6f8aee521aa78f9b7091bfb5bb6f66033f Mon Sep 17 00:00:00 2001 From: jiqing-feng <107918818+jiqing-feng@users.noreply.github.com> Date: Wed, 15 May 2024 14:04:37 +0800 Subject: [PATCH 27/30] Update optimum/intel/pipelines/pipeline_base.py Co-authored-by: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> --- optimum/intel/pipelines/pipeline_base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/optimum/intel/pipelines/pipeline_base.py b/optimum/intel/pipelines/pipeline_base.py index d7c2d0d7a7..10e8e50ab4 100644 --- a/optimum/intel/pipelines/pipeline_base.py +++ b/optimum/intel/pipelines/pipeline_base.py @@ -113,8 +113,7 @@ def load_ipex_model( model_id = model try: config = AutoConfig.from_pretrained(model) - torchscript = getattr(config, "torchscript", None) - export = False if torchscript else True + export = not getattr(config, "torchscript", False) except RuntimeError: logger.warning("We will use IPEXModel with export=True to export the model") export = True From ea756b0b110e1c102ec1485964e740a337cd69f4 Mon Sep 17 00:00:00 2001 From: jiqing-feng <107918818+jiqing-feng@users.noreply.github.com> Date: Wed, 15 May 2024 14:04:46 +0800 Subject: [PATCH 28/30] Update optimum/intel/pipelines/pipeline_base.py Co-authored-by: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> --- optimum/intel/pipelines/pipeline_base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/optimum/intel/pipelines/pipeline_base.py b/optimum/intel/pipelines/pipeline_base.py index 10e8e50ab4..9abe37b4bd 100644 --- a/optimum/intel/pipelines/pipeline_base.py +++ b/optimum/intel/pipelines/pipeline_base.py @@ -92,7 +92,8 @@ "type": "audio", }, } - +else: + IPEX_SUPPORTED_TASKS = {} def load_ipex_model( model, From 7f92191f6e4c94dfaa3cc0e8c6cf62b2bcb642b3 Mon Sep 17 00:00:00 2001 From: jiqing-feng <107918818+jiqing-feng@users.noreply.github.com> Date: Wed, 15 May 2024 14:05:11 +0800 Subject: [PATCH 29/30] Update optimum/intel/pipelines/pipeline_base.py Co-authored-by: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> --- optimum/intel/pipelines/pipeline_base.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/optimum/intel/pipelines/pipeline_base.py b/optimum/intel/pipelines/pipeline_base.py index 9abe37b4bd..7000b7ed8e 100644 --- a/optimum/intel/pipelines/pipeline_base.py +++ b/optimum/intel/pipelines/pipeline_base.py @@ -30,10 +30,8 @@ ) from transformers.pipelines.base import Pipeline from transformers.tokenization_utils import PreTrainedTokenizer -from transformers.utils import ( - is_ipex_available, - logging, -) +from transformers.utils import logging +from optimum.intel.utils import is_ipex_available if is_ipex_available(): From 30aec8a23b0ad66bfb3f49e2224df50bfff709ad Mon Sep 17 00:00:00 2001 From: jiqing-feng Date: Wed, 15 May 2024 10:08:32 -0400 Subject: [PATCH 30/30] fix style --- optimum/intel/pipelines/pipeline_base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/optimum/intel/pipelines/pipeline_base.py b/optimum/intel/pipelines/pipeline_base.py index 7000b7ed8e..65e6cfb782 100644 --- a/optimum/intel/pipelines/pipeline_base.py +++ b/optimum/intel/pipelines/pipeline_base.py @@ -31,6 +31,7 @@ from transformers.pipelines.base import Pipeline from transformers.tokenization_utils import PreTrainedTokenizer from transformers.utils import logging + from optimum.intel.utils import is_ipex_available @@ -93,6 +94,7 @@ else: IPEX_SUPPORTED_TASKS = {} + def load_ipex_model( model, targeted_task,