Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pipeline #501

Merged
merged 37 commits into from
May 15, 2024
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
1b89624
define optimum-intel pipeline
jiqing-feng Jan 8, 2024
2bf2122
add tests and readme
jiqing-feng Jan 8, 2024
db10723
fix pipelines example
jiqing-feng Jan 8, 2024
24f26db
fix readme codestyle
jiqing-feng Jan 9, 2024
8394d41
Merge branch 'huggingface:main' into pipeline
jiqing-feng Jan 9, 2024
39b7804
add _load_model in pipeline
jiqing-feng Jan 9, 2024
b0f21e9
Merge branch 'huggingface:main' into pipeline
jiqing-feng Mar 28, 2024
d37ff18
update pipeline for optimum intel
jiqing-feng Apr 2, 2024
6882417
update tests
jiqing-feng Apr 2, 2024
64c546c
remove readme
jiqing-feng Apr 2, 2024
4d69d40
Merge branch 'huggingface:main' into pipeline
jiqing-feng Apr 2, 2024
29ad8b2
Update optimum/intel/pipelines/__init__.py
jiqing-feng Apr 3, 2024
b5392c1
fix pipelines
jiqing-feng Apr 7, 2024
f294f74
add all supported tasks testing
jiqing-feng Apr 7, 2024
7510036
add hub_kwargs and model_kwargs on tokenizer and feature_extractor
jiqing-feng Apr 15, 2024
faba83f
Merge branch 'huggingface:main' into pipeline
jiqing-feng Apr 15, 2024
9e8ce0e
add hub_kwargs and default pipeline tests
jiqing-feng Apr 25, 2024
6056612
Merge branch 'huggingface:main' into pipeline
jiqing-feng Apr 28, 2024
5013fe7
fix _from_transformers args
jiqing-feng Apr 28, 2024
a39112f
rm default pipeline test
jiqing-feng Apr 29, 2024
f401b55
Update optimum/intel/pipelines/pipeline_base.py
jiqing-feng May 6, 2024
e784dd2
Update optimum/intel/pipelines/pipeline_base.py
jiqing-feng May 6, 2024
6fb8863
Update optimum/intel/pipelines/pipeline_base.py
jiqing-feng May 6, 2024
79ae3d9
Update optimum/intel/pipelines/pipeline_base.py
jiqing-feng May 6, 2024
cfbcf9f
Update optimum/intel/pipelines/pipeline_base.py
jiqing-feng May 6, 2024
3760e1e
Update optimum/intel/pipelines/pipeline_base.py
jiqing-feng May 6, 2024
112a9c2
Merge branch 'main' into pipeline
jiqing-feng May 6, 2024
6d4726b
Update optimum/intel/pipelines/pipeline_base.py
jiqing-feng May 6, 2024
4effaa4
Update optimum/intel/pipelines/pipeline_base.py
jiqing-feng May 6, 2024
bf2ae08
fix comments
jiqing-feng May 6, 2024
184a610
Update optimum/exporters/openvino/model_patcher.py
echarlaix May 14, 2024
abe8704
Update optimum/intel/ipex/modeling_base.py
jiqing-feng May 15, 2024
aa4d4e6
Update optimum/intel/pipelines/pipeline_base.py
jiqing-feng May 15, 2024
ea756b0
Update optimum/intel/pipelines/pipeline_base.py
jiqing-feng May 15, 2024
7f92191
Update optimum/intel/pipelines/pipeline_base.py
jiqing-feng May 15, 2024
332e863
Merge branch 'huggingface:main' into pipeline
jiqing-feng May 15, 2024
30aec8a
fix style
jiqing-feng May 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions optimum/exporters/openvino/model_patcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,9 +339,9 @@ def _llama_gemma_update_causal_mask(self, attention_mask, input_tensor, cache_po
offset = 0
mask_shape = attention_mask.shape
mask_slice = (attention_mask.eq(0.0)).to(dtype=dtype) * min_dtype
causal_mask[
: mask_shape[0], : mask_shape[1], offset : mask_shape[2] + offset, : mask_shape[3]
] = mask_slice
causal_mask[: mask_shape[0], : mask_shape[1], offset : mask_shape[2] + offset, : mask_shape[3]] = (
mask_slice
)

if (
self.config._attn_implementation == "sdpa"
Expand Down
4 changes: 4 additions & 0 deletions optimum/intel/ipex/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ def __init__(
jit (`boolean = False`, *optional*):
Enable jit to accelerate inference speed
"""
logger.warning(
"`inference_mode` is deprecated and will be removed in v1.18.0. Use `pipeline` to load and export your model to TorchScript instead."
)

if not is_ipex_available():
raise ImportError(IPEX_NOT_AVAILABLE_ERROR_MSG)

Expand Down
22 changes: 2 additions & 20 deletions optimum/intel/ipex/modeling_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,35 +151,17 @@ def _from_transformers(
model_id: str,
config: PretrainedConfig,
use_cache: bool = True,
use_auth_token: Optional[Union[bool, str]] = None,
revision: Optional[str] = None,
force_download: bool = False,
cache_dir: str = HUGGINGFACE_HUB_CACHE,
subfolder: str = "",
local_files_only: bool = False,
torch_dtype: Optional[Union[str, "torch.dtype"]] = None,
trust_remote_code: bool = False,
**model_kwargs,
):
if is_torch_version("<", "2.1.0"):
raise ImportError("`torch>=2.0.0` is needed to trace your model")

task = cls.export_feature
model_kwargs = {
"revision": revision,
"use_auth_token": use_auth_token,
"cache_dir": cache_dir,
"subfolder": subfolder,
"local_files_only": local_files_only,
"force_download": force_download,
"torch_dtype": torch_dtype,
"trust_remote_code": trust_remote_code,
}

model = TasksManager.get_model_from_task(task, model_id, **model_kwargs)
traced_model = ipex_jit_trace(model, task, use_cache)

config.torchscript = True
config.torch_dtype = torch_dtype
config.torch_dtype = model_kwargs.get("torch_dtype", None)

return cls(traced_model, config=config, model_save_dir=model_id, use_cache=use_cache, warmup=False)

Expand Down
15 changes: 15 additions & 0 deletions optimum/intel/pipelines/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright 2024 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .pipeline_base import pipeline
303 changes: 303 additions & 0 deletions optimum/intel/pipelines/pipeline_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,303 @@
# Copyright 2024 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from pathlib import Path
from typing import TYPE_CHECKING, Any, Dict, Optional, Union

from transformers import AutoConfig, AutoFeatureExtractor, AutoTokenizer
from transformers import pipeline as transformers_pipeline
from transformers.feature_extraction_utils import PreTrainedFeatureExtractor
from transformers.pipelines import (
AudioClassificationPipeline,
FillMaskPipeline,
ImageClassificationPipeline,
QuestionAnsweringPipeline,
TextClassificationPipeline,
TextGenerationPipeline,
TokenClassificationPipeline,
)
from transformers.pipelines.base import Pipeline
from transformers.tokenization_utils import PreTrainedTokenizer
from transformers.utils import (
is_ipex_available,
is_torch_available,
logging,
)


if is_ipex_available():
from ..ipex.modeling_base import (
IPEXModel,
IPEXModelForAudioClassification,
IPEXModelForCausalLM,
IPEXModelForImageClassification,
IPEXModelForMaskedLM,
IPEXModelForQuestionAnswering,
IPEXModelForSequenceClassification,
IPEXModelForTokenClassification,
)

IPEX_SUPPORTED_TASKS = {
"text-generation": {
"impl": TextGenerationPipeline,
"class": (IPEXModelForCausalLM,),
"default": "gpt2",
"type": "text",
},
"fill-mask": {
"impl": FillMaskPipeline,
"class": (IPEXModelForMaskedLM,),
"default": "bert-base-cased",
"type": "text",
},
"question-answering": {
"impl": QuestionAnsweringPipeline,
"class": (IPEXModelForQuestionAnswering,),
"default": "distilbert-base-cased-distilled-squad",
"type": "text",
},
"image-classification": {
"impl": ImageClassificationPipeline,
"class": (IPEXModelForImageClassification,),
"default": "google/vit-base-patch16-224",
"type": "image",
},
"text-classification": {
"impl": TextClassificationPipeline,
"class": (IPEXModelForSequenceClassification,),
"default": "distilbert-base-uncased-finetuned-sst-2-english",
"type": "text",
},
"token-classification": {
"impl": TokenClassificationPipeline,
"class": (IPEXModelForTokenClassification,),
"default": "dbmdz/bert-large-cased-finetuned-conll03-english",
"type": "text",
},
"audio-classification": {
"impl": AudioClassificationPipeline,
"class": (IPEXModelForAudioClassification,),
"default": "superb/hubert-base-superb-ks",
"type": "audio",
},
}


def load_ipex_model(
model,
targeted_task,
SUPPORTED_TASKS,
model_kwargs: Optional[Dict[str, Any]] = None,
hub_kwargs: Optional[Dict[str, Any]] = None,
**kwargs,
):
export = kwargs.pop("export", True)
if model_kwargs is None:
model_kwargs = {}

ipex_model_class = SUPPORTED_TASKS[targeted_task]["class"][0]

if model is None:
model_id = SUPPORTED_TASKS[targeted_task]["default"]
model = ipex_model_class.from_pretrained(model_id, export=True, **model_kwargs, **hub_kwargs)
elif isinstance(model, str):
model_id = model
try:
config = AutoConfig.from_pretrained(model)
torchscript = getattr(config, "torchscript", None)
export = False if torchscript else export
except RuntimeError:
logger.warning(
"config file not found, please pass `export` to decide whether we should export this model. `export` defaullt to True"
)

model = ipex_model_class.from_pretrained(model, export=export, **model_kwargs, **hub_kwargs)
elif isinstance(model, IPEXModel):
model_id = None
else:
raise ValueError(
f"""Model {model} is not supported. Please provide a valid model name or path or a IPEXModel.
You can also provide non model then a default one will be used"""
)

return model, model_id


MAPPING_LOADING_FUNC = {
"ipex": load_ipex_model,
}


if is_torch_available():
pass


if TYPE_CHECKING:
from transformers.modeling_utils import PreTrainedModel
from transformers.tokenization_utils_fast import PreTrainedTokenizerFast


logger = logging.get_logger(__name__)


def pipeline(
task: str = None,
model: Optional[Union[str, "PreTrainedModel"]] = None,
tokenizer: Optional[Union[str, PreTrainedTokenizer, "PreTrainedTokenizerFast"]] = None,
feature_extractor: Optional[Union[str, PreTrainedFeatureExtractor]] = None,
accelerator: Optional[str] = "ipex",
use_fast: bool = True,
torch_dtype=None,
model_kwargs: Dict[str, Any] = None,
**kwargs,
) -> Pipeline:
"""
Utility factory method to build a [`Pipeline`].

Pipelines are made of:

- A [tokenizer](tokenizer) in charge of mapping raw textual input to token.
- A [model](model) to make predictions from the inputs.
- Some (optional) post processing for enhancing model's output.

Args:
task (`str`):
The task defining which pipeline will be returned. Currently accepted tasks are:

- `"text-generation"`: will return a [`TextGenerationPipeline`]:.

model (`str` or [`PreTrainedModel`], *optional*):
The model that will be used by the pipeline to make predictions. This can be a model identifier or an
actual instance of a pretrained model inheriting from [`PreTrainedModel`] (for PyTorch).

If not provided, the default for the `task` will be loaded.
tokenizer (`str` or [`PreTrainedTokenizer`], *optional*):
The tokenizer that will be used by the pipeline to encode data for the model. This can be a model
identifier or an actual pretrained tokenizer inheriting from [`PreTrainedTokenizer`].

If not provided, the default tokenizer for the given `model` will be loaded (if it is a string). If `model`
is not specified or not a string, then the default tokenizer for `config` is loaded (if it is a string).
However, if `config` is also not given or not a string, then the default tokenizer for the given `task`
will be loaded.
accelerator (`str`, *optional*, defaults to `"ipex"`):
The optimization backends, choose from ["ipex", "inc", "openvino"].
use_fast (`bool`, *optional*, defaults to `True`):
Whether or not to use a Fast tokenizer if possible (a [`PreTrainedTokenizerFast`]).
torch_dtype (`str` or `torch.dtype`, *optional*):
Sent directly as `model_kwargs` (just a simpler shortcut) to use the available precision for this model
(`torch.float16`, `torch.bfloat16`, ... or `"auto"`).
model_kwargs (`Dict[str, Any]`, *optional*):
Additional dictionary of keyword arguments passed along to the model's `from_pretrained(...,
**model_kwargs)` function.
kwargs (`Dict[str, Any]`, *optional*):
Additional keyword arguments passed along to the specific pipeline init (see the documentation for the
corresponding pipeline class for possible values).

Returns:
[`Pipeline`]: A suitable pipeline for the task.

Examples:

```python
>>> import torch
>>> from optimum.intel.pipelines import pipeline

>>> pipe = pipeline('text-generation', 'gpt2', torch_dtype=torch.bfloat16)
>>> pipe("Describe a real-world application of AI in sustainable energy.")
```"""
if model_kwargs is None:
model_kwargs = {}

if task is None and model is None:
raise RuntimeError(
"Impossible to instantiate a pipeline without either a task or a model "
"being specified. "
"Please provide a task class or a model"
)

if model is None and tokenizer is not None:
raise RuntimeError(
"Impossible to instantiate a pipeline with tokenizer specified but not the model as the provided tokenizer"
" may not be compatible with the default model. Please provide a PreTrainedModel class or a"
" path/identifier to a pretrained model when providing tokenizer."
)

if accelerator not in MAPPING_LOADING_FUNC:
raise ValueError(f'Accelerator {accelerator} is not supported. Supported accelerator is "ipex".')

if accelerator == "ipex":
if task not in list(IPEX_SUPPORTED_TASKS.keys()):
raise ValueError(
f"Task {task} is not supported for the ONNX Runtime pipeline. Supported tasks are { list(IPEX_SUPPORTED_TASKS.keys())}"
)

supported_tasks = IPEX_SUPPORTED_TASKS if accelerator == "ipex" else None

no_feature_extractor_tasks = set()
no_tokenizer_tasks = set()
for _task, values in supported_tasks.items():
if values["type"] == "text":
no_feature_extractor_tasks.add(_task)
elif values["type"] in {"image", "video"}:
no_tokenizer_tasks.add(_task)
elif values["type"] in {"audio"}:
no_tokenizer_tasks.add(_task)
elif values["type"] not in ["multimodal", "audio", "video"]:
raise ValueError(f"SUPPORTED_TASK {_task} contains invalid type {values['type']}")

load_tokenizer = False if task in no_tokenizer_tasks else True
load_feature_extractor = False if task in no_feature_extractor_tasks else True

commit_hash = kwargs.pop("_commit_hash", None)

hub_kwargs = {
"revision": kwargs.pop("revision", None),
"token": kwargs.pop("use_auth_token", None),
"trust_remote_code": kwargs.pop("trust_remote_code", None),
"_commit_hash": commit_hash,
}

if isinstance(model, Path):
model = str(model)

if torch_dtype is not None:
if "torch_dtype" in model_kwargs:
raise ValueError(
'You cannot use both `pipeline(... torch_dtype=..., model_kwargs={"torch_dtype":...})` as those'
" arguments might conflict, use only one.)"
)
model_kwargs["torch_dtype"] = torch_dtype

# Load the correct model if possible
# Infer the framework from the model if not already defined
model, model_id = MAPPING_LOADING_FUNC[accelerator](
model, task, supported_tasks, model_kwargs, hub_kwargs, **kwargs
)

if load_tokenizer and model_id and tokenizer is None:
tokenizer = AutoTokenizer.from_pretrained(model_id, **hub_kwargs, **model_kwargs)
if load_feature_extractor and model_id and feature_extractor is None:
feature_extractor = AutoFeatureExtractor.from_pretrained(model_id, **hub_kwargs, **model_kwargs)

if torch_dtype is not None:
kwargs["torch_dtype"] = torch_dtype

return transformers_pipeline(
task,
model=model,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
use_fast=use_fast,
**kwargs,
)
Loading
Loading