Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix merge conflict #1

Merged
merged 1 commit into from
Sep 10, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions notebooks/ipex/text_generation.ipynb
Original file line number Diff line number Diff line change
@@ -22,6 +22,7 @@
"source": [
"import torch\n",
"from transformers import AutoTokenizer\n",
"\n",
"from optimum.intel.ipex import IPEXModelForCausalLM"
]
},
49 changes: 37 additions & 12 deletions notebooks/openvino/optimum_openvino_inference.ipynb
Original file line number Diff line number Diff line change
@@ -78,6 +78,7 @@
"source": [
"from optimum.intel import OVModelForQuestionAnswering\n",
"\n",
"\n",
"# Load PyTorch model from the Hub and export to OpenVINO in the background\n",
"model = OVModelForQuestionAnswering.from_pretrained(\"distilbert-base-uncased-distilled-squad\", export=True)\n",
"\n",
@@ -122,6 +123,7 @@
"source": [
"from transformers import AutoTokenizer\n",
"\n",
"\n",
"tokenizer = AutoTokenizer.from_pretrained(\"distilbert-base-uncased-distilled-squad\")\n",
"tokenizer.save_pretrained(\"distilbert-base-uncased-distilled-squad-ov-fp32\")"
]
@@ -182,9 +184,11 @@
}
],
"source": [
"from optimum.intel import OVModelForQuestionAnswering\n",
"from transformers import AutoTokenizer, pipeline\n",
"\n",
"from optimum.intel import OVModelForQuestionAnswering\n",
"\n",
"\n",
"model = OVModelForQuestionAnswering.from_pretrained(\"distilbert-base-uncased-distilled-squad-ov-fp32\")\n",
"tokenizer = AutoTokenizer.from_pretrained(\"distilbert-base-uncased-distilled-squad\")\n",
"ov_pipe = pipeline(\"question-answering\", model=model, tokenizer=tokenizer)\n",
@@ -240,9 +244,11 @@
],
"source": [
"import torch\n",
"from optimum.intel import OVModelForQuestionAnswering\n",
"from transformers import AutoTokenizer, pipeline\n",
"\n",
"from optimum.intel import OVModelForQuestionAnswering\n",
"\n",
"\n",
"model = OVModelForQuestionAnswering.from_pretrained(\"distilbert-base-uncased-distilled-squad-ov-fp32\")\n",
"tokenizer = AutoTokenizer.from_pretrained(\"distilbert-base-uncased-distilled-squad-ov-fp32\")\n",
"\n",
@@ -324,9 +330,11 @@
}
],
"source": [
"from optimum.intel import OVModelForQuestionAnswering\n",
"from transformers import AutoTokenizer, pipeline\n",
"\n",
"from optimum.intel import OVModelForQuestionAnswering\n",
"\n",
"\n",
"model = OVModelForQuestionAnswering.from_pretrained(\n",
" \"helenai/distilbert-base-uncased-distilled-squad-ov-fp32\", compile=False\n",
")\n",
@@ -411,6 +419,7 @@
"source": [
"from openvino.runtime import Core\n",
"\n",
"\n",
"for device in Core().available_devices:\n",
" print(device, Core().get_property(device, \"FULL_DEVICE_NAME\"))"
]
@@ -528,10 +537,12 @@
}
],
"source": [
"from datasets import load_dataset\n",
"from IPython.display import Audio\n",
"from optimum.intel import OVModelForAudioClassification\n",
"from transformers import AutoFeatureExtractor, pipeline\n",
"from datasets import load_dataset\n",
"\n",
"from optimum.intel import OVModelForAudioClassification\n",
"\n",
"\n",
"model_id = \"helenai/MIT-ast-finetuned-speech-commands-v2-ov\"\n",
"model = OVModelForAudioClassification.from_pretrained(model_id)\n",
@@ -638,9 +649,11 @@
}
],
"source": [
"from optimum.intel import OVModelForCausalLM\n",
"from transformers import AutoTokenizer, pipeline\n",
"\n",
"from optimum.intel import OVModelForCausalLM\n",
"\n",
"\n",
"model_id = \"helenai/gpt2-ov\"\n",
"tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
"model = OVModelForCausalLM.from_pretrained(model_id)\n",
@@ -704,9 +717,11 @@
],
"source": [
"from IPython.display import Image\n",
"from optimum.intel import OVModelForImageClassification\n",
"from transformers import AutoImageProcessor, pipeline\n",
"\n",
"from optimum.intel import OVModelForImageClassification\n",
"\n",
"\n",
"model_id = \"helenai/microsoft-swin-tiny-patch4-window7-224-ov\"\n",
"model = OVModelForImageClassification.from_pretrained(model_id, compile=False)\n",
"image_processor = AutoImageProcessor.from_pretrained(model_id)\n",
@@ -766,9 +781,11 @@
}
],
"source": [
"from optimum.intel import OVModelForMaskedLM\n",
"from transformers import AutoTokenizer, pipeline\n",
"\n",
"from optimum.intel import OVModelForMaskedLM\n",
"\n",
"\n",
"model_id = \"helenai/bert-base-uncased-ov\"\n",
"model = OVModelForMaskedLM.from_pretrained(model_id)\n",
"tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
@@ -835,9 +852,11 @@
}
],
"source": [
"from optimum.intel import OVModelForQuestionAnswering\n",
"from transformers import AutoTokenizer, pipeline\n",
"\n",
"from optimum.intel import OVModelForQuestionAnswering\n",
"\n",
"\n",
"# Load the model and tokenizer saved in Part 1 of this notebook. Or use the line below to load them from the hub\n",
"# model_id = \"helenai/distilbert-base-uncased-distilled-squad-ov-fp32\"\n",
"model_id = \"distilbert-base-uncased-distilled-squad-ov-fp32\"\n",
@@ -890,9 +909,11 @@
}
],
"source": [
"from optimum.intel import OVModelForSeq2SeqLM\n",
"from transformers import AutoTokenizer, pipeline\n",
"\n",
"from optimum.intel import OVModelForSeq2SeqLM\n",
"\n",
"\n",
"model_id = \"helenai/t5-small-ov\"\n",
"model = OVModelForSeq2SeqLM.from_pretrained(model_id, compile=False, trust_remote_code=True)\n",
"tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)\n",
@@ -998,9 +1019,11 @@
}
],
"source": [
"from optimum.intel import OVModelForSequenceClassification\n",
"from transformers import AutoTokenizer, pipeline\n",
"\n",
"from optimum.intel import OVModelForSequenceClassification\n",
"\n",
"\n",
"model_id = \"helenai/papluca-xlm-roberta-base-language-detection-ov\"\n",
"model = OVModelForSequenceClassification.from_pretrained(model_id)\n",
"tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
@@ -1047,9 +1070,11 @@
}
],
"source": [
"from optimum.intel import OVModelForTokenClassification\n",
"from transformers import AutoTokenizer, pipeline\n",
"\n",
"from optimum.intel import OVModelForTokenClassification\n",
"\n",
"\n",
"model_id = \"helenai/dslim-bert-base-NER-ov-fp32\"\n",
"tokenizer = AutoTokenizer.from_pretrained(model_id)\n",
"model = OVModelForTokenClassification.from_pretrained(model_id)\n",
27 changes: 16 additions & 11 deletions notebooks/openvino/quantized_generation_demo.ipynb
Original file line number Diff line number Diff line change
@@ -45,6 +45,7 @@
"import os\n",
"\n",
"from transformers import AutoTokenizer\n",
"\n",
"from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig"
]
},
@@ -211,6 +212,7 @@
"source": [
"from transformers import TextStreamer\n",
"\n",
"\n",
"# Tokenize the sample\n",
"inputs = tokenizer([sample], return_tensors='pt')\n",
"\n",
@@ -294,15 +296,15 @@
"\n",
"\n",
"# Tokenize the sample\n",
"inputs = tokenizer([sample], return_tensors='pt') \n",
"inputs = tokenizer([sample], return_tensors='pt')\n",
"\n",
"out = stateless_model.generate(\n",
" **inputs,\n",
" max_new_tokens=128,\n",
" streamer=TextStreamer(tokenizer=tokenizer, skip_special_tokens=True),\n",
" pad_token_id=tokenizer.eos_token_id,\n",
" prompt_lookup_num_tokens=3,\n",
") "
")"
]
},
{
@@ -442,6 +444,7 @@
"outputs": [],
"source": [
"from functools import wraps\n",
"\n",
"import numpy as np\n",
"\n",
"\n",
@@ -458,15 +461,15 @@
" if len(self.seq_lens) > 0 or len(self.win_sizes) > 0:\n",
" raise RuntimeError(\"Always use a new instance, don't reuse!\")\n",
" self.model_forward = self.model.forward\n",
" \n",
"\n",
" @wraps(self.model_forward)\n",
" def forward_wrapper(**kwargs):\n",
" self.seq_lens[-1].append(kwargs.get(\"attention_mask\").shape[-1])\n",
" self.win_sizes[-1].append(kwargs.get(\"input_ids\").shape[-1] - 1)\n",
" return self.model_forward(**kwargs)\n",
" \n",
"\n",
" self.model.forward = forward_wrapper\n",
" \n",
"\n",
" # wrap generate method\n",
" self.model_generate = self.model.generate\n",
"\n",
@@ -494,7 +497,7 @@
" self.seq_lens = [sl[1:] for sl in self.seq_lens]\n",
" # Add window size for output to ease calculation later\n",
" for ws, sl in zip(self.win_sizes, self.seq_lens):\n",
" ws.append(0) \n",
" ws.append(0)\n",
"\n",
" def acceptance_rate(self, return_mean=True, normalize=False):\n",
" # ar_per_win = ((cur_seq_len - cur_win_size) - (prev_seq_len - prev_win_size) - 1) / prev_win_size\n",
@@ -533,8 +536,9 @@
"metadata": {},
"outputs": [],
"source": [
"from tqdm import tqdm\n",
"from datasets import load_dataset\n",
"from tqdm import tqdm\n",
"\n",
"\n",
"dataset_name = \"openai_humaneval\"\n",
"dataset_subset_name = None\n",
@@ -590,10 +594,10 @@
"from threading import Thread\n",
"\n",
"from transformers import (\n",
" TextIteratorStreamer,\n",
" GenerationConfig,\n",
" StoppingCriteria,\n",
" StoppingCriteriaList,\n",
" GenerationConfig,\n",
" TextIteratorStreamer,\n",
")\n",
"\n",
"\n",
@@ -690,7 +694,7 @@
" prompt_char = \"\"\n",
" history[-1][1] = prompt_char\n",
" yield history, \"Status: Generating...\", *([gr.update(interactive=False)] * 4)\n",
" \n",
"\n",
" streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)\n",
"\n",
" # Create a stopping criteria to prevent the model from playing the role of the user aswell.\n",
@@ -770,6 +774,7 @@
"source": [
"import gradio as gr\n",
"\n",
"\n",
"try:\n",
" demo.close()\n",
"except:\n",
@@ -808,7 +813,7 @@
" history: conversation history\n",
" Returns:\n",
" updated history\n",
" \"\"\" \n",
" \"\"\"\n",
" history[-1][1] = None\n",
" return history\n",
"\n",
4 changes: 3 additions & 1 deletion notebooks/openvino/question_answering_quantization.ipynb
Original file line number Diff line number Diff line change
@@ -51,9 +51,11 @@
"import transformers\n",
"from evaluate import evaluator\n",
"from openvino.runtime import Core\n",
"from optimum.intel import OVModelForQuestionAnswering, OVQuantizer, OVQuantizationConfig, OVConfig\n",
"from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline\n",
"\n",
"from optimum.intel import OVConfig, OVModelForQuestionAnswering, OVQuantizationConfig, OVQuantizer\n",
"\n",
"\n",
"transformers.logging.set_verbosity_error()\n",
"datasets.logging.set_verbosity_error()"
]
Original file line number Diff line number Diff line change
@@ -46,15 +46,18 @@
"outputs": [],
"source": [
"import time\n",
"from pathlib import Path\n",
"\n",
"import datasets\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import transformers\n",
"from pathlib import Path\n",
"from openvino.runtime import Core\n",
"\n",
"from optimum.intel import OVConfig, OVQuantizer, OVStableDiffusionPipeline, OVWeightQuantizationConfig\n",
"from optimum.intel.openvino.configuration import OVQuantizationMethod\n",
"\n",
"\n",
"transformers.logging.set_verbosity_error()\n",
"datasets.logging.set_verbosity_error()"
]
4 changes: 2 additions & 2 deletions optimum/exporters/ipex/model_patcher.py
Original file line number Diff line number Diff line change
@@ -29,11 +29,11 @@
from .modeling_utils import (
_IPEX_MINIMUM_VERSION_FOR_PATCHING,
_gpt2_block_forward,
_ipex_rms_layer_norm_forward,
_IPEXFalconDecoderLayer,
_IPEXGPT2Attention,
_IPEXIntermediate,
_IPEXLlamaDecoderLayer,
_llama_layer_norm_forward,
_llama_model_forward,
)

@@ -79,7 +79,7 @@ def _patch_llama_model(model):
2. Linear fusion with (2 Linears + Silu + Mul) and (Linear + Add)
"""
convert_functions(model, LlamaModel, "forward", _llama_model_forward)
convert_functions(model, LlamaRMSNorm, "forward", _ipex_rms_layer_norm_forward)
convert_functions(model, LlamaRMSNorm, "forward", _llama_layer_norm_forward)
convert_class(model, LlamaDecoderLayer, _IPEXLlamaDecoderLayer, model.config)
return model

245 changes: 148 additions & 97 deletions optimum/exporters/ipex/modeling_utils.py

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions optimum/exporters/openvino/model_patcher.py
Original file line number Diff line number Diff line change
@@ -404,9 +404,9 @@ def _llama_gemma_update_causal_mask_legacy(self, attention_mask, input_tensor, c
offset = 0
mask_shape = attention_mask.shape
mask_slice = (attention_mask.eq(0.0)).to(dtype=dtype) * min_dtype
causal_mask[
: mask_shape[0], : mask_shape[1], offset : mask_shape[2] + offset, : mask_shape[3]
] = mask_slice
causal_mask[: mask_shape[0], : mask_shape[1], offset : mask_shape[2] + offset, : mask_shape[3]] = (
mask_slice
)

if (
self.config._attn_implementation == "sdpa"
@@ -1966,9 +1966,9 @@ def _dbrx_update_causal_mask_legacy(
offset = 0
mask_shape = attention_mask.shape
mask_slice = (attention_mask.eq(0.0)).to(dtype=dtype) * min_dtype
causal_mask[
: mask_shape[0], : mask_shape[1], offset : mask_shape[2] + offset, : mask_shape[3]
] = mask_slice
causal_mask[: mask_shape[0], : mask_shape[1], offset : mask_shape[2] + offset, : mask_shape[3]] = (
mask_slice
)

if (
self.config._attn_implementation == "sdpa"
52 changes: 11 additions & 41 deletions optimum/intel/ipex/modeling_base.py
Original file line number Diff line number Diff line change
@@ -154,7 +154,7 @@ def __init__(
self._device = torch.device("cpu")

# CPU only support jit model for now.
if export:
if export and self._device.type == "cpu":
if isinstance(model, torch.jit.RecursiveScriptModule):
logger.warning("The model has been exported already.")
else:
@@ -251,7 +251,6 @@ def _from_pretrained(
)
token = use_auth_token

task = cls.export_feature
commit_hash = kwargs.pop("_commit_hash", None)

model_kwargs = {
@@ -263,49 +262,11 @@ def _from_pretrained(
"force_download": force_download,
}

model = TasksManager.get_model_from_task(task, model_id, **model_kwargs)

if is_torch_xpu_available(check_device=True):
model.to("xpu:0")
if _is_patched_with_ipex(model, task):
model = _patch_model(model)
else:
model = ipex_jit_trace(model, task, use_cache)
config.torchscript = True
config.torch_dtype = torch_dtype
return cls(model, config=config, model_save_dir=model_id, use_cache=use_cache, warmup=False)

@classmethod
def _from_pretrained(
cls,
model_id: Union[str, Path],
config: PretrainedConfig,
use_auth_token: Optional[Union[bool, str]] = None,
token: Optional[Union[bool, str]] = None,
revision: Optional[str] = None,
force_download: bool = False,
cache_dir: str = HUGGINGFACE_HUB_CACHE,
file_name: Optional[str] = WEIGHTS_NAME,
local_files_only: bool = False,
subfolder: str = "",
**kwargs,
):
if use_auth_token is not None:
warnings.warn(
"The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.",
FutureWarning,
)
if token is not None:
raise ValueError(
"Both the arguments `use_auth_token` and `token` were specified, which is not supported. Please specify only `token`."
)
token = use_auth_token

if not getattr(config, "torchscript", False):
logger.warning("Detect torchscript is false. Convert to torchscript model!")

if is_torch_version("<", "2.1.0"):
raise ImportError("`torch>=2.0.0` is needed to trace your model")
raise ImportError("`torch>=2.1.0` is needed to trace your model")

task = cls.export_feature
config.torch_dtype = torch_dtype
@@ -318,6 +279,15 @@ def _from_pretrained(
_commit_hash=commit_hash,
**model_kwargs,
)
if is_torch_xpu_available(check_device=True):
model.to("xpu:0")
if _is_patched_with_ipex(model, task):
model = _patch_model(model)
else:
use_cache = kwargs.get("use_cache", True)
model = ipex_jit_trace(model, task, use_cache)
config.torchscript = True
config.torch_dtype = torch_dtype

return cls(model, config=config, export=True, **kwargs)

12 changes: 6 additions & 6 deletions optimum/intel/openvino/modeling_base.py
Original file line number Diff line number Diff line change
@@ -84,9 +84,9 @@ def __init__(
for idx, key in enumerate(model.inputs):
names = tuple(key.get_names())
input_names[next((name for name in names if "/" not in name), names[0])] = idx
input_dtypes[
next((name for name in names if "/" not in name), names[0])
] = key.get_element_type().get_type_name()
input_dtypes[next((name for name in names if "/" not in name), names[0])] = (
key.get_element_type().get_type_name()
)
self.input_names = input_names
self.input_dtypes = input_dtypes

@@ -95,9 +95,9 @@ def __init__(
for idx, key in enumerate(model.outputs):
names = tuple(key.get_names())
output_names[next((name for name in names if "/" not in name), names[0])] = idx
output_dtypes[
next((name for name in names if "/" not in name), names[0])
] = key.get_element_type().get_type_name()
output_dtypes[next((name for name in names if "/" not in name), names[0])] = (
key.get_element_type().get_type_name()
)

self.output_names = output_names
self.output_dtypes = output_dtypes