Merge branch 'main' into ea/janus

eaidova · web-flow · commit 005ccebea71f · 2025-02-05T14:26:26.000+04:00
diff --git a/README.md b/README.md
@@ -1,5 +1,5 @@
 <p align="center">
-    <img src="readme_logo.png" />
+    <img src="https://huggingface.co/datasets/optimum/documentation-images/resolve/main/intel/logo/hf_intel_logo.png" />
 </p>
 
 # Optimum Intel
diff --git a/notebooks/ipex/README.md b/notebooks/ipex/README.md
@@ -6,4 +6,4 @@ You can find here a list of the notebooks for the IPEX integration in 🤗 Optim
 | Notebook     |      Description      |   |   |
 |:----------|:-------------|:-------------|------:|
 | [How to optimize your model with IPEX for text generation](https://github.com/huggingface/optimum-intel/blob/main/notebooks/ipex/text_generation.ipynb)| Show how to apply operators and graph-level optimizations using Intel [IPEX](https://github.com/intel/intel-extension-for-pytorch) | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/optimum-intel/blob/main/notebooks/ipex/text_generation.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/optimum-intel/blob/main/notebooks/ipex/text_generation.ipynb)|
-
+| [How to optimize your langchain pipeline with IPEX](https://github.com/huggingface/optimum-intel/blob/main/notebooks/ipex/langchain_hf_pipelines.ipynb)| Show how to optimize your langchain pipeline with IPEX [IPEX](https://github.com/intel/intel-extension-for-pytorch) | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/optimum-intel/blob/main/notebooks/ipex/langchain_hf_pipelines.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/optimum-intel/blob/main/notebooks/ipex/langchain_hf_pipelines.ipynb)|
diff --git a/notebooks/ipex/langchain_hf_pipelines.ipynb b/notebooks/ipex/langchain_hf_pipelines.ipynb
@@ -0,0 +1,168 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Hugging Face Pipelines\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If you're opening this Notebook on colab, you will probably need to install Langchain and 🤗 Optimum. Uncomment the following cell and run it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#! pip install langchain-huggingface optimum[ipex]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Make sure your version of langchain-huggingface is at least v0.2 and 🤗 Optimum is at least v1.22.0 since the functionality was introduced in these versions:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from optimum.intel.version import __version__\n",
+    "\n",
+    "print(\"optimum-intel version is\", __version__)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from optimum.intel.utils.import_utils import _langchain_hf_version\n",
+    "\n",
+    "print(\"langchain-huggingface version is\", _langchain_hf_version)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Model Loading\n",
+    "\n",
+    "Models can be loaded by specifying the model parameters using the `from_model_id` method."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_huggingface.llms import HuggingFacePipeline\n",
+    "\n",
+    "hf = HuggingFacePipeline.from_model_id(\n",
+    "    model_id=\"gpt2\",\n",
+    "    task=\"text-generation\",\n",
+    "    pipeline_kwargs={\"max_new_tokens\": 10},\n",
+    "    backend=\"ipex\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create Chain\n",
+    "\n",
+    "With the model loaded into memory, you can compose it with a prompt to form a chain."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_core.prompts import PromptTemplate\n",
+    "\n",
+    "template = \"\"\"Question: {question}\n",
+    "\n",
+    "Answer: Let's think step by step.\"\"\"\n",
+    "prompt = PromptTemplate.from_template(template)\n",
+    "\n",
+    "chain = prompt | hf\n",
+    "\n",
+    "question = \"What is electroencephalography?\"\n",
+    "\n",
+    "print(chain.invoke({\"question\": question}))\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To get response without prompt, you can bind skip_prompt=True with LLM."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chain = prompt | hf.bind(skip_prompt=True)\n",
+    "\n",
+    "question = \"What is electroencephalography?\"\n",
+    "\n",
+    "print(chain.invoke({\"question\": question}))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Streaming response :"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for chunk in chain.stream(question):\n",
+    "    print(chunk, end=\"\", flush=True)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/ipex/text_generation.ipynb b/notebooks/ipex/text_generation.ipynb
@@ -22,7 +22,7 @@
    "source": [
     "import torch\n",
     "from transformers import AutoTokenizer\n",
-    "from optimum.intel.ipex import IPEXModelForCausalLM"
+    "from optimum.intel import IPEXModelForCausalLM"
    ]
   },
   {
diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py
@@ -718,14 +718,15 @@ def _mistral_update_causal_mask(
 class MistralModelPatcher(DecoderModelPatcher):
     def __enter__(self):
         super().__enter__()
-        if is_transformers_version(">=", "4.42.0"):
+        if is_transformers_version(">=", "4.42.0") and is_transformers_version("<", "4.48.0"):
             # apply fix https://github.com/huggingface/transformers/commit/57d7594a79a9f5d835abf2d4d384db0e4818e548
             self._model.model._orig_update_causal_mask = self._model.model._update_causal_mask
             self._model.model._update_causal_mask = types.MethodType(_mistral_update_causal_mask, self._model.model)
 
         else:
             for layer in self._model.model.layers:
-                _reinitialize_cos_sin_cached_fp32(layer.self_attn.rotary_emb)
+                if hasattr(layer.self_attn, "rotary_emb"):
+                    _reinitialize_cos_sin_cached_fp32(layer.self_attn.rotary_emb)
 
     def __exit__(self, exc_type, exc_value, traceback):
         super().__exit__(exc_type, exc_value, traceback)
@@ -734,7 +735,7 @@ def __exit__(self, exc_type, exc_value, traceback):
             self._model.model._update_causal_mask = self._model.model._orig_update_causal_mask
 
         for layer in self._model.model.layers:
-            if hasattr(layer.self_attn.rotary_emb, "_orig_forward"):
+            if hasattr(layer.self_attn, "rotary_emb") and hasattr(layer.self_attn.rotary_emb, "_orig_forward"):
                 layer.self_attn.rotary_emb.forward = layer.self_attn.rotary_emb._orig_forward
 
 
@@ -1580,19 +1581,19 @@ def __enter__(self):
         ):
             self._model.config.max_position_embeddings = self._model.config.original_max_position_embeddings
 
-        if is_transformers_version(">=", "4.42.0"):
+        if is_transformers_version(">=", "4.42.0") and is_transformers_version("<", "4.48.0"):
             self._model.model._orig_forward = self._model.model.forward
             self._model.model.forward = types.MethodType(phi3_442_forward, self._model.model)
 
         # https://github.com/huggingface/transformers/blob/30ee508c6c92a1c0aa0281d193c7c0fb815b8d2f/src/transformers/models/phi3/modeling_phi3.py#L113
         # init inv_freq for torchscript tracing
         for layer in self._model.model.layers:
-            if is_torch_version(">=", "2.1.0"):
+            if is_torch_version(">=", "2.1.0") and is_transformers_version("<", "4.48.0"):
                 orig_self_attn_fwd = layer.self_attn.forward
                 layer.self_attn.forward = types.MethodType(_phi3_self_attn_sdpa_forward, layer.self_attn)
                 layer.self_attn._orig_forward = orig_self_attn_fwd
 
-            if layer.self_attn.rotary_emb.inv_freq is None:
+            if hasattr(layer.self_attn, "rotary_emb") and layer.self_attn.rotary_emb.inv_freq is None:
                 rotary_emb = layer.self_attn.rotary_emb
                 layer.self_attn.rotary_emb.inv_freq = 1.0 / (
                     rotary_emb.base ** (torch.arange(0, rotary_emb.dim, 2, dtype=torch.int64).float() / rotary_emb.dim)
@@ -2493,7 +2494,9 @@ class UpdateCausalMaskModelPatcher(DecoderModelPatcher):
     def __enter__(self):
         super().__enter__()
         patch_update_causal_mask(self._model, "4.42.0")
-        if hasattr(self._model.model.layers[0].self_attn.rotary_emb, "_set_cos_sin_cache"):
+        if hasattr(self._model.model.layers[0].self_attn, "rotary_emb") and hasattr(
+            self._model.model.layers[0].self_attn.rotary_emb, "_set_cos_sin_cache"
+        ):
             for layer in self._model.model.layers:
                 _reinitialize_cos_sin_cached_fp32(layer.self_attn.rotary_emb)
 
@@ -3045,15 +3048,16 @@ def patched_forward(self, fn):
     def __enter__(self):
         if is_torch_version(">=", "2.1.0"):
             if self._model.config.model_type == "qwen2" and self._model.config._attn_implementation != "sdpa":
-                from transformers.models.qwen2.modeling_qwen2 import QWEN2_ATTENTION_CLASSES
+                if is_transformers_version("<", "4.48"):
+                    from transformers.models.qwen2.modeling_qwen2 import QWEN2_ATTENTION_CLASSES
 
-                sdpa_attn = QWEN2_ATTENTION_CLASSES["sdpa"]
-                self._model.config._orig_attn_implementation = self._model.config._attn_implementation
-                self._model.config._attn_implementation = "sdpa"
+                    sdpa_attn = QWEN2_ATTENTION_CLASSES["sdpa"]
+                    self._model.config._orig_attn_implementation = self._model.config._attn_implementation
+                    self._model.config._attn_implementation = "sdpa"
 
-                for layer in self._model.model.layers:
-                    layer.self_attn._orig_forward = layer.self_attn.forward
-                    layer.self_attn.forward = types.MethodType(sdpa_attn.forward, layer.self_attn)
+                    for layer in self._model.model.layers:
+                        layer.self_attn._orig_forward = layer.self_attn.forward
+                        layer.self_attn.forward = types.MethodType(sdpa_attn.forward, layer.self_attn)
 
             if self._model.config.model_type == "llama" and self._model.config._attn_implementation != "sdpa":
                 self._model.config._orig_attn_implementation = self._model.config._attn_implementation
diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py
@@ -56,7 +56,11 @@
 
 
 if TYPE_CHECKING:
-    from transformers.generation.streamers import BaseStreamer
+    try:
+        from transformers.generation.streamers import BaseStreamer
+    except Exception:
+        from typing import Generator as BaseStreamer
+
     from transformers.modeling_utils import PreTrainedModel
 
 
diff --git a/optimum/intel/openvino/modeling_seq2seq.py b/optimum/intel/openvino/modeling_seq2seq.py
@@ -668,7 +668,7 @@ def forward(
         logits = torch.from_numpy(self.request.get_tensor("logits").data).to(self.device)
         self._past_length += input_ids.shape[1]
 
-        out_past_key_values = ()
+        out_past_key_values = ((),)
 
         if not self.stateful:
             # Tuple of length equal to : number of layer * number of past_key_value per decoder layer (2 corresponds to the
diff --git a/optimum/intel/openvino/modeling_visual_language.py b/optimum/intel/openvino/modeling_visual_language.py
@@ -1090,6 +1090,18 @@ def preprocess_inputs(
                 prompt = "<image>\n" + text
             else:
                 prompt = text
+
+        if getattr(processor, "patch_size", None) is None:
+            if (
+                getattr(config, "vision_config", None) is not None
+                and getattr(config.vision_config, "patch_size", None) is not None
+            ):
+                processor.patch_size = config.vision_config.patch_size
+            else:
+                raise ValueError(
+                    "Processor does not have `patch_size` attribute. Please fix the processor or provide `patch_size` in the config."
+                )
+
         inputs = processor(images=image, text=prompt, return_tensors="pt")
         return inputs
 
@@ -1985,6 +1997,7 @@ def preprocess_inputs(
             input_ids = tokenizer(text, return_tensors="pt").input_ids
         attention_mask = torch.ones_like(input_ids, dtype=torch.int64)
         result = {"input_ids": input_ids, "attention_mask": attention_mask}
+
         if image is not None:
             result["images"] = processor(images=[image], return_tensors="pt")["pixel_values"]
         return result
diff --git a/readme_logo.png b/readme_logo.png
diff --git a/setup.py b/setup.py
@@ -29,7 +29,7 @@
 INSTALL_REQUIRE = [
     "torch>=1.11",
     "optimum @ git+https://github.com/eaidova/optimum.git@ea/avoid_lib_guessing_in_standartize_args",
-    "transformers>=4.36,<4.48",
+    "transformers>=4.36,<4.49",
     "datasets>=1.4.0",
     "sentencepiece",
     "setuptools",
diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py

Original file line number	Diff line number	Diff line change
`@@ -22,7 +22,7 @@`
`22`	`22`	`"source": [`
`23`	`23`	`"import torch\n",`
`24`	`24`	`"from transformers import AutoTokenizer\n",`
`25`		`- "from optimum.intel.ipex import IPEXModelForCausalLM"`
	`25`	`+ "from optimum.intel import IPEXModelForCausalLM"`
`26`	`26`	`]`
`27`	`27`	`},`
`28`	`28`	`{`