huggingface
diff --git a/‎.github/workflows/test_openvino.yml
+1-1 b/‎.github/workflows/test_openvino.yml
+1-1
diff --git a/‎.github/workflows/test_openvino_full.yml
+1-1 b/‎.github/workflows/test_openvino_full.yml
+1-1
diff --git a/‎.github/workflows/test_openvino_slow.yml
+6-5 b/‎.github/workflows/test_openvino_slow.yml
+6-5
diff --git a/‎README.md
+1-1 b/‎README.md
+1-1
diff --git a/‎docs/source/openvino/export.mdx
+3-2 b/‎docs/source/openvino/export.mdx
+3-2
diff --git a/‎docs/source/openvino/models.mdx
+1 b/‎docs/source/openvino/models.mdx
+1
diff --git a/‎notebooks/ipex/README.md
+1-1 b/‎notebooks/ipex/README.md
+1-1
diff --git a/‎notebooks/ipex/langchain_hf_pipelines.ipynb
+168 b/‎notebooks/ipex/langchain_hf_pipelines.ipynb
+168
diff --git a/‎notebooks/ipex/text_generation.ipynb
+1-1 b/‎notebooks/ipex/text_generation.ipynb
+1-1
@@ -54,7 +54,7 @@ jobs:
       - if: ${{ matrix.transformers-version == 'latest' && matrix.test-pattern == '*modeling*'}}
         name: Install auto-gptq, autoawq
         run: |
-          pip install auto-gptq autoawq --extra-index-url https://download.pytorch.org/whl/cpu
+          pip install auto-gptq "autoawq<0.2.8" --extra-index-url https://download.pytorch.org/whl/cpu
 
       - if: ${{ matrix.test-pattern == '*modeling*' }}
         name: Uninstall NNCF
 
@@ -81,7 +81,7 @@ jobs:
       - if: ${{ matrix.transformers-version == 'latest' && matrix.os != 'windows-2019' }}
         name: Install auto-gptq, autoawq
         run: |
-          pip install auto-gptq autoawq --extra-index-url https://download.pytorch.org/whl/cpu
+          pip install auto-gptq "autoawq<0.2.8" --extra-index-url https://download.pytorch.org/whl/cpu
 
       - name: Pip freeze
         run: pip freeze
 
@@ -26,6 +26,11 @@ jobs:
       matrix:
         os: ["ubuntu-22.04", "windows-2019"]
         transformers-version: ["4.36.0", "latest"]
+        include:
+          - transformers-version: "4.40.0"
+            os: "ubuntu-22.04"
+          - transformers-version: "4.45.0"
+            os: "ubuntu-22.04"
 
     runs-on: ${{ matrix.os }}
 
@@ -52,7 +57,7 @@ jobs:
       - if: ${{ matrix.transformers-version == 'latest' && matrix.os != 'windows-2019' }}
         name: Install auto-gptq, autoawq
         run: |
-          pip install auto-gptq autoawq --extra-index-url https://download.pytorch.org/whl/cpu
+          pip install auto-gptq "autoawq<0.2.8" --extra-index-url https://download.pytorch.org/whl/cpu
 
       - name: Pip freeze
         run: pip freeze
@@ -65,10 +70,6 @@ jobs:
         run: |
           pip install .[nncf]
 
-      - if: ${{ matrix.transformers-version != 'latest' }}
-        name: Downgrade Transformers and Accelerate
-        run: pip install transformers==${{ matrix.transformers-version }} accelerate==0.*
-
       - name: Test with Pytest (slow)
         run: |
           pytest tests/openvino -m "run_slow" --durations=0
 
@@ -1,5 +1,5 @@
 <p align="center">
-    <img src="readme_logo.png" />
+    <img src="https://huggingface.co/datasets/optimum/documentation-images/resolve/main/intel/logo/hf_intel_logo.png" />
 </p>
 
 # Optimum Intel
 
@@ -31,7 +31,8 @@ Check out the help for more options:
 
 ```text
 usage: optimum-cli export openvino [-h] -m MODEL [--task TASK] [--framework {pt,tf}] [--trust-remote-code]
-                                   [--weight-format {fp32,fp16,int8,int4,mxfp4,nf4}] [--quant-mode {int8,f8e4m3,f8e5m2}]
+                                   [--weight-format {fp32,fp16,int8,int4,mxfp4,nf4}]
+                                   [--quant-mode {int8,f8e4m3,f8e5m2,nf4_f8e4m3,nf4_f8e5m2,int4_f8e4m3,int4_f8e5m2}]
                                    [--library {transformers,diffusers,timm,sentence_transformers,open_clip}]
                                    [--cache_dir CACHE_DIR] [--pad-token-id PAD_TOKEN_ID] [--ratio RATIO] [--sym]
                                    [--group-size GROUP_SIZE] [--backup-precision {none,int8_sym,int8_asym}]
@@ -67,7 +68,7 @@ Optional arguments:
                         on your local machine arbitrary code present in the model repository.
   --weight-format {fp32,fp16,int8,int4,mxfp4,nf4}
                         The weight format of the exported model.
-  --quant-mode {int8,f8e4m3,f8e5m2}
+  --quant-mode {int8,f8e4m3,f8e5m2,nf4_f8e4m3,nf4_f8e5m2,int4_f8e4m3,int4_f8e5m2}
                         Quantization precision mode. This is used for applying full model quantization including
                         activations.
   --library {transformers,diffusers,timm,sentence_transformers,open_clip}
 
@@ -72,6 +72,7 @@ Here is the list of the supported architectures :
 - Llava
 - Llava-Next
 - M2-M100
+- MAIRA-2
 - MBart
 - MPNet
 - MPT
 
@@ -6,4 +6,4 @@ You can find here a list of the notebooks for the IPEX integration in 🤗 Optim
 | Notebook     |      Description      |   |   |
 |:----------|:-------------|:-------------|------:|
 | [How to optimize your model with IPEX for text generation](https://github.com/huggingface/optimum-intel/blob/main/notebooks/ipex/text_generation.ipynb)| Show how to apply operators and graph-level optimizations using Intel [IPEX](https://github.com/intel/intel-extension-for-pytorch) | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/optimum-intel/blob/main/notebooks/ipex/text_generation.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/optimum-intel/blob/main/notebooks/ipex/text_generation.ipynb)|
-
+| [How to optimize your langchain pipeline with IPEX](https://github.com/huggingface/optimum-intel/blob/main/notebooks/ipex/langchain_hf_pipelines.ipynb)| Show how to optimize your langchain pipeline with IPEX [IPEX](https://github.com/intel/intel-extension-for-pytorch) | [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/optimum-intel/blob/main/notebooks/ipex/langchain_hf_pipelines.ipynb)| [![Open in AWS Studio](https://studiolab.sagemaker.aws/studiolab.svg)](https://studiolab.sagemaker.aws/import/github/huggingface/optimum-intel/blob/main/notebooks/ipex/langchain_hf_pipelines.ipynb)|
@@ -0,0 +1,168 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Hugging Face Pipelines\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If you're opening this Notebook on colab, you will probably need to install Langchain and 🤗 Optimum. Uncomment the following cell and run it."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#! pip install langchain-huggingface optimum[ipex]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Make sure your version of langchain-huggingface is at least v0.2 and 🤗 Optimum is at least v1.22.0 since the functionality was introduced in these versions:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from optimum.intel.version import __version__\n",
+    "\n",
+    "print(\"optimum-intel version is\", __version__)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from optimum.intel.utils.import_utils import _langchain_hf_version\n",
+    "\n",
+    "print(\"langchain-huggingface version is\", _langchain_hf_version)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Model Loading\n",
+    "\n",
+    "Models can be loaded by specifying the model parameters using the `from_model_id` method."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_huggingface.llms import HuggingFacePipeline\n",
+    "\n",
+    "hf = HuggingFacePipeline.from_model_id(\n",
+    "    model_id=\"gpt2\",\n",
+    "    task=\"text-generation\",\n",
+    "    pipeline_kwargs={\"max_new_tokens\": 10},\n",
+    "    backend=\"ipex\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create Chain\n",
+    "\n",
+    "With the model loaded into memory, you can compose it with a prompt to form a chain."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_core.prompts import PromptTemplate\n",
+    "\n",
+    "template = \"\"\"Question: {question}\n",
+    "\n",
+    "Answer: Let's think step by step.\"\"\"\n",
+    "prompt = PromptTemplate.from_template(template)\n",
+    "\n",
+    "chain = prompt | hf\n",
+    "\n",
+    "question = \"What is electroencephalography?\"\n",
+    "\n",
+    "print(chain.invoke({\"question\": question}))\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To get response without prompt, you can bind skip_prompt=True with LLM."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chain = prompt | hf.bind(skip_prompt=True)\n",
+    "\n",
+    "question = \"What is electroencephalography?\"\n",
+    "\n",
+    "print(chain.invoke({\"question\": question}))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Streaming response :"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for chunk in chain.stream(question):\n",
+    "    print(chunk, end=\"\", flush=True)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.14"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
@@ -22,7 +22,7 @@
    "source": [
     "import torch\n",
     "from transformers import AutoTokenizer\n",
-    "from optimum.intel.ipex import IPEXModelForCausalLM"
+    "from optimum.intel import IPEXModelForCausalLM"
    ]
   },
   {
-Original file line number
+Diff line change
 - Llava
 - Llava-Next
 - M2-M100
 +- MAIRA-2
 - MBart
 - MPNet
 - MPT
Original file line number	Diff line number	Diff line change
`@@ -22,7 +22,7 @@`
`22`	`22`	`"source": [`
`23`	`23`	`"import torch\n",`
`24`	`24`	`"from transformers import AutoTokenizer\n",`
`25`		`- "from optimum.intel.ipex import IPEXModelForCausalLM"`
	`25`	`+ "from optimum.intel import IPEXModelForCausalLM"`
`26`	`26`	`]`
`27`	`27`	`},`
`28`	`28`	`{`