huggingface
diff --git a/‎.github/workflows/test_inc.yml
+1 b/‎.github/workflows/test_inc.yml
+1
diff --git a/‎.github/workflows/test_ipex.yml
+1 b/‎.github/workflows/test_ipex.yml
+1
diff --git a/‎.github/workflows/test_offline.yaml
+40 b/‎.github/workflows/test_offline.yaml
+40
diff --git a/‎.github/workflows/test_openvino_examples.yml
+10-10 b/‎.github/workflows/test_openvino_examples.yml
+10-10
diff --git a/‎.github/workflows/test_openvino_notebooks.yml
+2-2 b/‎.github/workflows/test_openvino_notebooks.yml
+2-2
diff --git a/‎examples/neural_compressor/language-modeling/run_clm.py
+4-7 b/‎examples/neural_compressor/language-modeling/run_clm.py
+4-7
diff --git a/‎examples/openvino/audio-classification/requirements.txt
+2-1 b/‎examples/openvino/audio-classification/requirements.txt
+2-1
diff --git a/‎examples/openvino/audio-classification/run_audio_classification.py
+1-1 b/‎examples/openvino/audio-classification/run_audio_classification.py
+1-1
diff --git a/‎examples/openvino/image-classification/requirements.txt
+1 b/‎examples/openvino/image-classification/requirements.txt
+1
diff --git a/‎examples/openvino/image-classification/run_image_classification.py
+1-1 b/‎examples/openvino/image-classification/run_image_classification.py
+1-1
diff --git a/‎examples/openvino/question-answering/requirements.txt
+1 b/‎examples/openvino/question-answering/requirements.txt
+1
diff --git a/‎examples/openvino/question-answering/run_qa.py
+1-1 b/‎examples/openvino/question-answering/run_qa.py
+1-1
diff --git a/‎examples/openvino/question-answering/trainer_qa.py
+1-1 b/‎examples/openvino/question-answering/trainer_qa.py
+1-1
diff --git a/‎examples/openvino/text-classification/requirements.txt
+2-1 b/‎examples/openvino/text-classification/requirements.txt
+2-1
diff --git a/‎examples/openvino/text-classification/run_glue.py
+1-1 b/‎examples/openvino/text-classification/run_glue.py
+1-1
diff --git a/‎notebooks/openvino/optimum_openvino_inference.ipynb
+12-12 b/‎notebooks/openvino/optimum_openvino_inference.ipynb
+12-12
diff --git a/‎notebooks/openvino/question_answering_quantization.ipynb
+2-2 b/‎notebooks/openvino/question_answering_quantization.ipynb
+2-2
diff --git a/‎notebooks/openvino/requirements.txt
+1-1 b/‎notebooks/openvino/requirements.txt
+1-1
diff --git a/‎notebooks/openvino/stable_diffusion_optimization.ipynb
+1-1 b/‎notebooks/openvino/stable_diffusion_optimization.ipynb
+1-1
diff --git a/‎optimum/commands/export/openvino.py
+5-1 b/‎optimum/commands/export/openvino.py
+5-1
diff --git a/‎optimum/exporters/openvino/__main__.py
+2-1 b/‎optimum/exporters/openvino/__main__.py
+2-1
@@ -32,6 +32,7 @@ jobs:
         python -m pip install --upgrade pip
         pip install cmake
         pip install py-cpuinfo
+        pip install torch==2.2 torchaudio torchvision --extra-index-url https://download.pytorch.org/whl/cpu
         pip install .[neural-compressor,diffusers,tests]
         pip install intel-extension-for-transformers
         pip install peft
 
@@ -30,6 +30,7 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
+        pip install torch==2.2 torchaudio torchvision --extra-index-url https://download.pytorch.org/whl/cpu
         pip install .[ipex,tests]
     - name: Test with Pytest
       run: |
 
@@ -0,0 +1,40 @@
+name: Offline usage / Python - Test
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: [3.9]
+        os: [ubuntu-latest]
+
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v3
+      - name: Setup Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v3
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          pip install .[tests,openvino]
+      - name: Test
+        run: |
+          HF_HOME=/tmp/ huggingface-cli download hf-internal-testing/tiny-random-gpt2
+          HF_HOME=/tmp/ HF_HUB_OFFLINE=1 optimum-cli export openvino --model hf-internal-testing/tiny-random-gpt2 gpt2_openvino --task text-generation
+
+          huggingface-cli download hf-internal-testing/tiny-random-gpt2
+          HF_HUB_OFFLINE=1 optimum-cli export openvino --model hf-internal-testing/tiny-random-gpt2 gpt2_openvino --task text-generation
+
+          pytest tests/openvino/test_modeling.py -k "test_load_from_hub" -s -vvvvv
+          HF_HUB_OFFLINE=1 pytest tests/openvino/test_modeling.py -k "test_load_from_hub" -s -vvvvv
@@ -7,11 +7,11 @@ on:
   push:
     paths:
     - '.github/workflows/test_openvino_examples.yml'
-    - 'examples/openvino/*'
+    - 'examples/openvino/**'
   pull_request:
     paths:
     - '.github/workflows/test_openvino_examples.yml'
-    - 'examples/openvino/*'
+    - 'examples/openvino/**'
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
@@ -22,9 +22,9 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.10"]
+        python-version: ["3.8", "3.11"]
 
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
 
     steps:
     - uses: actions/checkout@v2
@@ -35,12 +35,12 @@ jobs:
 
     - name: Install dependencies
       run: |
-        pip install optimum[openvino] jstyleson nncf pytest
-        pip install -r examples/openvino/audio-classification/requirements.txt
-        pip install -r examples/openvino/image-classification/requirements.txt
-        pip install -r examples/openvino/question-answering/requirements.txt
-        pip install -r examples/openvino/text-classification/requirements.txt
+        pip install .[openvino] jstyleson pytest
+        pip install -r examples/openvino/audio-classification/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
+        pip install -r examples/openvino/image-classification/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
+        pip install -r examples/openvino/question-answering/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
+        pip install -r examples/openvino/text-classification/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
 
     - name: Test examples
       run: |
-        python -m pytest examples/openvino/test_examples.py
+        python -m pytest examples/openvino/test_examples.py
@@ -23,9 +23,9 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.10"]
+        python-version: ["3.8", "3.11"]
 
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
 
     steps:
     - uses: actions/checkout@v2
 
@@ -57,13 +57,10 @@
 from transformers.utils.versions import require_version
 
 from optimum.intel.neural_compressor import INCModelForCausalLM, INCQuantizer, INCTrainer
-from optimum.intel.utils.import_utils import (
-    INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR,
-    is_intel_extension_for_transformers_available,
-)
+from optimum.intel.utils.import_utils import ITREX_IMPORT_ERROR, is_itrex_available
 
 
-if is_intel_extension_for_transformers_available():
+if is_itrex_available():
     from intel_extension_for_transformers.transformers.utils.config import GPTQConfig, RtnConfig
 
 os.environ["CUDA_VISIBLE_DEVICES"] = ""
@@ -658,8 +655,8 @@ def compute_metrics(eval_preds):
             else:
                 recipes = {}
             if optim_args.quantization_approach == "weight_only":
-                if not is_intel_extension_for_transformers_available():
-                    raise ImportError(INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR.format("WeightOnly quantization"))
+                if not is_itrex_available():
+                    raise ImportError(ITREX_IMPORT_ERROR.format("WeightOnly quantization"))
                 if optim_args.apply_pruning or optim_args.apply_distillation:
                     raise ValueError("Weight only quantization and pruning or distillation cannot be combined.")
 
 
@@ -1,4 +1,5 @@
 datasets>=1.14.0
 evaluate
 librosa
-torchaudio
+torchaudio
+accelerate
@@ -35,7 +35,7 @@
 from transformers.utils import check_min_version, send_example_telemetry
 from transformers.utils.versions import require_version
 
-from optimum.intel.openvino import OVConfig, OVTrainer, OVTrainingArguments
+from optimum.intel import OVConfig, OVTrainer, OVTrainingArguments
 
 
 logger = logging.getLogger(__name__)
 
@@ -2,3 +2,4 @@ datasets >= 1.8.0
 torch >= 1.9.0
 torchvision>=0.6.0
 evaluate
+accelerate
@@ -52,7 +52,7 @@
 from transformers.utils import check_min_version, send_example_telemetry
 from transformers.utils.versions import require_version
 
-from optimum.intel.openvino import OVConfig, OVTrainer, OVTrainingArguments
+from optimum.intel import OVConfig, OVTrainer, OVTrainingArguments
 
 
 logger = logging.getLogger(__name__)
 
@@ -1,3 +1,4 @@
 datasets >= 1.8.0
 torch >= 1.9.0
 evaluate
+accelerate
@@ -49,7 +49,7 @@
 from transformers.utils.versions import require_version
 from utils_qa import postprocess_qa_predictions
 
-from optimum.intel.openvino import OVConfig, OVTrainingArguments
+from optimum.intel import OVConfig, OVTrainingArguments
 
 
 # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
 
@@ -20,7 +20,7 @@
 import torch.nn.functional as F
 from transformers.trainer_utils import PredictionOutput
 
-from optimum.intel.openvino.trainer import OVTrainer
+from optimum.intel import OVTrainer
 
 
 class QuestionAnsweringOVTrainer(OVTrainer):
 
@@ -4,4 +4,5 @@ scipy
 scikit-learn
 protobuf
 torch >= 1.3
-evaluate
+evaluate
+accelerate
@@ -46,7 +46,7 @@
 from transformers.utils import check_min_version, send_example_telemetry
 from transformers.utils.versions import require_version
 
-from optimum.intel.openvino import OVConfig, OVTrainer, OVTrainingArguments
+from optimum.intel import OVConfig, OVTrainer, OVTrainingArguments
 
 
 # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
 
@@ -76,7 +76,7 @@
     }
    ],
    "source": [
-    "from optimum.intel.openvino import OVModelForQuestionAnswering\n",
+    "from optimum.intel import OVModelForQuestionAnswering\n",
     "\n",
     "# Load PyTorch model from the Hub and export to OpenVINO in the background\n",
     "model = OVModelForQuestionAnswering.from_pretrained(\"distilbert-base-uncased-distilled-squad\", export=True)\n",
@@ -182,7 +182,7 @@
     }
    ],
    "source": [
-    "from optimum.intel.openvino import OVModelForQuestionAnswering\n",
+    "from optimum.intel import OVModelForQuestionAnswering\n",
     "from transformers import AutoTokenizer, pipeline\n",
     "\n",
     "model = OVModelForQuestionAnswering.from_pretrained(\"distilbert-base-uncased-distilled-squad-ov-fp32\")\n",
@@ -240,7 +240,7 @@
    ],
    "source": [
     "import torch\n",
-    "from optimum.intel.openvino import OVModelForQuestionAnswering\n",
+    "from optimum.intel import OVModelForQuestionAnswering\n",
     "from transformers import AutoTokenizer, pipeline\n",
     "\n",
     "model = OVModelForQuestionAnswering.from_pretrained(\"distilbert-base-uncased-distilled-squad-ov-fp32\")\n",
@@ -324,7 +324,7 @@
     }
    ],
    "source": [
-    "from optimum.intel.openvino import OVModelForQuestionAnswering\n",
+    "from optimum.intel import OVModelForQuestionAnswering\n",
     "from transformers import AutoTokenizer, pipeline\n",
     "\n",
     "model = OVModelForQuestionAnswering.from_pretrained(\n",
@@ -529,7 +529,7 @@
    ],
    "source": [
     "from IPython.display import Audio\n",
-    "from optimum.intel.openvino import OVModelForAudioClassification\n",
+    "from optimum.intel import OVModelForAudioClassification\n",
     "from transformers import AutoFeatureExtractor, pipeline\n",
     "from datasets import load_dataset\n",
     "\n",
@@ -638,7 +638,7 @@
     }
    ],
    "source": [
-    "from optimum.intel.openvino import OVModelForCausalLM\n",
+    "from optimum.intel import OVModelForCausalLM\n",
     "from transformers import AutoTokenizer, pipeline\n",
     "\n",
     "model_id = \"helenai/gpt2-ov\"\n",
@@ -704,7 +704,7 @@
    ],
    "source": [
     "from IPython.display import Image\n",
-    "from optimum.intel.openvino import OVModelForImageClassification\n",
+    "from optimum.intel import OVModelForImageClassification\n",
     "from transformers import AutoImageProcessor, pipeline\n",
     "\n",
     "model_id = \"helenai/microsoft-swin-tiny-patch4-window7-224-ov\"\n",
@@ -766,7 +766,7 @@
     }
    ],
    "source": [
-    "from optimum.intel.openvino import OVModelForMaskedLM\n",
+    "from optimum.intel import OVModelForMaskedLM\n",
     "from transformers import AutoTokenizer, pipeline\n",
     "\n",
     "model_id = \"helenai/bert-base-uncased-ov\"\n",
@@ -835,7 +835,7 @@
     }
    ],
    "source": [
-    "from optimum.intel.openvino import OVModelForQuestionAnswering\n",
+    "from optimum.intel import OVModelForQuestionAnswering\n",
     "from transformers import AutoTokenizer, pipeline\n",
     "\n",
     "# Load the model and tokenizer saved in Part 1 of this notebook. Or use the line below to load them from the hub\n",
@@ -890,7 +890,7 @@
     }
    ],
    "source": [
-    "from optimum.intel.openvino import OVModelForSeq2SeqLM\n",
+    "from optimum.intel import OVModelForSeq2SeqLM\n",
     "from transformers import AutoTokenizer, pipeline\n",
     "\n",
     "model_id = \"helenai/t5-small-ov\"\n",
@@ -998,7 +998,7 @@
     }
    ],
    "source": [
-    "from optimum.intel.openvino import OVModelForSequenceClassification\n",
+    "from optimum.intel import OVModelForSequenceClassification\n",
     "from transformers import AutoTokenizer, pipeline\n",
     "\n",
     "model_id = \"helenai/papluca-xlm-roberta-base-language-detection-ov\"\n",
@@ -1047,7 +1047,7 @@
     }
    ],
    "source": [
-    "from optimum.intel.openvino import OVModelForTokenClassification\n",
+    "from optimum.intel import OVModelForTokenClassification\n",
     "from transformers import AutoTokenizer, pipeline\n",
     "\n",
     "model_id = \"helenai/dslim-bert-base-NER-ov-fp32\"\n",
 
@@ -51,7 +51,7 @@
     "import transformers\n",
     "from evaluate import evaluator\n",
     "from openvino.runtime import Core\n",
-    "from optimum.intel.openvino import OVModelForQuestionAnswering, OVQuantizer, OVQuantizationConfig, OVConfig\n",
+    "from optimum.intel import OVModelForQuestionAnswering, OVQuantizer, OVQuantizationConfig, OVConfig\n",
     "from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline\n",
     "\n",
     "transformers.logging.set_verbosity_error()\n",
@@ -286,7 +286,7 @@
     "**NOTE:** if you notice very low accuracy after post-training quantization, it is likely caused by an overflow issue which affects processors that do not contain VNNI (Vector Neural Network Instruction). NNCF has an `overflow_fix` option to address this. It will effectively use 7-bits for quantizing instead of 8-bits to prevent the overflow. To use this option, modify the code in the next cell to add an explicit quantization configuration, and set `overflow_fix` to `\"enable\"`:\n",
     "\n",
     "```\n",
-    "from optimum.intel.openvino import OVConfig, OVQuantizationConfig\n",
+    "from optimum.intel import OVConfig, OVQuantizationConfig\n",
     "\n",
     "ov_config = OVConfig(quantization_config=OVQuantizationConfig(overflow_fix=\"enable\")\n",
     "quantizer = OVQuantizer.from_pretrained(model)\n",
 
@@ -1,4 +1,4 @@
-optimum-intel[openvino, nncf]
+optimum-intel[openvino]
 datasets
 evaluate[evaluator]
 ipywidgets
 
@@ -14,7 +14,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from optimum.intel.openvino import OVStableDiffusionPipeline\n",
+    "from optimum.intel import OVStableDiffusionPipeline\n",
     "from diffusers.training_utils import set_seed\n",
     "from IPython.display import display"
    ]
 
@@ -18,6 +18,8 @@
 from pathlib import Path
 from typing import TYPE_CHECKING, Optional
 
+from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
+
 from ...exporters import TasksManager
 from ...intel.utils.import_utils import DIFFUSERS_IMPORT_ERROR, is_diffusers_available
 from ..base import BaseOptimumCLICommand, CommandInfo
@@ -47,7 +49,9 @@ def parse_args_openvino(parser: "ArgumentParser"):
             f" {str(TasksManager.get_all_tasks())}. For decoder models, use `xxx-with-past` to export the model using past key values in the decoder."
         ),
     )
-    optional_group.add_argument("--cache_dir", type=str, default=None, help="Path indicating where to store cache.")
+    optional_group.add_argument(
+        "--cache_dir", type=str, default=HUGGINGFACE_HUB_CACHE, help="Path indicating where to store cache."
+    )
     optional_group.add_argument(
         "--framework",
         type=str,
 
@@ -16,6 +16,7 @@
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Union
 
+from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
 from requests.exceptions import ConnectionError as RequestsConnectionError
 from transformers import AutoConfig, AutoTokenizer, PreTrainedTokenizerBase
 
@@ -48,7 +49,7 @@ def main_export(
     task: str = "auto",
     device: str = "cpu",
     framework: Optional[str] = None,
-    cache_dir: Optional[str] = None,
+    cache_dir: str = HUGGINGFACE_HUB_CACHE,
     trust_remote_code: bool = False,
     pad_token_id: Optional[int] = None,
     subfolder: str = "",
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-optimum-intel[openvino, nncf]`
	`1`	`+optimum-intel[openvino]`
`2`	`2`	`datasets`
`3`	`3`	`evaluate[evaluator]`
`4`	`4`	`ipywidgets`
Original file line number	Diff line number	Diff line change
`@@ -14,7 +14,7 @@`
`14`	`14`	`"metadata": {},`
`15`	`15`	`"outputs": [],`
`16`	`16`	`"source": [`
`17`		`- "from optimum.intel.openvino import OVStableDiffusionPipeline\n",`
	`17`	`+ "from optimum.intel import OVStableDiffusionPipeline\n",`
`18`	`18`	`"from diffusers.training_utils import set_seed\n",`
`19`	`19`	`"from IPython.display import display"`
`20`	`20`	`]`