From a2e1e7325e57213c9bfba2ae4fae18fe690c9018 Mon Sep 17 00:00:00 2001
From: Nikita Savelyev <nikita.savelyev@intel.com>
Date: Tue, 18 Feb 2025 12:51:32 +0100
Subject: [PATCH 1/2] Deprecate OVTrainer

---
 .github/workflows/test_openvino.yml           |    1 -
 .github/workflows/test_openvino_examples.yml  |   48 -
 docs/source/openvino/optimization.mdx         |  378 ------
 .../openvino/audio-classification/README.md   |   96 --
 .../configs/wav2vec2-base-jpqd.json           |   68 --
 .../configs/wav2vec2-base-qat.json            |   30 -
 .../audio-classification/requirements.txt     |    6 -
 .../run_audio_classification.py               |  445 -------
 .../openvino/image-classification/README.md   |   78 --
 .../configs/swin-base-jpqd.json               |   50 -
 .../image-classification/requirements.txt     |    6 -
 .../run_image_classification.py               |  428 -------
 .../openvino/question-answering/README.md     |   77 --
 .../configs/bert-base-jpqd.json               |   41 -
 .../configs/bert-base-movement-sparsity.json  |   15 -
 .../question-answering/requirements.txt       |    5 -
 .../openvino/question-answering/run_qa.py     |  713 ------------
 .../openvino/question-answering/trainer_qa.py |  148 ---
 .../openvino/question-answering/utils_qa.py   |  443 -------
 examples/openvino/test_examples.py            |  146 ---
 .../openvino/text-classification/README.md    |   86 --
 .../configs/bert-base-jpqd.json               |   45 -
 .../text-classification/requirements.txt      |    9 -
 .../openvino/text-classification/run_glue.py  |  657 -----------
 optimum/intel/__init__.py                     |   21 -
 optimum/intel/openvino/__init__.py            |    5 -
 optimum/intel/openvino/configuration.py       |    3 -
 optimum/intel/openvino/trainer.py             | 1027 -----------------
 optimum/intel/openvino/training_args.py       |   37 -
 .../utils/dummy_openvino_and_nncf_objects.py  |   22 -
 tests/openvino/test_quantization.py           |   49 -
 tests/openvino/test_training.py               |  889 --------------
 tests/openvino/test_training_examples.py      |  201 ----
 33 files changed, 6273 deletions(-)
 delete mode 100644 .github/workflows/test_openvino_examples.yml
 delete mode 100644 docs/source/openvino/optimization.mdx
 delete mode 100644 examples/openvino/audio-classification/README.md
 delete mode 100644 examples/openvino/audio-classification/configs/wav2vec2-base-jpqd.json
 delete mode 100644 examples/openvino/audio-classification/configs/wav2vec2-base-qat.json
 delete mode 100644 examples/openvino/audio-classification/requirements.txt
 delete mode 100644 examples/openvino/audio-classification/run_audio_classification.py
 delete mode 100644 examples/openvino/image-classification/README.md
 delete mode 100644 examples/openvino/image-classification/configs/swin-base-jpqd.json
 delete mode 100644 examples/openvino/image-classification/requirements.txt
 delete mode 100644 examples/openvino/image-classification/run_image_classification.py
 delete mode 100644 examples/openvino/question-answering/README.md
 delete mode 100644 examples/openvino/question-answering/configs/bert-base-jpqd.json
 delete mode 100644 examples/openvino/question-answering/configs/bert-base-movement-sparsity.json
 delete mode 100644 examples/openvino/question-answering/requirements.txt
 delete mode 100644 examples/openvino/question-answering/run_qa.py
 delete mode 100644 examples/openvino/question-answering/trainer_qa.py
 delete mode 100644 examples/openvino/question-answering/utils_qa.py
 delete mode 100644 examples/openvino/test_examples.py
 delete mode 100644 examples/openvino/text-classification/README.md
 delete mode 100644 examples/openvino/text-classification/configs/bert-base-jpqd.json
 delete mode 100644 examples/openvino/text-classification/requirements.txt
 delete mode 100644 examples/openvino/text-classification/run_glue.py
 delete mode 100644 optimum/intel/openvino/trainer.py
 delete mode 100644 optimum/intel/openvino/training_args.py
 delete mode 100644 tests/openvino/test_training.py
 delete mode 100644 tests/openvino/test_training_examples.py

diff --git a/.github/workflows/test_openvino.yml b/.github/workflows/test_openvino.yml
index d9882773ba..f64d6772b9 100644
--- a/.github/workflows/test_openvino.yml
+++ b/.github/workflows/test_openvino.yml
@@ -24,7 +24,6 @@ jobs:
             "*modeling*",
             "*diffusion*",
             "*quantization*",
-            "*training*",
             "*export*",
           ]
         transformers-version: ["4.36.0", "latest"]
diff --git a/.github/workflows/test_openvino_examples.yml b/.github/workflows/test_openvino_examples.yml
deleted file mode 100644
index 5b1e8e9dff..0000000000
--- a/.github/workflows/test_openvino_examples.yml
+++ /dev/null
@@ -1,48 +0,0 @@
-name: OpenVINO - Examples Test
-
-on:
-  workflow_dispatch:
-  schedule:
-    - cron: 0 1 * * 1 # run weekly: every Monday at 1am
-  push:
-    paths:
-      - ".github/workflows/test_openvino_examples.yml"
-      - "examples/openvino/**"
-  pull_request:
-    paths:
-      - ".github/workflows/test_openvino_examples.yml"
-      - "examples/openvino/**"
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
-  cancel-in-progress: true
-
-jobs:
-  build:
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: ["3.9", "3.12"]
-
-    runs-on: ubuntu-22.04
-
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Setup Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-
-      - name: Install dependencies
-        run: |
-          pip install -r examples/openvino/audio-classification/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
-          pip install -r examples/openvino/image-classification/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
-          pip install -r examples/openvino/question-answering/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
-          pip install -r examples/openvino/text-classification/requirements.txt --extra-index-url https://download.pytorch.org/whl/cpu
-          pip install .[openvino] jstyleson pytest
-
-      - name: Test examples
-        run: |
-          pytest examples/openvino/test_examples.py
diff --git a/docs/source/openvino/optimization.mdx b/docs/source/openvino/optimization.mdx
deleted file mode 100644
index 147421dd4a..0000000000
--- a/docs/source/openvino/optimization.mdx
+++ /dev/null
@@ -1,378 +0,0 @@
-<!---
-Copyright 2022 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
--->
-
-# Optimization
-
-🤗 Optimum Intel provides an `openvino` package that enables you to apply a variety of model compression methods such as quantization, pruning, on many models hosted on the 🤗 hub using the [NNCF](https://docs.openvino.ai/2024/openvino-workflow/model-optimization.html) framework.
-
-
-## Post-training
-
-Quantization is a technique to reduce the computational and memory costs of running inference by representing the weights and / or the activations with lower precision data types like 8-bit or 4-bit.
-
-### Weight-only quantization
-
-Quantization can be applied on the model's Linear, Convolutional and Embedding layers, enabling the loading of large models on memory-limited devices. For example, when applying 8-bit quantization, the resulting model will be x4 smaller than its fp32 counterpart. For 4-bit quantization, the reduction in memory could theoretically reach x8, but is closer to x6 in practice.
-
-
-#### 8-bit
-
-For the 8-bit weight quantization you can provide `quantization_config` equal to `OVWeightQuantizationConfig(bits=8)` to load your model's weights in 8-bit:
-
-```python
-from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig
-
-model_id = "helenai/gpt2-ov"
-quantization_config = OVWeightQuantizationConfig(bits=8)
-model = OVModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config)
-
-# Saves the int8 model that will be x4 smaller than its fp32 counterpart
-model.save_pretrained(saving_directory)
-```
-
-Weights of language models inside vision-language pipelines can be quantized in a similar way:
-```python
-model = OVModelForVisualCausalLM.from_pretrained(
-    "llava-hf/llava-v1.6-mistral-7b-hf",
-    quantization_config=quantization_config
-)
-```
-
-<Tip warning={true}>
-
-If quantization_config is not provided, model will be exported in 8 bits by default when it has more than 1 billion parameters. You can disable it with `load_in_8bit=False`.
-
-</Tip>
-
-
-#### 4-bit
-
-4-bit weight quantization can be achieved in a similar way:
-
-```python
-from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig
-
-quantization_config = OVWeightQuantizationConfig(bits=4)
-model = OVModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config)
-```
-
-Or for vision-language pipelines:
-```python
-model = OVModelForVisualCausalLM.from_pretrained(
-    "llava-hf/llava-v1.6-mistral-7b-hf",
-    quantization_config=quantization_config
-)
-```
-
-You can tune quantization parameters to achieve a better performance accuracy trade-off as follows:
-
-```python
-quantization_config = OVWeightQuantizationConfig(
-    bits=4,
-    sym=False,
-    ratio=0.8,
-    quant_method="awq",
-    dataset="wikitext2"
-)
-```
-
-By default the quantization scheme will be [asymmetric](https://github.com/openvinotoolkit/nncf/blob/develop/docs/usage/training_time_compression/other_algorithms/LegacyQuantization.md#asymmetric-quantization), to make it [symmetric](https://github.com/openvinotoolkit/nncf/blob/develop/docs/usage/training_time_compression/other_algorithms/LegacyQuantization.md#symmetric-quantization) you can add `sym=True`.
-
-For 4-bit quantization you can also specify the following arguments in the quantization configuration :
-* The `group_size` parameter will define the group size to use for quantization, `-1` it will results in per-column quantization.
-* The `ratio` parameter controls the ratio between 4-bit and 8-bit quantization. If set to 0.9, it means that 90% of the layers will be quantized to `int4` while 10% will be quantized to `int8`.
-
-Smaller `group_size` and `ratio` values usually improve accuracy at the sacrifice of the model size and inference latency.
-
-Quality of 4-bit weight compressed model can further be improved by employing one of the following data-dependent methods:
-* **AWQ** which stands for Activation Aware Quantization is an algorithm that tunes model weights for more accurate 4-bit compression. It slightly improves generation quality of compressed LLMs, but requires significant additional time and memory for tuning weights on a calibration dataset. Please note that it is possible that there will be no matching patterns in the model to apply AWQ, in such case it will be skipped.
-* **Scale Estimation** is a method that tunes quantization scales to minimize the `L2` error between the original and compressed layers. Providing a dataset is required to run scale estimation. Using this method also incurs additional time and memory overhead.
-* **GPTQ** optimizes compressed weights in a layer-wise fashion to minimize the difference between activations of a compressed and original layer.
-* **LoRA Correction** mitigates quantization noise introduced during weight compression by leveraging low-rank adaptation.
-
-Data-aware algorithms can be applied together or separately. For that, provide corresponding arguments to the 4-bit `OVWeightQuantizationConfig` together with a dataset. For example:
-```python
-quantization_config = OVWeightQuantizationConfig(
-    bits=4,
-    sym=False,
-    ratio=0.8,
-    quant_method="awq",
-    scale_estimation=True,
-    gptq=True,
-    dataset="wikitext2"
-)
-```
-
-Note: GPTQ and LoRA Correction algorithms can't be applied simultaneously.
-
-### Static quantization
-
-When applying post-training static quantization, both the weights and the activations are quantized.
-To apply quantization on the activations, an additional calibration step is needed which consists in feeding a `calibration_dataset` to the network in order to estimate the quantization activations parameters.
-
-Here is how to apply static quantization on a fine-tuned DistilBERT given your own `calibration_dataset`:
-
-```python
-from transformers import AutoTokenizer
-from optimum.intel import OVQuantizer, OVModelForSequenceClassification, OVConfig, OVQuantizationConfig
-
-model_id = "distilbert-base-uncased-finetuned-sst-2-english"
-model = OVModelForSequenceClassification.from_pretrained(model_id, export=True)
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-# The directory where the quantized model will be saved
-save_dir = "ptq_model"
-
-quantizer = OVQuantizer.from_pretrained(model)
-
-# Apply static quantization and export the resulting quantized model to OpenVINO IR format
-ov_config = OVConfig(quantization_config=OVQuantizationConfig())
-quantizer.quantize(ov_config=ov_config, calibration_dataset=calibration_dataset, save_directory=save_dir)
-# Save the tokenizer
-tokenizer.save_pretrained(save_dir)
-```
-
-The calibration dataset can also be created easily using your `OVQuantizer`:
-
-```python
-from functools import partial
-
-def preprocess_function(examples, tokenizer):
-    return tokenizer(examples["sentence"], padding="max_length", max_length=128, truncation=True)
-
-# Create the calibration dataset used to perform static quantization
-calibration_dataset = quantizer.get_calibration_dataset(
-    "glue",
-    dataset_config_name="sst2",
-    preprocess_function=partial(preprocess_function, tokenizer=tokenizer),
-    num_samples=300,
-    dataset_split="train",
-)
-```
-
-
-The `quantize()` method applies post-training static quantization and export the resulting quantized model to the OpenVINO Intermediate Representation (IR). The resulting graph is represented with two files: an XML file describing the network topology and a binary file describing the weights. The resulting model can be run on any target Intel device.
-
-
-#### Speech-to-text Models Quantization
-
-The speech-to-text Whisper model can be quantized without the need for preparing a custom calibration dataset. Please see example below.
-
-```python
-model_id = "openai/whisper-tiny"
-ov_model = OVModelForSpeechSeq2Seq.from_pretrained(
-    model_id,
-    quantization_config=OVQuantizationConfig(
-        num_samples=10,
-        dataset="librispeech",
-        processor=model_id,
-        matmul_sq_alpha=0.95,
-    )
-)
-```
-
-With this, encoder, decoder and decoder-with-past models of the Whisper pipeline will be fully quantized, including activations.
-
-###  Hybrid quantization
-
-Traditional optimization methods like post-training 8-bit quantization do not work well for Stable Diffusion (SD) models and can lead to poor generation results. On the other hand, weight compression does not improve performance significantly when applied to Stable Diffusion models, as the size of activations is comparable to weights.
-The U-Net component takes up most of the overall execution time of the pipeline. Thus, optimizing just this one component can bring substantial benefits in terms of inference speed while keeping acceptable accuracy without fine-tuning. Quantizing the rest of the diffusion pipeline does not significantly improve inference performance but could potentially lead to substantial accuracy degradation.
-Therefore, the proposal is to apply quantization in *hybrid mode* for the U-Net model and weight-only quantization for the rest of the pipeline components :
-* U-Net : quantization applied on both the weights and activations 
-* The text encoder, VAE encoder / decoder : quantization applied on the weights 
-
-The hybrid mode involves the quantization of weights in MatMul and Embedding layers, and activations of other layers, facilitating accuracy preservation post-optimization while reducing the model size.
-
-The `quantization_config` is utilized to define optimization parameters for optimizing the SD pipeline. To enable hybrid quantization, specify the quantization dataset in the `quantization_config`. If the dataset is not defined, weight-only quantization will be applied on all components.
-
-```python
-from optimum.intel import OVStableDiffusionPipeline, OVWeightQuantizationConfig
-
-model = OVStableDiffusionPipeline.from_pretrained(
-    model_id,
-    export=True,
-    quantization_config=OVWeightQuantizationConfig(bits=8, dataset="conceptual_captions"),
-)
-```
-
-
-For more details, please refer to the corresponding NNCF [documentation](https://github.com/openvinotoolkit/nncf/blob/develop/docs/usage/post_training_compression/weights_compression/Usage.md).
-
-
-## Training-time
-
-Apart from optimizing a model after training like post-training quantization above, `optimum.openvino` also provides optimization methods during training, namely Quantization-Aware Training (QAT) and Joint Pruning, Quantization and Distillation (JPQD).
-
-<Tip warning={true}>
-
-Training-time optimization methods are deprecated and will be removed in optimum-intel v1.22.0.
-
-</Tip>
-
-
-### Quantization-Aware Training (QAT) 
-
-QAT simulates the effects of quantization during training, in order to alleviate its effects on the model's accuracy. It is recommended in the case where post-training quantization results in high accuracy degradation. Here is an example on how to fine-tune a DistilBERT on the sst-2 task while applying quantization aware training (QAT).
-
-```diff
-  import evaluate
-  import numpy as np
-  from transformers import (
-      AutoModelForSequenceClassification,
-      AutoTokenizer,
-      TrainingArguments,
-      default_data_collator,
-  )
-  from datasets import load_dataset
-- from transformers import Trainer
-+ from optimum.intel import OVConfig, OVTrainer, OVModelForSequenceClassification
-
-  model_id = "distilbert-base-uncased-finetuned-sst-2-english"
-  model = AutoModelForSequenceClassification.from_pretrained(model_id)
-  tokenizer = AutoTokenizer.from_pretrained(model_id)
-  # The directory where the quantized model will be saved
-  save_dir = "qat_model"
-  dataset = load_dataset("glue", "sst2")
-  dataset = dataset.map(
-      lambda examples: tokenizer(examples["sentence"], padding=True), batched=True
-  )
-  metric = evaluate.load("glue", "sst2")
-
-  def compute_metrics(eval_preds):
-      preds = np.argmax(eval_preds.predictions, axis=1)
-      return metric.compute(predictions=preds, references=eval_preds.label_ids)
-
-  # Load the default quantization configuration detailing the quantization we wish to apply
-+ ov_config = OVConfig()
-
-- trainer = Trainer(
-+ trainer = OVTrainer(
-      model=model,
-      args=TrainingArguments(save_dir, num_train_epochs=1.0, do_train=True, do_eval=True),
-      train_dataset=dataset["train"].select(range(300)),
-      eval_dataset=dataset["validation"],
-      compute_metrics=compute_metrics,
-      tokenizer=tokenizer,
-      data_collator=default_data_collator,
-+     ov_config=ov_config,
-+     task="text-classification",
-)
-
-  # Train the model while applying quantization
-  train_result = trainer.train()
-  metrics = trainer.evaluate()
-  # Export the quantized model to OpenVINO IR format and save it
-  trainer.save_model()
-
-  # Load the resulting quantized model
-- model = AutoModelForSequenceClassification.from_pretrained(save_dir)
-+ model = OVModelForSequenceClassification.from_pretrained(save_dir)
-```
-
-
-### Joint Pruning, Quantization and Distillation (JPQD)
-
-Other than quantization, compression methods like pruning and distillation are common in further improving the task performance and efficiency. Structured pruning slims a model for lower computational demands while distillation leverages knowledge of a teacher, usually, larger model to improve model prediction. Combining these methods with quantization can result in optimized model with significant efficiency improvement while enjoying good task accuracy retention. In `optimum.openvino`, `OVTrainer` provides the capability to jointly prune, quantize and distill a model during training. Following is an example on how to perform the optimization on BERT-base for the sst-2 task.
-
-First, we create a config dictionary to specify the target algorithms. As `optimum.openvino` relies on NNCF as backend, the config format follows NNCF specifications (see [here](https://github.com/openvinotoolkit/nncf/blob/develop/docs/usage/training_time_compression/other_algorithms)). In the example config below, we specify pruning and quantization in a list of compression with thier hyperparameters. The pruning method closely resembles the work of [Lagunas et al., 2021, Block Pruning For Faster Transformers](https://arxiv.org/pdf/2109.04838.pdf) whereas the quantization refers to QAT. With this configuration, the model under optimization will be initialized with pruning and quantization operators at the beginning of the training.
-
-```python
-compression_config = [
-    {
-        "compression":
-        {
-        "algorithm":  "movement_sparsity",
-        "params": {
-            "warmup_start_epoch":  1,
-            "warmup_end_epoch":    4,
-            "importance_regularization_factor":  0.01,
-            "enable_structured_masking":  True
-        },
-        "sparse_structure_by_scopes": [
-            {"mode":  "block",   "sparse_factors": [32, 32], "target_scopes": "{re}.*BertAttention.*"},
-            {"mode":  "per_dim", "axis":  0,                 "target_scopes": "{re}.*BertIntermediate.*"},
-            {"mode":  "per_dim", "axis":  1,                 "target_scopes": "{re}.*BertOutput.*"},
-        ],
-        "ignored_scopes": ["{re}.*NNCFEmbedding", "{re}.*pooler.*", "{re}.*LayerNorm.*"]
-        }
-    },
-    {
-        "algorithm": "quantization",
-        "weights": {"mode": "symmetric"}
-        "activations": { "mode": "symmetric"},
-    }
-]
-```
-
-> Known limitation: Current structured pruning with movement sparsity only supports *BERT, Wav2vec2 and Swin* family of models. See [here](https://github.com/openvinotoolkit/nncf/blob/develop/nncf/experimental/torch/sparsity/movement/MovementSparsity.md) for more information.
-
-Once we have the config ready, we can start develop the training pipeline like the snippet below. Since we are customizing joint compression with config above, notice that `OVConfig` is initialized with config dictionary (JSON parsing to python dictionary is skipped for brevity). As for distillation, users are required to load the teacher model, it is just like a normal model loading with transformers API. `OVTrainingArguments` extends transformers' `TrainingArguments` with distillation hyperparameters, i.e. distillation weightage and temperature for ease of use. The snippet below shows how we load a teacher model and create training arguments with `OVTrainingArguments`. Subsequently, the teacher model, with the instantiated `OVConfig` and `OVTrainingArguments` are fed to `OVTrainer`. Voila! that is all we need, the rest of the pipeline is identical to native transformers training.
-
-```diff
-- from transformers import Trainer, TrainingArguments
-+ from optimum.intel import OVConfig, OVTrainer, OVTrainingArguments
-
-  # Load teacher model
-+ teacher_model = AutoModelForSequenceClassification.from_pretrained(teacher_model_or_path)
-
-- ov_config = OVConfig()
-+ ov_config = OVConfig(compression=compression_config)
-
-  trainer = OVTrainer(
-      model=model,
-+     teacher_model=teacher_model,
--     args=TrainingArguments(save_dir, num_train_epochs=1.0, do_train=True, do_eval=True),
-+     args=OVTrainingArguments(save_dir, num_train_epochs=1.0, do_train=True, do_eval=True, distillation_temperature=3, distillation_weight=0.9),
-      train_dataset=dataset["train"].select(range(300)),
-      eval_dataset=dataset["validation"],
-      compute_metrics=compute_metrics,
-      tokenizer=tokenizer,
-      data_collator=default_data_collator,
-+     ov_config=ov_config,
-      task="text-classification",
-  )
-
-  # Train the model like usual, internally the training is applied with pruning, quantization and distillation
-  train_result = trainer.train()
-  metrics = trainer.evaluate()
-  # Export the quantized model to OpenVINO IR format and save it
-  trainer.save_model()
-```
-
-More on the description and how to configure movement sparsity, see NNCF documentation [here](https://github.com/openvinotoolkit/nncf/blob/develop/nncf/experimental/torch/sparsity/movement/MovementSparsity.md).
-
-More on available algorithms in NNCF, see documentation [here](https://github.com/openvinotoolkit/nncf/tree/develop/docs/usage/training_time_compression/other_algorithms).
-
-For complete JPQD scripts, please refer to examples provided [here](https://github.com/huggingface/optimum-intel/tree/main/examples/openvino). 
-
-Quantization-Aware Training (QAT) and knowledge distillation can also be combined in order to optimize Stable Diffusion models while maintaining accuracy. For more details, take a look at this [blog post](https://huggingface.co/blog/train-optimize-sd-intel).
-
-## Inference with Transformers pipeline
-
-After applying quantization on our model, we can then easily load it with our `OVModelFor<Task>` classes and perform inference with OpenVINO Runtime using the Transformers [pipelines](https://huggingface.co/docs/transformers/main/en/main_classes/pipelines).
-
-```python
-from transformers import pipeline
-from optimum.intel import OVModelForSequenceClassification
-
-model_id = "helenai/distilbert-base-uncased-finetuned-sst-2-english-ov-int8"
-ov_model = OVModelForSequenceClassification.from_pretrained(model_id)
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-cls_pipe = pipeline("text-classification", model=ov_model, tokenizer=tokenizer)
-text = "He's a dreadful magician."
-outputs = cls_pipe(text)
-
-[{'label': 'NEGATIVE', 'score': 0.9840195178985596}]
-```
diff --git a/examples/openvino/audio-classification/README.md b/examples/openvino/audio-classification/README.md
deleted file mode 100644
index 8b3366c960..0000000000
--- a/examples/openvino/audio-classification/README.md
+++ /dev/null
@@ -1,96 +0,0 @@
-<!---
-Copyright 2021 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
--->
-
-# Audio classification examples
-
-This folder contains [`run_audio_classification.py`](https://github.com/huggingface/optimum/blob/main/examples/openvino/audio-classification/run_audio_classification.py), a script to fine-tune a 🤗 Transformers model on the 🗣️ [Keyword Spotting subset](https://huggingface.co/datasets/superb#ks) of the SUPERB dataset while applying Quantization-Aware Training (QAT). QAT can be easily applied by replacing the Transformers [`Trainer`](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#trainer) with the Optimum [`OVTrainer`]. Any model from our [hub](https://huggingface.co/models) can be fine-tuned and quantized, as long as the model is supported by the [`AutoModelForAudioClassification`](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoModelForAudioClassification) API.
-
-### Fine-tuning Wav2Vec2 on Keyword Spotting with QAT
-
-The following command shows how to fine-tune [Wav2Vec2-base](https://huggingface.co/facebook/wav2vec2-base) on the 🗣️ [Keyword Spotting subset](https://huggingface.co/datasets/superb#ks) of the SUPERB dataset with Quantization-Aware Training (QAT). The `OVTrainer` uses a default quantization configuration which should work in many cases, but we can also customize the algorithm details. Here, we quantize the Wav2Vec2-base model with a custom configuration file specified by `--nncf_compression_config`. For more details on the quantization configuration, see NNCF documentation [here](https://github.com/openvinotoolkit/nncf/blob/develop/docs/compression_algorithms/Quantization.md).
-
-```bash
-python run_audio_classification.py \
-    --model_name_or_path facebook/wav2vec2-base \
-    --nncf_compression_config configs/wav2vec2-base-qat.json \
-    --dataset_name superb \
-    --dataset_config_name ks \
-    --output_dir /tmp/qat-wav2vec2-base-ft-keyword-spotting \
-    --overwrite_output_dir \
-    --remove_unused_columns False \
-    --do_train \
-    --do_eval \
-    --fp16 \
-    --learning_rate 3e-5 \
-    --max_length_seconds 1 \
-    --attention_mask False \
-    --warmup_ratio 0.1 \
-    --num_train_epochs 5 \
-    --per_device_train_batch_size 32 \
-    --gradient_accumulation_steps 4 \
-    --per_device_eval_batch_size 64 \
-    --dataloader_num_workers 4 \
-    --logging_strategy steps \
-    --logging_steps 10 \
-    --evaluation_strategy epoch \
-    --save_strategy epoch \
-    --load_best_model_at_end True \
-    --metric_for_best_model accuracy \
-    --save_total_limit 3 \
-    --seed 42
-```
-
-On a single V100 GPU, this script should run in ~45 minutes and yield a quantized model with accuracy of **97.5%**.
-
-### Joint Pruning, Quantization and Distillation (JPQD) of Wav2Vec2 on Keyword Spotting
-
-`OVTrainer` also provides advanced optimization workflow via NNCF to structurally prune, quantize and distill. Following is an example of joint pruning, quantization and distillation on Wav2Vec2-base model for keyword spotting task. To enable JPQD optimization, use an alternative configuration specified with `--nncf_compression_config`. For more details on how to configure the pruning algorithm, see NNCF documentation [here](https://github.com/openvinotoolkit/nncf/blob/develop/nncf/experimental/torch/sparsity/movement/MovementSparsity.md).
-
-```bash
-torchrun --nproc-per-node=1 run_audio_classification.py \
-    --model_name_or_path anton-l/wav2vec2-base-ft-keyword-spotting \
-    --teacher_model_name_or_path anton-l/wav2vec2-base-ft-keyword-spotting \
-    --nncf_compression_config configs/wav2vec2-base-jpqd.json \
-    --freeze_feature_encoder False \
-    --distillation_weight 0.9 \
-    --dataset_name superb \
-    --dataset_config_name ks \
-    --output_dir /tmp/jpqd-wav2vec2-base-ft-keyword-spotting \
-    --overwrite_output_dir \
-    --remove_unused_columns False \
-    --do_eval \
-    --do_train  \
-    --fp16 \
-    --optim adamw_torch \
-    --learning_rate 7e-5 \
-    --max_length_seconds 1 \
-    --attention_mask False \
-    --warmup_ratio 0.5 \
-    --num_train_epochs 12 \
-    --per_device_train_batch_size 32 \
-    --gradient_accumulation_steps 4 \
-    --per_device_eval_batch_size 64 \
-    --dataloader_num_workers 4 \
-    --logging_strategy steps \
-    --logging_steps 10 \
-    --evaluation_strategy epoch \
-    --save_strategy epoch \
-    --load_best_model_at_end False \
-    --save_total_limit 3 \
-    --seed 42
-```
-
-This script should take about 3 hours on a single V100 GPU and produce a quantized Wav2Vec2-base model with ~60% structured sparsity in its linear layers. The model accuracy should converge to about 97.5%. For launching the script on multiple GPUs specify `--nproc-per-node=<number of GPU>`. Note, that different batch size and other hyperparameters might be required to achieve the same results as on a single GPU.
diff --git a/examples/openvino/audio-classification/configs/wav2vec2-base-jpqd.json b/examples/openvino/audio-classification/configs/wav2vec2-base-jpqd.json
deleted file mode 100644
index 41e53f5cbb..0000000000
--- a/examples/openvino/audio-classification/configs/wav2vec2-base-jpqd.json
+++ /dev/null
@@ -1,68 +0,0 @@
-[
-    {
-        "algorithm": "movement_sparsity",
-        "params": {
-            "warmup_start_epoch": 1,
-            "warmup_end_epoch": 6,
-            "importance_regularization_factor": 0.045,
-            "enable_structured_masking": true
-        },
-        "sparse_structure_by_scopes": [
-            {
-                "mode": "block",
-                "sparse_factors": [32, 32],
-                "target_scopes": "{re}.*Wav2Vec2Attention.*"
-            },
-            {
-                "mode": "per_dim",
-                "axis": 0,
-                "target_scopes": "{re}.*intermediate_dense.*"
-            },
-            {
-                "mode": "per_dim",
-                "axis": 1,
-                "target_scopes": "{re}.*output_dense.*"
-            }
-        ],
-        "ignored_scopes": [
-            "{re}projector",
-            "{re}classifier",
-            "{re}feature_extractor",
-            "{re}feature_projection",
-            "{re}pos_conv_embed"
-        ]
-    },
-    {
-        "algorithm": "quantization",
-        "quantize_inputs": false,
-        "preset": "mixed",
-        "overflow_fix": "enable",
-        "initializer": {
-            "range": {
-                "num_init_samples": 512,
-                "type": "percentile",
-                "params": {
-                    "min_percentile": 0.01,
-                    "max_percentile": 99.99
-                }
-            },
-            "batchnorm_adaptation": {
-                "num_bn_adaptation_samples": 0
-            }
-        },
-        "scope_overrides": {
-            "activations": {
-                "{re}.*matmul_0": {
-                    "mode": "symmetric"
-                },
-                "{re}.*scaled_dot_product_attention_0": {
-                    "mode": "symmetric"
-                }
-            }
-        },
-        "ignored_scopes": [
-            "{re}.*__add___[0-1]",
-            "{re}.*layer_norm_0"
-        ]
-    }
-]
\ No newline at end of file
diff --git a/examples/openvino/audio-classification/configs/wav2vec2-base-qat.json b/examples/openvino/audio-classification/configs/wav2vec2-base-qat.json
deleted file mode 100644
index 191f266a65..0000000000
--- a/examples/openvino/audio-classification/configs/wav2vec2-base-qat.json
+++ /dev/null
@@ -1,30 +0,0 @@
-{
-    "algorithm": "quantization",
-    "quantize_inputs": false,
-    "preset": "mixed",
-    "overflow_fix": "enable",
-    "initializer": {
-        "range": {
-            "num_init_samples": 300,
-            "type": "mean_min_max"
-        },
-        "batchnorm_adaptation": {
-            "num_bn_adaptation_samples": 0
-        }
-    },
-    "scope_overrides": {
-        "activations": {
-            "{re}.*matmul_0": {
-                "mode": "symmetric"
-            },
-            "{re}.*scaled_dot_product_attention_0": {
-                "mode": "symmetric"
-            }
-        }
-    },
-    "ignored_scopes": [
-        "{re}.*feature_extractor.*",
-        "{re}.*__add___[0-1]",
-        "{re}.*layer_norm_0"
-    ]
-}
\ No newline at end of file
diff --git a/examples/openvino/audio-classification/requirements.txt b/examples/openvino/audio-classification/requirements.txt
deleted file mode 100644
index 89569575f5..0000000000
--- a/examples/openvino/audio-classification/requirements.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-transformers>=4.36.0,<4.46.0
-datasets>=1.14.0,<2.20.0
-evaluate
-librosa
-torchaudio
-accelerate
\ No newline at end of file
diff --git a/examples/openvino/audio-classification/run_audio_classification.py b/examples/openvino/audio-classification/run_audio_classification.py
deleted file mode 100644
index 30b95c1739..0000000000
--- a/examples/openvino/audio-classification/run_audio_classification.py
+++ /dev/null
@@ -1,445 +0,0 @@
-#!/usr/bin/env python
-# coding=utf-8
-# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import os
-import sys
-import warnings
-from dataclasses import dataclass, field
-from pathlib import Path
-from random import randint
-from typing import Optional
-
-import datasets
-import evaluate
-import jstyleson as json
-import numpy as np
-import transformers
-from datasets import DatasetDict, load_dataset
-from nncf.common.utils.os import safe_open
-from transformers import AutoConfig, AutoFeatureExtractor, AutoModelForAudioClassification, HfArgumentParser, set_seed
-from transformers.trainer_utils import get_last_checkpoint
-from transformers.utils import check_min_version, send_example_telemetry
-from transformers.utils.versions import require_version
-
-from optimum.intel import OVConfig, OVTrainer, OVTrainingArguments
-
-
-logger = logging.getLogger(__name__)
-
-# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.22.0")
-
-require_version("datasets>=1.14.0", "To fix: pip install -r examples/pytorch/audio-classification/requirements.txt")
-
-
-def random_subsample(wav: np.ndarray, max_length: float, sample_rate: int = 16000):
-    """Randomly sample chunks of `max_length` seconds from the input audio"""
-    sample_length = int(round(sample_rate * max_length))
-    if len(wav) <= sample_length:
-        return wav
-    random_offset = randint(0, len(wav) - sample_length - 1)
-    return wav[random_offset : random_offset + sample_length]
-
-
-@dataclass
-class DataTrainingArguments:
-    """
-    Arguments pertaining to what data we are going to input our model for training and eval.
-    Using `HfArgumentParser` we can turn this class
-    into argparse arguments to be able to specify them on
-    the command line.
-    """
-
-    dataset_name: Optional[str] = field(default=None, metadata={"help": "Name of a dataset from the datasets package"})
-    dataset_config_name: Optional[str] = field(
-        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
-    )
-    train_file: Optional[str] = field(
-        default=None, metadata={"help": "A file containing the training audio paths and labels."}
-    )
-    eval_file: Optional[str] = field(
-        default=None, metadata={"help": "A file containing the validation audio paths and labels."}
-    )
-    train_split_name: str = field(
-        default="train",
-        metadata={
-            "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'"
-        },
-    )
-    eval_split_name: str = field(
-        default="validation",
-        metadata={
-            "help": (
-                "The name of the training data set split to use (via the datasets library). Defaults to 'validation'"
-            )
-        },
-    )
-    audio_column_name: str = field(
-        default="audio",
-        metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"},
-    )
-    label_column_name: str = field(
-        default="label", metadata={"help": "The name of the dataset column containing the labels. Defaults to 'label'"}
-    )
-    max_train_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": (
-                "For debugging purposes or quicker training, truncate the number of training examples to this "
-                "value if set."
-            )
-        },
-    )
-    max_eval_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": (
-                "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
-                "value if set."
-            )
-        },
-    )
-    max_length_seconds: float = field(
-        default=20,
-        metadata={"help": "Audio clips will be randomly cut to this length during training if the value is set."},
-    )
-
-
-@dataclass
-class ModelArguments:
-    """
-    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
-    """
-
-    model_name_or_path: str = field(
-        default="facebook/wav2vec2-base",
-        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models."},
-    )
-    teacher_model_name_or_path: str = field(
-        default=None,
-        metadata={
-            "help": "Path to pretrained model or model identifier from huggingface.co/models as teacher model in distillation."
-        },
-    )
-    config_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name."}
-    )
-    cache_dir: Optional[str] = field(
-        default=None, metadata={"help": "Where do you want to store the pretrained models downloaded from the Hub."}
-    )
-    model_revision: str = field(
-        default="main",
-        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
-    )
-    feature_extractor_name: Optional[str] = field(
-        default=None, metadata={"help": "Name or path of preprocessor config."}
-    )
-    freeze_feature_encoder: bool = field(
-        default=True, metadata={"help": "Whether to freeze the feature encoder layers of the model."}
-    )
-    attention_mask: bool = field(
-        default=True, metadata={"help": "Whether to generate an attention mask in the feature extractor."}
-    )
-    use_auth_token: bool = field(
-        default=False,
-        metadata={
-            "help": (
-                "Will use the token generated when running `huggingface-cli login` (necessary to use this script "
-                "with private models)."
-            )
-        },
-    )
-    freeze_feature_extractor: Optional[bool] = field(
-        default=None, metadata={"help": "Whether to freeze the feature extractor layers of the model."}
-    )
-    ignore_mismatched_sizes: bool = field(
-        default=False,
-        metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."},
-    )
-    nncf_compression_config: Optional[str] = field(
-        default=None,
-        metadata={
-            "help": "Path to NNCF configuration .json file for adapting the model to compression-enabled training."
-        },
-    )
-
-    def __post_init__(self):
-        if not self.freeze_feature_extractor and self.freeze_feature_encoder:
-            warnings.warn(
-                "The argument `--freeze_feature_extractor` is deprecated and "
-                "will be removed in a future version. Use `--freeze_feature_encoder`"
-                "instead. Setting `freeze_feature_encoder==True`.",
-                FutureWarning,
-            )
-        if self.freeze_feature_extractor and not self.freeze_feature_encoder:
-            raise ValueError(
-                "The argument `--freeze_feature_extractor` is deprecated and "
-                "should not be used in combination with `--freeze_feature_encoder`."
-                "Only make use of `--freeze_feature_encoder`."
-            )
-
-
-def main():
-    # See all possible arguments in src/transformers/training_args.py
-    # or by passing the --help flag to this script.
-    # We now keep distinct sets of args, for a cleaner separation of concerns.
-
-    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, OVTrainingArguments))
-    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
-        # If we pass only one argument to the script and it's the path to a json file,
-        # let's parse it to get our arguments.
-        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
-    else:
-        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
-
-    # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
-    # information sent is the one passed as arguments along with your Python/PyTorch versions.
-    send_example_telemetry("run_audio_classification", model_args, data_args)
-
-    # Setup logging
-    logging.basicConfig(
-        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
-        datefmt="%m/%d/%Y %H:%M:%S",
-        handlers=[logging.StreamHandler(sys.stdout)],
-    )
-
-    log_level = training_args.get_process_log_level()
-    logger.setLevel(log_level)
-    transformers.utils.logging.set_verbosity(log_level)
-    transformers.utils.logging.enable_default_handler()
-    transformers.utils.logging.enable_explicit_format()
-
-    # Log on each process the small summary:
-    logger.warning(
-        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu} "
-        + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
-    )
-    logger.info(f"Training/evaluation parameters {training_args}")
-
-    # Set seed before initializing model.
-    set_seed(training_args.seed)
-
-    # Detecting last checkpoint.
-    last_checkpoint = None
-    if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
-        last_checkpoint = get_last_checkpoint(training_args.output_dir)
-        if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
-            raise ValueError(
-                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
-                "Use --overwrite_output_dir to train from scratch."
-            )
-        elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
-            logger.info(
-                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
-                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
-            )
-
-    # Initialize our dataset and prepare it for the audio classification task.
-    raw_datasets = DatasetDict()
-    raw_datasets["train"] = load_dataset(
-        data_args.dataset_name,
-        data_args.dataset_config_name,
-        split=data_args.train_split_name,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-    raw_datasets["eval"] = load_dataset(
-        data_args.dataset_name,
-        data_args.dataset_config_name,
-        split=data_args.eval_split_name,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-
-    if data_args.audio_column_name not in raw_datasets["train"].column_names:
-        raise ValueError(
-            f"--audio_column_name {data_args.audio_column_name} not found in dataset '{data_args.dataset_name}'. "
-            "Make sure to set `--audio_column_name` to the correct audio column - one of "
-            f"{', '.join(raw_datasets['train'].column_names)}."
-        )
-
-    if data_args.label_column_name not in raw_datasets["train"].column_names:
-        raise ValueError(
-            f"--label_column_name {data_args.label_column_name} not found in dataset '{data_args.dataset_name}'. "
-            "Make sure to set `--label_column_name` to the correct text column - one of "
-            f"{', '.join(raw_datasets['train'].column_names)}."
-        )
-
-    # Setting `return_attention_mask=True` is the way to get a correctly masked mean-pooling over
-    # transformer outputs in the classifier, but it doesn't always lead to better accuracy
-    feature_extractor = AutoFeatureExtractor.from_pretrained(
-        model_args.feature_extractor_name or model_args.model_name_or_path,
-        return_attention_mask=model_args.attention_mask,
-        cache_dir=model_args.cache_dir,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-    logger.info(feature_extractor)
-
-    # `datasets` takes care of automatically loading and resampling the audio,
-    # so we just need to set the correct target sampling rate.
-    raw_datasets = raw_datasets.cast_column(
-        data_args.audio_column_name, datasets.features.Audio(sampling_rate=feature_extractor.sampling_rate)
-    )
-
-    def train_transforms(batch):
-        """Apply train_transforms across a batch."""
-        output_batch = {"input_values": []}
-        for audio in batch[data_args.audio_column_name]:
-            wav = random_subsample(
-                audio["array"], max_length=data_args.max_length_seconds, sample_rate=feature_extractor.sampling_rate
-            )
-            output_batch["input_values"].append(wav)
-        output_batch["labels"] = list(batch[data_args.label_column_name])
-
-        return output_batch
-
-    def val_transforms(batch):
-        """Apply val_transforms across a batch."""
-        output_batch = {"input_values": []}
-        for audio in batch[data_args.audio_column_name]:
-            wav = audio["array"]
-            output_batch["input_values"].append(wav)
-        output_batch["labels"] = list(batch[data_args.label_column_name])
-
-        return output_batch
-
-    # Prepare label mappings.
-    # We'll include these in the model's config to get human readable labels in the Inference API.
-    labels = raw_datasets["train"].features[data_args.label_column_name].names
-    label2id, id2label = {}, {}
-    for i, label in enumerate(labels):
-        label2id[label] = str(i)
-        id2label[str(i)] = label
-
-    # Load the accuracy metric from the datasets package
-    metric = evaluate.load("accuracy")
-
-    # Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with
-    # `predictions` and `label_ids` fields) and has to return a dictionary string to float.
-    def compute_metrics(eval_pred):
-        """Computes accuracy on a batch of predictions"""
-        predictions = np.argmax(eval_pred.predictions, axis=1)
-        return metric.compute(predictions=predictions, references=eval_pred.label_ids)
-
-    config = AutoConfig.from_pretrained(
-        model_args.config_name or model_args.model_name_or_path,
-        num_labels=len(labels),
-        label2id=label2id,
-        id2label=id2label,
-        finetuning_task="audio-classification",
-        cache_dir=model_args.cache_dir,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-    model = AutoModelForAudioClassification.from_pretrained(
-        model_args.model_name_or_path,
-        from_tf=bool(".ckpt" in model_args.model_name_or_path),
-        config=config,
-        cache_dir=model_args.cache_dir,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-        ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
-    )
-    teacher_model = None
-    if model_args.teacher_model_name_or_path is not None:
-        teacher_model = AutoModelForAudioClassification.from_pretrained(
-            model_args.teacher_model_name_or_path,
-            from_tf=bool(".ckpt" in model_args.teacher_model_name_or_path),
-            cache_dir=model_args.cache_dir,
-        )
-
-    if training_args.do_train:
-        if training_args.gradient_checkpointing:
-            model.gradient_checkpointing_enable()
-        else:
-            model.gradient_checkpointing_disable()
-
-    # freeze the convolutional waveform encoder
-    if model_args.freeze_feature_encoder:
-        model.freeze_feature_encoder()
-
-    if training_args.do_train:
-        if data_args.max_train_samples is not None:
-            raw_datasets["train"] = (
-                raw_datasets["train"].shuffle(seed=training_args.seed).select(range(data_args.max_train_samples))
-            )
-        # Set the training transforms
-        raw_datasets["train"].set_transform(train_transforms, output_all_columns=False)
-
-    if training_args.do_eval:
-        if data_args.max_eval_samples is not None:
-            raw_datasets["eval"] = (
-                raw_datasets["eval"].shuffle(seed=training_args.seed).select(range(data_args.max_eval_samples))
-            )
-        # Set the validation transforms
-        raw_datasets["eval"].set_transform(val_transforms, output_all_columns=False)
-
-    if model_args.nncf_compression_config is not None:
-        file_path = Path(model_args.nncf_compression_config).resolve()
-        with safe_open(file_path) as f:
-            compression = json.load(f)
-        ov_config = OVConfig(compression=compression)
-    else:
-        ov_config = OVConfig()
-    ov_config.log_dir = training_args.output_dir
-
-    # Initialize our trainer
-    trainer = OVTrainer(
-        model=model,
-        teacher_model=teacher_model,
-        ov_config=ov_config,
-        task="audio-classification",
-        args=training_args,
-        train_dataset=raw_datasets["train"] if training_args.do_train else None,
-        eval_dataset=raw_datasets["eval"] if training_args.do_eval else None,
-        compute_metrics=compute_metrics,
-        tokenizer=feature_extractor,
-    )
-
-    # Training
-    if training_args.do_train:
-        checkpoint = None
-        if training_args.resume_from_checkpoint is not None:
-            checkpoint = training_args.resume_from_checkpoint
-        elif last_checkpoint is not None:
-            checkpoint = last_checkpoint
-        train_result = trainer.train(resume_from_checkpoint=checkpoint)
-        trainer.save_model()
-        trainer.log_metrics("train", train_result.metrics)
-        trainer.save_metrics("train", train_result.metrics)
-        trainer.save_state()
-
-    # Evaluation
-    if training_args.do_eval:
-        metrics = trainer.evaluate()
-        trainer.log_metrics("eval", metrics)
-        trainer.save_metrics("eval", metrics)
-
-    # Write model card and (optionally) push to hub
-    kwargs = {
-        "finetuned_from": model_args.model_name_or_path,
-        "tasks": "audio-classification",
-        "dataset": data_args.dataset_name,
-        "tags": ["audio-classification"],
-    }
-    if training_args.push_to_hub:
-        trainer.push_to_hub(**kwargs)
-    else:
-        trainer.create_model_card(**kwargs)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/openvino/image-classification/README.md b/examples/openvino/image-classification/README.md
deleted file mode 100644
index 25d7cbc541..0000000000
--- a/examples/openvino/image-classification/README.md
+++ /dev/null
@@ -1,78 +0,0 @@
-<!---
-Copyright 2022 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
--->
-# Image classification
-
-This folder contains [`run_image_classification.py`](https://github.com/huggingface/optimum/blob/main/examples/openvino/image-classification/run_image_classification.py), a script to fine-tune a 🤗 Transformers model on an image classification dataset while applying Quantization-Aware Training (QAT). QAT can be easily applied by replacing the Transformers [`Trainer`](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#trainer) with the Optimum [`OVTrainer`]. Any model from our [hub](https://huggingface.co/models) can be fine-tuned and quantized, as long as the model is supported by the [`AutoModelForImageClassification`](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoModelForImageClassification) API.
-
-### Fine-tuning ViT on the beans dataset
-
-Here we show how to apply Quantization-Aware Training (QAT) on a fine-tuned Vision Transformer (ViT) on the beans dataset (to classify the disease type of bean leaves).
-
-```bash
-python run_image_classification.py \
-    --model_name_or_path nateraw/vit-base-beans \
-    --dataset_name beans \
-    --remove_unused_columns False \
-    --do_train \
-    --do_eval \
-    --learning_rate 2e-5 \
-    --num_train_epochs 1 \
-    --per_device_train_batch_size 8 \
-    --per_device_eval_batch_size 8 \
-    --logging_strategy steps \
-    --logging_steps 10 \
-    --evaluation_strategy epoch \
-    --save_strategy epoch \
-    --save_total_limit 3 \
-    --seed 1337 \
-    --output_dir /tmp/beans_outputs/
-```
-
-On a single V100 GPU, this example takes about 1 minute and yields a quantized model with accuracy of **98.5%**.
-
-### Joint Pruning, Quantization and Distillation (JPQD) of Swin on food101
-
-`OVTrainer` also provides advanced optimization workflow via NNCF to structurally prune, quantize and distill. Following is an example of joint pruning, quantization and distillation on Swin-base model for food101 dataset. To enable JPQD optimization, use an alternative configuration specified with `--nncf_compression_config`. For more details on how to configure the pruning algorithm, see NNCF documentation [here](https://github.com/openvinotoolkit/nncf/blob/develop/nncf/experimental/torch/sparsity/movement/MovementSparsity.md).
-
-```bash
-torchrun --nproc-per-node=1 run_image_classification.py \
-    --model_name_or_path microsoft/swin-base-patch4-window7-224 \
-    --teacher_model_name_or_path skylord/swin-finetuned-food101 \
-    --distillation_weight 0.9 \
-    --ignore_mismatched_sizes \
-    --dataset_name food101 \
-    --remove_unused_columns False \
-    --dataloader_num_workers 8 \
-    --do_train \
-    --per_device_train_batch_size 16 \
-    --gradient_accumulation_steps 4 \
-    --learning_rate 5e-5 \
-    --warmup_ratio 0.1 \
-    --num_train_epochs 10 \
-    --logging_steps 1 \
-    --do_eval \
-    --per_device_eval_batch_size 128 \
-    --evaluation_strategy steps \
-    --eval_steps 500 \
-    --save_steps 1000 \
-    --save_total_limit 5 \
-    --seed 42 \
-    --overwrite_output_dir \
-    --output_dir /tmp/food101_outputs/ \
-    --nncf_compression_config configs/swin-base-jpqd.json
-```
-
-This example results in a quantized swin-base model with ~40% sparsity in its linear layers of the transformer blocks, giving 90.7% accuracy on food101 and taking about 12.5 hours on a single V100 GPU. For launching the script on multiple GPUs specify `--nproc-per-node=<number of GPU>`. Note, that different batch size and other hyperparameters might be required to achieve the same results as on a single GPU.
diff --git a/examples/openvino/image-classification/configs/swin-base-jpqd.json b/examples/openvino/image-classification/configs/swin-base-jpqd.json
deleted file mode 100644
index a6057f6d71..0000000000
--- a/examples/openvino/image-classification/configs/swin-base-jpqd.json
+++ /dev/null
@@ -1,50 +0,0 @@
-[
-  {
-      "algorithm": "movement_sparsity",
-      "params": {
-          "warmup_start_epoch": 2,
-          "warmup_end_epoch": 5,
-          "importance_regularization_factor": 1.0,
-          "enable_structured_masking": true
-      },
-      "sparse_structure_by_scopes": [
-          {"mode": "block", "sparse_factors": [16, 16], "target_scopes": "{re}.*SwinAttention.*"},
-          {"mode": "per_dim", "axis": 0, "target_scopes": "{re}.*SwinIntermediate.*"},
-          {"mode": "per_dim", "axis": 1, "target_scopes": "{re}.*SwinOutput.*"}
-      ],
-      "ignored_scopes": ["{re}.*PatchEmbed.*", "{re}.*PatchMerging.*", "{re}.*classifier.*", "{re}.*LayerNorm.*"]
-  },
-  {
-      "algorithm": "quantization",
-      "overflow_fix": "enable",
-      "preset": "mixed",
-      "initializer": {
-        "range": {
-          "num_init_samples": 32,
-          "type": "percentile",
-          "params":
-            {
-              "min_percentile": 0.01,
-              "max_percentile": 99.99
-            }
-        },
-        "batchnorm_adaptation": {
-          "num_bn_adaptation_samples": 200
-        }
-      },
-      "scope_overrides": {
-        "activations": {
-          "{re}.*matmul_0": {
-            "mode": "symmetric"
-          },
-          "{re}.*scaled_dot_product_attention_0": {
-            "mode": "symmetric"
-          }
-        }
-      },
-      "ignored_scopes": [
-        "{re}.*__add___[0-1]",
-        "{re}.*layer_norm_0",
-      ]
-  }
-]
diff --git a/examples/openvino/image-classification/requirements.txt b/examples/openvino/image-classification/requirements.txt
deleted file mode 100644
index 4ef9212757..0000000000
--- a/examples/openvino/image-classification/requirements.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-transformers>=4.36.0,<4.46.0
-datasets>=1.14.0,<2.20.0
-torch >= 1.9.0
-torchvision>=0.6.0
-evaluate
-accelerate
diff --git a/examples/openvino/image-classification/run_image_classification.py b/examples/openvino/image-classification/run_image_classification.py
deleted file mode 100644
index 04c2984d8b..0000000000
--- a/examples/openvino/image-classification/run_image_classification.py
+++ /dev/null
@@ -1,428 +0,0 @@
-#!/usr/bin/env python
-# coding=utf-8
-# Copyright 2021 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-"""
-Fine-tuning a 🤗 Transformers model for image classification while applying quantization aware training with NNCF.
-"""
-
-import logging
-import os
-import sys
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Optional
-
-import evaluate
-import jstyleson as json
-import numpy as np
-import torch
-import transformers
-from datasets import load_dataset
-from nncf.common.utils.os import safe_open
-from PIL import Image
-from torchvision.transforms import (
-    CenterCrop,
-    Compose,
-    Normalize,
-    RandomHorizontalFlip,
-    RandomResizedCrop,
-    Resize,
-    ToTensor,
-)
-from transformers import (
-    MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
-    AutoConfig,
-    AutoFeatureExtractor,
-    AutoModelForImageClassification,
-    HfArgumentParser,
-    set_seed,
-)
-from transformers.trainer_utils import get_last_checkpoint
-from transformers.utils import check_min_version, send_example_telemetry
-from transformers.utils.versions import require_version
-
-from optimum.intel import OVConfig, OVTrainer, OVTrainingArguments
-
-
-logger = logging.getLogger(__name__)
-
-# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.22.0")
-
-require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-classification/requirements.txt")
-
-MODEL_CONFIG_CLASSES = list(MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING.keys())
-MODEL_TYPES = tuple(conf.model_type for conf in MODEL_CONFIG_CLASSES)
-
-
-def pil_loader(path: str):
-    with open(path, "rb") as f:
-        im = Image.open(f)
-        return im.convert("RGB")
-
-
-@dataclass
-class DataTrainingArguments:
-    """
-    Arguments pertaining to what data we are going to input our model for training and eval.
-    Using `HfArgumentParser` we can turn this class into argparse arguments to be able to specify
-    them on the command line.
-    """
-
-    dataset_name: Optional[str] = field(
-        default=None,
-        metadata={
-            "help": "Name of a dataset from the hub (could be your own, possibly private dataset hosted on the hub)."
-        },
-    )
-    dataset_config_name: Optional[str] = field(
-        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
-    )
-    train_dir: Optional[str] = field(default=None, metadata={"help": "A folder containing the training data."})
-    validation_dir: Optional[str] = field(default=None, metadata={"help": "A folder containing the validation data."})
-    train_val_split: Optional[float] = field(
-        default=0.15, metadata={"help": "Percent to split off of train for validation."}
-    )
-    max_train_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": (
-                "For debugging purposes or quicker training, truncate the number of training examples to this "
-                "value if set."
-            )
-        },
-    )
-    max_eval_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": (
-                "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
-                "value if set."
-            )
-        },
-    )
-
-    def __post_init__(self):
-        if self.dataset_name is None and (self.train_dir is None and self.validation_dir is None):
-            raise ValueError(
-                "You must specify either a dataset name from the hub or a train and/or validation directory."
-            )
-
-
-@dataclass
-class ModelArguments:
-    """
-    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
-    """
-
-    model_name_or_path: str = field(
-        default="google/vit-base-patch16-224-in21k",
-        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"},
-    )
-    teacher_model_name_or_path: str = field(
-        default=None,
-        metadata={
-            "help": "Path to pretrained model or model identifier from huggingface.co/models as teacher model in distillation."
-        },
-    )
-    model_type: Optional[str] = field(
-        default=None,
-        metadata={"help": "If training from scratch, pass a model type from the list: " + ", ".join(MODEL_TYPES)},
-    )
-    config_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
-    )
-    cache_dir: Optional[str] = field(
-        default=None, metadata={"help": "Where do you want to store the pretrained models downloaded from s3"}
-    )
-    model_revision: str = field(
-        default="main",
-        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
-    )
-    feature_extractor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."})
-    token: str = field(
-        default=None,
-        metadata={
-            "help": (
-                "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
-                "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
-            )
-        },
-    )
-    ignore_mismatched_sizes: bool = field(
-        default=False,
-        metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."},
-    )
-    nncf_compression_config: Optional[str] = field(
-        default=None,
-        metadata={
-            "help": "Path to NNCF configuration .json file for adapting the model to compression-enabled training."
-        },
-    )
-
-
-def collate_fn(examples):
-    pixel_values = torch.stack([example["pixel_values"] for example in examples])
-    labels = torch.tensor([example["labels"] for example in examples])
-    return {"pixel_values": pixel_values, "labels": labels}
-
-
-def main():
-    # See all possible arguments in src/transformers/training_args.py
-    # or by passing the --help flag to this script.
-    # We now keep distinct sets of args, for a cleaner separation of concerns.
-
-    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, OVTrainingArguments))
-    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
-        # If we pass only one argument to the script and it's the path to a json file,
-        # let's parse it to get our arguments.
-        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
-    else:
-        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
-
-    # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
-    # information sent is the one passed as arguments along with your Python/PyTorch versions.
-    send_example_telemetry("run_image_classification", model_args, data_args)
-
-    # Setup logging
-    logging.basicConfig(
-        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
-        datefmt="%m/%d/%Y %H:%M:%S",
-        handlers=[logging.StreamHandler(sys.stdout)],
-    )
-
-    log_level = training_args.get_process_log_level()
-    logger.setLevel(log_level)
-    transformers.utils.logging.set_verbosity(log_level)
-    transformers.utils.logging.enable_default_handler()
-    transformers.utils.logging.enable_explicit_format()
-
-    # Log on each process the small summary:
-    logger.warning(
-        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
-        + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
-    )
-    logger.info(f"Training/evaluation parameters {training_args}")
-
-    # Detecting last checkpoint.
-    last_checkpoint = None
-    if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
-        last_checkpoint = get_last_checkpoint(training_args.output_dir)
-        if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
-            raise ValueError(
-                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
-                "Use --overwrite_output_dir to overcome."
-            )
-        elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
-            logger.info(
-                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
-                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
-            )
-
-    # Set seed before initializing model.
-    set_seed(training_args.seed)
-
-    # Initialize our dataset and prepare it for the 'image-classification' task.
-    if data_args.dataset_name is not None:
-        dataset = load_dataset(
-            data_args.dataset_name,
-            data_args.dataset_config_name,
-            cache_dir=model_args.cache_dir,
-            token=model_args.token,
-        )
-    else:
-        data_files = {}
-        if data_args.train_dir is not None:
-            data_files["train"] = os.path.join(data_args.train_dir, "**")
-        if data_args.validation_dir is not None:
-            data_files["validation"] = os.path.join(data_args.validation_dir, "**")
-        dataset = load_dataset(
-            "imagefolder",
-            data_files=data_files,
-            cache_dir=model_args.cache_dir,
-        )
-
-    # If we don't have a validation split, split off a percentage of train as validation.
-    data_args.train_val_split = None if "validation" in dataset.keys() else data_args.train_val_split
-    if isinstance(data_args.train_val_split, float) and data_args.train_val_split > 0.0:
-        split = dataset["train"].train_test_split(data_args.train_val_split)
-        dataset["train"] = split["train"]
-        dataset["validation"] = split["test"]
-
-    # Prepare label mappings.
-    # We'll include these in the model's config to get human readable labels in the Inference API.
-    labels = dataset["train"].features["labels"].names
-    label2id, id2label = {}, {}
-    for i, label in enumerate(labels):
-        label2id[label] = str(i)
-        id2label[str(i)] = label
-
-    # Load the accuracy metric from the datasets package
-    metric = evaluate.load("accuracy")
-
-    # Define our compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
-    # predictions and label_ids field) and has to return a dictionary string to float.
-    def compute_metrics(p):
-        """Computes accuracy on a batch of predictions"""
-        return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)
-
-    config = AutoConfig.from_pretrained(
-        model_args.config_name or model_args.model_name_or_path,
-        num_labels=len(labels),
-        label2id=label2id,
-        id2label=id2label,
-        finetuning_task="image-classification",
-        cache_dir=model_args.cache_dir,
-        revision=model_args.model_revision,
-        token=model_args.token,
-    )
-    model = AutoModelForImageClassification.from_pretrained(
-        model_args.model_name_or_path,
-        from_tf=bool(".ckpt" in model_args.model_name_or_path),
-        config=config,
-        cache_dir=model_args.cache_dir,
-        revision=model_args.model_revision,
-        token=model_args.token,
-        ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
-    )
-
-    teacher_model = None
-    if model_args.teacher_model_name_or_path is not None:
-        teacher_model = AutoModelForImageClassification.from_pretrained(
-            model_args.teacher_model_name_or_path,
-            from_tf=bool(".ckpt" in model_args.teacher_model_name_or_path),
-            cache_dir=model_args.cache_dir,
-        )
-
-    feature_extractor = AutoFeatureExtractor.from_pretrained(
-        model_args.feature_extractor_name or model_args.model_name_or_path,
-        cache_dir=model_args.cache_dir,
-        revision=model_args.model_revision,
-        token=model_args.token,
-    )
-
-    # Define torchvision transforms to be applied to each image.
-    if isinstance(feature_extractor.size, dict):
-        if "shortest_edge" in feature_extractor.size:
-            size = feature_extractor.size["shortest_edge"]
-        else:
-            size = (feature_extractor.size["height"], feature_extractor.size["width"])
-    else:
-        size = feature_extractor.size
-    normalize = Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std)
-    _train_transforms = Compose(
-        [
-            RandomResizedCrop(size),
-            RandomHorizontalFlip(),
-            ToTensor(),
-            normalize,
-        ]
-    )
-    _val_transforms = Compose(
-        [
-            Resize(size),
-            CenterCrop(size),
-            ToTensor(),
-            normalize,
-        ]
-    )
-
-    def train_transforms(example_batch):
-        """Apply _train_transforms across a batch."""
-        example_batch["pixel_values"] = [
-            _train_transforms(pil_img.convert("RGB")) for pil_img in example_batch["image"]
-        ]
-        return example_batch
-
-    def val_transforms(example_batch):
-        """Apply _val_transforms across a batch."""
-        example_batch["pixel_values"] = [_val_transforms(pil_img.convert("RGB")) for pil_img in example_batch["image"]]
-        return example_batch
-
-    if training_args.do_train:
-        if "train" not in dataset:
-            raise ValueError("--do_train requires a train dataset")
-        if data_args.max_train_samples is not None:
-            dataset["train"] = (
-                dataset["train"].shuffle(seed=training_args.seed).select(range(data_args.max_train_samples))
-            )
-        # Set the training transforms
-        dataset["train"].set_transform(train_transforms)
-
-    if training_args.do_eval:
-        if "validation" not in dataset:
-            raise ValueError("--do_eval requires a validation dataset")
-        if data_args.max_eval_samples is not None:
-            dataset["validation"] = (
-                dataset["validation"].shuffle(seed=training_args.seed).select(range(data_args.max_eval_samples))
-            )
-        # Set the validation transforms
-        dataset["validation"].set_transform(val_transforms)
-
-    if model_args.nncf_compression_config is not None:
-        file_path = Path(model_args.nncf_compression_config).resolve()
-        with safe_open(file_path) as f:
-            compression = json.load(f)
-        ov_config = OVConfig(compression=compression)
-    else:
-        ov_config = OVConfig()
-
-    # Initalize our trainer
-    trainer = OVTrainer(
-        model=model,
-        teacher_model=teacher_model,
-        ov_config=ov_config,
-        task="image-classification",
-        args=training_args,
-        train_dataset=dataset["train"] if training_args.do_train else None,
-        eval_dataset=dataset["validation"] if training_args.do_eval else None,
-        compute_metrics=compute_metrics,
-        tokenizer=feature_extractor,
-        data_collator=collate_fn,
-    )
-
-    # Training
-    if training_args.do_train:
-        checkpoint = None
-        if training_args.resume_from_checkpoint is not None:
-            checkpoint = training_args.resume_from_checkpoint
-        elif last_checkpoint is not None:
-            checkpoint = last_checkpoint
-        train_result = trainer.train(resume_from_checkpoint=checkpoint)
-        trainer.save_model()
-        trainer.log_metrics("train", train_result.metrics)
-        trainer.save_metrics("train", train_result.metrics)
-        trainer.save_state()
-
-    # Evaluation
-    if training_args.do_eval:
-        metrics = trainer.evaluate()
-        trainer.log_metrics("eval", metrics)
-        trainer.save_metrics("eval", metrics)
-
-    # Write model card and (optionally) push to hub
-    kwargs = {
-        "finetuned_from": model_args.model_name_or_path,
-        "tasks": "image-classification",
-        "dataset": data_args.dataset_name,
-        "tags": ["image-classification", "vision"],
-    }
-    if training_args.push_to_hub:
-        trainer.push_to_hub(**kwargs)
-    else:
-        trainer.create_model_card(**kwargs)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/openvino/question-answering/README.md b/examples/openvino/question-answering/README.md
deleted file mode 100644
index c57d332e63..0000000000
--- a/examples/openvino/question-answering/README.md
+++ /dev/null
@@ -1,77 +0,0 @@
-<!---
-Copyright 2022 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
--->
-# Question answering
-
-This folder contains [`run_qa.py`](https://github.com/huggingface/optimum/blob/main/examples/openvino/question-answering/run_qa.py), a script to fine-tune a 🤗 Transformers model on a question answering dataset while applying quantization aware training (QAT). QAT can be easily applied by replacing the Transformers [`Trainer`](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#trainer) with the Optimum [`OVTrainer`].
-An `QuestionAnsweringOVTrainer` is defined in [`trainer_qa.py`](https://github.com/huggingface/optimum/blob/main/examples/openvino/question-answering/trainer_qa.py), which inherits from `OVTrainer` and is adapted to perform question answering tasks evaluation.
-
-Any model from our [hub](https://huggingface.co/models) (as long as the model supported by the [`AutoModelForQuestionAnswering`](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoModelForQuestionAnswering) API) can be fine-tuned on a question-answering dataset (such as SQuAD, or any other QA dataset available in the `datasets` library, or your own csv/jsonlines files) as long as they are structured the same way as SQuAD. You might need to tweak the data processing inside the script if your data is structured differently.
-
-**Note:** This script only works with models that have a fast tokenizer (backed by the 🤗 Tokenizers library) as it
-uses special features of those tokenizers. You can check if your favorite model has a fast tokenizer in
-[this table](https://huggingface.co/transformers/index.html#supported-frameworks).
-
-Note that if your dataset contains samples with no possible answers (like SQuAD version 2), you need to pass along the flag `--version_2_with_negative`.
-
-### Fine-tuning BERT on SQuAD1.0
-
-Here we show how to apply quantization aware training (QAT) on a fine-tuned DistilBERT on the SQuAD1.0 dataset.
-
-```bash
-python run_qa.py \
-  --model_name_or_path distilbert-base-uncased-distilled-squad \
-  --dataset_name squad \
-  --do_train \
-  --do_eval \
-  --per_device_train_batch_size 8 \
-  --per_device_eval_batch_size 8 \
-  --max_train_samples 1024 \
-  --learning_rate 3e-5 \
-  --num_train_epochs 1 \
-  --max_seq_length 384 \
-  --doc_stride 128 \
-  --output_dir /tmp/outputs_squad/
-```
-
-### Joint Pruning, Quantization and Distillation (JPQD) for BERT on SQuAD1.0
-`OVTrainer` also provides an advanced optimization workflow through the NNCF when Transformer model can be structurally pruned along with 8-bit quantization and distillation. Below is an example which demonstrates how to jointly prune, quantize BERT-base for SQuAD 1.0 using NNCF config `--nncf_compression_config` and distill from BERT-large teacher. This example closely resembles the movement sparsification work of [Lagunas et al., 2021, Block Pruning For Faster Transformers](https://arxiv.org/pdf/2109.04838.pdf). This example takes about 12 hours with a single V100 GPU and ~40% of the weights of the Transformer blocks were pruned. For launching the script on multiple GPUs specify `--nproc-per-node=<number of GPU>`. Note, that different batch size and other hyperparameters qmight be required to achieve the same results as on a single GPU.
-
-More on how to configure movement sparsity, see NNCF documentation [here](https://github.com/openvinotoolkit/nncf/blob/develop/nncf/experimental/torch/sparsity/movement/MovementSparsity.md).
-
-```bash
-torchrun --nproc-per-node=1 run_qa.py \
-    --model_name_or_path bert-base-uncased \
-    --dataset_name squad \
-    --teacher_model_name_or_path bert-large-uncased-whole-word-masking-finetuned-squad \
-    --distillation_weight 0.9 \
-    --do_eval \
-    --fp16 \
-    --do_train \
-    --learning_rate 3e-5 \
-    --num_train_epochs 8 \
-    --per_device_eval_batch_size 128 \
-    --per_device_train_batch_size 16 \
-    --max_seq_length 384 \
-    --doc_stride 128 \
-    --logging_steps 1 \
-    --evaluation_strategy steps \
-    --eval_steps 250 \
-    --save_steps 500 \
-    --save_total_limit 5 \
-    --output_dir /tmp/jpqd_bert_squad/ \
-    --overwrite_output_dir \
-    --nncf_compression_config configs/bert-base-jpqd.json
-```
diff --git a/examples/openvino/question-answering/configs/bert-base-jpqd.json b/examples/openvino/question-answering/configs/bert-base-jpqd.json
deleted file mode 100644
index 342d327a34..0000000000
--- a/examples/openvino/question-answering/configs/bert-base-jpqd.json
+++ /dev/null
@@ -1,41 +0,0 @@
-[
-    {
-        "algorithm": "movement_sparsity",
-        "params": {
-            "warmup_start_epoch": 1,
-            "warmup_end_epoch": 4,
-            "importance_regularization_factor": 0.02,
-            "enable_structured_masking": true
-        },
-        "sparse_structure_by_scopes": [
-            {"mode": "block", "sparse_factors": [32, 32], "target_scopes": "{re}.*BertAttention.*"},
-            {"mode": "per_dim", "axis": 0, "target_scopes": "{re}.*BertIntermediate.*"},
-            {"mode": "per_dim", "axis": 1, "target_scopes": "{re}.*BertOutput.*"}
-        ],
-        "ignored_scopes": ["{re}.*NNCFEmbedding.*", "{re}.*qa_outputs.*", "{re}.*LayerNorm.*"]
-    },
-    {
-        "algorithm": "quantization",
-        "preset": "mixed",
-        "overflow_fix": "enable",
-        "initializer": {
-            "range": {
-                "num_init_samples": 32,
-                "type": "percentile",
-                "params":
-                {
-                    "min_percentile": 0.01,
-                    "max_percentile": 99.99
-                }
-            },
-            "batchnorm_adaptation": {
-                "num_bn_adaptation_samples": 200
-            }
-        },
-        "scope_overrides": {"activations": {"{re}.*matmul_0": {"mode": "symmetric"}}},
-        "ignored_scopes": [
-            "{re}.*__add___[0-1]",
-            "{re}.*layer_norm_0",
-        ]
-    }
-]
diff --git a/examples/openvino/question-answering/configs/bert-base-movement-sparsity.json b/examples/openvino/question-answering/configs/bert-base-movement-sparsity.json
deleted file mode 100644
index 44e4c5c805..0000000000
--- a/examples/openvino/question-answering/configs/bert-base-movement-sparsity.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "algorithm": "movement_sparsity",
-    "params": {
-        "warmup_start_epoch": 1,
-        "warmup_end_epoch": 4,
-        "importance_regularization_factor": 0.01,
-        "enable_structured_masking": true
-    },
-    "sparse_structure_by_scopes": [
-        {"mode": "block", "sparse_factors": [32, 32], "target_scopes": "{re}.*BertAttention.*"},
-        {"mode": "per_dim", "axis": 0, "target_scopes": "{re}.*BertIntermediate.*"},
-        {"mode": "per_dim", "axis": 1, "target_scopes": "{re}.*BertOutput.*"}
-    ],
-    "ignored_scopes": ["{re}.*NNCFEmbedding.*", "{re}.*qa_outputs.*", "{re}.*LayerNorm.*"]
-}
diff --git a/examples/openvino/question-answering/requirements.txt b/examples/openvino/question-answering/requirements.txt
deleted file mode 100644
index b4e37df13b..0000000000
--- a/examples/openvino/question-answering/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-transformers>=4.36.0,<4.46.0
-datasets>=1.14.0,<2.20.0
-torch >= 1.9.0
-evaluate
-accelerate
diff --git a/examples/openvino/question-answering/run_qa.py b/examples/openvino/question-answering/run_qa.py
deleted file mode 100644
index 261fa839c9..0000000000
--- a/examples/openvino/question-answering/run_qa.py
+++ /dev/null
@@ -1,713 +0,0 @@
-#!/usr/bin/env python
-# coding=utf-8
-# Copyright 2020 The HuggingFace Team All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Fine-tuning a 🤗 Transformers model for question answering while applying quantization aware training with NNCF.
-"""
-
-# You can also adapt this script on your own question answering task. Pointers for this are left as comments.
-
-import logging
-import os
-import sys
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Optional
-
-import datasets
-import evaluate
-import jstyleson as json
-import transformers
-from datasets import load_dataset
-from nncf.common.utils.os import safe_open
-from trainer_qa import QuestionAnsweringOVTrainer
-from transformers import (
-    AutoConfig,
-    AutoModelForQuestionAnswering,
-    AutoTokenizer,
-    DataCollatorWithPadding,
-    EvalPrediction,
-    HfArgumentParser,
-    PreTrainedTokenizerFast,
-    default_data_collator,
-    set_seed,
-)
-from transformers.trainer_utils import get_last_checkpoint
-from transformers.utils import check_min_version, send_example_telemetry
-from transformers.utils.versions import require_version
-from utils_qa import postprocess_qa_predictions
-
-from optimum.intel import OVConfig, OVTrainingArguments
-
-
-# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.22.0")
-
-require_version("datasets>=1.8.0", "To fix: pip install -r examples/openvino/question-answering/requirements.txt")
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class ModelArguments:
-    """
-    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
-    """
-
-    model_name_or_path: str = field(
-        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
-    )
-    teacher_model_name_or_path: str = field(
-        default=None, metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
-    )
-    config_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
-    )
-    tokenizer_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
-    )
-    cache_dir: Optional[str] = field(
-        default=None,
-        metadata={"help": "Path to directory to store the pretrained models downloaded from huggingface.co"},
-    )
-    model_revision: str = field(
-        default="main",
-        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
-    )
-    use_auth_token: bool = field(
-        default=False,
-        metadata={
-            "help": (
-                "Will use the token generated when running `huggingface-cli login` (necessary to use this script "
-                "with private models)."
-            )
-        },
-    )
-    nncf_compression_config: Optional[str] = field(
-        default=None,
-        metadata={
-            "help": "Path to NNCF configuration .json file for adapting the model to compression-enabled training."
-        },
-    )
-
-
-@dataclass
-class DataTrainingArguments:
-    """
-    Arguments pertaining to what data we are going to input our model for training and eval.
-    """
-
-    dataset_name: Optional[str] = field(
-        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
-    )
-    dataset_config_name: Optional[str] = field(
-        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
-    )
-    train_file: Optional[str] = field(default=None, metadata={"help": "The input training data file (a text file)."})
-    validation_file: Optional[str] = field(
-        default=None,
-        metadata={"help": "An optional input evaluation data file to evaluate the perplexity on (a text file)."},
-    )
-    test_file: Optional[str] = field(
-        default=None,
-        metadata={"help": "An optional input test data file to evaluate the perplexity on (a text file)."},
-    )
-    overwrite_cache: bool = field(
-        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
-    )
-    preprocessing_num_workers: Optional[int] = field(
-        default=None,
-        metadata={"help": "The number of processes to use for the preprocessing."},
-    )
-    max_seq_length: int = field(
-        default=384,
-        metadata={
-            "help": (
-                "The maximum total input sequence length after tokenization. Sequences longer "
-                "than this will be truncated, sequences shorter will be padded."
-            )
-        },
-    )
-    pad_to_max_length: bool = field(
-        default=True,
-        metadata={
-            "help": (
-                "Whether to pad all samples to `max_seq_length`. If False, will pad the samples dynamically when"
-                " batching to the maximum length in the batch (which can be faster on GPU but will be slower on TPU)."
-            )
-        },
-    )
-    max_train_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": (
-                "For debugging purposes or quicker training, truncate the number of training examples to this "
-                "value if set."
-            )
-        },
-    )
-    max_eval_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": (
-                "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
-                "value if set."
-            )
-        },
-    )
-    max_predict_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": (
-                "For debugging purposes or quicker training, truncate the number of prediction examples to this "
-                "value if set."
-            )
-        },
-    )
-    version_2_with_negative: bool = field(
-        default=False, metadata={"help": "If true, some of the examples do not have an answer."}
-    )
-    null_score_diff_threshold: float = field(
-        default=0.0,
-        metadata={
-            "help": (
-                "The threshold used to select the null answer: if the best answer has a score that is less than "
-                "the score of the null answer minus this threshold, the null answer is selected for this example. "
-                "Only useful when `version_2_with_negative=True`."
-            )
-        },
-    )
-    doc_stride: int = field(
-        default=128,
-        metadata={"help": "When splitting up a long document into chunks, how much stride to take between chunks."},
-    )
-    n_best_size: int = field(
-        default=20,
-        metadata={"help": "The total number of n-best predictions to generate when looking for an answer."},
-    )
-    max_answer_length: int = field(
-        default=30,
-        metadata={
-            "help": (
-                "The maximum length of an answer that can be generated. This is needed because the start "
-                "and end predictions are not conditioned on one another."
-            )
-        },
-    )
-
-    def __post_init__(self):
-        if (
-            self.dataset_name is None
-            and self.train_file is None
-            and self.validation_file is None
-            and self.test_file is None
-        ):
-            raise ValueError("Need either a dataset name or a training/validation file/test_file.")
-        else:
-            if self.train_file is not None:
-                extension = self.train_file.split(".")[-1]
-                assert extension in ["csv", "json"], "`train_file` should be a csv or a json file."
-            if self.validation_file is not None:
-                extension = self.validation_file.split(".")[-1]
-                assert extension in ["csv", "json"], "`validation_file` should be a csv or a json file."
-            if self.test_file is not None:
-                extension = self.test_file.split(".")[-1]
-                assert extension in ["csv", "json"], "`test_file` should be a csv or a json file."
-
-
-def main():
-    # See all possible arguments in src/transformers/training_args.py
-    # or by passing the --help flag to this script.
-    # We now keep distinct sets of args, for a cleaner separation of concerns.
-
-    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, OVTrainingArguments))
-    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
-        # If we pass only one argument to the script and it's the path to a json file,
-        # let's parse it to get our arguments.
-        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
-    else:
-        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
-
-    # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
-    # information sent is the one passed as arguments along with your Python/PyTorch versions.
-    send_example_telemetry("run_qa", model_args, data_args)
-
-    # Setup logging
-    logging.basicConfig(
-        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
-        datefmt="%m/%d/%Y %H:%M:%S",
-        handlers=[logging.StreamHandler(sys.stdout)],
-    )
-
-    log_level = training_args.get_process_log_level()
-    logger.setLevel(log_level)
-    datasets.utils.logging.set_verbosity(log_level)
-    transformers.utils.logging.set_verbosity(log_level)
-    transformers.utils.logging.enable_default_handler()
-    transformers.utils.logging.enable_explicit_format()
-
-    # Log on each process the small summary:
-    logger.warning(
-        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
-        + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
-    )
-    logger.info(f"Training/evaluation parameters {training_args}")
-
-    # Detecting last checkpoint.
-    last_checkpoint = None
-    if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
-        last_checkpoint = get_last_checkpoint(training_args.output_dir)
-        if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
-            raise ValueError(
-                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
-                "Use --overwrite_output_dir to overcome."
-            )
-        elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
-            logger.info(
-                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
-                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
-            )
-
-    # Set seed before initializing model.
-    set_seed(training_args.seed)
-
-    # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
-    # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
-    # (the dataset will be downloaded automatically from the datasets Hub).
-    #
-    # For CSV/JSON files, this script will use the column called 'text' or the first column if no column called
-    # 'text' is found. You can easily tweak this behavior (see below).
-    #
-    # In distributed training, the load_dataset function guarantee that only one local process can concurrently
-    # download the dataset.
-    if data_args.dataset_name is not None:
-        # Downloading and loading a dataset from the hub.
-        raw_datasets = load_dataset(
-            data_args.dataset_name,
-            data_args.dataset_config_name,
-            cache_dir=model_args.cache_dir,
-            use_auth_token=True if model_args.use_auth_token else None,
-        )
-    else:
-        data_files = {}
-        if data_args.train_file is not None:
-            data_files["train"] = data_args.train_file
-            extension = data_args.train_file.split(".")[-1]
-
-        if data_args.validation_file is not None:
-            data_files["validation"] = data_args.validation_file
-            extension = data_args.validation_file.split(".")[-1]
-        if data_args.test_file is not None:
-            data_files["test"] = data_args.test_file
-            extension = data_args.test_file.split(".")[-1]
-        raw_datasets = load_dataset(
-            extension,
-            data_files=data_files,
-            field="data",
-            cache_dir=model_args.cache_dir,
-            use_auth_token=True if model_args.use_auth_token else None,
-        )
-    # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
-    # https://huggingface.co/docs/datasets/loading_datasets.html.
-
-    # Load pretrained model and tokenizer
-    #
-    # Distributed training:
-    # The .from_pretrained methods guarantee that only one local process can concurrently
-    # download model & vocab.
-    config = AutoConfig.from_pretrained(
-        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
-        cache_dir=model_args.cache_dir,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-    tokenizer = AutoTokenizer.from_pretrained(
-        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
-        cache_dir=model_args.cache_dir,
-        use_fast=True,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-    model = AutoModelForQuestionAnswering.from_pretrained(
-        model_args.model_name_or_path,
-        from_tf=bool(".ckpt" in model_args.model_name_or_path),
-        config=config,
-        cache_dir=model_args.cache_dir,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-
-    teacher_model = None
-    if model_args.teacher_model_name_or_path is not None:
-        teacher_model = AutoModelForQuestionAnswering.from_pretrained(
-            model_args.teacher_model_name_or_path,
-            from_tf=bool(".ckpt" in model_args.teacher_model_name_or_path),
-            cache_dir=model_args.cache_dir,
-        )
-
-    # Tokenizer check: this script requires a fast tokenizer.
-    if not isinstance(tokenizer, PreTrainedTokenizerFast):
-        raise ValueError(
-            "This example script only works for models that have a fast tokenizer. Checkout the big table of models at"
-            " https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet"
-            " this requirement"
-        )
-
-    # Preprocessing the datasets.
-    # Preprocessing is slighlty different for training and evaluation.
-    if training_args.do_train:
-        column_names = raw_datasets["train"].column_names
-    elif training_args.do_eval:
-        column_names = raw_datasets["validation"].column_names
-    else:
-        column_names = raw_datasets["test"].column_names
-    question_column_name = "question" if "question" in column_names else column_names[0]
-    context_column_name = "context" if "context" in column_names else column_names[1]
-    answer_column_name = "answers" if "answers" in column_names else column_names[2]
-
-    # Padding side determines if we do (question|context) or (context|question).
-    pad_on_right = tokenizer.padding_side == "right"
-
-    if data_args.max_seq_length > tokenizer.model_max_length:
-        logger.warning(
-            f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
-            f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
-        )
-    max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
-
-    # Training preprocessing
-    def prepare_train_features(examples):
-        # Some of the questions have lots of whitespace on the left, which is not useful and will make the
-        # truncation of the context fail (the tokenized question will take a lots of space). So we remove that
-        # left whitespace
-        examples[question_column_name] = [q.lstrip() for q in examples[question_column_name]]
-
-        # Tokenize our examples with truncation and maybe padding, but keep the overflows using a stride. This results
-        # in one example possible giving several features when a context is long, each of those features having a
-        # context that overlaps a bit the context of the previous feature.
-        tokenized_examples = tokenizer(
-            examples[question_column_name if pad_on_right else context_column_name],
-            examples[context_column_name if pad_on_right else question_column_name],
-            truncation="only_second" if pad_on_right else "only_first",
-            max_length=max_seq_length,
-            stride=data_args.doc_stride,
-            return_overflowing_tokens=True,
-            return_offsets_mapping=True,
-            padding="max_length" if data_args.pad_to_max_length else False,
-        )
-
-        # Since one example might give us several features if it has a long context, we need a map from a feature to
-        # its corresponding example. This key gives us just that.
-        sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping")
-        # The offset mappings will give us a map from token to character position in the original context. This will
-        # help us compute the start_positions and end_positions.
-        offset_mapping = tokenized_examples.pop("offset_mapping")
-
-        # Let's label those examples!
-        tokenized_examples["start_positions"] = []
-        tokenized_examples["end_positions"] = []
-
-        for i, offsets in enumerate(offset_mapping):
-            # We will label impossible answers with the index of the CLS token.
-            input_ids = tokenized_examples["input_ids"][i]
-            cls_index = input_ids.index(tokenizer.cls_token_id)
-
-            # Grab the sequence corresponding to that example (to know what is the context and what is the question).
-            sequence_ids = tokenized_examples.sequence_ids(i)
-
-            # One example can give several spans, this is the index of the example containing this span of text.
-            sample_index = sample_mapping[i]
-            answers = examples[answer_column_name][sample_index]
-            # If no answers are given, set the cls_index as answer.
-            if len(answers["answer_start"]) == 0:
-                tokenized_examples["start_positions"].append(cls_index)
-                tokenized_examples["end_positions"].append(cls_index)
-            else:
-                # Start/end character index of the answer in the text.
-                start_char = answers["answer_start"][0]
-                end_char = start_char + len(answers["text"][0])
-
-                # Start token index of the current span in the text.
-                token_start_index = 0
-                while sequence_ids[token_start_index] != (1 if pad_on_right else 0):
-                    token_start_index += 1
-
-                # End token index of the current span in the text.
-                token_end_index = len(input_ids) - 1
-                while sequence_ids[token_end_index] != (1 if pad_on_right else 0):
-                    token_end_index -= 1
-
-                # Detect if the answer is out of the span (in which case this feature is labeled with the CLS index).
-                if not (offsets[token_start_index][0] <= start_char and offsets[token_end_index][1] >= end_char):
-                    tokenized_examples["start_positions"].append(cls_index)
-                    tokenized_examples["end_positions"].append(cls_index)
-                else:
-                    # Otherwise move the token_start_index and token_end_index to the two ends of the answer.
-                    # Note: we could go after the last offset if the answer is the last word (edge case).
-                    while token_start_index < len(offsets) and offsets[token_start_index][0] <= start_char:
-                        token_start_index += 1
-                    tokenized_examples["start_positions"].append(token_start_index - 1)
-                    while offsets[token_end_index][1] >= end_char:
-                        token_end_index -= 1
-                    tokenized_examples["end_positions"].append(token_end_index + 1)
-
-        return tokenized_examples
-
-    if training_args.do_train:
-        if "train" not in raw_datasets:
-            raise ValueError("--do_train requires a train dataset")
-        train_dataset = raw_datasets["train"]
-        if data_args.max_train_samples is not None:
-            # We will select sample from whole data if argument is specified
-            max_train_samples = min(len(train_dataset), data_args.max_train_samples)
-            train_dataset = train_dataset.select(range(max_train_samples))
-        # Create train feature from dataset
-        with training_args.main_process_first(desc="train dataset map pre-processing"):
-            train_dataset = train_dataset.map(
-                prepare_train_features,
-                batched=True,
-                num_proc=data_args.preprocessing_num_workers,
-                remove_columns=column_names,
-                load_from_cache_file=not data_args.overwrite_cache,
-                desc="Running tokenizer on train dataset",
-            )
-        if data_args.max_train_samples is not None:
-            # Number of samples might increase during Feature Creation, We select only specified max samples
-            max_train_samples = min(len(train_dataset), data_args.max_train_samples)
-            train_dataset = train_dataset.select(range(max_train_samples))
-
-    # Validation preprocessing
-    def prepare_validation_features(examples):
-        # Some of the questions have lots of whitespace on the left, which is not useful and will make the
-        # truncation of the context fail (the tokenized question will take a lots of space). So we remove that
-        # left whitespace
-        examples[question_column_name] = [q.lstrip() for q in examples[question_column_name]]
-
-        # Tokenize our examples with truncation and maybe padding, but keep the overflows using a stride. This results
-        # in one example possible giving several features when a context is long, each of those features having a
-        # context that overlaps a bit the context of the previous feature.
-        tokenized_examples = tokenizer(
-            examples[question_column_name if pad_on_right else context_column_name],
-            examples[context_column_name if pad_on_right else question_column_name],
-            truncation="only_second" if pad_on_right else "only_first",
-            max_length=max_seq_length,
-            stride=data_args.doc_stride,
-            return_overflowing_tokens=True,
-            return_offsets_mapping=True,
-            padding="max_length" if data_args.pad_to_max_length else False,
-        )
-
-        # Since one example might give us several features if it has a long context, we need a map from a feature to
-        # its corresponding example. This key gives us just that.
-        sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping")
-
-        # For evaluation, we will need to convert our predictions to substrings of the context, so we keep the
-        # corresponding example_id and we will store the offset mappings.
-        tokenized_examples["example_id"] = []
-
-        for i in range(len(tokenized_examples["input_ids"])):
-            # Grab the sequence corresponding to that example (to know what is the context and what is the question).
-            sequence_ids = tokenized_examples.sequence_ids(i)
-            context_index = 1 if pad_on_right else 0
-
-            # One example can give several spans, this is the index of the example containing this span of text.
-            sample_index = sample_mapping[i]
-            tokenized_examples["example_id"].append(examples["id"][sample_index])
-
-            # Set to None the offset_mapping that are not part of the context so it's easy to determine if a token
-            # position is part of the context or not.
-            tokenized_examples["offset_mapping"][i] = [
-                (o if sequence_ids[k] == context_index else None)
-                for k, o in enumerate(tokenized_examples["offset_mapping"][i])
-            ]
-
-        return tokenized_examples
-
-    if training_args.do_eval:
-        if "validation" not in raw_datasets:
-            raise ValueError("--do_eval requires a validation dataset")
-        eval_examples = raw_datasets["validation"]
-        if data_args.max_eval_samples is not None:
-            # We will select sample from whole data
-            max_eval_samples = min(len(eval_examples), data_args.max_eval_samples)
-            eval_examples = eval_examples.select(range(max_eval_samples))
-        # Validation Feature Creation
-        with training_args.main_process_first(desc="validation dataset map pre-processing"):
-            eval_dataset = eval_examples.map(
-                prepare_validation_features,
-                batched=True,
-                num_proc=data_args.preprocessing_num_workers,
-                remove_columns=column_names,
-                load_from_cache_file=not data_args.overwrite_cache,
-                desc="Running tokenizer on validation dataset",
-            )
-        if data_args.max_eval_samples is not None:
-            # During Feature creation dataset samples might increase, we will select required samples again
-            max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples)
-            eval_dataset = eval_dataset.select(range(max_eval_samples))
-
-    if training_args.do_predict:
-        if "test" not in raw_datasets:
-            raise ValueError("--do_predict requires a test dataset")
-        predict_examples = raw_datasets["test"]
-        if data_args.max_predict_samples is not None:
-            # We will select sample from whole data
-            predict_examples = predict_examples.select(range(data_args.max_predict_samples))
-        # Predict Feature Creation
-        with training_args.main_process_first(desc="prediction dataset map pre-processing"):
-            predict_dataset = predict_examples.map(
-                prepare_validation_features,
-                batched=True,
-                num_proc=data_args.preprocessing_num_workers,
-                remove_columns=column_names,
-                load_from_cache_file=not data_args.overwrite_cache,
-                desc="Running tokenizer on prediction dataset",
-            )
-        if data_args.max_predict_samples is not None:
-            # During Feature creation dataset samples might increase, we will select required samples again
-            max_predict_samples = min(len(predict_dataset), data_args.max_predict_samples)
-            predict_dataset = predict_dataset.select(range(max_predict_samples))
-
-    # Data collator
-    # We have already padded to max length if the corresponding flag is True, otherwise we need to pad in the data
-    # collator.
-    data_collator = (
-        default_data_collator
-        if data_args.pad_to_max_length
-        else DataCollatorWithPadding(tokenizer, pad_to_multiple_of=8 if training_args.fp16 else None)
-    )
-
-    # Post-processing:
-    def post_processing_function(examples, features, predictions, stage="eval"):
-        # Post-processing: we match the start logits and end logits to answers in the original context.
-        predictions = postprocess_qa_predictions(
-            examples=examples,
-            features=features,
-            predictions=predictions,
-            version_2_with_negative=data_args.version_2_with_negative,
-            n_best_size=data_args.n_best_size,
-            max_answer_length=data_args.max_answer_length,
-            null_score_diff_threshold=data_args.null_score_diff_threshold,
-            output_dir=training_args.output_dir,
-            log_level=log_level,
-            prefix=stage,
-        )
-        # Format the result to the format the metric expects.
-        if data_args.version_2_with_negative:
-            formatted_predictions = [
-                {"id": k, "prediction_text": v, "no_answer_probability": 0.0} for k, v in predictions.items()
-            ]
-        else:
-            formatted_predictions = [{"id": k, "prediction_text": v} for k, v in predictions.items()]
-
-        references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
-        return EvalPrediction(predictions=formatted_predictions, label_ids=references)
-
-    metric = evaluate.load("squad_v2" if data_args.version_2_with_negative else "squad")
-
-    def compute_metrics(p: EvalPrediction):
-        return metric.compute(predictions=p.predictions, references=p.label_ids)
-
-    if model_args.nncf_compression_config is not None:
-        file_path = Path(model_args.nncf_compression_config).resolve()
-        with safe_open(file_path) as f:
-            compression = json.load(f)
-        ov_config = OVConfig(compression=compression)
-    else:
-        ov_config = OVConfig()
-
-    # Initialize our Trainer
-    trainer = QuestionAnsweringOVTrainer(
-        model=model,
-        teacher_model=teacher_model,
-        ov_config=ov_config,
-        task="question-answering",
-        args=training_args,
-        train_dataset=train_dataset if training_args.do_train else None,
-        eval_dataset=eval_dataset if training_args.do_eval else None,
-        eval_examples=eval_examples if training_args.do_eval else None,
-        tokenizer=tokenizer,
-        data_collator=data_collator,
-        post_process_function=post_processing_function,
-        compute_metrics=compute_metrics,
-    )
-
-    # Training
-    if training_args.do_train:
-        checkpoint = None
-        if training_args.resume_from_checkpoint is not None:
-            checkpoint = training_args.resume_from_checkpoint
-        elif last_checkpoint is not None:
-            checkpoint = last_checkpoint
-        train_result = trainer.train(resume_from_checkpoint=checkpoint)
-        trainer.save_model()  # Saves the tokenizer too for easy upload
-
-        metrics = train_result.metrics
-        max_train_samples = (
-            data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset)
-        )
-        metrics["train_samples"] = min(max_train_samples, len(train_dataset))
-
-        trainer.log_metrics("train", metrics)
-        trainer.save_metrics("train", metrics)
-        trainer.save_state()
-
-    # Evaluation
-    if training_args.do_eval:
-        logger.info("*** Evaluate ***")
-        metrics = trainer.evaluate()
-
-        max_eval_samples = data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset)
-        metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset))
-
-        trainer.log_metrics("eval", metrics)
-        trainer.save_metrics("eval", metrics)
-
-    # Prediction
-    if training_args.do_predict:
-        logger.info("*** Predict ***")
-        results = trainer.predict(predict_dataset, predict_examples)
-        metrics = results.metrics
-
-        max_predict_samples = (
-            data_args.max_predict_samples if data_args.max_predict_samples is not None else len(predict_dataset)
-        )
-        metrics["predict_samples"] = min(max_predict_samples, len(predict_dataset))
-
-        trainer.log_metrics("predict", metrics)
-        trainer.save_metrics("predict", metrics)
-
-    kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "question-answering"}
-    if data_args.dataset_name is not None:
-        kwargs["dataset_tags"] = data_args.dataset_name
-        if data_args.dataset_config_name is not None:
-            kwargs["dataset_args"] = data_args.dataset_config_name
-            kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}"
-        else:
-            kwargs["dataset"] = data_args.dataset_name
-
-    if training_args.push_to_hub:
-        trainer.push_to_hub(**kwargs)
-    else:
-        trainer.create_model_card(**kwargs)
-
-
-def _mp_fn(index):
-    # For xla_spawn (TPUs)
-    main()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/openvino/question-answering/trainer_qa.py b/examples/openvino/question-answering/trainer_qa.py
deleted file mode 100644
index c10466060b..0000000000
--- a/examples/openvino/question-answering/trainer_qa.py
+++ /dev/null
@@ -1,148 +0,0 @@
-# coding=utf-8
-# Copyright 2022 The HuggingFace Team All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-A subclass of `OVTrainer` specific to Question-Answering tasks
-"""
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from transformers.trainer_utils import PredictionOutput
-
-from optimum.intel import OVTrainer
-
-
-class QuestionAnsweringOVTrainer(OVTrainer):
-    def __init__(self, *args, eval_examples=None, post_process_function=None, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.eval_examples = eval_examples
-        self.post_process_function = post_process_function
-        self.criterion = nn.CrossEntropyLoss()
-
-    def evaluate(self, eval_dataset=None, eval_examples=None, ignore_keys=None, metric_key_prefix: str = "eval"):
-        eval_dataset = self.eval_dataset if eval_dataset is None else eval_dataset
-        eval_dataloader = self.get_eval_dataloader(eval_dataset)
-        eval_examples = self.eval_examples if eval_examples is None else eval_examples
-
-        # Temporarily disable metric computation, we will do it in the loop here.
-        compute_metrics = self.compute_metrics
-        self.compute_metrics = None
-        eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
-        try:
-            output = eval_loop(
-                eval_dataloader,
-                description="Evaluation",
-                # No point gathering the predictions if there are no metrics, otherwise we defer to
-                # self.args.prediction_loss_only
-                prediction_loss_only=True if compute_metrics is None else None,
-                ignore_keys=ignore_keys,
-            )
-        finally:
-            self.compute_metrics = compute_metrics
-
-        if self.post_process_function is not None and self.compute_metrics is not None:
-            eval_preds = self.post_process_function(eval_examples, eval_dataset, output.predictions)
-            metrics = self.compute_metrics(eval_preds)
-
-            # Prefix all keys with metric_key_prefix + '_'
-            for key in list(metrics.keys()):
-                if not key.startswith(f"{metric_key_prefix}_"):
-                    metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key)
-
-            self.log(metrics)
-        else:
-            metrics = {}
-
-        self.control = self.callback_handler.on_evaluate(self.args, self.state, self.control, metrics)
-        return metrics
-
-    def predict(self, predict_dataset, predict_examples, ignore_keys=None, metric_key_prefix: str = "test"):
-        predict_dataloader = self.get_test_dataloader(predict_dataset)
-
-        # Temporarily disable metric computation, we will do it in the loop here.
-        compute_metrics = self.compute_metrics
-        self.compute_metrics = None
-        eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
-        try:
-            output = eval_loop(
-                predict_dataloader,
-                description="Prediction",
-                # No point gathering the predictions if there are no metrics, otherwise we defer to
-                # self.args.prediction_loss_only
-                prediction_loss_only=True if compute_metrics is None else None,
-                ignore_keys=ignore_keys,
-            )
-        finally:
-            self.compute_metrics = compute_metrics
-
-        if self.post_process_function is None or self.compute_metrics is None:
-            return output
-
-        predictions = self.post_process_function(predict_examples, predict_dataset, output.predictions, "predict")
-        metrics = self.compute_metrics(predictions)
-
-        # Prefix all keys with metric_key_prefix + '_'
-        for key in list(metrics.keys()):
-            if not key.startswith(f"{metric_key_prefix}_"):
-                metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key)
-
-        return PredictionOutput(predictions=predictions.predictions, label_ids=predictions.label_ids, metrics=metrics)
-
-    def compute_distillation_loss(self, inputs, student_outputs):
-        with torch.no_grad():
-            teacher_outputs = self.teacher(**inputs)
-
-        temperature = self.args.distillation_temperature
-        distilliation_loss_start = F.kl_div(
-            input=F.log_softmax(student_outputs.start_logits / temperature, dim=-1),
-            target=F.softmax(teacher_outputs.start_logits / temperature, dim=-1),
-            reduction="batchmean",
-        ) * (temperature**2)
-        distilliation_loss_end = F.kl_div(
-            input=F.log_softmax(student_outputs.end_logits / temperature, dim=-1),
-            target=F.softmax(teacher_outputs.end_logits / temperature, dim=-1),
-            reduction="batchmean",
-        ) * (temperature**2)
-        return (distilliation_loss_start + distilliation_loss_end) / 2.0
-
-    def compute_loss(self, model, inputs, return_outputs=False):
-        if self.teacher is None:
-            retval = super().compute_loss(model, inputs, return_outputs)
-
-            if return_outputs is True:
-                loss, outputs = retval
-            else:
-                loss = retval
-        else:
-            # compute_loss is not used as QA distillation requires custom handling for outputs
-            # Using compute_loss incurs excessive computational footprint
-            outputs = self.model(**inputs)
-
-            task_loss_start = self.criterion(outputs.start_logits, inputs["start_positions"])
-            task_loss_end = self.criterion(outputs.end_logits, inputs["end_positions"])
-            task_loss = (task_loss_start + task_loss_end) / 2.0
-
-            distillation_loss = self.compute_distillation_loss(inputs, outputs)
-            loss = (1 - self.args.distillation_weight) * task_loss + self.args.distillation_weight * distillation_loss
-            if model.training:
-                self.compression_metrics["task_loss"] = task_loss.item()
-                self.compression_metrics["distillation_loss"] = distillation_loss.item()
-
-        if self.compression_controller is not None:
-            compression_loss = self.compression_controller.loss()
-            loss += compression_loss
-            if model.training:
-                self.compression_metrics["compression_loss"] = compression_loss.item()
-
-        return (loss, outputs) if return_outputs else loss
diff --git a/examples/openvino/question-answering/utils_qa.py b/examples/openvino/question-answering/utils_qa.py
deleted file mode 100644
index ca6326e76f..0000000000
--- a/examples/openvino/question-answering/utils_qa.py
+++ /dev/null
@@ -1,443 +0,0 @@
-# coding=utf-8
-# Copyright 2022 The HuggingFace Team All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Post-processing utilities for question answering.
-"""
-import collections
-import json
-import logging
-import os
-from typing import Optional, Tuple
-
-import numpy as np
-from tqdm.auto import tqdm
-
-
-logger = logging.getLogger(__name__)
-
-
-def postprocess_qa_predictions(
-    examples,
-    features,
-    predictions: Tuple[np.ndarray, np.ndarray],
-    version_2_with_negative: bool = False,
-    n_best_size: int = 20,
-    max_answer_length: int = 30,
-    null_score_diff_threshold: float = 0.0,
-    output_dir: Optional[str] = None,
-    prefix: Optional[str] = None,
-    log_level: Optional[int] = logging.WARNING,
-):
-    """
-    Post-processes the predictions of a question-answering model to convert them to answers that are substrings of the
-    original contexts. This is the base postprocessing functions for models that only return start and end logits.
-
-    Args:
-        examples: The non-preprocessed dataset (see the main script for more information).
-        features: The processed dataset (see the main script for more information).
-        predictions (:obj:`Tuple[np.ndarray, np.ndarray]`):
-            The predictions of the model: two arrays containing the start logits and the end logits respectively. Its
-            first dimension must match the number of elements of :obj:`features`.
-        version_2_with_negative (:obj:`bool`, `optional`, defaults to :obj:`False`):
-            Whether or not the underlying dataset contains examples with no answers.
-        n_best_size (:obj:`int`, `optional`, defaults to 20):
-            The total number of n-best predictions to generate when looking for an answer.
-        max_answer_length (:obj:`int`, `optional`, defaults to 30):
-            The maximum length of an answer that can be generated. This is needed because the start and end predictions
-            are not conditioned on one another.
-        null_score_diff_threshold (:obj:`float`, `optional`, defaults to 0):
-            The threshold used to select the null answer: if the best answer has a score that is less than the score of
-            the null answer minus this threshold, the null answer is selected for this example (note that the score of
-            the null answer for an example giving several features is the minimum of the scores for the null answer on
-            each feature: all features must be aligned on the fact they `want` to predict a null answer).
-
-            Only useful when :obj:`version_2_with_negative` is :obj:`True`.
-        output_dir (:obj:`str`, `optional`):
-            If provided, the dictionaries of predictions, n_best predictions (with their scores and logits) and, if
-            :obj:`version_2_with_negative=True`, the dictionary of the scores differences between best and null
-            answers, are saved in `output_dir`.
-        prefix (:obj:`str`, `optional`):
-            If provided, the dictionaries mentioned above are saved with `prefix` added to their names.
-        log_level (:obj:`int`, `optional`, defaults to ``logging.WARNING``):
-            ``logging`` log level (e.g., ``logging.WARNING``)
-    """
-    if len(predictions) != 2:
-        raise ValueError("`predictions` should be a tuple with two elements (start_logits, end_logits).")
-    all_start_logits, all_end_logits = predictions
-
-    if len(predictions[0]) != len(features):
-        raise ValueError(f"Got {len(predictions[0])} predictions and {len(features)} features.")
-
-    # Build a map example to its corresponding features.
-    example_id_to_index = {k: i for i, k in enumerate(examples["id"])}
-    features_per_example = collections.defaultdict(list)
-    for i, feature in enumerate(features):
-        features_per_example[example_id_to_index[feature["example_id"]]].append(i)
-
-    # The dictionaries we have to fill.
-    all_predictions = collections.OrderedDict()
-    all_nbest_json = collections.OrderedDict()
-    if version_2_with_negative:
-        scores_diff_json = collections.OrderedDict()
-
-    # Logging.
-    logger.setLevel(log_level)
-    logger.info(f"Post-processing {len(examples)} example predictions split into {len(features)} features.")
-
-    # Let's loop over all the examples!
-    for example_index, example in enumerate(tqdm(examples)):
-        # Those are the indices of the features associated to the current example.
-        feature_indices = features_per_example[example_index]
-
-        min_null_prediction = None
-        prelim_predictions = []
-
-        # Looping through all the features associated to the current example.
-        for feature_index in feature_indices:
-            # We grab the predictions of the model for this feature.
-            start_logits = all_start_logits[feature_index]
-            end_logits = all_end_logits[feature_index]
-            # This is what will allow us to map some the positions in our logits to span of texts in the original
-            # context.
-            offset_mapping = features[feature_index]["offset_mapping"]
-            # Optional `token_is_max_context`, if provided we will remove answers that do not have the maximum context
-            # available in the current feature.
-            token_is_max_context = features[feature_index].get("token_is_max_context", None)
-
-            # Update minimum null prediction.
-            feature_null_score = start_logits[0] + end_logits[0]
-            if min_null_prediction is None or min_null_prediction["score"] > feature_null_score:
-                min_null_prediction = {
-                    "offsets": (0, 0),
-                    "score": feature_null_score,
-                    "start_logit": start_logits[0],
-                    "end_logit": end_logits[0],
-                }
-
-            # Go through all possibilities for the `n_best_size` greater start and end logits.
-            start_indexes = np.argsort(start_logits)[-1 : -n_best_size - 1 : -1].tolist()
-            end_indexes = np.argsort(end_logits)[-1 : -n_best_size - 1 : -1].tolist()
-            for start_index in start_indexes:
-                for end_index in end_indexes:
-                    # Don't consider out-of-scope answers, either because the indices are out of bounds or correspond
-                    # to part of the input_ids that are not in the context.
-                    if (
-                        start_index >= len(offset_mapping)
-                        or end_index >= len(offset_mapping)
-                        or offset_mapping[start_index] is None
-                        or len(offset_mapping[start_index]) < 2
-                        or offset_mapping[end_index] is None
-                        or len(offset_mapping[end_index]) < 2
-                    ):
-                        continue
-                    # Don't consider answers with a length that is either < 0 or > max_answer_length.
-                    if end_index < start_index or end_index - start_index + 1 > max_answer_length:
-                        continue
-                    # Don't consider answer that don't have the maximum context available (if such information is
-                    # provided).
-                    if token_is_max_context is not None and not token_is_max_context.get(str(start_index), False):
-                        continue
-
-                    prelim_predictions.append(
-                        {
-                            "offsets": (offset_mapping[start_index][0], offset_mapping[end_index][1]),
-                            "score": start_logits[start_index] + end_logits[end_index],
-                            "start_logit": start_logits[start_index],
-                            "end_logit": end_logits[end_index],
-                        }
-                    )
-        if version_2_with_negative and min_null_prediction is not None:
-            # Add the minimum null prediction
-            prelim_predictions.append(min_null_prediction)
-            null_score = min_null_prediction["score"]
-
-        # Only keep the best `n_best_size` predictions.
-        predictions = sorted(prelim_predictions, key=lambda x: x["score"], reverse=True)[:n_best_size]
-
-        # Add back the minimum null prediction if it was removed because of its low score.
-        if (
-            version_2_with_negative
-            and min_null_prediction is not None
-            and not any(p["offsets"] == (0, 0) for p in predictions)
-        ):
-            predictions.append(min_null_prediction)
-
-        # Use the offsets to gather the answer text in the original context.
-        context = example["context"]
-        for pred in predictions:
-            offsets = pred.pop("offsets")
-            pred["text"] = context[offsets[0] : offsets[1]]
-
-        # In the very rare edge case we have not a single non-null prediction, we create a fake prediction to avoid
-        # failure.
-        if len(predictions) == 0 or (len(predictions) == 1 and predictions[0]["text"] == ""):
-            predictions.insert(0, {"text": "empty", "start_logit": 0.0, "end_logit": 0.0, "score": 0.0})
-
-        # Compute the softmax of all scores (we do it with numpy to stay independent from torch/tf in this file, using
-        # the LogSumExp trick).
-        scores = np.array([pred.pop("score") for pred in predictions])
-        exp_scores = np.exp(scores - np.max(scores))
-        probs = exp_scores / exp_scores.sum()
-
-        # Include the probabilities in our predictions.
-        for prob, pred in zip(probs, predictions):
-            pred["probability"] = prob
-
-        # Pick the best prediction. If the null answer is not possible, this is easy.
-        if not version_2_with_negative:
-            all_predictions[example["id"]] = predictions[0]["text"]
-        else:
-            # Otherwise we first need to find the best non-empty prediction.
-            i = 0
-            while predictions[i]["text"] == "":
-                i += 1
-            best_non_null_pred = predictions[i]
-
-            # Then we compare to the null prediction using the threshold.
-            score_diff = null_score - best_non_null_pred["start_logit"] - best_non_null_pred["end_logit"]
-            scores_diff_json[example["id"]] = float(score_diff)  # To be JSON-serializable.
-            if score_diff > null_score_diff_threshold:
-                all_predictions[example["id"]] = ""
-            else:
-                all_predictions[example["id"]] = best_non_null_pred["text"]
-
-        # Make `predictions` JSON-serializable by casting np.float back to float.
-        all_nbest_json[example["id"]] = [
-            {k: (float(v) if isinstance(v, (np.float16, np.float32, np.float64)) else v) for k, v in pred.items()}
-            for pred in predictions
-        ]
-
-    # If we have an output_dir, let's save all those dicts.
-    if output_dir is not None:
-        if not os.path.isdir(output_dir):
-            raise EnvironmentError(f"{output_dir} is not a directory.")
-
-        prediction_file = os.path.join(
-            output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json"
-        )
-        nbest_file = os.path.join(
-            output_dir, "nbest_predictions.json" if prefix is None else f"{prefix}_nbest_predictions.json"
-        )
-        if version_2_with_negative:
-            null_odds_file = os.path.join(
-                output_dir, "null_odds.json" if prefix is None else f"{prefix}_null_odds.json"
-            )
-
-        logger.info(f"Saving predictions to {prediction_file}.")
-        with open(prediction_file, "w") as writer:
-            writer.write(json.dumps(all_predictions, indent=4) + "\n")
-        logger.info(f"Saving nbest_preds to {nbest_file}.")
-        with open(nbest_file, "w") as writer:
-            writer.write(json.dumps(all_nbest_json, indent=4) + "\n")
-        if version_2_with_negative:
-            logger.info(f"Saving null_odds to {null_odds_file}.")
-            with open(null_odds_file, "w") as writer:
-                writer.write(json.dumps(scores_diff_json, indent=4) + "\n")
-
-    return all_predictions
-
-
-def postprocess_qa_predictions_with_beam_search(
-    examples,
-    features,
-    predictions: Tuple[np.ndarray, np.ndarray],
-    version_2_with_negative: bool = False,
-    n_best_size: int = 20,
-    max_answer_length: int = 30,
-    start_n_top: int = 5,
-    end_n_top: int = 5,
-    output_dir: Optional[str] = None,
-    prefix: Optional[str] = None,
-    log_level: Optional[int] = logging.WARNING,
-):
-    """
-    Post-processes the predictions of a question-answering model with beam search to convert them to answers that are substrings of the
-    original contexts. This is the postprocessing functions for models that return start and end logits, indices, as well as
-    cls token predictions.
-
-    Args:
-        examples: The non-preprocessed dataset (see the main script for more information).
-        features: The processed dataset (see the main script for more information).
-        predictions (:obj:`Tuple[np.ndarray, np.ndarray]`):
-            The predictions of the model: two arrays containing the start logits and the end logits respectively. Its
-            first dimension must match the number of elements of :obj:`features`.
-        version_2_with_negative (:obj:`bool`, `optional`, defaults to :obj:`False`):
-            Whether or not the underlying dataset contains examples with no answers.
-        n_best_size (:obj:`int`, `optional`, defaults to 20):
-            The total number of n-best predictions to generate when looking for an answer.
-        max_answer_length (:obj:`int`, `optional`, defaults to 30):
-            The maximum length of an answer that can be generated. This is needed because the start and end predictions
-            are not conditioned on one another.
-        start_n_top (:obj:`int`, `optional`, defaults to 5):
-            The number of top start logits too keep when searching for the :obj:`n_best_size` predictions.
-        end_n_top (:obj:`int`, `optional`, defaults to 5):
-            The number of top end logits too keep when searching for the :obj:`n_best_size` predictions.
-        output_dir (:obj:`str`, `optional`):
-            If provided, the dictionaries of predictions, n_best predictions (with their scores and logits) and, if
-            :obj:`version_2_with_negative=True`, the dictionary of the scores differences between best and null
-            answers, are saved in `output_dir`.
-        prefix (:obj:`str`, `optional`):
-            If provided, the dictionaries mentioned above are saved with `prefix` added to their names.
-        log_level (:obj:`int`, `optional`, defaults to ``logging.WARNING``):
-            ``logging`` log level (e.g., ``logging.WARNING``)
-    """
-    if len(predictions) != 5:
-        raise ValueError("`predictions` should be a tuple with five elements.")
-    start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits = predictions
-
-    if len(predictions[0]) != len(features):
-        raise ValueError(f"Got {len(predictions[0])} predictions and {len(features)} features.")
-
-    # Build a map example to its corresponding features.
-    example_id_to_index = {k: i for i, k in enumerate(examples["id"])}
-    features_per_example = collections.defaultdict(list)
-    for i, feature in enumerate(features):
-        features_per_example[example_id_to_index[feature["example_id"]]].append(i)
-
-    # The dictionaries we have to fill.
-    all_predictions = collections.OrderedDict()
-    all_nbest_json = collections.OrderedDict()
-    scores_diff_json = collections.OrderedDict() if version_2_with_negative else None
-
-    # Logging.
-    logger.setLevel(log_level)
-    logger.info(f"Post-processing {len(examples)} example predictions split into {len(features)} features.")
-
-    # Let's loop over all the examples!
-    for example_index, example in enumerate(tqdm(examples)):
-        # Those are the indices of the features associated to the current example.
-        feature_indices = features_per_example[example_index]
-
-        min_null_score = None
-        prelim_predictions = []
-
-        # Looping through all the features associated to the current example.
-        for feature_index in feature_indices:
-            # We grab the predictions of the model for this feature.
-            start_log_prob = start_top_log_probs[feature_index]
-            start_indexes = start_top_index[feature_index]
-            end_log_prob = end_top_log_probs[feature_index]
-            end_indexes = end_top_index[feature_index]
-            feature_null_score = cls_logits[feature_index]
-            # This is what will allow us to map some the positions in our logits to span of texts in the original
-            # context.
-            offset_mapping = features[feature_index]["offset_mapping"]
-            # Optional `token_is_max_context`, if provided we will remove answers that do not have the maximum context
-            # available in the current feature.
-            token_is_max_context = features[feature_index].get("token_is_max_context", None)
-
-            # Update minimum null prediction
-            if min_null_score is None or feature_null_score < min_null_score:
-                min_null_score = feature_null_score
-
-            # Go through all possibilities for the `n_start_top`/`n_end_top` greater start and end logits.
-            for i in range(start_n_top):
-                for j in range(end_n_top):
-                    start_index = int(start_indexes[i])
-                    j_index = i * end_n_top + j
-                    end_index = int(end_indexes[j_index])
-                    # Don't consider out-of-scope answers (last part of the test should be unnecessary because of the
-                    # p_mask but let's not take any risk)
-                    if (
-                        start_index >= len(offset_mapping)
-                        or end_index >= len(offset_mapping)
-                        or offset_mapping[start_index] is None
-                        or len(offset_mapping[start_index]) < 2
-                        or offset_mapping[end_index] is None
-                        or len(offset_mapping[end_index]) < 2
-                    ):
-                        continue
-
-                    # Don't consider answers with a length negative or > max_answer_length.
-                    if end_index < start_index or end_index - start_index + 1 > max_answer_length:
-                        continue
-                    # Don't consider answer that don't have the maximum context available (if such information is
-                    # provided).
-                    if token_is_max_context is not None and not token_is_max_context.get(str(start_index), False):
-                        continue
-                    prelim_predictions.append(
-                        {
-                            "offsets": (offset_mapping[start_index][0], offset_mapping[end_index][1]),
-                            "score": start_log_prob[i] + end_log_prob[j_index],
-                            "start_log_prob": start_log_prob[i],
-                            "end_log_prob": end_log_prob[j_index],
-                        }
-                    )
-
-        # Only keep the best `n_best_size` predictions.
-        predictions = sorted(prelim_predictions, key=lambda x: x["score"], reverse=True)[:n_best_size]
-
-        # Use the offsets to gather the answer text in the original context.
-        context = example["context"]
-        for pred in predictions:
-            offsets = pred.pop("offsets")
-            pred["text"] = context[offsets[0] : offsets[1]]
-
-        # In the very rare edge case we have not a single non-null prediction, we create a fake prediction to avoid
-        # failure.
-        if len(predictions) == 0:
-            # Without predictions min_null_score is going to be None and None will cause an exception later
-            min_null_score = -2e-6
-            predictions.insert(0, {"text": "", "start_logit": -1e-6, "end_logit": -1e-6, "score": min_null_score})
-
-        # Compute the softmax of all scores (we do it with numpy to stay independent from torch/tf in this file, using
-        # the LogSumExp trick).
-        scores = np.array([pred.pop("score") for pred in predictions])
-        exp_scores = np.exp(scores - np.max(scores))
-        probs = exp_scores / exp_scores.sum()
-
-        # Include the probabilities in our predictions.
-        for prob, pred in zip(probs, predictions):
-            pred["probability"] = prob
-
-        # Pick the best prediction and set the probability for the null answer.
-        all_predictions[example["id"]] = predictions[0]["text"]
-        if version_2_with_negative:
-            scores_diff_json[example["id"]] = float(min_null_score)
-
-        # Make `predictions` JSON-serializable by casting np.float back to float.
-        all_nbest_json[example["id"]] = [
-            {k: (float(v) if isinstance(v, (np.float16, np.float32, np.float64)) else v) for k, v in pred.items()}
-            for pred in predictions
-        ]
-
-    # If we have an output_dir, let's save all those dicts.
-    if output_dir is not None:
-        if not os.path.isdir(output_dir):
-            raise EnvironmentError(f"{output_dir} is not a directory.")
-
-        prediction_file = os.path.join(
-            output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json"
-        )
-        nbest_file = os.path.join(
-            output_dir, "nbest_predictions.json" if prefix is None else f"{prefix}_nbest_predictions.json"
-        )
-        if version_2_with_negative:
-            null_odds_file = os.path.join(
-                output_dir, "null_odds.json" if prefix is None else f"{prefix}_null_odds.json"
-            )
-
-        logger.info(f"Saving predictions to {prediction_file}.")
-        with open(prediction_file, "w") as writer:
-            writer.write(json.dumps(all_predictions, indent=4) + "\n")
-        logger.info(f"Saving nbest_preds to {nbest_file}.")
-        with open(nbest_file, "w") as writer:
-            writer.write(json.dumps(all_nbest_json, indent=4) + "\n")
-        if version_2_with_negative:
-            logger.info(f"Saving null_odds to {null_odds_file}.")
-            with open(null_odds_file, "w") as writer:
-                writer.write(json.dumps(scores_diff_json, indent=4) + "\n")
-
-    return all_predictions, scores_diff_json
diff --git a/examples/openvino/test_examples.py b/examples/openvino/test_examples.py
deleted file mode 100644
index d3993d5b78..0000000000
--- a/examples/openvino/test_examples.py
+++ /dev/null
@@ -1,146 +0,0 @@
-#  Copyright 2021 The HuggingFace Team. All rights reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-import os
-import sys
-import tempfile
-import unittest
-from unittest.mock import patch
-
-
-SRC_DIRS = [
-    os.path.join(os.path.dirname(__file__), dirname)
-    for dirname in [
-        "text-classification",
-        "question-answering",
-        "audio-classification",
-        "image-classification",
-    ]
-]
-sys.path.extend(SRC_DIRS)
-
-if SRC_DIRS is not None:
-    import run_audio_classification
-    import run_glue
-    import run_image_classification
-    import run_qa
-
-
-class TestExamples(unittest.TestCase):
-    def test_audio_classification(self):
-        with tempfile.TemporaryDirectory() as tmp_dir:
-            test_args = f"""
-                run_audio_classification.py
-                --model_name_or_path hf-internal-testing/tiny-random-Wav2Vec2Model
-                --nncf_compression_config  examples/openvino/audio-classification/configs/wav2vec2-base-qat.json
-                --dataset_name superb
-                --dataset_config_name ks
-                --max_train_samples 10
-                --max_eval_samples 2
-                --remove_unused_columns False
-                --do_train
-                --learning_rate 3e-5
-                --max_length_seconds 1
-                --attention_mask False
-                --warmup_ratio 0.1
-                --num_train_epochs 1
-                --gradient_accumulation_steps 1
-                --dataloader_num_workers 1
-                --logging_strategy steps
-                --logging_steps 1
-                --evaluation_strategy epoch
-                --save_strategy epoch
-                --load_best_model_at_end False
-                --seed 42
-                --output_dir {tmp_dir}
-                --overwrite_output_dir
-                """.split()
-
-            with patch.object(sys, "argv", test_args):
-                run_audio_classification.main()
-
-    def test_image_classification(self):
-        with tempfile.TemporaryDirectory() as tmp_dir:
-            test_args = f"""
-                run_image_classification.py
-                --model_name_or_path hf-internal-testing/tiny-random-ViTModel
-                --dataset_name beans
-                --max_train_samples 10
-                --max_eval_samples 2
-                --remove_unused_columns False
-                --do_train
-                --do_eval
-                --learning_rate 2e-5
-                --num_train_epochs 1
-                --logging_strategy steps
-                --logging_steps 1
-                --evaluation_strategy epoch
-                --save_strategy epoch
-                --save_total_limit 1
-                --seed 1337
-                --output_dir {tmp_dir}
-                """.split()
-
-            with patch.object(sys, "argv", test_args):
-                run_image_classification.main()
-
-    def test_text_classification(self):
-        with tempfile.TemporaryDirectory() as tmp_dir:
-            test_args = f"""
-                run_glue.py
-                --model_name_or_path hf-internal-testing/tiny-random-DistilBertForSequenceClassification
-                --task_name sst2
-                --max_train_samples 10
-                --max_eval_samples 2
-                --overwrite_output_dir
-                --do_train
-                --do_eval
-                --max_seq_length 128
-                --learning_rate 1e-5
-                --optim adamw_torch
-                --num_train_epochs 1
-                --logging_steps 1
-                --evaluation_strategy steps
-                --eval_steps 1
-                --save_strategy epoch
-                --seed 42
-                --output_dir {tmp_dir}
-                """.split()
-
-            with patch.object(sys, "argv", test_args):
-                run_glue.main()
-
-    def test_question_answering(self):
-        with tempfile.TemporaryDirectory() as tmp_dir:
-            test_args = f"""
-                run_qa.py
-                --model_name_or_path hf-internal-testing/tiny-random-DistilBertForQuestionAnswering
-                --dataset_name squad
-                --do_train
-                --do_eval
-                --max_train_samples 10
-                --max_eval_samples 2
-                --learning_rate 3e-5
-                --num_train_epochs 1
-                --max_seq_length 384
-                --doc_stride 128
-                --overwrite_output_dir
-                --output_dir {tmp_dir}
-                """.split()
-
-            with patch.object(sys, "argv", test_args):
-                run_qa.main()
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/examples/openvino/text-classification/README.md b/examples/openvino/text-classification/README.md
deleted file mode 100644
index 0128220c89..0000000000
--- a/examples/openvino/text-classification/README.md
+++ /dev/null
@@ -1,86 +0,0 @@
-<!---
-Copyright 2023 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
--->
-
-# Text classification
-
-This folder contains [`run_glue.py`](https://github.com/huggingface/optimum/blob/main/examples/openvino/text-classification/run_glue.py), a script to fine-tune a 🤗 Transformers model on the [General Language Understanding Evaluation](https://gluebenchmark.com/) (GLUE) benchmark while applying quantization aware training (QAT). QAT can be easily applied by replacing the Transformers [`Trainer`](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#trainer) with the Optimum [`OVTrainer`]. Any model from our [hub](https://huggingface.co/models) can be fine-tuned and quantized, as long as the model is supported by the [`AutoModelForSequenceClassification`](https://huggingface.co/docs/transformers/main/en/model_doc/auto#transformers.AutoModelForSequenceClassification) API.
-
-### Fine-tuning BERT on GLUE with QAT
-
-Here is the example to apply Quantization Aware Training (QAT) on BERT-base model for Stanford Sentiment Treebank-2 (SST-2) task in GLUE benchmark.
-
-```bash
-TASK_NAME=sst2
-python run_glue.py \
-    --model_name_or_path bert-base-uncased \
-    --task_name $TASK_NAME \
-    --output_dir /tmp/qat-bert-base-ft-$TASK_NAME \
-    --overwrite_output_dir \
-    --do_train \
-    --do_eval \
-    --max_seq_length 128 \
-    --per_device_train_batch_size 32 \
-    --learning_rate 1e-5 \
-    --optim adamw_torch \
-    --num_train_epochs 3 \
-    --logging_steps 10 \
-    --evaluation_strategy steps \
-    --eval_steps 250 \
-    --save_strategy epoch \
-    --fp16 \
-    --seed 42
-```
-
-On a single V100 GPU, this script should run in ~40 minutes and yield accuracy of **92.9%**.
-
-### Joint Pruning, Quantization and Distillation (JPQD) of BERT on GLUE
-
-`OVTrainer` also provides advanced optimization workflow via NNCF to structurally prune, quantize and distillation. Following is an example to optimize a sparse-quantized BERT-base model for SST2, distilling from a BERT-large teacher. Do take note of additional NNCF config `--nncf_compression_config`.
-More on how to configure movement sparsity, see NNCF documentation [here](https://github.com/openvinotoolkit/nncf/blob/develop/nncf/experimental/torch/sparsity/movement/MovementSparsity.md).
-
-To run the JPQD example, please install optimum-intel from source. This command will install or upgrade optimum-intel and all necessary dependencies:
-
-```python -m pip install --upgrade "git+https://github.com/huggingface/optimum-intel.git#egg=optimum-intel[openvino, nncf]"
-```
-
-```bash
-TASK_NAME=sst2
-torchrun --nproc-per-node=1 run_glue.py \
-    --model_name_or_path bert-base-uncased \
-    --task_name $TASK_NAME \
-    --teacher_model_name_or_path yoshitomo-matsubara/bert-large-uncased-sst2 \
-    --nncf_compression_config ./configs/bert-base-jpqd.json \
-    --distillation_weight 0.9 \
-    --output_dir /tmp/jpqd-bert-base-ft-$TASK_NAME \
-    --overwrite_output_dir \
-    --do_train \
-    --do_eval \
-    --max_seq_length 128 \
-    --per_device_train_batch_size 32 \
-    --learning_rate 2e-5 \
-    --optim adamw_torch \
-    --num_train_epochs 5 \
-    --logging_steps 10 \
-    --evaluation_strategy steps \
-    --eval_steps 250 \
-    --save_strategy epoch \
-    --save_total_limit 3 \
-    --fp16 \
-    --seed 42
-```
-
-On a single V100 GPU, this script should run in ~1.8 hours, and yield accuracy of **92.2%** with ~40% of the weights of the Transformer blocks pruned.
-For launching the script on multiple GPUs specify `--nproc-per-node=<number of GPU>`. Note, that different batch size and other hyperparameters might be required to achieve the same results as on a single GPU.
diff --git a/examples/openvino/text-classification/configs/bert-base-jpqd.json b/examples/openvino/text-classification/configs/bert-base-jpqd.json
deleted file mode 100644
index d177e4efd7..0000000000
--- a/examples/openvino/text-classification/configs/bert-base-jpqd.json
+++ /dev/null
@@ -1,45 +0,0 @@
-[
-    {
-        "algorithm": "movement_sparsity",
-        "params": {
-            "warmup_start_epoch": 1,
-            "warmup_end_epoch": 2,
-            "importance_regularization_factor": 0.05,
-            "enable_structured_masking": true
-        },
-        "sparse_structure_by_scopes": [
-            {"mode": "block", "sparse_factors": [32, 32], "target_scopes": "{re}.*BertAttention.*"},
-            {"mode": "per_dim", "axis": 0, "target_scopes": "{re}.*BertIntermediate.*"},
-            {"mode": "per_dim", "axis": 1, "target_scopes": "{re}.*BertOutput.*"}
-        ],
-        "ignored_scopes": [
-            "{re}.*NNCFEmbedding.*",
-            "{re}.*LayerNorm.*",
-            "{re}.*pooler.*",
-            "{re}.*classifier.*"]
-    },
-    {
-        "algorithm": "quantization",
-        "preset": "mixed",
-        "overflow_fix": "enable",
-        "initializer": {
-            "range": {
-                "num_init_samples": 32,
-                "type": "percentile",
-                "params":
-                {
-                    "min_percentile": 0.01,
-                    "max_percentile": 99.99
-                }
-            },
-            "batchnorm_adaptation": {
-                "num_bn_adaptation_samples": 200
-            }
-        },
-        "scope_overrides": {"activations": {"{re}.*matmul_0": {"mode": "symmetric"}}},
-        "ignored_scopes": [
-            "{re}.*__add___[0-1]",
-            "{re}.*layer_norm_0",
-        ]
-    }
-]
diff --git a/examples/openvino/text-classification/requirements.txt b/examples/openvino/text-classification/requirements.txt
deleted file mode 100644
index f8b37a9e56..0000000000
--- a/examples/openvino/text-classification/requirements.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-transformers>=4.36.0,<4.46.0
-datasets>=1.14.0,<2.20.0
-sentencepiece != 0.1.92
-scipy
-scikit-learn
-protobuf
-torch >= 1.3
-evaluate
-accelerate
diff --git a/examples/openvino/text-classification/run_glue.py b/examples/openvino/text-classification/run_glue.py
deleted file mode 100644
index 66670de77e..0000000000
--- a/examples/openvino/text-classification/run_glue.py
+++ /dev/null
@@ -1,657 +0,0 @@
-#!/usr/bin/env python
-# coding=utf-8
-# Copyright 2020 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-""" Finetuning the library models for sequence classification on GLUE."""
-# You can also adapt this script on your own text classification task. Pointers for this are left as comments.
-
-import logging
-import os
-import random
-import sys
-from dataclasses import dataclass, field
-from pathlib import Path
-from typing import Optional
-
-import datasets
-import evaluate
-import jstyleson as json
-import numpy as np
-import transformers
-from datasets import load_dataset
-from nncf.common.utils.os import safe_open
-from transformers import (
-    AutoConfig,
-    AutoModelForSequenceClassification,
-    AutoTokenizer,
-    DataCollatorWithPadding,
-    EvalPrediction,
-    HfArgumentParser,
-    PretrainedConfig,
-    default_data_collator,
-    set_seed,
-)
-from transformers.trainer_utils import get_last_checkpoint
-from transformers.utils import check_min_version, send_example_telemetry
-from transformers.utils.versions import require_version
-
-from optimum.intel import OVConfig, OVTrainer, OVTrainingArguments
-
-
-# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.22.0")
-
-require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")
-
-task_to_keys = {
-    "cola": ("sentence", None),
-    "mnli": ("premise", "hypothesis"),
-    "mrpc": ("sentence1", "sentence2"),
-    "qnli": ("question", "sentence"),
-    "qqp": ("question1", "question2"),
-    "rte": ("sentence1", "sentence2"),
-    "sst2": ("sentence", None),
-    "stsb": ("sentence1", "sentence2"),
-    "wnli": ("sentence1", "sentence2"),
-}
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class DataTrainingArguments:
-    """
-    Arguments pertaining to what data we are going to input our model for training and eval.
-
-    Using `HfArgumentParser` we can turn this class
-    into argparse arguments to be able to specify them on
-    the command line.
-    """
-
-    task_name: Optional[str] = field(
-        default=None,
-        metadata={"help": "The name of the task to train on: " + ", ".join(task_to_keys.keys())},
-    )
-    dataset_name: Optional[str] = field(
-        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
-    )
-    dataset_config_name: Optional[str] = field(
-        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
-    )
-    max_seq_length: int = field(
-        default=128,
-        metadata={
-            "help": (
-                "The maximum total input sequence length after tokenization. Sequences longer "
-                "than this will be truncated, sequences shorter will be padded."
-            )
-        },
-    )
-    overwrite_cache: bool = field(
-        default=False, metadata={"help": "Overwrite the cached preprocessed datasets or not."}
-    )
-    pad_to_max_length: bool = field(
-        default=True,
-        metadata={
-            "help": (
-                "Whether to pad all samples to `max_seq_length`. "
-                "If False, will pad the samples dynamically when batching to the maximum length in the batch."
-            )
-        },
-    )
-    max_train_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": (
-                "For debugging purposes or quicker training, truncate the number of training examples to this "
-                "value if set."
-            )
-        },
-    )
-    max_eval_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": (
-                "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
-                "value if set."
-            )
-        },
-    )
-    max_predict_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": (
-                "For debugging purposes or quicker training, truncate the number of prediction examples to this "
-                "value if set."
-            )
-        },
-    )
-    train_file: Optional[str] = field(
-        default=None, metadata={"help": "A csv or a json file containing the training data."}
-    )
-    validation_file: Optional[str] = field(
-        default=None, metadata={"help": "A csv or a json file containing the validation data."}
-    )
-    test_file: Optional[str] = field(default=None, metadata={"help": "A csv or a json file containing the test data."})
-
-    def __post_init__(self):
-        if self.task_name is not None:
-            self.task_name = self.task_name.lower()
-            if self.task_name not in task_to_keys.keys():
-                raise ValueError("Unknown task, you should pick one in " + ",".join(task_to_keys.keys()))
-        elif self.dataset_name is not None:
-            pass
-        elif self.train_file is None or self.validation_file is None:
-            raise ValueError("Need either a GLUE task, a training/validation file or a dataset name.")
-        else:
-            train_extension = self.train_file.split(".")[-1]
-            assert train_extension in ["csv", "json"], "`train_file` should be a csv or a json file."
-            validation_extension = self.validation_file.split(".")[-1]
-            assert (
-                validation_extension == train_extension
-            ), "`validation_file` should have the same extension (csv or json) as `train_file`."
-
-
-@dataclass
-class ModelArguments:
-    """
-    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
-    """
-
-    model_name_or_path: str = field(
-        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
-    )
-    teacher_model_name_or_path: str = field(
-        default=None, metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
-    )
-    config_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
-    )
-    tokenizer_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
-    )
-    cache_dir: Optional[str] = field(
-        default=None,
-        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
-    )
-    use_fast_tokenizer: bool = field(
-        default=True,
-        metadata={"help": "Whether to use one of the fast tokenizer (backed by the tokenizers library) or not."},
-    )
-    model_revision: str = field(
-        default="main",
-        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
-    )
-    use_auth_token: bool = field(
-        default=False,
-        metadata={
-            "help": (
-                "Will use the token generated when running `huggingface-cli login` (necessary to use this script "
-                "with private models)."
-            )
-        },
-    )
-    ignore_mismatched_sizes: bool = field(
-        default=False,
-        metadata={"help": "Will enable to load a pretrained model whose head dimensions are different."},
-    )
-    nncf_compression_config: Optional[str] = field(
-        default=None,
-        metadata={
-            "help": "Path to NNCF configuration .json file for adapting the model to compression-enabled training."
-        },
-    )
-
-
-def main():
-    # See all possible arguments in src/transformers/training_args.py
-    # or by passing the --help flag to this script.
-    # We now keep distinct sets of args, for a cleaner separation of concerns.
-
-    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, OVTrainingArguments))
-    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
-        # If we pass only one argument to the script and it's the path to a json file,
-        # let's parse it to get our arguments.
-        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
-    else:
-        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
-
-    # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
-    # information sent is the one passed as arguments along with your Python/PyTorch versions.
-    send_example_telemetry("run_glue", model_args, data_args)
-
-    # Setup logging
-    logging.basicConfig(
-        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
-        datefmt="%m/%d/%Y %H:%M:%S",
-        handlers=[logging.StreamHandler(sys.stdout)],
-    )
-
-    log_level = training_args.get_process_log_level()
-    logger.setLevel(log_level)
-    datasets.utils.logging.set_verbosity(log_level)
-    transformers.utils.logging.set_verbosity(log_level)
-    transformers.utils.logging.enable_default_handler()
-    transformers.utils.logging.enable_explicit_format()
-
-    # Log on each process the small summary:
-    logger.warning(
-        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
-        + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
-    )
-    logger.info(f"Training/evaluation parameters {training_args}")
-
-    # Detecting last checkpoint.
-    last_checkpoint = None
-    if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
-        last_checkpoint = get_last_checkpoint(training_args.output_dir)
-        if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
-            raise ValueError(
-                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
-                "Use --overwrite_output_dir to overcome."
-            )
-        elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
-            logger.info(
-                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
-                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
-            )
-
-    # Set seed before initializing model.
-    set_seed(training_args.seed)
-
-    # Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
-    # or specify a GLUE benchmark task (the dataset will be downloaded automatically from the datasets Hub).
-    #
-    # For CSV/JSON files, this script will use as labels the column called 'label' and as pair of sentences the
-    # sentences in columns called 'sentence1' and 'sentence2' if such column exists or the first two columns not named
-    # label if at least two columns are provided.
-    #
-    # If the CSVs/JSONs contain only one non-label column, the script does single sentence classification on this
-    # single column. You can easily tweak this behavior (see below)
-    #
-    # In distributed training, the load_dataset function guarantee that only one local process can concurrently
-    # download the dataset.
-    if data_args.task_name is not None:
-        # Downloading and loading a dataset from the hub.
-        raw_datasets = load_dataset(
-            "glue",
-            data_args.task_name,
-            cache_dir=model_args.cache_dir,
-            use_auth_token=True if model_args.use_auth_token else None,
-        )
-    elif data_args.dataset_name is not None:
-        # Downloading and loading a dataset from the hub.
-        raw_datasets = load_dataset(
-            data_args.dataset_name,
-            data_args.dataset_config_name,
-            cache_dir=model_args.cache_dir,
-            use_auth_token=True if model_args.use_auth_token else None,
-        )
-    else:
-        # Loading a dataset from your local files.
-        # CSV/JSON training and evaluation files are needed.
-        data_files = {"train": data_args.train_file, "validation": data_args.validation_file}
-
-        # Get the test dataset: you can provide your own CSV/JSON test file (see below)
-        # when you use `do_predict` without specifying a GLUE benchmark task.
-        if training_args.do_predict:
-            if data_args.test_file is not None:
-                train_extension = data_args.train_file.split(".")[-1]
-                test_extension = data_args.test_file.split(".")[-1]
-                assert (
-                    test_extension == train_extension
-                ), "`test_file` should have the same extension (csv or json) as `train_file`."
-                data_files["test"] = data_args.test_file
-            else:
-                raise ValueError("Need either a GLUE task or a test file for `do_predict`.")
-
-        for key in data_files.keys():
-            logger.info(f"load a local file for {key}: {data_files[key]}")
-
-        if data_args.train_file.endswith(".csv"):
-            # Loading a dataset from local csv files
-            raw_datasets = load_dataset(
-                "csv",
-                data_files=data_files,
-                cache_dir=model_args.cache_dir,
-                use_auth_token=True if model_args.use_auth_token else None,
-            )
-        else:
-            # Loading a dataset from local json files
-            raw_datasets = load_dataset(
-                "json",
-                data_files=data_files,
-                cache_dir=model_args.cache_dir,
-                use_auth_token=True if model_args.use_auth_token else None,
-            )
-    # See more about loading any type of standard or custom dataset at
-    # https://huggingface.co/docs/datasets/loading_datasets.html.
-
-    # Labels
-    if data_args.task_name is not None:
-        is_regression = data_args.task_name == "stsb"
-        if not is_regression:
-            label_list = raw_datasets["train"].features["label"].names
-            num_labels = len(label_list)
-        else:
-            num_labels = 1
-    else:
-        # Trying to have good defaults here, don't hesitate to tweak to your needs.
-        is_regression = raw_datasets["train"].features["label"].dtype in ["float32", "float64"]
-        if is_regression:
-            num_labels = 1
-        else:
-            # A useful fast method:
-            # https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.unique
-            label_list = raw_datasets["train"].unique("label")
-            label_list.sort()  # Let's sort it for determinism
-            num_labels = len(label_list)
-
-    if is_regression and model_args.teacher_model_name_or_path is not None:
-        raise NotImplementedError(
-            "Built-in knowledge distillation of `OVTrainer` only supports single label classification task now."
-        )
-
-    # Load pretrained model and tokenizer
-    #
-    # In distributed training, the .from_pretrained methods guarantee that only one local process can concurrently
-    # download model & vocab.
-    config = AutoConfig.from_pretrained(
-        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
-        num_labels=num_labels,
-        finetuning_task=data_args.task_name,
-        cache_dir=model_args.cache_dir,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-    tokenizer = AutoTokenizer.from_pretrained(
-        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
-        cache_dir=model_args.cache_dir,
-        use_fast=model_args.use_fast_tokenizer,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-    model = AutoModelForSequenceClassification.from_pretrained(
-        model_args.model_name_or_path,
-        from_tf=bool(".ckpt" in model_args.model_name_or_path),
-        config=config,
-        cache_dir=model_args.cache_dir,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-        ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
-    )
-    teacher_model = None
-    if model_args.teacher_model_name_or_path is not None:
-        teacher_model = AutoModelForSequenceClassification.from_pretrained(
-            model_args.teacher_model_name_or_path,
-            from_tf=bool(".ckpt" in model_args.teacher_model_name_or_path),
-            cache_dir=model_args.cache_dir,
-        )
-
-    # Preprocessing the raw_datasets
-    if data_args.task_name is not None:
-        sentence1_key, sentence2_key = task_to_keys[data_args.task_name]
-    else:
-        # Again, we try to have some nice defaults but don't hesitate to tweak to your use case.
-        non_label_column_names = [name for name in raw_datasets["train"].column_names if name != "label"]
-        if "sentence1" in non_label_column_names and "sentence2" in non_label_column_names:
-            sentence1_key, sentence2_key = "sentence1", "sentence2"
-        else:
-            if len(non_label_column_names) >= 2:
-                sentence1_key, sentence2_key = non_label_column_names[:2]
-            else:
-                sentence1_key, sentence2_key = non_label_column_names[0], None
-
-    # Padding strategy
-    if data_args.pad_to_max_length:
-        padding = "max_length"
-    else:
-        # We will pad later, dynamically at batch creation, to the max sequence length in each batch
-        padding = False
-
-    # Some models have set the order of the labels to use, so let's make sure we do use it.
-    label_to_id = None
-    if (
-        model.config.label2id != PretrainedConfig(num_labels=num_labels).label2id
-        and data_args.task_name is not None
-        and not is_regression
-    ):
-        # Some have all caps in their config, some don't.
-        label_name_to_id = {k.lower(): v for k, v in model.config.label2id.items()}
-        if sorted(label_name_to_id.keys()) == sorted(label_list):
-            label_to_id = {i: int(label_name_to_id[label_list[i]]) for i in range(num_labels)}
-        else:
-            logger.warning(
-                "Your model seems to have been trained with labels, but they don't match the dataset: ",
-                f"model labels: {sorted(label_name_to_id.keys())}, dataset labels: {sorted(label_list)}."
-                "\nIgnoring the model labels as a result.",
-            )
-    elif data_args.task_name is None and not is_regression:
-        label_to_id = {v: i for i, v in enumerate(label_list)}
-
-    if label_to_id is not None:
-        model.config.label2id = label_to_id
-        model.config.id2label = {id: label for label, id in config.label2id.items()}
-    elif data_args.task_name is not None and not is_regression:
-        model.config.label2id = {l: i for i, l in enumerate(label_list)}
-        model.config.id2label = {id: label for label, id in config.label2id.items()}
-
-    if data_args.max_seq_length > tokenizer.model_max_length:
-        logger.warning(
-            f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
-            f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
-        )
-    max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
-
-    def preprocess_function(examples):
-        # Tokenize the texts
-        args = (
-            (examples[sentence1_key],) if sentence2_key is None else (examples[sentence1_key], examples[sentence2_key])
-        )
-        result = tokenizer(*args, padding=padding, max_length=max_seq_length, truncation=True)
-
-        # Map labels to IDs (not necessary for GLUE tasks)
-        if label_to_id is not None and "label" in examples:
-            result["label"] = [(label_to_id[l] if l != -1 else -1) for l in examples["label"]]
-        return result
-
-    with training_args.main_process_first(desc="dataset map pre-processing"):
-        raw_datasets = raw_datasets.map(
-            preprocess_function,
-            batched=True,
-            load_from_cache_file=not data_args.overwrite_cache,
-            desc="Running tokenizer on dataset",
-        )
-    if training_args.do_train:
-        if "train" not in raw_datasets:
-            raise ValueError("--do_train requires a train dataset")
-        train_dataset = raw_datasets["train"]
-        if data_args.max_train_samples is not None:
-            max_train_samples = min(len(train_dataset), data_args.max_train_samples)
-            train_dataset = train_dataset.select(range(max_train_samples))
-
-    if training_args.do_eval:
-        if "validation" not in raw_datasets and "validation_matched" not in raw_datasets:
-            raise ValueError("--do_eval requires a validation dataset")
-        eval_dataset = raw_datasets["validation_matched" if data_args.task_name == "mnli" else "validation"]
-        if data_args.max_eval_samples is not None:
-            max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples)
-            eval_dataset = eval_dataset.select(range(max_eval_samples))
-
-    if training_args.do_predict or data_args.task_name is not None or data_args.test_file is not None:
-        if "test" not in raw_datasets and "test_matched" not in raw_datasets:
-            raise ValueError("--do_predict requires a test dataset")
-        predict_dataset = raw_datasets["test_matched" if data_args.task_name == "mnli" else "test"]
-        if data_args.max_predict_samples is not None:
-            max_predict_samples = min(len(predict_dataset), data_args.max_predict_samples)
-            predict_dataset = predict_dataset.select(range(max_predict_samples))
-
-    # Log a few random samples from the training set:
-    if training_args.do_train:
-        for index in random.sample(range(len(train_dataset)), 3):
-            logger.info(f"Sample {index} of the training set: {train_dataset[index]}.")
-
-    # Get the metric function
-    if data_args.task_name is not None:
-        metric = evaluate.load("glue", data_args.task_name)
-    else:
-        metric = evaluate.load("accuracy")
-
-    # You can define your custom compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
-    # predictions and label_ids field) and has to return a dictionary string to float.
-    def compute_metrics(p: EvalPrediction):
-        preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
-        preds = np.squeeze(preds) if is_regression else np.argmax(preds, axis=1)
-        if data_args.task_name is not None:
-            result = metric.compute(predictions=preds, references=p.label_ids)
-            if len(result) > 1:
-                result["combined_score"] = np.mean(list(result.values())).item()
-            return result
-        elif is_regression:
-            return {"mse": ((preds - p.label_ids) ** 2).mean().item()}
-        else:
-            return {"accuracy": (preds == p.label_ids).astype(np.float32).mean().item()}
-
-    # Data collator will default to DataCollatorWithPadding when the tokenizer is passed to Trainer, so we change it if
-    # we already did the padding.
-    if data_args.pad_to_max_length:
-        data_collator = default_data_collator
-    elif training_args.fp16:
-        data_collator = DataCollatorWithPadding(tokenizer, pad_to_multiple_of=8)
-    else:
-        data_collator = None
-
-    if model_args.nncf_compression_config is not None:
-        file_path = Path(model_args.nncf_compression_config).resolve()
-        with safe_open(file_path) as f:
-            compression = json.load(f)
-        ov_config = OVConfig(compression=compression)
-    else:
-        ov_config = OVConfig()
-    ov_config.log_dir = training_args.output_dir
-
-    # Initialize our Trainer
-    trainer = OVTrainer(
-        model=model,
-        teacher_model=teacher_model,
-        ov_config=ov_config,
-        task="text-classification",
-        args=training_args,
-        train_dataset=train_dataset if training_args.do_train else None,
-        eval_dataset=eval_dataset if training_args.do_eval else None,
-        compute_metrics=compute_metrics,
-        tokenizer=tokenizer,
-        data_collator=data_collator,
-    )
-
-    # Training
-    if training_args.do_train:
-        checkpoint = None
-        if training_args.resume_from_checkpoint is not None:
-            checkpoint = training_args.resume_from_checkpoint
-        elif last_checkpoint is not None:
-            checkpoint = last_checkpoint
-        train_result = trainer.train(resume_from_checkpoint=checkpoint)
-        metrics = train_result.metrics
-        max_train_samples = (
-            data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset)
-        )
-        metrics["train_samples"] = min(max_train_samples, len(train_dataset))
-
-        trainer.save_model()  # Saves the tokenizer too for easy upload
-
-        trainer.log_metrics("train", metrics)
-        trainer.save_metrics("train", metrics)
-        trainer.save_state()
-
-    # Evaluation
-    if training_args.do_eval:
-        logger.info("*** Evaluate ***")
-
-        # Loop to handle MNLI double evaluation (matched, mis-matched)
-        tasks = [data_args.task_name]
-        eval_datasets = [eval_dataset]
-        if data_args.task_name == "mnli":
-            tasks.append("mnli-mm")
-            valid_mm_dataset = raw_datasets["validation_mismatched"]
-            if data_args.max_eval_samples is not None:
-                max_eval_samples = min(len(valid_mm_dataset), data_args.max_eval_samples)
-                valid_mm_dataset = valid_mm_dataset.select(range(max_eval_samples))
-            eval_datasets.append(valid_mm_dataset)
-            combined = {}
-
-        for eval_dataset, task in zip(eval_datasets, tasks):
-            metrics = trainer.evaluate(eval_dataset=eval_dataset)
-
-            max_eval_samples = (
-                data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset)
-            )
-            metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset))
-
-            if task == "mnli-mm":
-                metrics = {k + "_mm": v for k, v in metrics.items()}
-            if task is not None and "mnli" in task:
-                combined.update(metrics)
-
-            trainer.log_metrics("eval", metrics)
-            trainer.save_metrics("eval", combined if task is not None and "mnli" in task else metrics)
-
-    if training_args.do_predict:
-        logger.info("*** Predict ***")
-
-        # Loop to handle MNLI double evaluation (matched, mis-matched)
-        tasks = [data_args.task_name]
-        predict_datasets = [predict_dataset]
-        if data_args.task_name == "mnli":
-            tasks.append("mnli-mm")
-            predict_datasets.append(raw_datasets["test_mismatched"])
-
-        for predict_dataset, task in zip(predict_datasets, tasks):
-            # Removing the `label` columns because it contains -1 and Trainer won't like that.
-            predict_dataset = predict_dataset.remove_columns("label")
-            predictions = trainer.predict(predict_dataset, metric_key_prefix="predict").predictions
-            predictions = np.squeeze(predictions) if is_regression else np.argmax(predictions, axis=1)
-
-            output_predict_file = os.path.join(training_args.output_dir, f"predict_results_{task}.txt")
-            if trainer.is_world_process_zero():
-                with open(output_predict_file, "w") as writer:
-                    logger.info(f"***** Predict results {task} *****")
-                    writer.write("index\tprediction\n")
-                    for index, item in enumerate(predictions):
-                        if is_regression:
-                            writer.write(f"{index}\t{item:3.3f}\n")
-                        else:
-                            item = label_list[item]
-                            writer.write(f"{index}\t{item}\n")
-
-    kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "text-classification"}
-    if data_args.task_name is not None:
-        kwargs["language"] = "en"
-        kwargs["dataset_tags"] = "glue"
-        kwargs["dataset_args"] = data_args.task_name
-        kwargs["dataset"] = f"GLUE {data_args.task_name.upper()}"
-
-    if training_args.push_to_hub:
-        trainer.push_to_hub(**kwargs)
-    else:
-        trainer.create_model_card(**kwargs)
-
-
-def _mp_fn(index):
-    # For xla_spawn (TPUs)
-    main()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/optimum/intel/__init__.py b/optimum/intel/__init__.py
index 8e0f12b747..eab3b49c4c 100644
--- a/optimum/intel/__init__.py
+++ b/optimum/intel/__init__.py
@@ -80,7 +80,6 @@
     _import_structure["utils.dummy_openvino_and_nncf_objects"].extend(
         [
             "OVQuantizer",
-            "OVTrainingArguments",
             "OVQuantizationConfig",
             "OVWeightQuantizationConfig",
             "OVDynamicQuantizationConfig",
@@ -91,7 +90,6 @@
     _import_structure["openvino"].extend(
         [
             "OVQuantizer",
-            "OVTrainingArguments",
             "OVQuantizationConfig",
             "OVWeightQuantizationConfig",
             "OVDynamicQuantizationConfig",
@@ -100,15 +98,6 @@
     )
 
 
-try:
-    if not (is_openvino_available() and is_nncf_available() and is_accelerate_available()):
-        raise OptionalDependencyNotAvailable()
-except OptionalDependencyNotAvailable:
-    _import_structure["utils.dummy_openvino_and_nncf_objects"].extend(["OVTrainer"])
-else:
-    _import_structure["openvino"].extend(["OVTrainer"])
-
-
 try:
     if not (is_openvino_available() and is_diffusers_available()):
         raise OptionalDependencyNotAvailable()
@@ -277,7 +266,6 @@
             OVMixedQuantizationConfig,
             OVQuantizationConfig,
             OVQuantizer,
-            OVTrainingArguments,
             OVWeightQuantizationConfig,
         )
     else:
@@ -286,18 +274,9 @@
             OVMixedQuantizationConfig,
             OVQuantizationConfig,
             OVQuantizer,
-            OVTrainingArguments,
             OVWeightQuantizationConfig,
         )
 
-    try:
-        if not (is_openvino_available() and is_nncf_available() and is_accelerate_available()):
-            raise OptionalDependencyNotAvailable()
-    except OptionalDependencyNotAvailable:
-        from .utils.dummy_openvino_and_nncf_objects import OVTrainer
-    else:
-        from .openvino import OVTrainer
-
     try:
         if not (is_openvino_available() and is_diffusers_available()):
             raise OptionalDependencyNotAvailable()
diff --git a/optimum/intel/openvino/__init__.py b/optimum/intel/openvino/__init__.py
index 71eeb11f56..a88238aab8 100644
--- a/optimum/intel/openvino/__init__.py
+++ b/optimum/intel/openvino/__init__.py
@@ -16,7 +16,6 @@
 import warnings
 
 from ..utils.import_utils import (
-    is_accelerate_available,
     is_diffusers_available,
     is_nncf_available,
     is_sentence_transformers_available,
@@ -49,10 +48,6 @@
     patch_torch_operators()
 
     from .quantization import OVQuantizer
-    from .training_args import OVTrainingArguments
-
-    if is_accelerate_available():
-        from .trainer import OVTrainer
 
 
 from .configuration import (
diff --git a/optimum/intel/openvino/configuration.py b/optimum/intel/openvino/configuration.py
index 9c443d37c5..1a645bc5bc 100644
--- a/optimum/intel/openvino/configuration.py
+++ b/optimum/intel/openvino/configuration.py
@@ -799,9 +799,6 @@ def __init__(
         if isinstance(quantization_config, dict):
             quantization_config = self.quantization_config_from_dict(quantization_config)
         self.quantization_config = quantization_config
-        self.compression = kwargs.get(
-            "compression", None
-        )  # A field for backward-compatability of training-time compression parameters
         if self.quantization_config is not None:
             if isinstance(self.quantization_config, (OVWeightQuantizationConfig, OVQuantizationConfig)):
                 self.dtype = self.quantization_config.dtype
diff --git a/optimum/intel/openvino/trainer.py b/optimum/intel/openvino/trainer.py
deleted file mode 100644
index 0edb3a7307..0000000000
--- a/optimum/intel/openvino/trainer.py
+++ /dev/null
@@ -1,1027 +0,0 @@
-#  Copyright 2022 The HuggingFace Team. All rights reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-import inspect
-import io
-import math
-import os
-import shutil
-import sys
-import time
-from collections import defaultdict
-from itertools import chain
-from pathlib import Path
-from typing import Callable, Dict, List, Optional, Tuple, Type, Union
-
-
-# Integrations must be imported before ML frameworks:
-# isort: off
-from transformers.integrations import hp_params
-from transformers.integrations.deepspeed import deepspeed_init, deepspeed_load_checkpoint, is_deepspeed_available
-
-# isort: on
-
-import openvino
-import torch
-import torch.distributed as dist
-import torch.nn.functional as F
-from nncf import NNCFConfig
-from nncf.common.logging.logger import nncf_logger, set_log_level
-from nncf.common.utils.tensorboard import prepare_for_tensorboard
-from nncf.config.structures import BNAdaptationInitArgs, QuantizationRangeInitArgs
-from nncf.experimental.torch.sparsity.movement.algo import MovementSparsityController
-from nncf.experimental.torch.sparsity.movement.scheduler import MovementSchedulerStage
-from nncf.torch import create_compressed_model
-from nncf.torch.composite_compression import PTCompositeCompressionAlgorithmController
-from nncf.torch.compression_method_api import PTCompressionAlgorithmController
-from nncf.torch.nncf_network import NNCFNetwork
-from nncf.torch.quantization.algo import QuantizationController
-from openvino._offline_transformations import (
-    apply_fused_names_cleanup,
-    apply_moc_transformations,
-    apply_pruning_transformation,
-    compress_quantize_weights_transformation,
-)
-from openvino.runtime import Core, PartialShape, save_model
-from packaging import version
-from torch import nn
-from torch.onnx import export as onnx_export
-from torch.utils._pytree import tree_map
-from torch.utils.data import Dataset, RandomSampler
-from transformers import Trainer
-from transformers.data.data_collator import DataCollator
-from transformers.debug_utils import DebugOption, DebugUnderflowOverflow
-from transformers.modeling_utils import PreTrainedModel, unwrap_model
-from transformers.tokenization_utils_base import PreTrainedTokenizerBase
-from transformers.trainer import TRAINER_STATE_NAME, TRAINING_ARGS_NAME
-from transformers.trainer_callback import TrainerCallback, TrainerState
-from transformers.trainer_pt_utils import get_dataloader_sampler, get_model_param_count
-from transformers.trainer_utils import (
-    EvalPrediction,
-    HPSearchBackend,
-    TrainOutput,
-    has_length,
-    speed_metrics,
-)
-from transformers.training_args import ParallelMode
-from transformers.utils import (
-    WEIGHTS_NAME,
-    is_accelerate_available,
-    is_apex_available,
-    is_sagemaker_mp_enabled,
-    logging,
-)
-
-from optimum.exporters import TasksManager
-from optimum.exporters.onnx import OnnxConfig
-
-from ..utils.constant import _TASK_ALIASES
-from ..utils.import_utils import _transformers_version, is_transformers_version
-from .configuration import OVConfig
-from .quantization import OVDataLoader
-from .training_args import OVTrainingArguments
-from .utils import (
-    MAX_ONNX_OPSET,
-    MIN_ONNX_QDQ_OPSET,
-    ONNX_WEIGHTS_NAME,
-    OV_XML_FILE_NAME,
-    use_external_data_format,
-)
-
-
-if is_transformers_version(">=", "4.39.0"):
-    from transformers.utils import is_torch_xla_available
-else:
-    from transformers.utils import is_torch_tpu_available as is_torch_xla_available
-
-if is_accelerate_available():
-    from accelerate import __version__ as accelerate_version
-    from accelerate import skip_first_batches
-
-    if version.parse(accelerate_version) > version.parse("0.20.3"):
-        pass
-    DATA_SAMPLERS = [RandomSampler]
-    if version.parse(accelerate_version) > version.parse("0.23.0"):
-        from accelerate.data_loader import SeedableRandomSampler
-
-        DATA_SAMPLERS += [SeedableRandomSampler]
-
-    if is_deepspeed_available():
-        pass
-
-
-if is_apex_available():
-    from apex import amp
-
-if is_sagemaker_mp_enabled():
-    import smdistributed.modelparallel.torch as smp
-
-if is_torch_xla_available():
-    import torch_xla.core.xla_model as xm
-
-core = Core()
-
-logger = logging.get_logger(__name__)
-logger.setLevel(logging.INFO)
-
-# NNCF Error to be shown on stdout
-# set_log_level(logging.ERROR)
-NNCF_LOG_FILE_NAME = "nncf_output.log"
-
-
-DEFAULT_QUANTIZATION_CONFIG = {
-    "algorithm": "quantization",
-    "preset": "mixed",
-    "overflow_fix": "disable",
-    "initializer": {
-        "range": {"num_init_samples": 300, "type": "mean_min_max"},
-        "batchnorm_adaptation": {"num_bn_adaptation_samples": 0},
-    },
-    "scope_overrides": {
-        "activations": {
-            "{re}.*matmul_0": {"mode": "symmetric"},
-            "{re}.*scaled_dot_product_attention_0": {"mode": "symmetric"},
-        }
-    },
-    "ignored_scopes": [
-        "{re}.*Embedding.*",
-        "{re}.*add___.*",
-        "{re}.*layer_norm_.*",
-    ],
-}
-
-
-def _onnx_export_nncf_model(model: NNCFNetwork, config: OnnxConfig, output: Union[str, io.BytesIO], opset: int = None):
-    # TODO: remove it when fix controller.strip(copy=True) behavior
-    signature = inspect.signature(model.forward)
-    signature = list(signature.parameters.keys())
-    opset = opset or config.DEFAULT_ONNX_OPSET
-    model_inputs = config.generate_dummy_inputs(framework="pt")
-    # Create ordered inputs for the ONNX export of NNCFNetwork as keyword arguments are currently not supported
-    model_inputs = tuple(model_inputs.pop(key, None) for key in signature if len(model_inputs) != 0)
-    device = model.device
-
-    def remap(value):
-        if isinstance(value, torch.Tensor):
-            value = value.to(device)
-        return value
-
-    with config.patch_model_for_export(model):
-        model_inputs = tree_map(remap, model_inputs)
-        with torch.no_grad():
-            model.eval()
-            # Disable node additions to be exported in the graph
-            model.nncf.disable_dynamic_graph_building()
-            onnx_export(
-                model,
-                model_inputs,
-                f=output,
-                input_names=list(config.inputs.keys()),
-                output_names=list(config.outputs.keys()),
-                dynamic_axes=dict(chain(config.inputs.items(), config.outputs.items())),
-                do_constant_folding=True,
-                opset_version=opset,
-            )
-            model.nncf.enable_dynamic_graph_building()
-
-
-class OVTrainer(Trainer):
-    """
-    OVTrainer enables NNCF quantization aware training.
-    """
-
-    def __init__(
-        self,
-        model: Union[PreTrainedModel, torch.nn.Module] = None,
-        teacher_model: Union[PreTrainedModel, torch.nn.Module] = None,
-        args: OVTrainingArguments = None,
-        data_collator: Optional[DataCollator] = None,
-        train_dataset: Optional[Dataset] = None,
-        eval_dataset: Optional[Dataset] = None,
-        tokenizer: Optional[PreTrainedTokenizerBase] = None,
-        model_init: Callable[[], PreTrainedModel] = None,
-        compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
-        callbacks: Optional[List[TrainerCallback]] = None,
-        optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
-        preprocess_logits_for_metrics: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] = None,
-        ov_config: Optional[OVConfig] = None,
-        task: Optional[str] = None,
-    ):
-        logger.warning("OVTrainer is deprecated and will be removed in optimum-intel v1.22.0.")
-
-        if is_transformers_version(">=", "4.45.0"):
-            if is_transformers_version(">=", "4.46.0"):
-                raise ImportError(
-                    f"Unsupported transformers version found is {_transformers_version} which is not supported by the OVTrainer. Please downgrade to v4.44"
-                )
-
-            logger.warning(
-                f"The transformers version found is {_transformers_version} which is not officially supported by the OVTrainer, use at your own risk"
-            )
-
-        self.neftune_noise_alpha = None
-
-        super().__init__(
-            model,
-            args,
-            data_collator,
-            train_dataset,
-            eval_dataset,
-            tokenizer,
-            model_init,
-            compute_metrics,
-            callbacks,
-            optimizers,
-            preprocess_logits_for_metrics,
-        )
-
-        self.ov_config = ov_config
-        self.task = task
-        self.teacher = None
-        if teacher_model is not None:
-            self.teacher = teacher_model.to(args.device)
-            if self.args.n_gpu > 1:
-                self.teacher = torch.nn.DataParallel(self.teacher)
-            self.teacher.eval()
-        self.compression_controller = None
-
-        if self.ov_config is not None:
-            if self.ov_config.compression is None:
-                self.ov_config.compression = DEFAULT_QUANTIZATION_CONFIG
-            if (
-                isinstance(self.ov_config.compression, dict)
-                and "algorithm" in self.ov_config.compression
-                and self.ov_config.compression["algorithm"] == "quantization"
-            ):
-                self.ov_config.compression["export_to_onnx_standard_ops"] = self.ov_config.save_onnx_model
-            elif isinstance(self.ov_config.compression, list):
-                for i, algo_config in enumerate(self.ov_config.compression):
-                    if algo_config["algorithm"] == "quantization":
-                        self.ov_config.compression[i]["export_to_onnx_standard_ops"] = self.ov_config.save_onnx_model
-
-            if self.args.do_train:
-                self._set_task()
-                train_dataloader = self.get_train_dataloader()
-                model_inputs = next(iter(train_dataloader))
-                for label_name in self.label_names:
-                    model_inputs.pop(label_name)
-                force_batch_one = self._is_pruning_enabled()
-                self.ov_config.add_input_info(model_inputs, force_batch_one)
-                nncf_config = NNCFConfig.from_dict(self.ov_config.__dict__)
-                nncf_config.register_extra_structs(
-                    [
-                        QuantizationRangeInitArgs(OVDataLoader(train_dataloader)),
-                        BNAdaptationInitArgs(OVDataLoader(train_dataloader)),
-                    ]
-                )
-
-                # Configure NNCF logging
-                # Disable nncf logging to stdout except error
-                # but to file nncf_output.log
-                nncf_config["log_dir"] = args.output_dir
-                nncf_log_file_handler = logging.logging.FileHandler(os.path.join(args.output_dir, NNCF_LOG_FILE_NAME))
-                nncf_log_file_handler.setFormatter(logging.logging.Formatter("%(levelname)s:%(name)s:%(message)s"))
-                nncf_logger.addHandler(nncf_log_file_handler)
-                set_log_level(logging.ERROR)
-                nncf_logger.setLevel(logging.INFO)
-                nncf_log_file_handler.setLevel(logging.INFO)
-
-                self.compression_controller, self.model = create_compressed_model(self.model, nncf_config)
-                self.model_wrapped = self.model
-                # TODO : To deprecate once support transformers > 4.30.0
-                self.deepspeed = None
-
-    def _set_signature_columns_if_needed(self):
-        if self._signature_columns is None:
-            # Inspect model forward signature to keep only the arguments it accepts.
-            signature = inspect.signature(self.model.forward)
-            self._signature_columns = list(signature.parameters.keys())
-            # Labels may be named label or label_ids, the default data collator handles that.
-            self._signature_columns += list(set(["label", "label_ids"] + self.label_names))
-
-    def _inner_training_loop(
-        self, batch_size=None, args=None, resume_from_checkpoint=None, trial=None, ignore_keys_for_eval=None
-    ):
-        self.accelerator.free_memory()
-        self._train_batch_size = batch_size
-
-        if self.args.auto_find_batch_size:
-            self.state.train_batch_size = self._train_batch_size
-        logger.debug(f"Currently training with a batch size of: {self._train_batch_size}")
-        # Data loader and number of training steps
-        train_dataloader = self.get_train_dataloader()
-
-        # Setting up training control variables:
-        # number of training epochs: num_train_epochs
-        # number of training steps per epoch: num_update_steps_per_epoch
-        # total number of training steps to execute: max_steps
-        total_train_batch_size = self._train_batch_size * args.gradient_accumulation_steps * args.world_size
-
-        len_dataloader = None
-        num_train_tokens = None
-        if has_length(train_dataloader):
-            len_dataloader = len(train_dataloader)
-            num_update_steps_per_epoch = len_dataloader // args.gradient_accumulation_steps
-            num_update_steps_per_epoch = max(num_update_steps_per_epoch, 1)
-            num_examples = self.num_examples(train_dataloader)
-            if args.max_steps > 0:
-                max_steps = args.max_steps
-                num_train_epochs = args.max_steps // num_update_steps_per_epoch + int(
-                    args.max_steps % num_update_steps_per_epoch > 0
-                )
-                # May be slightly incorrect if the last batch in the training dataloader has a smaller size but it's
-                # the best we can do.
-                num_train_samples = args.max_steps * total_train_batch_size
-                if args.include_tokens_per_second:
-                    num_train_tokens = (
-                        self.num_tokens(train_dataloader, args.max_steps) * args.gradient_accumulation_steps
-                    )
-            else:
-                max_steps = math.ceil(args.num_train_epochs * num_update_steps_per_epoch)
-                num_train_epochs = math.ceil(args.num_train_epochs)
-                num_train_samples = self.num_examples(train_dataloader) * args.num_train_epochs
-                if args.include_tokens_per_second:
-                    num_train_tokens = self.num_tokens(train_dataloader) * args.num_train_epochs
-        elif args.max_steps > 0:  # Rely on max_steps when dataloader does not have a working size
-            max_steps = args.max_steps
-            # Setting a very large number of epochs so we go as many times as necessary over the iterator.
-            num_train_epochs = sys.maxsize
-            num_update_steps_per_epoch = max_steps
-            num_examples = total_train_batch_size * args.max_steps
-            num_train_samples = args.max_steps * total_train_batch_size
-            if args.include_tokens_per_second:
-                num_train_tokens = self.num_tokens(train_dataloader, args.max_steps) * args.gradient_accumulation_steps
-        else:
-            raise ValueError(
-                "args.max_steps must be set to a positive value if dataloader does not have a length, was"
-                f" {args.max_steps}"
-            )
-
-        if DebugOption.UNDERFLOW_OVERFLOW in self.args.debug:
-            if self.args.n_gpu > 1:
-                # nn.DataParallel(model) replicates the model, creating new variables and module
-                # references registered here no longer work on other gpus, breaking the module
-                raise ValueError(
-                    "Currently --debug underflow_overflow is not supported under DP. Please use DDP"
-                    " (torch.distributed.launch)."
-                )
-            else:
-                debug_overflow = DebugUnderflowOverflow(self.model)  # noqa
-
-        is_fsdp_xla_enabled = (
-            self.is_fsdp_xla_enabled if is_transformers_version(">=", "4.36.0") else self.fsdp is not None
-        )
-        delay_optimizer_creation = is_sagemaker_mp_enabled() or is_fsdp_xla_enabled or self.is_fsdp_enabled
-
-        # We need to reset the scheduler, as its parameters may be different on subsequent calls
-        if self._created_lr_scheduler:
-            self.lr_scheduler = None
-            self._created_lr_scheduler = False
-
-        if self.is_deepspeed_enabled:
-            self.optimizer, self.lr_scheduler = deepspeed_init(self, num_training_steps=max_steps)
-
-        if not delay_optimizer_creation:
-            self.create_optimizer_and_scheduler(num_training_steps=max_steps)
-
-        if is_transformers_version(">=", "4.44.99"):
-            from transformers.trainer_callback import ExportableState
-
-            self.state = TrainerState(
-                stateful_callbacks=[
-                    cb for cb in self.callback_handler.callbacks + [self.control] if isinstance(cb, ExportableState)
-                ]
-            )
-
-        else:
-            self.state = TrainerState()
-
-        self.state.is_hyper_param_search = trial is not None
-        self.state.train_batch_size = self._train_batch_size
-
-        # Compute absolute values for logging, eval, and save if given as ratio
-        if args.logging_steps is not None:
-            if args.logging_steps < 1:
-                self.state.logging_steps = math.ceil(max_steps * args.logging_steps)
-            else:
-                self.state.logging_steps = args.logging_steps
-        if args.eval_steps is not None:
-            if args.eval_steps < 1:
-                self.state.eval_steps = math.ceil(max_steps * args.eval_steps)
-            else:
-                self.state.eval_steps = args.eval_steps
-        if args.save_steps is not None:
-            if args.save_steps < 1:
-                self.state.save_steps = math.ceil(max_steps * args.save_steps)
-            else:
-                self.state.save_steps = args.save_steps
-
-        # Activate gradient checkpointing if needed
-        if args.gradient_checkpointing:
-            if args.gradient_checkpointing_kwargs is None:
-                gradient_checkpointing_kwargs = {}
-            else:
-                gradient_checkpointing_kwargs = args.gradient_checkpointing_kwargs
-
-            self.model.gradient_checkpointing_enable(gradient_checkpointing_kwargs=gradient_checkpointing_kwargs)
-
-        if is_transformers_version("<", "4.29.0"):
-            is_distributed = self.args.local_rank != -1
-        else:
-            from accelerate.utils import DistributedType
-
-            is_distributed = self.args.distributed_state.distributed_type != DistributedType.NO
-
-        if self.compression_controller is not None and is_distributed:
-            self.compression_controller.distributed()
-
-        model = self._wrap_model(self.model_wrapped)
-
-        # as the model is wrapped, don't use `accelerator.prepare`
-        # this is for unhandled cases such as
-        # FSDP-XLA, SageMaker MP/DP, DataParallel, IPEX
-        use_accelerator_prepare = True if model is self.model else False
-
-        if delay_optimizer_creation:
-            if is_transformers_version("<", "4.36.0") and use_accelerator_prepare:
-                self.model = self.accelerator.prepare(self.model)
-            self.create_optimizer_and_scheduler(num_training_steps=max_steps)
-
-        # prepare using `accelerator` prepare
-        if use_accelerator_prepare:
-            self.model.train()
-            if hasattr(self.lr_scheduler, "step"):
-                if self.use_apex:
-                    model = self.accelerator.prepare(self.model)
-                else:
-                    model, self.optimizer = self.accelerator.prepare(self.model, self.optimizer)
-            else:
-                # to handle cases wherein we pass "DummyScheduler" such as when it is specified in DeepSpeed config.
-                model, self.optimizer, self.lr_scheduler = self.accelerator.prepare(
-                    self.model, self.optimizer, self.lr_scheduler
-                )
-
-        if self.is_fsdp_enabled:
-            self.model = self.model_wrapped = model
-
-        # for the rest of this function `model` is the outside model, whether it was wrapped or not
-        if model is not self.model:
-            self.model_wrapped = model
-
-        # backward compatibility
-        if self.is_deepspeed_enabled:
-            self.deepspeed = self.model_wrapped
-
-        # ckpt loading
-        if resume_from_checkpoint is not None:
-            if self.is_deepspeed_enabled:
-                deepspeed_load_checkpoint(self.model_wrapped, resume_from_checkpoint)
-            elif is_sagemaker_mp_enabled() or self.is_fsdp_enabled:
-                self._load_from_checkpoint(resume_from_checkpoint, self.model_wrapped)
-
-        # Check if saved optimizer or scheduler states exist
-        self._load_optimizer_and_scheduler(resume_from_checkpoint)
-
-        # important: at this point:
-        # self.model         is the Transformers Model
-        # self.model_wrapped is DDP(Transformers Model), Deepspeed(Transformers Model),
-        # FSDP(Transformers Model), Dynamo Optimized Module(Transformers Model) etc.
-
-        # Train!
-        logger.info("***** Running training *****")
-        logger.info(f"  Num examples = {num_examples:,}")
-        logger.info(f"  Num Epochs = {num_train_epochs:,}")
-        logger.info(f"  Instantaneous batch size per device = {self.args.per_device_train_batch_size:,}")
-        if self.args.per_device_train_batch_size != self._train_batch_size:
-            logger.info(f"  Training with DataParallel so batch size has been adjusted to: {self._train_batch_size:,}")
-        logger.info(f"  Total train batch size (w. parallel, distributed & accumulation) = {total_train_batch_size:,}")
-        logger.info(f"  Gradient Accumulation steps = {args.gradient_accumulation_steps}")
-        logger.info(f"  Total optimization steps = {max_steps:,}")
-        logger.info(f"  Number of trainable parameters = {get_model_param_count(model, trainable_only=True):,}")
-
-        self.state.epoch = 0
-        start_time = time.time()
-        epochs_trained = 0
-        steps_trained_in_current_epoch = 0
-        steps_trained_progress_bar = None
-
-        # Check if continuing training from a checkpoint
-        if resume_from_checkpoint is not None and os.path.isfile(
-            os.path.join(resume_from_checkpoint, TRAINER_STATE_NAME)
-        ):
-            self.state = TrainerState.load_from_json(os.path.join(resume_from_checkpoint, TRAINER_STATE_NAME))
-            epochs_trained = self.state.global_step // num_update_steps_per_epoch
-            if not args.ignore_data_skip:
-                steps_trained_in_current_epoch = self.state.global_step % (num_update_steps_per_epoch)
-                steps_trained_in_current_epoch *= args.gradient_accumulation_steps
-            else:
-                steps_trained_in_current_epoch = 0
-
-            logger.info("  Continuing training from checkpoint, will skip to saved global_step")
-            logger.info(f"  Continuing training from epoch {epochs_trained}")
-            logger.info(f"  Continuing training from global step {self.state.global_step}")
-            if not args.ignore_data_skip:
-                logger.info(
-                    f"  Will skip the first {epochs_trained} epochs then the first"
-                    f" {steps_trained_in_current_epoch} batches in the first epoch."
-                )
-
-        # Update the references
-        self.callback_handler.model = self.model
-        self.callback_handler.optimizer = self.optimizer
-        self.callback_handler.lr_scheduler = self.lr_scheduler
-        self.callback_handler.train_dataloader = train_dataloader
-        if self.hp_name is not None and self._trial is not None:
-            # use self._trial because the SigOpt/Optuna hpo only call `_hp_search_setup(trial)` instead of passing trial
-            # parameter to Train when using DDP.
-            self.state.trial_name = self.hp_name(self._trial)
-        if trial is not None:
-            assignments = trial.assignments if self.hp_search_backend == HPSearchBackend.SIGOPT else trial
-            self.state.trial_params = hp_params(assignments)
-        else:
-            self.state.trial_params = None
-        # This should be the same if the state has been saved but in case the training arguments changed, it's safer
-        # to set this after the load.
-        self.state.max_steps = max_steps
-        self.state.num_train_epochs = num_train_epochs
-        self.state.is_local_process_zero = self.is_local_process_zero()
-        self.state.is_world_process_zero = self.is_world_process_zero()
-
-        # tr_loss is a tensor to avoid synchronization of TPUs through .item()
-        tr_loss = torch.tensor(0.0).to(args.device)
-        self.compression_metrics = defaultdict(lambda: torch.tensor(0.0).to(args.device))
-        # _total_loss_scalar is updated everytime .item() has to be called on tr_loss and stores the sum of all losses
-        self._total_loss_scalar = 0.0
-        self._globalstep_last_logged = self.state.global_step
-        model.zero_grad()
-
-        self.control = self.callback_handler.on_train_begin(args, self.state, self.control)
-
-        # Skip the first epochs_trained epochs to get the random state of the dataloader at the right point.
-        if not args.ignore_data_skip:
-            for epoch in range(epochs_trained):
-                sampler = get_dataloader_sampler(train_dataloader)
-                sampler_kinds = [RandomSampler]
-                if version.parse(accelerate_version) > version.parse("0.23.0"):
-                    sampler_kinds.append(SeedableRandomSampler)
-                is_random_sampler = isinstance(sampler, tuple(sampler_kinds))
-                if not is_random_sampler:
-                    # We just need to begin an iteration to create the randomization of the sampler.
-                    for _ in train_dataloader:
-                        break
-                else:
-                    # Otherwise we need to call the whooooole sampler cause there is some random operation added
-                    # AT THE VERY END!
-                    sampler = sampler if sampler is not None else []
-                    _ = list(sampler)
-
-        total_batched_samples = 0
-        for epoch in range(epochs_trained, num_train_epochs):
-            epoch_iterator = train_dataloader
-            if hasattr(epoch_iterator, "set_epoch"):
-                epoch_iterator.set_epoch(epoch)
-
-            # Reset the past mems state at the beginning of each epoch if necessary.
-            if args.past_index >= 0:
-                self._past = None
-
-            steps_in_epoch = (
-                len(epoch_iterator)
-                if len_dataloader is not None
-                else args.max_steps * args.gradient_accumulation_steps
-            )
-            self.control = self.callback_handler.on_epoch_begin(args, self.state, self.control)
-
-            if self.compression_controller is not None:
-                # Must be called at the beginning of each training epoch to prepare the compression method
-                self.compression_controller.scheduler.epoch_step()
-                nncf_logger.info(
-                    "\nEpoch {} |".format(epoch).join(self.compression_controller.statistics().to_str().split("\n"))
-                )
-
-            if epoch == epochs_trained and resume_from_checkpoint is not None and steps_trained_in_current_epoch == 0:
-                self._load_rng_state(resume_from_checkpoint)
-
-            rng_to_sync = False
-            steps_skipped = 0
-            if steps_trained_in_current_epoch > 0:
-                epoch_iterator = skip_first_batches(epoch_iterator, steps_trained_in_current_epoch)
-                steps_skipped = steps_trained_in_current_epoch
-                steps_trained_in_current_epoch = 0
-                rng_to_sync = True
-
-            step = -1
-            for step, inputs in enumerate(epoch_iterator):
-                total_batched_samples += 1
-
-                if is_transformers_version(">=", "4.36.0") and self.args.include_num_input_tokens_seen:
-                    main_input_name = getattr(self.model, "main_input_name", "input_ids")
-                    if main_input_name not in inputs:
-                        logger.warning(
-                            "Tried to track the number of tokens seen, however the current model is "
-                            "not configured properly to know what item is the input. To fix this, add "
-                            "a `main_input_name` attribute to the model class you are using."
-                        )
-                    else:
-                        self.state.num_input_tokens_seen += self.accelerator.gather(inputs[main_input_name]).numel()
-
-                if rng_to_sync:
-                    self._load_rng_state(resume_from_checkpoint)
-                    rng_to_sync = False
-
-                # Skip past any already trained steps if resuming training
-                if steps_trained_in_current_epoch > 0:
-                    steps_trained_in_current_epoch -= 1
-                    if steps_trained_progress_bar is not None:
-                        steps_trained_progress_bar.update(1)
-                    if steps_trained_in_current_epoch == 0:
-                        self._load_rng_state(resume_from_checkpoint)
-                    continue
-                elif steps_trained_progress_bar is not None:
-                    steps_trained_progress_bar.close()
-                    steps_trained_progress_bar = None
-
-                if step % args.gradient_accumulation_steps == 0:
-                    self.control = self.callback_handler.on_step_begin(args, self.state, self.control)
-                    if self.compression_controller is not None:
-                        # Must be called at the beginning of each training step to prepare the compression method
-                        self.compression_controller.scheduler.step()
-
-                with self.accelerator.accumulate(model):
-                    tr_loss_step = self.training_step(model, inputs)
-
-                if (
-                    args.logging_nan_inf_filter
-                    and not is_torch_xla_available()
-                    and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step))
-                ):
-                    # if loss is nan or inf simply add the average of previous logged losses
-                    tr_loss += tr_loss / (1 + self.state.global_step - self._globalstep_last_logged)
-                else:
-                    tr_loss += tr_loss_step
-
-                self.current_flos += float(self.floating_point_ops(inputs))
-
-                is_last_step_and_steps_less_than_grad_acc = (
-                    steps_in_epoch <= args.gradient_accumulation_steps and (step + 1) == steps_in_epoch
-                )
-
-                if (
-                    total_batched_samples % args.gradient_accumulation_steps == 0
-                    or
-                    # last step in epoch but step is always smaller than gradient_accumulation_steps
-                    is_last_step_and_steps_less_than_grad_acc
-                ):
-                    # the `or` condition of `is_last_step_and_steps_less_than_grad_acc` is not covered
-                    # in accelerate. So, explicitly enable sync gradients to True in that case.
-                    if is_last_step_and_steps_less_than_grad_acc:
-                        self.accelerator.gradient_state._set_sync_gradients(True)
-
-                    # Gradient clipping
-                    if args.max_grad_norm is not None and args.max_grad_norm > 0:
-                        # deepspeed does its own clipping
-
-                        if getattr(self, "do_grad_scaling", False):
-                            # AMP: gradients need unscaling
-                            self.scaler.unscale_(self.optimizer)
-
-                        if is_sagemaker_mp_enabled() and args.fp16:
-                            self.optimizer.clip_master_grads(args.max_grad_norm)
-                        elif self.use_apex:
-                            # Revert to normal clipping otherwise, handling Apex or full precision
-                            nn.utils.clip_grad_norm_(
-                                amp.master_params(self.optimizer),
-                                args.max_grad_norm,
-                            )
-                        else:
-                            self.accelerator.clip_grad_norm_(
-                                model.parameters(),
-                                args.max_grad_norm,
-                            )
-
-                    # Optimizer step
-                    optimizer_was_run = True
-                    if self.deepspeed:
-                        pass  # called outside the loop
-                    elif getattr(self, "do_grad_scaling", False):
-                        scale_before = self.scaler.get_scale()
-                        self.scaler.step(self.optimizer)
-                        self.scaler.update()
-                        scale_after = self.scaler.get_scale()
-                        optimizer_was_run = scale_before <= scale_after
-                    else:
-                        self.optimizer.step()
-
-                    if optimizer_was_run and not self.deepspeed:
-                        self.lr_scheduler.step()
-
-                    model.zero_grad()
-                    self.state.global_step += 1
-                    self.state.epoch = epoch + (step + 1 + steps_skipped) / steps_in_epoch
-                    self.control = self.callback_handler.on_step_end(args, self.state, self.control)
-
-                    self._maybe_log_save_evaluate(tr_loss, model, trial, epoch, ignore_keys_for_eval)
-                else:
-                    self.control = self.callback_handler.on_substep_end(args, self.state, self.control)
-
-                if self.control.should_epoch_stop or self.control.should_training_stop:
-                    break
-            if step < 0:
-                logger.warning(
-                    "There seems to be not a single sample in your epoch_iterator, stopping training at step"
-                    f" {self.state.global_step}! This is expected if you're using an IterableDataset and set"
-                    f" num_steps ({max_steps}) higher than the number of available samples."
-                )
-                self.control.should_training_stop = True
-
-            self.control = self.callback_handler.on_epoch_end(args, self.state, self.control)
-            self._maybe_log_save_evaluate(tr_loss, model, trial, epoch, ignore_keys_for_eval)
-
-            if self.control.should_training_stop:
-                break
-
-        if args.past_index and hasattr(self, "_past"):
-            # Clean the state at the end of training
-            delattr(self, "_past")
-
-        logger.info("\n\nTraining completed. Do not forget to share your model on huggingface.co/models =)\n\n")
-        if args.load_best_model_at_end and self.state.best_model_checkpoint is not None:
-            # Wait for everyone to get here so we are sure the model has been saved by process 0.
-            if is_torch_xla_available():
-                xm.rendezvous("load_best_model_at_end")
-            elif args.parallel_mode == ParallelMode.DISTRIBUTED:
-                dist.barrier()
-            elif is_sagemaker_mp_enabled():
-                smp.barrier()
-
-            self._load_best_model()
-
-        # add remaining tr_loss
-        self._total_loss_scalar += tr_loss.item()
-        train_loss = self._total_loss_scalar / self.state.global_step
-
-        metrics = speed_metrics(
-            "train",
-            start_time,
-            num_samples=num_train_samples,
-            num_steps=self.state.max_steps,
-            num_tokens=num_train_tokens,
-        )
-        self.store_flos()
-        metrics["total_flos"] = self.state.total_flos
-        metrics["train_loss"] = train_loss
-
-        self.is_in_train = False
-
-        self._memory_tracker.stop_and_update_metrics(metrics)
-
-        self.log(metrics)
-
-        run_dir = self._get_output_dir(trial)
-        checkpoints_sorted = self._sorted_checkpoints(use_mtime=False, output_dir=run_dir)
-
-        # Delete the last checkpoint when save_total_limit=1 if it's different from the best checkpoint and process allowed to save.
-        if self.args.should_save and self.state.best_model_checkpoint is not None and self.args.save_total_limit == 1:
-            for checkpoint in checkpoints_sorted:
-                if not os.path.samefile(checkpoint, self.state.best_model_checkpoint):
-                    logger.info(f"Deleting older checkpoint [{checkpoint}] due to args.save_total_limit")
-                    shutil.rmtree(checkpoint)
-
-        self.control = self.callback_handler.on_train_end(args, self.state, self.control)
-
-        # Wait for the checkpoint to be uploaded.
-        self._finish_current_push()
-
-        # After training we make sure to retrieve back the original forward pass method
-        # for the embedding layer by removing the forward post hook.
-        if self.neftune_noise_alpha is not None:
-            self._deactivate_neftune(self.model)
-
-        return TrainOutput(self.state.global_step, train_loss, metrics)
-
-    def compute_distillation_loss(self, inputs, student_outputs):
-        with torch.no_grad():
-            teacher_outputs = self.teacher(**inputs)
-        teacher_logits = teacher_outputs.logits
-        student_logits = student_outputs.logits
-        temperature = self.args.distillation_temperature
-        return F.kl_div(
-            input=F.log_softmax(student_logits / temperature, dim=-1),
-            target=F.softmax(teacher_logits / temperature, dim=-1),
-            reduction="batchmean",
-        ) * (temperature**2)
-
-    def compute_loss(self, model, inputs, return_outputs=False):
-        if self.teacher is None:
-            retval = super().compute_loss(model, inputs, return_outputs)
-
-            if return_outputs is True:
-                loss, outputs = retval
-            else:
-                loss = retval
-        else:
-            task_loss, outputs = super().compute_loss(model, inputs, return_outputs=True)
-            if self.args.n_gpu > 1:
-                task_loss = task_loss.mean()
-            distillation_loss = self.compute_distillation_loss(inputs, outputs)
-            loss = (1 - self.args.distillation_weight) * task_loss + self.args.distillation_weight * distillation_loss
-
-            if model.training:
-                self.compression_metrics["task_loss"] = task_loss.item()
-                self.compression_metrics["distillation_loss"] = distillation_loss.item()
-
-        if self.compression_controller is not None:
-            compression_loss = self.compression_controller.loss()
-            loss += compression_loss
-            if model.training:
-                self.compression_metrics["compression_loss"] = compression_loss.item()
-
-        return (loss, outputs) if return_outputs else loss
-
-    def _maybe_log_save_evaluate(self, tr_loss, model, trial, epoch, ignore_keys_for_eval):
-        if self.control.should_log:
-            if is_torch_xla_available():
-                xm.mark_step()
-
-            logs: Dict[str, float] = {}
-
-            # all_gather + mean() to get average loss over all processes
-            tr_loss_scalar = self._nested_gather(tr_loss).mean().item()
-
-            # reset tr_loss to zero
-            tr_loss -= tr_loss
-
-            logs["loss"] = round(tr_loss_scalar / (self.state.global_step - self._globalstep_last_logged), 4)
-            logs["learning_rate"] = self._get_learning_rate()
-
-            if model.training:
-                for key, value in self.compression_metrics.items():
-                    logs[key] = value
-
-            if self.compression_controller is not None:
-                compression_stats = self.compression_controller.statistics()
-                for key, value in prepare_for_tensorboard(compression_stats).items():
-                    logs["compression/{0}".format(key)] = value
-
-            self._total_loss_scalar += tr_loss_scalar
-            self._globalstep_last_logged = self.state.global_step
-            self.store_flos()
-
-            self.log(logs)
-
-        metrics = None
-        if self.control.should_evaluate:
-            if isinstance(self.eval_dataset, dict):
-                for eval_dataset_name, eval_dataset in self.eval_dataset.items():
-                    metrics = self.evaluate(
-                        eval_dataset=eval_dataset,
-                        ignore_keys=ignore_keys_for_eval,
-                        metric_key_prefix=f"eval_{eval_dataset_name}",
-                    )
-            else:
-                metrics = self.evaluate(ignore_keys=ignore_keys_for_eval)
-            self._report_to_hp_search(trial, self.state.global_step, metrics)
-
-        if self.control.should_save:
-            self._save_checkpoint(model, trial, metrics=metrics)
-            self.control = self.callback_handler.on_save(self.args, self.state, self.control)
-
-    def _save(self, output_dir: Optional[str] = None, state_dict=None):
-        # If we are executing this function, we are the process zero, so we don't check for that.
-        output_dir = output_dir if output_dir is not None else self.args.output_dir
-        os.makedirs(output_dir, exist_ok=True)
-        logger.info(f"Saving model checkpoint to {output_dir}")
-        # Save a trained model and configuration using `save_pretrained()`.
-        # They can then be reloaded using `from_pretrained()`
-
-        if not isinstance(self.model, PreTrainedModel):
-            unwrapped_model = unwrap_model(self.model)
-            is_pretrained_model = isinstance(unwrapped_model, PreTrainedModel)
-            if state_dict is None:
-                state_dict = self.model.state_dict()
-            if is_pretrained_model:
-                unwrapped_model.save_pretrained(output_dir, state_dict=state_dict, safe_serialization=False)
-            else:
-                logger.info("Trainer.model is not a `PreTrainedModel`, only saving its state dict.")
-                torch.save(state_dict, os.path.join(output_dir, WEIGHTS_NAME))
-        else:
-            self.model.save_pretrained(output_dir, state_dict=state_dict, safe_serialization=False)
-
-        if self.tokenizer is not None:
-            self.tokenizer.save_pretrained(output_dir)
-
-        # Good practice: save your training arguments together with the trained model
-        torch.save(self.args, os.path.join(output_dir, TRAINING_ARGS_NAME))
-
-        if self.compression_controller is not None:
-            # Save the configuration containing all the parameters related to quantization
-            self.ov_config.save_pretrained(output_dir)
-
-            # Export the compressed model to the ONNX format
-            output_path = os.path.join(output_dir, OV_XML_FILE_NAME)
-            self.compression_controller.prepare_for_export()
-            model_type = self.model.config.model_type.replace("_", "-")
-            exporter_config_class = TasksManager.get_exporter_config_constructor(
-                exporter="onnx",
-                model=self.model,
-                task=self.task,
-                model_type=model_type,
-            )
-
-            if self.task == "text-generation":
-                onnx_config = exporter_config_class(self.model.config, use_past=self.model.config.use_cache)
-            else:
-                onnx_config = exporter_config_class(self.model.config)
-
-            num_parameters = self.model.num_parameters()
-            save_as_external_data = use_external_data_format(num_parameters) or self.ov_config.save_onnx_model
-            f = io.BytesIO() if not save_as_external_data else os.path.join(output_dir, ONNX_WEIGHTS_NAME)
-
-            opset = min(onnx_config.DEFAULT_ONNX_OPSET, MAX_ONNX_OPSET)
-            opset = opset if not self.ov_config.save_onnx_model else max(opset, MIN_ONNX_QDQ_OPSET)
-            _onnx_export_nncf_model(self.model, onnx_config, f, opset)
-            ov_model = core.read_model(f) if save_as_external_data else core.read_model(f.getvalue(), b"")
-
-            # Prune IR if structured pruning is conducted on the model
-            if self._should_apply_pruning_transform():
-                try:
-                    # OpenVINO IR pruning requires static-shaped input
-                    ov_model = self._reshape_ir(ov_model, static_shape=True)
-                    apply_moc_transformations(ov_model, cf=False)
-                    if self._get_compression_controller_by_cls(QuantizationController) is not None:
-                        compress_quantize_weights_transformation(ov_model)
-                    apply_pruning_transformation(ov_model)
-                    apply_fused_names_cleanup(ov_model)
-                    # Reshape back to dynamic shape IR
-                    ov_model = self._reshape_ir(ov_model, static_shape=False)
-                except Exception as err:
-                    onnx_path = Path(output_dir, ONNX_WEIGHTS_NAME).resolve()
-                    if not save_as_external_data:
-                        onnx_path.write_bytes(f.getvalue())
-                    logger.error(
-                        f"Error encountered during OpenVINO IR pruning: {err}. {onnx_path} is dumped for debugging."
-                    )
-                    raise
-            else:
-                if self._get_compression_controller_by_cls(QuantizationController) is not None:
-                    compress_quantize_weights_transformation(ov_model)
-
-            # Serialize IR xml and bin
-            save_model(ov_model, output_path, compress_to_fp16=False)
-
-    def _get_compression_controller_by_cls(
-        self, controller_cls: Type[PTCompressionAlgorithmController]
-    ) -> Optional[PTCompressionAlgorithmController]:
-        if isinstance(self.compression_controller, controller_cls):
-            return self.compression_controller
-        if isinstance(self.compression_controller, PTCompositeCompressionAlgorithmController):
-            for child_controller in self.compression_controller.child_ctrls:
-                if isinstance(child_controller, controller_cls):
-                    return child_controller
-        return None
-
-    def _should_apply_pruning_transform(self) -> bool:
-        movement_controller = self._get_compression_controller_by_cls(MovementSparsityController)
-        return (
-            movement_controller is not None
-            and movement_controller.scheduler.enable_structured_masking
-            and movement_controller.scheduler.current_stage == MovementSchedulerStage.POST_WARMUP
-        )
-
-    def _reshape_ir(self, ov_model: openvino.runtime.Model, static_shape: bool) -> openvino.runtime.Model:
-        new_input_cfg = {}
-        input_name_vs_shape = {item["keyword"]: item["sample_size"] for item in self.ov_config.input_info}
-        for input_ in ov_model.inputs:
-            if static_shape is True:
-                new_input_cfg[input_.any_name] = PartialShape(
-                    [1] + input_name_vs_shape[input_.any_name][1:]
-                )  # use batch size of 1 for static shape IR
-            else:
-                new_input_cfg[input_.any_name] = PartialShape([-1] * len(input_.partial_shape))
-        ov_model.reshape(new_input_cfg)
-        return ov_model
-
-    def _set_task(self):
-        if self.task is None:
-            raise ValueError("The model task defining the model topology needs to be specified for the ONNX export.")
-        self.task = _TASK_ALIASES.get(self.task, self.task)
-
-    def _is_pruning_enabled(compression: Union[Dict, List, None]):
-        if isinstance(compression, dict) and compression["algorithm"] == "movement_pruning":
-            return True
-        if isinstance(compression, list):
-            for algo_config in compression:
-                if algo_config["algorithm"] == "movement_pruning":
-                    return True
-        return False
diff --git a/optimum/intel/openvino/training_args.py b/optimum/intel/openvino/training_args.py
deleted file mode 100644
index 4928d67717..0000000000
--- a/optimum/intel/openvino/training_args.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#  Copyright 2024 The HuggingFace Team. All rights reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-from dataclasses import dataclass, field
-
-from transformers import TrainingArguments
-
-
-@dataclass
-class OVTrainingArguments(TrainingArguments):
-    """
-    Arguments pertaining to OpenVINO/NNCF-enabled training flow
-    """
-
-    distillation_weight: float = field(
-        default=0.5, metadata={"help": "weightage of distillation loss, value between 0.0 to 1.0"}
-    )
-    distillation_temperature: float = field(default=2.0, metadata={"help": "temperature of distillation."})
-
-    def __post_init__(self):
-        super().__post_init__()
-        if self.distillation_weight < 0.0 or self.distillation_weight > 1.0:
-            raise ValueError("distillation_weight must be between 0.0 and 1.0")
-
-        if self.distillation_temperature < 1:
-            raise ValueError("distillation_temperature must be >= 1.0")
diff --git a/optimum/intel/utils/dummy_openvino_and_nncf_objects.py b/optimum/intel/utils/dummy_openvino_and_nncf_objects.py
index 4b96d28589..c7687ce7ff 100644
--- a/optimum/intel/utils/dummy_openvino_and_nncf_objects.py
+++ b/optimum/intel/utils/dummy_openvino_and_nncf_objects.py
@@ -15,28 +15,6 @@
 from .import_utils import DummyObject, requires_backends
 
 
-class OVTrainingArguments(metaclass=DummyObject):
-    _backends = ["openvino", "nncf"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["openvino", "nncf"])
-
-    @classmethod
-    def from_pretrained(cls, *args, **kwargs):
-        requires_backends(cls, ["openvino", "nncf"])
-
-
-class OVTrainer(metaclass=DummyObject):
-    _backends = ["openvino", "nncf", "accelerate"]
-
-    def __init__(self, *args, **kwargs):
-        requires_backends(self, ["openvino", "nncf", "accelerate"])
-
-    @classmethod
-    def from_pretrained(cls, *args, **kwargs):
-        requires_backends(cls, ["openvino", "nncf", "accelerate"])
-
-
 class OVQuantizer(metaclass=DummyObject):
     _backends = ["openvino", "nncf"]
 
diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py
index 0e3e0212f2..6cf926d3c6 100644
--- a/tests/openvino/test_quantization.py
+++ b/tests/openvino/test_quantization.py
@@ -61,7 +61,6 @@
     OVStableDiffusion3Pipeline,
     OVQuantizer,
     OVSanaPipeline,
-    OVTrainer,
     OVQuantizationConfig,
     OVMixedQuantizationConfig,
     OVWeightQuantizationConfig,
@@ -1206,54 +1205,6 @@ def preprocess_function(examples, tokenizer):
             self.assertEqual(ov_config.quantization_config.to_dict(), loaded_config.quantization_config.to_dict())
 
 
-class OVTrainerTest(unittest.TestCase):
-    SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS = (("albert", 61, 39),)
-
-    @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS)
-    @unittest.skip(reason="Not supported on hosts running pre-commit jobs since OpenVINO 2025.0 relase.")
-    def test_aware_training_quantization(self, model_name, expected_fake_nodes, expected_int8_nodes):
-        model_id = MODEL_NAMES[model_name]
-        model = AutoModelForSequenceClassification.from_pretrained(model_id, attn_implementation="eager")
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
-        ov_config = OVConfig()
-        dataset = load_dataset("glue", "sst2")
-        dataset = dataset.map(
-            lambda examples: tokenizer(examples["sentence"], padding="max_length", max_length=128), batched=True
-        )
-        train_dataset = dataset["train"].select(range(16))
-        eval_dataset = dataset["validation"].select(range(16))
-        metric = evaluate.load("glue", "sst2")
-
-        def compute_metrics(p):
-            return metric.compute(predictions=np.argmax(p.predictions, axis=1), references=p.label_ids)
-
-        with TemporaryDirectory() as tmp_dir:
-            trainer = OVTrainer(
-                model=model,
-                ov_config=ov_config,
-                task="sequence-classification",
-                args=TrainingArguments(tmp_dir, num_train_epochs=1.0, do_train=True, do_eval=True),
-                train_dataset=train_dataset,
-                eval_dataset=eval_dataset,
-                compute_metrics=compute_metrics,
-                tokenizer=tokenizer,
-                data_collator=default_data_collator,
-            )
-            self.assertEqual(trainer.task, "text-classification")
-            trainer.train()
-            trainer.evaluate()
-            trainer.save_model()
-
-            model = OVModelForSequenceClassification.from_pretrained(tmp_dir)
-            num_fake_nodes, num_weight_nodes = get_num_quantized_nodes(model)
-            self.assertEqual(expected_fake_nodes, num_fake_nodes)
-            self.assertEqual(expected_int8_nodes, num_weight_nodes["int8"])
-
-            tokens = tokenizer("This is a sample input", return_tensors="pt")
-            outputs = model(**tokens)
-            self.assertTrue("logits" in outputs)
-
-
 class OVQuantizationConfigTest(unittest.TestCase):
     QUANTIZATION_CONFIGS = (
         (None,),
diff --git a/tests/openvino/test_training.py b/tests/openvino/test_training.py
deleted file mode 100644
index 76f7ec3197..0000000000
--- a/tests/openvino/test_training.py
+++ /dev/null
@@ -1,889 +0,0 @@
-#  Copyright 2023 The HuggingFace Team. All rights reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-import os
-import random
-import re
-import shutil
-import tempfile
-import unittest
-from abc import ABC, abstractmethod
-from copy import deepcopy
-from dataclasses import dataclass, field
-from functools import partial
-from math import ceil
-from pathlib import Path
-from typing import Dict, List, Optional, Union
-
-import cpuinfo
-import evaluate
-import numpy as np
-import pytest
-import torch
-from datasets import load_dataset
-from nncf.experimental.torch.sparsity.movement.algo import MovementSparsityController
-from parameterized import parameterized
-from transformers import (
-    AutoFeatureExtractor,
-    AutoImageProcessor,
-    AutoModelForAudioClassification,
-    AutoModelForImageClassification,
-    AutoModelForSequenceClassification,
-    AutoTokenizer,
-    default_data_collator,
-)
-from transformers.testing_utils import slow
-from transformers.trainer_utils import EvalPrediction, TrainOutput
-from transformers.utils import WEIGHTS_NAME
-from utils_tests import MODEL_NAMES
-
-from optimum.intel.openvino import OVTrainingArguments
-from optimum.intel.openvino.configuration import OVConfig
-from optimum.intel.openvino.modeling import (
-    OVModel,
-    OVModelForAudioClassification,
-    OVModelForImageClassification,
-    OVModelForSequenceClassification,
-)
-from optimum.intel.openvino.trainer import DEFAULT_QUANTIZATION_CONFIG, OVTrainer
-from optimum.intel.openvino.utils import OV_XML_FILE_NAME
-from optimum.intel.utils.import_utils import is_transformers_version
-
-
-F32_CONFIG = {"INFERENCE_PRECISION_HINT": "f32"}
-
-
-def initialize_movement_sparsifier_parameters_by_sparsity(
-    movement_controller: MovementSparsityController,
-    sparsity: float = 0.95,
-    seed: int = 42,
-    negative_value: float = -10.0,
-    positive_value: float = 10.0,
-):
-    for minfo in movement_controller.sparsified_module_info:
-        operand = minfo.operand
-        device = operand.weight_importance.device
-        generator = torch.Generator(device=device)
-        generator.manual_seed(seed)
-        with torch.no_grad():
-            weight_rand_idx = torch.randperm(operand.weight_importance.numel(), generator=generator, device=device)
-            num_negatives = int(operand.weight_importance.numel() * sparsity)
-            num_positives = operand.weight_importance.numel() - num_negatives
-            data = [negative_value] * num_negatives + [positive_value] * num_positives
-            weight_init_tensor = torch.FloatTensor(data, device=device)[weight_rand_idx].reshape_as(
-                operand.weight_importance
-            )
-            operand.weight_importance.copy_(weight_init_tensor)
-            if operand.prune_bias:
-                bias_init_tensor = torch.ones_like(operand.bias_importance) * negative_value
-                operand.bias_importance.copy_(bias_init_tensor)
-
-
-def is_windows():
-    return os.name == "nt"
-
-
-def is_avx_vnni_supported() -> bool:
-    return any(re.search("avx.*vnni", flag.lower()) is not None for flag in cpuinfo.get_cpu_info()["flags"])
-
-
-@dataclass
-class OVTrainerTestDescriptor:
-    model_id: str
-    teacher_model_id: Optional[str] = None
-    nncf_compression_config: Union[List[Dict], Dict, None] = None
-    expected_fake_quantize: int = 0
-    expected_int8: int = 0
-    expected_binary_masks: int = 0
-    compression_metrics: List[str] = field(default_factory=list)
-
-
-class OVTrainerBaseTrainingTest(unittest.TestCase, ABC):
-    ovmodel_cls = OVModel
-    task = "unknown"
-
-    def setUp(self):
-        torch.manual_seed(42)
-        random.seed(42)
-        np.random.seed(42)
-        self.output_dir = tempfile.mkdtemp()
-
-    def run_ovtrainer_training_checks(self, desc: OVTrainerTestDescriptor):
-        self.prepare_model_and_dataset(desc)
-        self.args = self.get_training_args()
-        self.ov_config = self.get_ov_config(desc.nncf_compression_config)
-        self.trainer = self.get_ov_trainer()
-
-        trainer = self.trainer
-        self.override_movement_sparsifier_initialization(trainer)
-
-        # check evaluation can work even before training
-        metrics = trainer.evaluate()
-        self.check_eval_metrics(metrics)
-
-        # check trainining & saving
-        train_output = trainer.train()
-        self.check_train_output(train_output)
-        self.check_compression_metrics(desc.compression_metrics)
-
-        # check model can be saved
-        trainer.save_model()
-        self.check_model_saving()
-
-        # check saved ovmodel IR and output
-        ovmodel = self.get_ov_model()
-        # dynamic batch size for tiny-swin does not work in OpenVINO 2023.0
-        is_swin = "swin" in desc.model_id.lower()
-        self.check_if_ovmodel_is_dynamic(ovmodel, expected_result=not is_swin)
-        self.check_ovmodel_output_equals_torch_output(ovmodel, trainer.model)
-        self.check_ovmodel_reshaping(ovmodel)
-
-        # check ovmodel quantization ops
-        self.check_quantization_op_number(ovmodel, desc.expected_fake_quantize, desc.expected_int8)
-
-        # check binary mask in sparsity/pruning algorithms
-        self.check_binary_mask_number(desc.expected_binary_masks)
-
-    @abstractmethod
-    def prepare_model_and_dataset(self, desc: OVTrainerTestDescriptor):
-        pass
-
-    @abstractmethod
-    def check_ovmodel_output_equals_torch_output(self, ovmodel, torch_model):
-        pass
-
-    @abstractmethod
-    def check_ovmodel_reshaping(self, ovmodel: OVModel):
-        pass
-
-    def compute_metric(self, predictions: EvalPrediction):
-        metric = evaluate.load("accuracy")
-        return metric.compute(predictions=np.argmax(predictions.predictions, axis=1), references=predictions.label_ids)
-
-    def check_eval_metrics(self, metrics: Dict[str, float]):
-        for eval_metric in ["loss", "accuracy"]:
-            self.assertIn(f"eval_{eval_metric}", metrics)
-
-    def check_train_output(self, train_output: TrainOutput):
-        self.assertIsInstance(train_output, TrainOutput)
-        total_steps = (
-            ceil(len(self.train_dataset) / self.args.per_device_train_batch_size) * self.args.num_train_epochs
-        )
-        self.assertEqual(train_output.global_step, total_steps)
-
-    def check_model_saving(self):
-        for file_name in [WEIGHTS_NAME, OV_XML_FILE_NAME, OV_XML_FILE_NAME.replace(".xml", ".bin")]:
-            self.assertTrue(Path(self.output_dir, file_name).is_file())
-
-    def check_compression_metrics(self, expected_compression_metrics: List[str]):
-        self.assertEqual(sorted(expected_compression_metrics), sorted(self.trainer.compression_metrics.keys()))
-
-    def check_quantization_op_number(self, ovmodel: OVModel, expected_fake_quantize: int, expected_int8: int):
-        num_fake_quantize = 0
-        num_int8 = 0
-        for elem in ovmodel.model.get_ops():
-            if "FakeQuantize" in elem.name:
-                num_fake_quantize += 1
-            for i in range(elem.get_output_size()):
-                if "8" in elem.get_output_element_type(i).get_type_name():
-                    num_int8 += 1
-        self.assertEqual(expected_fake_quantize, num_fake_quantize)
-        self.assertEqual(expected_int8, num_int8)
-
-    def check_binary_mask_number(self, expected_binary_masks: int):
-        state_dict = torch.load(Path(self.output_dir, WEIGHTS_NAME), map_location="cpu")
-        num_binary_masks = sum(key.endswith("_binary_mask") for key in state_dict)
-        self.assertEqual(expected_binary_masks, num_binary_masks)
-
-    def check_if_ovmodel_is_dynamic(self, ovmodel: OVModel, expected_result: bool = True):
-        if expected_result is True:
-            self.assertTrue(ovmodel.model.is_dynamic())
-        else:
-            self.assertFalse(ovmodel.model.is_dynamic())
-
-    def override_movement_sparsifier_initialization(self, trainer: OVTrainer, sparsity=0.95):
-        movement_controller = trainer._get_compression_controller_by_cls(
-            MovementSparsityController
-        )  # pylint: disable=protected-access
-        if movement_controller is not None:
-            # make sure the binary masks will have many zeros
-            initialize_movement_sparsifier_parameters_by_sparsity(movement_controller, sparsity=sparsity)
-
-    def get_training_args(self, train_batch_size=4, eval_batch_size=1, num_train_epochs=3) -> OVTrainingArguments:
-        args = OVTrainingArguments(
-            output_dir=self.output_dir,
-            num_train_epochs=num_train_epochs,
-            learning_rate=1e-7,
-            do_train=True,
-            do_eval=True,
-            logging_steps=1,
-            per_device_train_batch_size=train_batch_size,
-            per_device_eval_batch_size=eval_batch_size,
-            no_cuda=True,
-            full_determinism=True,
-            remove_unused_columns=False,
-        )
-        return args
-
-    def get_ov_trainer(self) -> OVTrainer:
-        return OVTrainer(
-            model=self.model,
-            teacher_model=self.teacher_model,
-            args=self.args,
-            ov_config=self.ov_config,
-            task=self.task,
-            train_dataset=self.train_dataset,
-            eval_dataset=self.eval_dataset,
-            tokenizer=self.tokenizer,
-            compute_metrics=self.compute_metric,
-            data_collator=self.data_collator,
-        )
-
-    def get_ov_config(self, nncf_compression_config: Union[List[Dict], Dict, None]) -> OVConfig:
-        ov_config = OVConfig()
-        if not is_avx_vnni_supported():
-            # should enable "overflow_fix" in quantization otherwise accuracy degradation may be seen
-            nncf_compression_config = self.get_nncf_config_with_overflow_fix_override(
-                nncf_compression_config, "enable"
-            )
-        ov_config.compression = nncf_compression_config
-        return ov_config
-
-    def get_ov_model(self, model_id=None) -> OVModel:
-        model_id = model_id or self.output_dir
-        return self.ovmodel_cls.from_pretrained(model_id, ov_config=F32_CONFIG)
-
-    def get_nncf_config_with_overflow_fix_override(
-        self, nncf_compression_config: Union[List[Dict], Dict, None], value: str = "enable"
-    ):
-        overrided_config = deepcopy(nncf_compression_config)
-        quantization_config = None
-        if isinstance(overrided_config, list):
-            for config in overrided_config:
-                if config["algorithm"] == "quantization":
-                    quantization_config = config
-                    break
-        elif isinstance(overrided_config, dict):
-            if overrided_config["algorithm"] == "quantization":
-                quantization_config = overrided_config
-        if quantization_config is not None:
-            quantization_config["overflow_fix"] = value
-        return overrided_config
-
-    def tearDown(self):
-        shutil.rmtree(self.output_dir)
-
-
-QUANTIZATION_CONFIG_FOR_BERT = deepcopy(DEFAULT_QUANTIZATION_CONFIG)
-QUANTIZATION_CONFIG_FOR_BERT["ignored_scopes"].append("{re}.*scaled_dot_product_attention_0")
-
-CUSTOMIZED_QUANTIZATION_CONFIG_FOR_BERT = deepcopy(QUANTIZATION_CONFIG_FOR_BERT)
-CUSTOMIZED_QUANTIZATION_CONFIG_FOR_BERT.update(
-    {
-        "overflow_fix": "disable",
-        "initializer": {
-            "range": {
-                "num_init_samples": 16,
-                "type": "percentile",
-                "params": {"min_percentile": 0.01, "max_percentile": 99.99},
-            },
-            "batchnorm_adaptation": {"num_bn_adaptation_samples": 4},
-        },
-        "scope_overrides": {"activations": {"{re}.*matmul_0": {"mode": "asymmetric"}}},
-    }
-)
-
-STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT = {
-    "algorithm": "movement_sparsity",
-    "params": {
-        "warmup_start_epoch": 1,
-        "warmup_end_epoch": 2,
-        "importance_regularization_factor": 1.0,
-        "enable_structured_masking": True,
-    },
-    "sparse_structure_by_scopes": [
-        {"mode": "block", "sparse_factors": [8, 8], "target_scopes": "{re}.*BertAttention.*"},
-        {"mode": "per_dim", "axis": 0, "target_scopes": "{re}.*BertIntermediate.*"},
-        {"mode": "per_dim", "axis": 1, "target_scopes": "{re}.*BertOutput.*"},
-    ],
-    "ignored_scopes": ["{re}.*NNCFEmbedding", "{re}.*LayerNorm.*", "{re}.*pooler.*", "{re}.*classifier.*"],
-}
-
-UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT = deepcopy(STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT)
-UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT["params"]["enable_structured_masking"] = False
-
-# TODO: Uncomment failes tests after NNCF 2.8.1 patch release
-OVTRAINER_TEXT_CLASSIFICATION_TEST_DESCRIPTORS = {
-    "distillation": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["bert"],
-        teacher_model_id=MODEL_NAMES["bert"],
-        nncf_compression_config=[],
-        compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
-    ),
-    "default_quantization": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["bert"],
-        nncf_compression_config=QUANTIZATION_CONFIG_FOR_BERT,
-        expected_fake_quantize=22,
-        expected_int8=32,
-        compression_metrics=["compression_loss"],
-    ),
-    "distillation,default_quantization": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["bert"],
-        teacher_model_id=MODEL_NAMES["bert"],
-        nncf_compression_config=QUANTIZATION_CONFIG_FOR_BERT,
-        expected_fake_quantize=22,
-        expected_int8=32,
-        compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
-    ),
-    "customized_quantization": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["bert"],
-        nncf_compression_config=QUANTIZATION_CONFIG_FOR_BERT,
-        expected_fake_quantize=22,
-        expected_int8=32,
-        compression_metrics=["compression_loss"],
-    ),
-    "distillation,customized_quantization": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["bert"],
-        teacher_model_id=MODEL_NAMES["bert"],
-        nncf_compression_config=CUSTOMIZED_QUANTIZATION_CONFIG_FOR_BERT,
-        expected_fake_quantize=22,
-        expected_int8=32,
-        compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
-    ),
-    "structured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["bert"],
-        nncf_compression_config=STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
-        expected_binary_masks=60,
-        compression_metrics=["compression_loss"],
-    ),
-    "distillation,structured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["bert"],
-        teacher_model_id=MODEL_NAMES["bert"],
-        nncf_compression_config=STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
-        expected_binary_masks=60,
-        compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
-    ),
-    "default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["bert"],
-        nncf_compression_config=[QUANTIZATION_CONFIG_FOR_BERT, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
-        expected_fake_quantize=22,
-        expected_int8=32,
-        expected_binary_masks=60,
-        compression_metrics=["compression_loss"],
-    ),
-    "customized_quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["bert"],
-        nncf_compression_config=[
-            CUSTOMIZED_QUANTIZATION_CONFIG_FOR_BERT,
-            STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
-        ],
-        expected_fake_quantize=22,
-        expected_int8=32,
-        expected_binary_masks=60,
-        compression_metrics=["compression_loss"],
-    ),
-    "distillation,default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["bert"],
-        teacher_model_id=MODEL_NAMES["bert"],
-        nncf_compression_config=[QUANTIZATION_CONFIG_FOR_BERT, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
-        expected_fake_quantize=22,
-        expected_int8=32,
-        expected_binary_masks=60,
-        compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
-    ),
-    "distillation,customized_quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["bert"],
-        teacher_model_id=MODEL_NAMES["bert"],
-        nncf_compression_config=[
-            CUSTOMIZED_QUANTIZATION_CONFIG_FOR_BERT,
-            STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
-        ],
-        expected_fake_quantize=22,
-        expected_int8=32,
-        expected_binary_masks=60,
-        compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
-    ),
-    "unstructured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["bert"],
-        nncf_compression_config=UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
-        expected_binary_masks=60,
-        compression_metrics=["compression_loss"],
-    ),
-    "distillation,unstructured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["bert"],
-        teacher_model_id=MODEL_NAMES["bert"],
-        nncf_compression_config=UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
-        expected_binary_masks=60,
-        compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
-    ),
-    "default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["bert"],
-        nncf_compression_config=[QUANTIZATION_CONFIG_FOR_BERT, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
-        expected_fake_quantize=22,
-        expected_int8=32,
-        expected_binary_masks=60,
-        compression_metrics=["compression_loss"],
-    ),
-    "customized_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["bert"],
-        nncf_compression_config=[
-            CUSTOMIZED_QUANTIZATION_CONFIG_FOR_BERT,
-            UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
-        ],
-        expected_fake_quantize=22,
-        expected_int8=32,
-        expected_binary_masks=60,
-        compression_metrics=["compression_loss"],
-    ),
-    "distillation,default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["bert"],
-        teacher_model_id=MODEL_NAMES["bert"],
-        nncf_compression_config=[QUANTIZATION_CONFIG_FOR_BERT, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT],
-        expected_fake_quantize=22,
-        expected_int8=32,
-        expected_binary_masks=60,
-        compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
-    ),
-    "distillation,customized_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["bert"],
-        teacher_model_id=MODEL_NAMES["bert"],
-        nncf_compression_config=[
-            CUSTOMIZED_QUANTIZATION_CONFIG_FOR_BERT,
-            UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_BERT,
-        ],
-        expected_fake_quantize=22,
-        expected_int8=32,
-        expected_binary_masks=60,
-        compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
-    ),
-}
-
-
-class OVTrainerTextClassificationTrainingTest(OVTrainerBaseTrainingTest):
-    ovmodel_cls = OVModelForSequenceClassification
-    task = "sequence-classification"
-
-    @parameterized.expand(OVTRAINER_TEXT_CLASSIFICATION_TEST_DESCRIPTORS.items())
-    @unittest.skipIf(
-        is_transformers_version("<", "4.41") or is_transformers_version(">=", "4.46"),
-        reason="Mismatch in expected fake quantized op and incompatible with transformers v4.46",
-    )
-    def test_training(self, _, desc: OVTrainerTestDescriptor):
-        self.run_ovtrainer_training_checks(desc)
-
-    def prepare_model_and_dataset(self, desc: OVTrainerTestDescriptor):
-        self.dataset = load_dataset("glue", "sst2")
-        self.num_labels = len(self.dataset["train"].features["label"].names)
-
-        self.tokenizer = AutoTokenizer.from_pretrained(desc.model_id)
-        self.model = AutoModelForSequenceClassification.from_pretrained(desc.model_id, num_labels=self.num_labels)
-        self.teacher_model = None
-        if desc.teacher_model_id:
-            self.teacher_model = AutoModelForSequenceClassification.from_pretrained(
-                desc.teacher_model_id, num_labels=self.num_labels
-            )
-
-        def data_transform(examples, max_length: int = 128):
-            result = self.tokenizer(
-                examples["sentence"],
-                padding="max_length",
-                max_length=max_length,
-                truncation=True,
-                return_tensors="pt",
-            )
-            result["labels"] = examples["label"]
-            return result
-
-        self.data_transform = data_transform
-        self.train_dataset = self.dataset["train"].select(range(8))
-        self.eval_dataset = self.dataset["validation"].select(range(4))
-        self.train_dataset.set_transform(data_transform)
-        self.eval_dataset.set_transform(data_transform)
-        self.data_collator = None
-
-    def check_ovmodel_output_equals_torch_output(self, ovmodel, torch_model):
-        torch_model = torch_model.eval()
-        for batch_size in [1, 4]:
-            self.trainer.args = self.get_training_args(eval_batch_size=batch_size)
-            self.trainer.create_accelerator_and_postprocess()
-            for seq_length in [16, 89, 128]:
-                dataset = deepcopy(self.eval_dataset)
-                dataset.set_transform(partial(self.data_transform, max_length=seq_length))
-                for inputs in self.trainer.get_eval_dataloader(dataset):
-                    self.assertSequenceEqual(inputs["input_ids"].shape, [batch_size, seq_length])
-                    ovmodel_outputs = ovmodel(**inputs)
-                    self.assertIn("logits", ovmodel_outputs)
-                    ovmodel_logits = ovmodel_outputs.logits
-                    with torch.no_grad():
-                        torch_logits = torch_model(**inputs).logits
-                    torch.testing.assert_close(
-                        ovmodel_logits,
-                        torch_logits,
-                        atol=1e-3,
-                        rtol=1e-4,
-                    )
-
-    def check_ovmodel_reshaping(self, ovmodel: OVModel):
-        self.check_if_ovmodel_is_dynamic(ovmodel, True)
-        for batch_size in [1, 4]:
-            for seq_length in [16, 89, 128]:
-                static_shape = [batch_size, seq_length]
-                ovmodel.reshape(*static_shape)
-                self.check_if_ovmodel_is_dynamic(ovmodel, False)
-                for input_ in ovmodel.model.inputs:
-                    self.assertSequenceEqual(list(input_.get_shape()), static_shape)
-                ovmodel.reshape(-1, -1)
-                self.check_if_ovmodel_is_dynamic(ovmodel, True)
-
-
-STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN = {
-    "algorithm": "movement_sparsity",
-    "params": {
-        "warmup_start_epoch": 1,
-        "warmup_end_epoch": 2,
-        "importance_regularization_factor": 1.0,
-        "enable_structured_masking": True,
-    },
-    "sparse_structure_by_scopes": [
-        {"mode": "block", "sparse_factors": [4, 4], "target_scopes": "{re}.*SwinAttention.*"},
-        {"mode": "per_dim", "axis": 0, "target_scopes": "{re}.*SwinIntermediate.*"},
-        {"mode": "per_dim", "axis": 1, "target_scopes": "{re}.*SwinOutput.*"},
-    ],
-    "ignored_scopes": ["{re}.*PatchEmbed.*", "{re}.*PatchMerging.*", "{re}.*classifier.*", "{re}.*LayerNorm.*"],
-}
-UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN = deepcopy(STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN)
-UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN["params"]["enable_structured_masking"] = False
-OVTRAINER_IMAGE_CLASSIFICATION_TEST_DESCRIPTORS = {
-    "default_quantization": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["swin"],
-        nncf_compression_config=DEFAULT_QUANTIZATION_CONFIG,
-        expected_fake_quantize=35,
-        expected_int8=27,
-        compression_metrics=["compression_loss"],
-    ),
-    "structured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["swin"],
-        nncf_compression_config=STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN,
-        expected_binary_masks=48,
-        compression_metrics=["compression_loss"],
-    ),
-    "unstructured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["swin"],
-        nncf_compression_config=UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN,
-        expected_binary_masks=48,
-        compression_metrics=["compression_loss"],
-    ),
-    "default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["swin"],
-        nncf_compression_config=[STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG],
-        expected_fake_quantize=35,
-        expected_int8=27,
-        expected_binary_masks=48,
-        compression_metrics=["compression_loss"],
-    ),
-    "default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["swin"],
-        nncf_compression_config=[UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG],
-        expected_fake_quantize=35,
-        expected_int8=27,
-        expected_binary_masks=48,
-        compression_metrics=["compression_loss"],
-    ),
-    "distillation,default_quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["swin"],
-        teacher_model_id=MODEL_NAMES["swin"],
-        nncf_compression_config=[STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG],
-        expected_fake_quantize=35,
-        expected_int8=27,
-        expected_binary_masks=48,
-        compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
-    ),
-    "distillation,default_quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["swin"],
-        teacher_model_id=MODEL_NAMES["swin"],
-        nncf_compression_config=[UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_SWIN, DEFAULT_QUANTIZATION_CONFIG],
-        expected_fake_quantize=35,
-        expected_int8=27,
-        expected_binary_masks=48,
-        compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
-    ),
-}
-# TODO : can be moved to MODEL_NAMES["swin-window"] after transformers v4.42.3
-
-
-@unittest.skipIf(is_windows(), reason="Fails on windows")
-class OVTrainerImageClassificationTrainingTest(OVTrainerBaseTrainingTest):
-    ovmodel_cls = OVModelForImageClassification
-    task = "image-classification"
-
-    @parameterized.expand(OVTRAINER_IMAGE_CLASSIFICATION_TEST_DESCRIPTORS.items())
-    @pytest.mark.run_slow
-    @slow
-    @unittest.skipIf(
-        is_transformers_version("<", "4.41") or is_transformers_version(">=", "4.46"),
-        reason="Mismatch in expected fake quantized op and incompatible with transformers v4.46",
-    )
-    def test_training(self, _, desc: OVTrainerTestDescriptor):
-        self.run_ovtrainer_training_checks(desc)
-
-    def prepare_model_and_dataset(self, desc: OVTrainerTestDescriptor):
-        self.dataset = load_dataset("hf-internal-testing/cats_vs_dogs_sample", trust_remote_code=True)
-        self.num_labels = len(self.dataset["train"].features["labels"].names)
-
-        self.feature_extractor = AutoImageProcessor.from_pretrained(desc.model_id)
-        self.tokenizer = self.feature_extractor
-        self.model = AutoModelForImageClassification.from_pretrained(desc.model_id, num_labels=self.num_labels)
-        self.teacher_model = None
-        if desc.teacher_model_id:
-            self.teacher_model = AutoModelForImageClassification.from_pretrained(
-                desc.teacher_model_id, num_labels=self.num_labels
-            )
-
-        def data_transform(examples, size=None):
-            result = self.feature_extractor(examples["image"], size=size, return_tensors="pt")
-            result["labels"] = examples["labels"]
-            return result
-
-        self.data_transform = data_transform
-        self.dataset.set_transform(data_transform)
-        raw_dataset = self.dataset["train"].shuffle(seed=42)
-        self.train_dataset = raw_dataset.select(range(8))
-        self.eval_dataset = raw_dataset.select(range(8, 12))
-        self.data_collator = default_data_collator
-        self.is_swin = "swin" in desc.model_id.lower()
-
-    def get_ov_model(self, model_id=None) -> OVModel:
-        # image models, e.g. swin, may require a determined image size
-        model_id = model_id or self.output_dir
-        size = (self.feature_extractor.size["height"], self.feature_extractor.size["width"])
-        ovmodel = self.ovmodel_cls.from_pretrained(model_id, compile=False, ov_config=F32_CONFIG)
-        # dynamic batch size for tiny-swin does not work in OpenVINO 2023.0
-        batch_size = 1 if self.is_swin else -1
-        ovmodel.reshape(batch_size, 3, *size)
-        ovmodel.compile()
-        return ovmodel
-
-    def check_ovmodel_output_equals_torch_output(self, ovmodel, torch_model):
-        torch_model = torch_model.eval()
-        batch_sizes = [1] if self.is_swin else [1, 4]
-        for batch_size in batch_sizes:
-            self.trainer.args = self.get_training_args(eval_batch_size=batch_size)
-            self.trainer.create_accelerator_and_postprocess()
-            for inputs in self.trainer.get_eval_dataloader():
-                self.assertEqual(inputs["pixel_values"].shape[0], batch_size)
-                ovmodel_outputs = ovmodel(**inputs)
-                self.assertIn("logits", ovmodel_outputs)
-                ovmodel_logits = ovmodel_outputs.logits
-                with torch.no_grad():
-                    torch_logits = torch_model(**inputs).logits
-                torch.testing.assert_close(
-                    ovmodel_logits,
-                    torch_logits,
-                    atol=1e-3,
-                    rtol=1e-4,
-                )
-
-    def check_ovmodel_reshaping(self, ovmodel: OVModel):
-        # dynamic batch size for tiny-swin does not work in OpenVINO 2023.0
-        self.check_if_ovmodel_is_dynamic(ovmodel, not self.is_swin)
-        size = (self.feature_extractor.size["height"], self.feature_extractor.size["width"])
-        dynamic_shape = [-1, 3, *size]
-        for batch_size in [1, 4]:
-            static_shape = [batch_size] + dynamic_shape[1:]
-            ovmodel.reshape(*static_shape)
-            self.check_if_ovmodel_is_dynamic(ovmodel, False)
-            for input_ in ovmodel.model.inputs:
-                self.assertSequenceEqual(list(input_.get_shape()), static_shape)
-            if not self.is_swin:
-                ovmodel.reshape(*dynamic_shape)
-                self.check_if_ovmodel_is_dynamic(ovmodel, True)
-
-
-QUANTIZATION_CONFIG_FOR_WAV2VEC2 = {
-    "algorithm": "quantization",
-    "quantize_inputs": False,
-    "preset": "mixed",
-    "overflow_fix": "enable",
-    "initializer": {
-        "range": {"num_init_samples": 10, "type": "mean_min_max"},
-        "batchnorm_adaptation": {"num_bn_adaptation_samples": 0},
-    },
-    "scope_overrides": {"activations": {"{re}.*matmul_0": {"mode": "symmetric"}}},
-    "ignored_scopes": ["{re}.*__add___[0-1]", "{re}.*layer_norm_0"],
-}
-
-STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2 = {
-    "algorithm": "movement_sparsity",
-    "params": {
-        "warmup_start_epoch": 1,
-        "warmup_end_epoch": 2,
-        "importance_regularization_factor": 0.1,
-        "enable_structured_masking": True,
-    },
-    "sparse_structure_by_scopes": [
-        {"mode": "block", "sparse_factors": [8, 8], "target_scopes": "{re}.*Wav2Vec2Attention.*"},
-        {"mode": "per_dim", "axis": 0, "target_scopes": "{re}.*intermediate_dense.*"},
-        {"mode": "per_dim", "axis": 1, "target_scopes": "{re}.*output_dense.*"},
-    ],
-    "ignored_scopes": [
-        "{re}projector",
-        "{re}classifier",
-        "{re}feature_extractor",
-        "{re}feature_projection",
-        "{re}pos_conv_embed",
-    ],
-}
-
-UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2 = deepcopy(STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2)
-UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2["params"]["enable_structured_masking"] = False
-
-
-OVTRAINER_AUDIO_CLASSIFICATION_TEST_DESCRIPTORS = {
-    "quantization": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["wav2vec2-hf"],
-        nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2],
-        expected_fake_quantize=40,
-        expected_int8=30,
-        compression_metrics=["compression_loss"],
-    ),
-    "structured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["wav2vec2-hf"],
-        nncf_compression_config=[STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
-        expected_binary_masks=48,
-        compression_metrics=["compression_loss"],
-    ),
-    "unstructured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["wav2vec2-hf"],
-        nncf_compression_config=[UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
-        expected_binary_masks=48,
-        compression_metrics=["compression_loss"],
-    ),
-    "quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["wav2vec2-hf"],
-        nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
-        expected_fake_quantize=40,
-        expected_int8=30,
-        expected_binary_masks=48,
-        compression_metrics=["compression_loss"],
-    ),
-    "quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["wav2vec2-hf"],
-        nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
-        expected_fake_quantize=40,
-        expected_int8=30,
-        expected_binary_masks=48,
-        compression_metrics=["compression_loss"],
-    ),
-    "distillation,quantization,structured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["wav2vec2-hf"],
-        teacher_model_id=MODEL_NAMES["wav2vec2-hf"],
-        nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, STRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
-        expected_fake_quantize=40,
-        expected_int8=30,
-        expected_binary_masks=48,
-        compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
-    ),
-    "distillation,quantization,unstructured_movement_sparsity": OVTrainerTestDescriptor(
-        model_id=MODEL_NAMES["wav2vec2-hf"],
-        teacher_model_id=MODEL_NAMES["wav2vec2-hf"],
-        nncf_compression_config=[QUANTIZATION_CONFIG_FOR_WAV2VEC2, UNSTRUCTURED_MOVEMENT_SPARSITY_CONFIG_FOR_WAV2VEC2],
-        expected_fake_quantize=40,
-        expected_int8=30,
-        expected_binary_masks=48,
-        compression_metrics=["compression_loss", "distillation_loss", "task_loss"],
-    ),
-}
-
-
-@unittest.skipIf(is_windows(), reason="Fails on windows")
-class OVTrainerAudioClassificationTrainingTest(OVTrainerBaseTrainingTest):
-    ovmodel_cls = OVModelForAudioClassification
-    task = "audio-classification"
-
-    @parameterized.expand(OVTRAINER_AUDIO_CLASSIFICATION_TEST_DESCRIPTORS.items())
-    @pytest.mark.run_slow
-    @slow
-    @unittest.skipIf(
-        is_transformers_version(">=", "4.46"), reason="OVTrainer is not compatible with transformers>=v4.46"
-    )
-    def test_training(self, _, desc: OVTrainerTestDescriptor):
-        self.run_ovtrainer_training_checks(desc)
-
-    def prepare_model_and_dataset(self, desc: OVTrainerTestDescriptor):
-        self.dataset = load_dataset("anton-l/superb_dummy", "ks", trust_remote_code=True)
-        self.num_labels = len(self.dataset["test"].features["label"].names)
-
-        self.feature_extractor = AutoFeatureExtractor.from_pretrained(desc.model_id)
-        self.tokenizer = self.feature_extractor
-        self.model = AutoModelForAudioClassification.from_pretrained(
-            desc.model_id, num_labels=self.num_labels, attn_implementation="eager"
-        )
-        self.teacher_model = None
-        if desc.teacher_model_id:
-            self.teacher_model = AutoModelForAudioClassification.from_pretrained(
-                desc.teacher_model_id, num_labels=self.num_labels
-            )
-
-        def data_transform(examples, max_length: int = 16000):
-            sampling_rate = self.feature_extractor.sampling_rate
-            batch = self.feature_extractor(
-                examples["speech"],
-                padding="max_length",
-                max_length=max_length,
-                truncation=True,
-                sampling_rate=sampling_rate,
-                return_tensors="pt",
-            )
-            batch["labels"] = examples["label"]
-            return batch
-
-        self.data_transform = data_transform
-        self.dataset.set_transform(data_transform)
-        self.train_dataset = self.dataset["test"].select(range(8))
-        self.eval_dataset = self.dataset["test"].select(range(8, 12))
-        self.data_collator = None
-
-    def check_ovmodel_reshaping(self, ovmodel: OVModel):
-        self.check_if_ovmodel_is_dynamic(ovmodel, True)
-        for batch_size in [1, 4]:
-            for seq_len in [12345, 16000]:
-                static_shape = [batch_size, seq_len]
-                ovmodel.reshape(*static_shape)
-                self.check_if_ovmodel_is_dynamic(ovmodel, False)
-                for input_ in ovmodel.model.inputs:
-                    self.assertSequenceEqual(list(input_.get_shape()), static_shape)
-                ovmodel.reshape(-1, -1)
-                self.check_if_ovmodel_is_dynamic(ovmodel, True)
-
-    def check_ovmodel_output_equals_torch_output(self, ovmodel, torch_model):
-        torch_model = torch_model.eval()
-        for batch_size in [1, 4]:
-            self.trainer.args = self.get_training_args(eval_batch_size=batch_size)
-            self.trainer.create_accelerator_and_postprocess()
-            for seq_length in [12345, 16000]:
-                dataset = deepcopy(self.eval_dataset)
-                dataset.set_transform(partial(self.data_transform, max_length=seq_length))
-                for inputs in self.trainer.get_eval_dataloader(dataset):
-                    self.assertSequenceEqual(inputs["input_values"].shape, [batch_size, seq_length])
-                    ovmodel_outputs = ovmodel(**inputs)
-                    self.assertIn("logits", ovmodel_outputs)
-                    ovmodel_logits = ovmodel_outputs.logits
-                    with torch.no_grad():
-                        torch_logits = torch_model(**inputs).logits
-                    torch.testing.assert_close(
-                        ovmodel_logits,
-                        torch_logits,
-                        atol=1e-3,
-                        rtol=1e-4,
-                    )
diff --git a/tests/openvino/test_training_examples.py b/tests/openvino/test_training_examples.py
deleted file mode 100644
index 023f9df7b8..0000000000
--- a/tests/openvino/test_training_examples.py
+++ /dev/null
@@ -1,201 +0,0 @@
-#  Copyright 2023 The HuggingFace Team. All rights reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-import os
-import subprocess
-import sys
-import unittest
-from dataclasses import dataclass
-from pathlib import Path
-from typing import List, Union
-
-import torch
-import torch.cuda
-from parameterized import parameterized
-
-from optimum.intel.openvino.utils import OV_XML_FILE_NAME, TemporaryDirectory
-
-
-PROJECT_ROOT = Path(__file__).parents[2]
-OPENVINO_EXAMPLES_PATH = PROJECT_ROOT / "examples" / "openvino"
-CUDA_VISIBLE_DEVICES = "CUDA_VISIBLE_DEVICES"
-
-
-@dataclass
-class TrainingExampleDescriptor:
-    cwd: Union[Path, str]
-    filename: str
-    args: List[str]
-    timeout: int
-
-    def get_args_with_output_dir(self, output_dir: Union[Path, str]):
-        flag = "--output_dir"
-        args = self.args.copy()
-        if flag in args:
-            idx = args.index(flag)
-            del args[idx : idx + 2]
-        return [*args, flag, str(output_dir)]
-
-
-TRAINING_EXAMPLE_DESCRIPTORS = {
-    "text-classification-QAT": TrainingExampleDescriptor(
-        cwd=OPENVINO_EXAMPLES_PATH / "text-classification",
-        filename="run_glue.py",
-        args=[
-            "--model_name_or_path",
-            "hf-internal-testing/tiny-bert",
-            "--task_name",
-            "sst2",
-            "--do_train",
-            "--do_eval",
-            "--per_device_train_batch_size",
-            "2",
-            "--per_device_eval_batch_size",
-            "8",
-            "--logging_steps",
-            "1",
-            "--evaluation_strategy",
-            "steps",
-            "--eval_steps",
-            "2",
-            "--save_strategy",
-            "steps",
-            "--save_steps",
-            "2",
-            "--save_total_limit",
-            "1",
-            "--max_steps",
-            "5",
-            "--fp16",
-            "--report_to",
-            "none",
-        ],
-        timeout=300,
-    ),
-    "text-classification-JPQD": TrainingExampleDescriptor(
-        cwd=OPENVINO_EXAMPLES_PATH / "text-classification",
-        filename="run_glue.py",
-        args=[
-            "--model_name_or_path",
-            "hf-internal-testing/tiny-bert",
-            "--teacher_model_name_or_path",
-            "hf-internal-testing/tiny-bert",
-            "--nncf_compression_config",
-            "./configs/bert-base-jpqd.json",
-            "--task_name",
-            "sst2",
-            "--do_train",
-            "--do_eval",
-            "--per_device_train_batch_size",
-            "2",
-            "--per_device_eval_batch_size",
-            "8",
-            "--logging_steps",
-            "1",
-            "--evaluation_strategy",
-            "steps",
-            "--eval_steps",
-            "2",
-            "--save_strategy",
-            "steps",
-            "--save_steps",
-            "2",
-            "--save_total_limit",
-            "1",
-            "--max_steps",
-            "5",
-            "--fp16",
-            "--report_to",
-            "none",
-        ],
-        timeout=300,
-    ),
-}
-
-
-def get_available_cuda_device_ids() -> List[int]:
-    torch_device_count = torch.cuda.device_count()
-    visible_devices_str = str(os.environ.get("CUDA_VISIBLE_DEVICES", ""))
-    if not visible_devices_str:
-        return list(range(torch_device_count))
-    device_ids = list(map(int, visible_devices_str.strip().split(",")))
-    if len(device_ids) != torch_device_count:
-        # Cannot decide device ids since some devices in env are unavailable.
-        return []
-    return device_ids
-
-
-class OVTrainingExampleTest(unittest.TestCase):
-    def setUp(self) -> None:
-        self.available_cuda_device_ids = get_available_cuda_device_ids()
-        self.env = os.environ.copy()
-
-    @parameterized.expand(TRAINING_EXAMPLE_DESCRIPTORS.items())
-    def test_single_card_training(self, _, desc: TrainingExampleDescriptor):
-        if len(self.available_cuda_device_ids) < 1:
-            self.skipTest("No enough cuda devices.")
-
-        self.env[CUDA_VISIBLE_DEVICES] = str(self.available_cuda_device_ids[0])
-        with TemporaryDirectory() as output_dir:
-            args = ["torchrun", "--nproc_per_node=1", desc.filename, *desc.get_args_with_output_dir(output_dir)]
-            proc = subprocess.Popen(
-                args=args,
-                cwd=desc.cwd,
-                env=self.env.copy(),
-            )
-            return_code = proc.wait(desc.timeout)
-            self.assertEqual(return_code, 0)
-            self.assertTrue(Path(output_dir, OV_XML_FILE_NAME).is_file())
-
-    @parameterized.expand(TRAINING_EXAMPLE_DESCRIPTORS.items())
-    def test_data_parallel_training(self, _, desc: TrainingExampleDescriptor):
-        if len(self.available_cuda_device_ids) < 2:
-            self.skipTest("No enough cuda devices.")
-
-        self.env[CUDA_VISIBLE_DEVICES] = ",".join(map(str, self.available_cuda_device_ids[:2]))
-        with TemporaryDirectory() as output_dir:
-            args = [sys.executable, desc.filename, *desc.get_args_with_output_dir(output_dir)]
-            proc = subprocess.Popen(
-                args=args,
-                cwd=desc.cwd,
-                env=self.env.copy(),
-            )
-            return_code = proc.wait(desc.timeout)
-            self.assertEqual(return_code, 0)
-            self.assertTrue(Path(output_dir, OV_XML_FILE_NAME).is_file())
-
-    @parameterized.expand(TRAINING_EXAMPLE_DESCRIPTORS.items())
-    def test_distributed_data_parallel_training(self, _, desc: TrainingExampleDescriptor):
-        if len(self.available_cuda_device_ids) < 2:
-            self.skipTest("No enough cuda devices.")
-
-        self.env[CUDA_VISIBLE_DEVICES] = ",".join(map(str, self.available_cuda_device_ids[:2]))
-        with TemporaryDirectory() as output_dir:
-            args = [
-                "torchrun",
-                "--rdzv_backend=c10d",
-                "--rdzv_endpoint=localhost:0",
-                "--nnodes=1",
-                "--nproc_per_node=2",
-                desc.filename,
-                *desc.get_args_with_output_dir(output_dir),
-            ]
-            proc = subprocess.Popen(
-                args=args,
-                cwd=desc.cwd,
-                env=self.env.copy(),
-            )
-            return_code = proc.wait(desc.timeout)
-            self.assertEqual(return_code, 0)
-            self.assertTrue(Path(output_dir, OV_XML_FILE_NAME).is_file())

From 534250516b0a2fe65fa166337878ecd2d5a8f84e Mon Sep 17 00:00:00 2001
From: Nikita Savelyev <nikita.savelyev@intel.com>
Date: Tue, 18 Feb 2025 13:33:48 +0100
Subject: [PATCH 2/2] Update docs

---
 docs/source/openvino/optimization.mdx | 209 ++++++++++++++++++++++++++
 1 file changed, 209 insertions(+)
 create mode 100644 docs/source/openvino/optimization.mdx

diff --git a/docs/source/openvino/optimization.mdx b/docs/source/openvino/optimization.mdx
new file mode 100644
index 0000000000..cf16b85133
--- /dev/null
+++ b/docs/source/openvino/optimization.mdx
@@ -0,0 +1,209 @@
+<!---
+Copyright 2022 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+# Optimization
+
+🤗 Optimum Intel provides an `openvino` package that enables you to apply a variety of model quantization methods on many models hosted on the 🤗 hub using the [NNCF](https://docs.openvino.ai/2024/openvino-workflow/model-optimization.html) framework.
+
+
+Quantization is a technique to reduce the computational and memory costs of running inference by representing the weights and / or the activations with lower precision data types like 8-bit or 4-bit.
+
+## Weight-only quantization
+
+Quantization can be applied on the model's Linear, Convolutional and Embedding layers, enabling the loading of large models on memory-limited devices. For example, when applying 8-bit quantization, the resulting model will be x4 smaller than its fp32 counterpart. For 4-bit quantization, the reduction in memory could theoretically reach x8, but is closer to x6 in practice.
+
+
+### 8-bit
+
+For the 8-bit weight quantization you can provide `quantization_config` equal to `OVWeightQuantizationConfig(bits=8)` to load your model's weights in 8-bit:
+
+```python
+from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig
+
+model_id = "helenai/gpt2-ov"
+quantization_config = OVWeightQuantizationConfig(bits=8)
+model = OVModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config)
+
+# Saves the int8 model that will be x4 smaller than its fp32 counterpart
+model.save_pretrained(saving_directory)
+```
+
+Weights of language models inside vision-language pipelines can be quantized in a similar way:
+```python
+model = OVModelForVisualCausalLM.from_pretrained(
+    "llava-hf/llava-v1.6-mistral-7b-hf",
+    quantization_config=quantization_config
+)
+```
+
+<Tip warning={true}>
+
+If quantization_config is not provided, model will be exported in 8 bits by default when it has more than 1 billion parameters. You can disable it with `load_in_8bit=False`.
+
+</Tip>
+
+
+### 4-bit
+
+4-bit weight quantization can be achieved in a similar way:
+
+```python
+from optimum.intel import OVModelForCausalLM, OVWeightQuantizationConfig
+
+quantization_config = OVWeightQuantizationConfig(bits=4)
+model = OVModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config)
+```
+
+Or for vision-language pipelines:
+```python
+model = OVModelForVisualCausalLM.from_pretrained(
+    "llava-hf/llava-v1.6-mistral-7b-hf",
+    quantization_config=quantization_config
+)
+```
+
+You can tune quantization parameters to achieve a better performance accuracy trade-off as follows:
+
+```python
+quantization_config = OVWeightQuantizationConfig(
+    bits=4,
+    sym=False,
+    ratio=0.8,
+    quant_method="awq",
+    dataset="wikitext2"
+)
+```
+
+By default the quantization scheme will be [asymmetric](https://github.com/openvinotoolkit/nncf/blob/develop/docs/usage/training_time_compression/other_algorithms/LegacyQuantization.md#asymmetric-quantization), to make it [symmetric](https://github.com/openvinotoolkit/nncf/blob/develop/docs/usage/training_time_compression/other_algorithms/LegacyQuantization.md#symmetric-quantization) you can add `sym=True`.
+
+For 4-bit quantization you can also specify the following arguments in the quantization configuration :
+* The `group_size` parameter will define the group size to use for quantization, `-1` it will results in per-column quantization.
+* The `ratio` parameter controls the ratio between 4-bit and 8-bit quantization. If set to 0.9, it means that 90% of the layers will be quantized to `int4` while 10% will be quantized to `int8`.
+
+Smaller `group_size` and `ratio` values usually improve accuracy at the sacrifice of the model size and inference latency.
+
+Quality of 4-bit weight compressed model can further be improved by employing one of the following data-dependent methods:
+* **AWQ** which stands for Activation Aware Quantization is an algorithm that tunes model weights for more accurate 4-bit compression. It slightly improves generation quality of compressed LLMs, but requires significant additional time and memory for tuning weights on a calibration dataset. Please note that it is possible that there will be no matching patterns in the model to apply AWQ, in such case it will be skipped.
+* **Scale Estimation** is a method that tunes quantization scales to minimize the `L2` error between the original and compressed layers. Providing a dataset is required to run scale estimation. Using this method also incurs additional time and memory overhead.
+* **GPTQ** optimizes compressed weights in a layer-wise fashion to minimize the difference between activations of a compressed and original layer.
+* **LoRA Correction** mitigates quantization noise introduced during weight compression by leveraging low-rank adaptation.
+
+Data-aware algorithms can be applied together or separately. For that, provide corresponding arguments to the 4-bit `OVWeightQuantizationConfig` together with a dataset. For example:
+```python
+quantization_config = OVWeightQuantizationConfig(
+    bits=4,
+    sym=False,
+    ratio=0.8,
+    quant_method="awq",
+    scale_estimation=True,
+    gptq=True,
+    dataset="wikitext2"
+)
+```
+
+Note: GPTQ and LoRA Correction algorithms can't be applied simultaneously.
+
+## Static quantization
+
+When applying post-training static quantization, both the weights and the activations are quantized.
+To apply quantization on the activations, an additional calibration step is needed which consists in feeding a `calibration_dataset` to the network in order to estimate the quantization activations parameters.
+
+Here is how to apply static quantization on a fine-tuned DistilBERT given your own `calibration_dataset`:
+
+```python
+from transformers import AutoTokenizer
+from optimum.intel import OVQuantizer, OVModelForSequenceClassification, OVConfig, OVQuantizationConfig
+
+model_id = "distilbert-base-uncased-finetuned-sst-2-english"
+model = OVModelForSequenceClassification.from_pretrained(model_id, export=True)
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+# The directory where the quantized model will be saved
+save_dir = "ptq_model"
+
+quantizer = OVQuantizer.from_pretrained(model)
+
+# Apply static quantization and export the resulting quantized model to OpenVINO IR format
+ov_config = OVConfig(quantization_config=OVQuantizationConfig())
+quantizer.quantize(ov_config=ov_config, calibration_dataset=calibration_dataset, save_directory=save_dir)
+# Save the tokenizer
+tokenizer.save_pretrained(save_dir)
+```
+
+The calibration dataset can also be created easily using your `OVQuantizer`:
+
+```python
+from functools import partial
+
+def preprocess_function(examples, tokenizer):
+    return tokenizer(examples["sentence"], padding="max_length", max_length=128, truncation=True)
+
+# Create the calibration dataset used to perform static quantization
+calibration_dataset = quantizer.get_calibration_dataset(
+    "glue",
+    dataset_config_name="sst2",
+    preprocess_function=partial(preprocess_function, tokenizer=tokenizer),
+    num_samples=300,
+    dataset_split="train",
+)
+```
+
+
+The `quantize()` method applies post-training static quantization and export the resulting quantized model to the OpenVINO Intermediate Representation (IR). The resulting graph is represented with two files: an XML file describing the network topology and a binary file describing the weights. The resulting model can be run on any target Intel device.
+
+
+### Speech-to-text Models Quantization
+
+The speech-to-text Whisper model can be quantized without the need for preparing a custom calibration dataset. Please see example below.
+
+```python
+model_id = "openai/whisper-tiny"
+ov_model = OVModelForSpeechSeq2Seq.from_pretrained(
+    model_id,
+    quantization_config=OVQuantizationConfig(
+        num_samples=10,
+        dataset="librispeech",
+        processor=model_id,
+        matmul_sq_alpha=0.95,
+    )
+)
+```
+
+With this, encoder, decoder and decoder-with-past models of the Whisper pipeline will be fully quantized, including activations.
+
+##  Hybrid quantization
+
+Traditional optimization methods like post-training 8-bit quantization do not work well for Stable Diffusion (SD) models and can lead to poor generation results. On the other hand, weight compression does not improve performance significantly when applied to Stable Diffusion models, as the size of activations is comparable to weights.
+The U-Net component takes up most of the overall execution time of the pipeline. Thus, optimizing just this one component can bring substantial benefits in terms of inference speed while keeping acceptable accuracy without fine-tuning. Quantizing the rest of the diffusion pipeline does not significantly improve inference performance but could potentially lead to substantial accuracy degradation.
+Therefore, the proposal is to apply quantization in *hybrid mode* for the U-Net model and weight-only quantization for the rest of the pipeline components :
+* U-Net : quantization applied on both the weights and activations 
+* The text encoder, VAE encoder / decoder : quantization applied on the weights 
+
+The hybrid mode involves the quantization of weights in MatMul and Embedding layers, and activations of other layers, facilitating accuracy preservation post-optimization while reducing the model size.
+
+The `quantization_config` is utilized to define optimization parameters for optimizing the SD pipeline. To enable hybrid quantization, specify the quantization dataset in the `quantization_config`. If the dataset is not defined, weight-only quantization will be applied on all components.
+
+```python
+from optimum.intel import OVStableDiffusionPipeline, OVWeightQuantizationConfig
+
+model = OVStableDiffusionPipeline.from_pretrained(
+    model_id,
+    export=True,
+    quantization_config=OVWeightQuantizationConfig(bits=8, dataset="conceptual_captions"),
+)
+```
+
+
+For more details, please refer to the corresponding NNCF [documentation](https://github.com/openvinotoolkit/nncf/blob/develop/docs/usage/post_training_compression/weights_compression/Usage.md).