From d4d0f9dd572beec0bc84f2571be4a8670e4b7a70 Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Fri, 31 Jan 2025 09:00:14 +0100
Subject: [PATCH 01/12] tes 4.48

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index cf4bfb3191..a8b898d230 100644
--- a/setup.py
+++ b/setup.py
@@ -29,7 +29,7 @@
 INSTALL_REQUIRE = [
     "torch>=1.11",
     "optimum~=1.24",
-    "transformers>=4.36,<4.48",
+    "transformers>=4.36,<4.49",
     "datasets>=1.4.0",
     "sentencepiece",
     "setuptools",

From 3ad88da2d370502dd17caedc1eb6ea5a46fb04c7 Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Fri, 31 Jan 2025 09:47:32 +0100
Subject: [PATCH 02/12] fix llava offline test

---
 tests/openvino/test_modeling.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py
index 66f06354ee..5d4f3798ec 100644
--- a/tests/openvino/test_modeling.py
+++ b/tests/openvino/test_modeling.py
@@ -271,6 +271,8 @@ def test_load_from_hub_and_save_visual_language_model(self):
             else:
                 self.assertEqual(component.request.get_property("PERFORMANCE_HINT"), "LATENCY")
 
+        processor.patch_size = 16
+        # sould be fixed in https://huggingface.co/katuni4ka/tiny-random-llava-ov/blob/main/processor_config.json#L3
         inputs = processor(images=image, text=prompt, return_tensors="pt")
         set_seed(SEED)
         loaded_model_outputs = loaded_model(**inputs)

From 4bf5ffc3ff8093c264b11afe59243e827bf99378 Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Fri, 31 Jan 2025 10:16:00 +0100
Subject: [PATCH 03/12] couple initial fixes

---
 optimum/exporters/openvino/model_patcher.py | 26 ++++++++++++---------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py
index e147945878..1c5eafcf00 100644
--- a/optimum/exporters/openvino/model_patcher.py
+++ b/optimum/exporters/openvino/model_patcher.py
@@ -718,14 +718,15 @@ def _mistral_update_causal_mask(
 class MistralModelPatcher(DecoderModelPatcher):
     def __enter__(self):
         super().__enter__()
-        if is_transformers_version(">=", "4.42.0"):
+        if is_transformers_version(">=", "4.42.0") and is_transformers_version("<", "4.48.0"):
             # apply fix https://github.com/huggingface/transformers/commit/57d7594a79a9f5d835abf2d4d384db0e4818e548
             self._model.model._orig_update_causal_mask = self._model.model._update_causal_mask
             self._model.model._update_causal_mask = types.MethodType(_mistral_update_causal_mask, self._model.model)
 
         else:
             for layer in self._model.model.layers:
-                _reinitialize_cos_sin_cached_fp32(layer.self_attn.rotary_emb)
+                if hasattr(layer.self_attn, "rotary_emb"):
+                    _reinitialize_cos_sin_cached_fp32(layer.self_attn.rotary_emb)
 
     def __exit__(self, exc_type, exc_value, traceback):
         super().__exit__(exc_type, exc_value, traceback)
@@ -734,7 +735,7 @@ def __exit__(self, exc_type, exc_value, traceback):
             self._model.model._update_causal_mask = self._model.model._orig_update_causal_mask
 
         for layer in self._model.model.layers:
-            if hasattr(layer.self_attn.rotary_emb, "_orig_forward"):
+            if hasattr(layer.self_attn, "rotary_emb") and hasattr(layer.self_attn.rotary_emb, "_orig_forward"):
                 layer.self_attn.rotary_emb.forward = layer.self_attn.rotary_emb._orig_forward
 
 
@@ -2493,7 +2494,9 @@ class UpdateCausalMaskModelPatcher(DecoderModelPatcher):
     def __enter__(self):
         super().__enter__()
         patch_update_causal_mask(self._model, "4.42.0")
-        if hasattr(self._model.model.layers[0].self_attn.rotary_emb, "_set_cos_sin_cache"):
+        if hasattr(self._model.model.layers[0].self_attn, "rotary_emb") and hasattr(
+            self._model.model.layers[0].self_attn.rotary_emb, "_set_cos_sin_cache"
+        ):
             for layer in self._model.model.layers:
                 _reinitialize_cos_sin_cached_fp32(layer.self_attn.rotary_emb)
 
@@ -3045,15 +3048,16 @@ def patched_forward(self, fn):
     def __enter__(self):
         if is_torch_version(">=", "2.1.0"):
             if self._model.config.model_type == "qwen2" and self._model.config._attn_implementation != "sdpa":
-                from transformers.models.qwen2.modeling_qwen2 import QWEN2_ATTENTION_CLASSES
+                if is_transformers_version("<", "4.48"):
+                    from transformers.models.qwen2.modeling_qwen2 import QWEN2_ATTENTION_CLASSES
 
-                sdpa_attn = QWEN2_ATTENTION_CLASSES["sdpa"]
-                self._model.config._orig_attn_implementation = self._model.config._attn_implementation
-                self._model.config._attn_implementation = "sdpa"
+                    sdpa_attn = QWEN2_ATTENTION_CLASSES["sdpa"]
+                    self._model.config._orig_attn_implementation = self._model.config._attn_implementation
+                    self._model.config._attn_implementation = "sdpa"
 
-                for layer in self._model.model.layers:
-                    layer.self_attn._orig_forward = layer.self_attn.forward
-                    layer.self_attn.forward = types.MethodType(sdpa_attn.forward, layer.self_attn)
+                    for layer in self._model.model.layers:
+                        layer.self_attn._orig_forward = layer.self_attn.forward
+                        layer.self_attn.forward = types.MethodType(sdpa_attn.forward, layer.self_attn)
 
             if self._model.config.model_type == "llama" and self._model.config._attn_implementation != "sdpa":
                 self._model.config._orig_attn_implementation = self._model.config._attn_implementation

From f6d48b1ff826a96414903afeb032a8afaa67c657 Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Fri, 31 Jan 2025 11:26:56 +0100
Subject: [PATCH 04/12] fix type hint error, vison language models tests and
 phi3

---
 optimum/exporters/openvino/model_patcher.py | 6 +++---
 optimum/intel/openvino/modeling_decoder.py  | 3 ---
 tests/openvino/test_modeling.py             | 5 +++++
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py
index 1c5eafcf00..9320303ff4 100644
--- a/optimum/exporters/openvino/model_patcher.py
+++ b/optimum/exporters/openvino/model_patcher.py
@@ -1581,19 +1581,19 @@ def __enter__(self):
         ):
             self._model.config.max_position_embeddings = self._model.config.original_max_position_embeddings
 
-        if is_transformers_version(">=", "4.42.0"):
+        if is_transformers_version(">=", "4.42.0") and is_transformers_version("<", "4.48.0"):
             self._model.model._orig_forward = self._model.model.forward
             self._model.model.forward = types.MethodType(phi3_442_forward, self._model.model)
 
         # https://github.com/huggingface/transformers/blob/30ee508c6c92a1c0aa0281d193c7c0fb815b8d2f/src/transformers/models/phi3/modeling_phi3.py#L113
         # init inv_freq for torchscript tracing
         for layer in self._model.model.layers:
-            if is_torch_version(">=", "2.1.0"):
+            if is_torch_version(">=", "2.1.0") and is_transformers_version("<", "4.48.0"):
                 orig_self_attn_fwd = layer.self_attn.forward
                 layer.self_attn.forward = types.MethodType(_phi3_self_attn_sdpa_forward, layer.self_attn)
                 layer.self_attn._orig_forward = orig_self_attn_fwd
 
-            if layer.self_attn.rotary_emb.inv_freq is None:
+            if hasattr(layer.self_attn, "rotary_emb") and layer.self_attn.rotary_emb.inv_freq is None:
                 rotary_emb = layer.self_attn.rotary_emb
                 layer.self_attn.rotary_emb.inv_freq = 1.0 / (
                     rotary_emb.base ** (torch.arange(0, rotary_emb.dim, 2, dtype=torch.int64).float() / rotary_emb.dim)
diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py
index b2519563f5..b13ebc80c6 100644
--- a/optimum/intel/openvino/modeling_decoder.py
+++ b/optimum/intel/openvino/modeling_decoder.py
@@ -56,7 +56,6 @@
 
 
 if TYPE_CHECKING:
-    from transformers.generation.streamers import BaseStreamer
     from transformers.modeling_utils import PreTrainedModel
 
 
@@ -706,7 +705,6 @@ def generate(
         prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None,
         synced_gpus: Optional[bool] = None,
         assistant_model: Optional["PreTrainedModel"] = None,
-        streamer: Optional["BaseStreamer"] = None,
         negative_prompt_ids: Optional[torch.Tensor] = None,
         negative_prompt_attention_mask: Optional[torch.Tensor] = None,
         **kwargs,
@@ -734,7 +732,6 @@ def generate(
             prefix_allowed_tokens_fn,
             synced_gpus,
             assistant_model,
-            streamer,
             negative_prompt_ids,
             negative_prompt_attention_mask,
             **kwargs,
diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py
index 5d4f3798ec..5c6fd2e3ae 100644
--- a/tests/openvino/test_modeling.py
+++ b/tests/openvino/test_modeling.py
@@ -2172,6 +2172,11 @@ def test_compare_to_transformers(self, model_arch):
         for component_name, component in ov_model.components.items():
             self.assertIsInstance(component, MODEL_PARTS_CLS_MAPPING[component_name])
         self.assertIsInstance(ov_model.config, PretrainedConfig)
+
+        # TODO: fix in models
+        if preprocessors.get("processor") is not None:
+            preprocessors["processor"].patch_size = ov_model.config.vision_config.patch_size
+
         inputs = ov_model.preprocess_inputs(**preprocessors, text=prompt, image=self.IMAGE.resize((600, 600)))
         transformers_inputs = copy.deepcopy(inputs)
         test_device = "AUTO"

From d2366a69d164692b74800dcff211e2e36e9cafd9 Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Fri, 31 Jan 2025 11:35:29 +0100
Subject: [PATCH 05/12] fix preprocess_inputs

---
 optimum/intel/openvino/modeling_visual_language.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/optimum/intel/openvino/modeling_visual_language.py b/optimum/intel/openvino/modeling_visual_language.py
index e895a76e73..9bb3b1f679 100644
--- a/optimum/intel/openvino/modeling_visual_language.py
+++ b/optimum/intel/openvino/modeling_visual_language.py
@@ -1020,6 +1020,10 @@ def preprocess_inputs(
                 prompt = "<image>\n" + text
             else:
                 prompt = text
+
+        if hasattr(processor, "patch_size") and processor.patch_size is None:
+            processor.patch_size = config.vision_config.patch_size
+
         inputs = processor(images=image, text=prompt, return_tensors="pt")
         return inputs
 

From bceb37128069481dd977d6092499466d1f79f6aa Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Fri, 31 Jan 2025 12:10:46 +0100
Subject: [PATCH 06/12] fix

---
 optimum/intel/openvino/modeling_visual_language.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/optimum/intel/openvino/modeling_visual_language.py b/optimum/intel/openvino/modeling_visual_language.py
index 9bb3b1f679..77f838da8f 100644
--- a/optimum/intel/openvino/modeling_visual_language.py
+++ b/optimum/intel/openvino/modeling_visual_language.py
@@ -1021,7 +1021,11 @@ def preprocess_inputs(
             else:
                 prompt = text
 
-        if hasattr(processor, "patch_size") and processor.patch_size is None:
+        if (
+            getattr(processor, "patch_size", None) is None
+            and hasattr(config, "vision_config")
+            and hasattr(config.vision_config, "patch_size")
+        ):
             processor.patch_size = config.vision_config.patch_size
 
         inputs = processor(images=image, text=prompt, return_tensors="pt")

From c5202f00515fa1ad69fefe812066c53704990b6d Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Fri, 31 Jan 2025 12:11:23 +0100
Subject: [PATCH 07/12] fix

---
 optimum/intel/openvino/modeling_visual_language.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/optimum/intel/openvino/modeling_visual_language.py b/optimum/intel/openvino/modeling_visual_language.py
index 77f838da8f..2e08652727 100644
--- a/optimum/intel/openvino/modeling_visual_language.py
+++ b/optimum/intel/openvino/modeling_visual_language.py
@@ -1023,8 +1023,8 @@ def preprocess_inputs(
 
         if (
             getattr(processor, "patch_size", None) is None
-            and hasattr(config, "vision_config")
-            and hasattr(config.vision_config, "patch_size")
+            and getattr(config, "vision_config", None) is not None
+            and getattr(config.vision_config, "patch_size", None) is not None
         ):
             processor.patch_size = config.vision_config.patch_size
 

From fe1e213b74d9fc7566cd755a8f1022cc9e7fdb2c Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Fri, 31 Jan 2025 12:49:46 +0100
Subject: [PATCH 08/12] fix streamer

---
 optimum/intel/openvino/modeling_decoder.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py
index b13ebc80c6..41e4c6e3e9 100644
--- a/optimum/intel/openvino/modeling_decoder.py
+++ b/optimum/intel/openvino/modeling_decoder.py
@@ -56,6 +56,11 @@
 
 
 if TYPE_CHECKING:
+    try:
+        from transformers.generation.streamers import BaseStreamer
+    except Exception:
+        from typing import Generator as BaseStreamer
+
     from transformers.modeling_utils import PreTrainedModel
 
 
@@ -705,6 +710,7 @@ def generate(
         prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None,
         synced_gpus: Optional[bool] = None,
         assistant_model: Optional["PreTrainedModel"] = None,
+        streamer: Optional["BaseStreamer"] = None,
         negative_prompt_ids: Optional[torch.Tensor] = None,
         negative_prompt_attention_mask: Optional[torch.Tensor] = None,
         **kwargs,
@@ -732,6 +738,7 @@ def generate(
             prefix_allowed_tokens_fn,
             synced_gpus,
             assistant_model,
+            streamer,
             negative_prompt_ids,
             negative_prompt_attention_mask,
             **kwargs,

From c5858ff1794a8c1cefe9ff860247ebf3eff91cbf Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Fri, 31 Jan 2025 13:02:29 +0100
Subject: [PATCH 09/12] fix

---
 tests/openvino/test_modeling.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py
index 5c6fd2e3ae..918d0e6629 100644
--- a/tests/openvino/test_modeling.py
+++ b/tests/openvino/test_modeling.py
@@ -2173,10 +2173,6 @@ def test_compare_to_transformers(self, model_arch):
             self.assertIsInstance(component, MODEL_PARTS_CLS_MAPPING[component_name])
         self.assertIsInstance(ov_model.config, PretrainedConfig)
 
-        # TODO: fix in models
-        if preprocessors.get("processor") is not None:
-            preprocessors["processor"].patch_size = ov_model.config.vision_config.patch_size
-
         inputs = ov_model.preprocess_inputs(**preprocessors, text=prompt, image=self.IMAGE.resize((600, 600)))
         transformers_inputs = copy.deepcopy(inputs)
         test_device = "AUTO"

From bb5ee65fd8e1e5940b9637ff2cc871662a6a1088 Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Fri, 31 Jan 2025 13:22:44 +0100
Subject: [PATCH 10/12] fix tests

---
 .../intel/openvino/modeling_visual_language.py  | 17 +++++++++++------
 tests/openvino/test_modeling.py                 |  9 +++++----
 2 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/optimum/intel/openvino/modeling_visual_language.py b/optimum/intel/openvino/modeling_visual_language.py
index 2e08652727..435f656c60 100644
--- a/optimum/intel/openvino/modeling_visual_language.py
+++ b/optimum/intel/openvino/modeling_visual_language.py
@@ -1021,12 +1021,16 @@ def preprocess_inputs(
             else:
                 prompt = text
 
-        if (
-            getattr(processor, "patch_size", None) is None
-            and getattr(config, "vision_config", None) is not None
-            and getattr(config.vision_config, "patch_size", None) is not None
-        ):
-            processor.patch_size = config.vision_config.patch_size
+        if getattr(processor, "patch_size", None) is None:
+            if (
+                getattr(config, "vision_config", None) is not None
+                and getattr(config.vision_config, "patch_size", None) is not None
+            ):
+                processor.patch_size = config.vision_config.patch_size
+            else:
+                raise ValueError(
+                    "Processor does not have `patch_size` attribute. Please fix the processor or provide `patch_size` in the config."
+                )
 
         inputs = processor(images=image, text=prompt, return_tensors="pt")
         return inputs
@@ -1923,6 +1927,7 @@ def preprocess_inputs(
             input_ids = tokenizer(text, return_tensors="pt").input_ids
         attention_mask = torch.ones_like(input_ids, dtype=torch.int64)
         result = {"input_ids": input_ids, "attention_mask": attention_mask}
+
         if image is not None:
             result["images"] = processor(images=[image], return_tensors="pt")["pixel_values"]
         return result
diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py
index 918d0e6629..44884b7c26 100644
--- a/tests/openvino/test_modeling.py
+++ b/tests/openvino/test_modeling.py
@@ -2311,17 +2311,17 @@ def test_generate_utils(self, model_arch):
 
     def get_preprocessors(self, model_arch):
         model_id = MODEL_NAMES[model_arch]
+        config = AutoConfig.from_pretrained(model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS)
+
         if model_arch == "nanollava":
-            config = AutoConfig.from_pretrained(model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS)
             processor = AutoProcessor.from_pretrained(
                 config.mm_vision_tower, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS
             )
             tokenizer = AutoTokenizer.from_pretrained(
                 model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS
             )
-            preprocessors = {"processor": processor, "tokenizer": tokenizer}
+            preprocessors = {"processor": processor, "tokenizer": tokenizer, "config": config}
         elif model_arch == "internvl2":
-            config = AutoConfig.from_pretrained(model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS)
             tokenizer = AutoTokenizer.from_pretrained(
                 model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS
             )
@@ -2330,7 +2330,8 @@ def get_preprocessors(self, model_arch):
             processor = AutoProcessor.from_pretrained(
                 model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS
             )
-            preprocessors = {"processor": processor, "tokenizer": None}
+            preprocessors = {"processor": processor, "tokenizer": None, "config": config}
+
         return preprocessors
 
     @parameterized.expand(SUPPORTED_ARCHITECTURES)

From 000b1ac77185e34dab03fb8781de0642cd2e3015 Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Fri, 31 Jan 2025 17:49:41 +0100
Subject: [PATCH 11/12] remove unnecessary

---
 tests/openvino/test_modeling.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py
index 44884b7c26..ad51b10e5c 100644
--- a/tests/openvino/test_modeling.py
+++ b/tests/openvino/test_modeling.py
@@ -271,8 +271,7 @@ def test_load_from_hub_and_save_visual_language_model(self):
             else:
                 self.assertEqual(component.request.get_property("PERFORMANCE_HINT"), "LATENCY")
 
-        processor.patch_size = 16
-        # sould be fixed in https://huggingface.co/katuni4ka/tiny-random-llava-ov/blob/main/processor_config.json#L3
+        processor.patch_size = loaded_model.config.vision_config.patch_size
         inputs = processor(images=image, text=prompt, return_tensors="pt")
         set_seed(SEED)
         loaded_model_outputs = loaded_model(**inputs)
@@ -2247,8 +2246,9 @@ def test_llava_with_new_preprocessing(self, model_arch):
         self.assertTrue(processor.patch_size is not None)
         self.assertTrue(processor.vision_feature_select_strategy is not None)
         inputs = processor(images=self.IMAGE, text=prompt, return_tensors="pt")
-        self.assertTrue(
-            (inputs.input_ids == ov_model.config.image_token_index).sum(1).max() >= ov_model.config.image_seq_length
+        self.assertGreaterEqual(
+            (inputs.input_ids == ov_model.config.image_token_index).sum().max().item(),
+            ov_model.config.image_seq_length,
         )
         set_seed(SEED)
         with torch.no_grad():

From f5b30f28234dac3fffa83e0c63a8d4be0c12e8e4 Mon Sep 17 00:00:00 2001
From: IlyasMoutawwakil <moutawwakil.ilyas.tsi@gmail.com>
Date: Tue, 4 Feb 2025 09:03:39 +0100
Subject: [PATCH 12/12] fix llava new processing test

---
 tests/openvino/test_modeling.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py
index ad51b10e5c..8ca72d06c2 100644
--- a/tests/openvino/test_modeling.py
+++ b/tests/openvino/test_modeling.py
@@ -2237,6 +2237,7 @@ def test_llava_with_new_preprocessing(self, model_arch):
             patch_size=config.vision_config.patch_size,
             vision_feature_select_strategy=config.vision_feature_select_strategy,
             trust_remote_code=model_arch in self.REMOTE_CODE_MODELS,
+            num_additional_image_tokens=1,
         )
         transformers_model = self.get_transformer_model_class(model_arch).from_pretrained(model_id)
         ov_model = OVModelForVisualCausalLM.from_pretrained(