From d4d0f9dd572beec0bc84f2571be4a8670e4b7a70 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Fri, 31 Jan 2025 09:00:14 +0100 Subject: [PATCH 01/12] tes 4.48 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index cf4bfb3191..a8b898d230 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ INSTALL_REQUIRE = [ "torch>=1.11", "optimum~=1.24", - "transformers>=4.36,<4.48", + "transformers>=4.36,<4.49", "datasets>=1.4.0", "sentencepiece", "setuptools", From 3ad88da2d370502dd17caedc1eb6ea5a46fb04c7 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Fri, 31 Jan 2025 09:47:32 +0100 Subject: [PATCH 02/12] fix llava offline test --- tests/openvino/test_modeling.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index 66f06354ee..5d4f3798ec 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -271,6 +271,8 @@ def test_load_from_hub_and_save_visual_language_model(self): else: self.assertEqual(component.request.get_property("PERFORMANCE_HINT"), "LATENCY") + processor.patch_size = 16 + # sould be fixed in https://huggingface.co/katuni4ka/tiny-random-llava-ov/blob/main/processor_config.json#L3 inputs = processor(images=image, text=prompt, return_tensors="pt") set_seed(SEED) loaded_model_outputs = loaded_model(**inputs) From 4bf5ffc3ff8093c264b11afe59243e827bf99378 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Fri, 31 Jan 2025 10:16:00 +0100 Subject: [PATCH 03/12] couple initial fixes --- optimum/exporters/openvino/model_patcher.py | 26 ++++++++++++--------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index e147945878..1c5eafcf00 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -718,14 +718,15 @@ def _mistral_update_causal_mask( class MistralModelPatcher(DecoderModelPatcher): def __enter__(self): super().__enter__() - if is_transformers_version(">=", "4.42.0"): + if is_transformers_version(">=", "4.42.0") and is_transformers_version("<", "4.48.0"): # apply fix https://github.com/huggingface/transformers/commit/57d7594a79a9f5d835abf2d4d384db0e4818e548 self._model.model._orig_update_causal_mask = self._model.model._update_causal_mask self._model.model._update_causal_mask = types.MethodType(_mistral_update_causal_mask, self._model.model) else: for layer in self._model.model.layers: - _reinitialize_cos_sin_cached_fp32(layer.self_attn.rotary_emb) + if hasattr(layer.self_attn, "rotary_emb"): + _reinitialize_cos_sin_cached_fp32(layer.self_attn.rotary_emb) def __exit__(self, exc_type, exc_value, traceback): super().__exit__(exc_type, exc_value, traceback) @@ -734,7 +735,7 @@ def __exit__(self, exc_type, exc_value, traceback): self._model.model._update_causal_mask = self._model.model._orig_update_causal_mask for layer in self._model.model.layers: - if hasattr(layer.self_attn.rotary_emb, "_orig_forward"): + if hasattr(layer.self_attn, "rotary_emb") and hasattr(layer.self_attn.rotary_emb, "_orig_forward"): layer.self_attn.rotary_emb.forward = layer.self_attn.rotary_emb._orig_forward @@ -2493,7 +2494,9 @@ class UpdateCausalMaskModelPatcher(DecoderModelPatcher): def __enter__(self): super().__enter__() patch_update_causal_mask(self._model, "4.42.0") - if hasattr(self._model.model.layers[0].self_attn.rotary_emb, "_set_cos_sin_cache"): + if hasattr(self._model.model.layers[0].self_attn, "rotary_emb") and hasattr( + self._model.model.layers[0].self_attn.rotary_emb, "_set_cos_sin_cache" + ): for layer in self._model.model.layers: _reinitialize_cos_sin_cached_fp32(layer.self_attn.rotary_emb) @@ -3045,15 +3048,16 @@ def patched_forward(self, fn): def __enter__(self): if is_torch_version(">=", "2.1.0"): if self._model.config.model_type == "qwen2" and self._model.config._attn_implementation != "sdpa": - from transformers.models.qwen2.modeling_qwen2 import QWEN2_ATTENTION_CLASSES + if is_transformers_version("<", "4.48"): + from transformers.models.qwen2.modeling_qwen2 import QWEN2_ATTENTION_CLASSES - sdpa_attn = QWEN2_ATTENTION_CLASSES["sdpa"] - self._model.config._orig_attn_implementation = self._model.config._attn_implementation - self._model.config._attn_implementation = "sdpa" + sdpa_attn = QWEN2_ATTENTION_CLASSES["sdpa"] + self._model.config._orig_attn_implementation = self._model.config._attn_implementation + self._model.config._attn_implementation = "sdpa" - for layer in self._model.model.layers: - layer.self_attn._orig_forward = layer.self_attn.forward - layer.self_attn.forward = types.MethodType(sdpa_attn.forward, layer.self_attn) + for layer in self._model.model.layers: + layer.self_attn._orig_forward = layer.self_attn.forward + layer.self_attn.forward = types.MethodType(sdpa_attn.forward, layer.self_attn) if self._model.config.model_type == "llama" and self._model.config._attn_implementation != "sdpa": self._model.config._orig_attn_implementation = self._model.config._attn_implementation From f6d48b1ff826a96414903afeb032a8afaa67c657 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Fri, 31 Jan 2025 11:26:56 +0100 Subject: [PATCH 04/12] fix type hint error, vison language models tests and phi3 --- optimum/exporters/openvino/model_patcher.py | 6 +++--- optimum/intel/openvino/modeling_decoder.py | 3 --- tests/openvino/test_modeling.py | 5 +++++ 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index 1c5eafcf00..9320303ff4 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -1581,19 +1581,19 @@ def __enter__(self): ): self._model.config.max_position_embeddings = self._model.config.original_max_position_embeddings - if is_transformers_version(">=", "4.42.0"): + if is_transformers_version(">=", "4.42.0") and is_transformers_version("<", "4.48.0"): self._model.model._orig_forward = self._model.model.forward self._model.model.forward = types.MethodType(phi3_442_forward, self._model.model) # https://github.com/huggingface/transformers/blob/30ee508c6c92a1c0aa0281d193c7c0fb815b8d2f/src/transformers/models/phi3/modeling_phi3.py#L113 # init inv_freq for torchscript tracing for layer in self._model.model.layers: - if is_torch_version(">=", "2.1.0"): + if is_torch_version(">=", "2.1.0") and is_transformers_version("<", "4.48.0"): orig_self_attn_fwd = layer.self_attn.forward layer.self_attn.forward = types.MethodType(_phi3_self_attn_sdpa_forward, layer.self_attn) layer.self_attn._orig_forward = orig_self_attn_fwd - if layer.self_attn.rotary_emb.inv_freq is None: + if hasattr(layer.self_attn, "rotary_emb") and layer.self_attn.rotary_emb.inv_freq is None: rotary_emb = layer.self_attn.rotary_emb layer.self_attn.rotary_emb.inv_freq = 1.0 / ( rotary_emb.base ** (torch.arange(0, rotary_emb.dim, 2, dtype=torch.int64).float() / rotary_emb.dim) diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index b2519563f5..b13ebc80c6 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -56,7 +56,6 @@ if TYPE_CHECKING: - from transformers.generation.streamers import BaseStreamer from transformers.modeling_utils import PreTrainedModel @@ -706,7 +705,6 @@ def generate( prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None, synced_gpus: Optional[bool] = None, assistant_model: Optional["PreTrainedModel"] = None, - streamer: Optional["BaseStreamer"] = None, negative_prompt_ids: Optional[torch.Tensor] = None, negative_prompt_attention_mask: Optional[torch.Tensor] = None, **kwargs, @@ -734,7 +732,6 @@ def generate( prefix_allowed_tokens_fn, synced_gpus, assistant_model, - streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs, diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index 5d4f3798ec..5c6fd2e3ae 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -2172,6 +2172,11 @@ def test_compare_to_transformers(self, model_arch): for component_name, component in ov_model.components.items(): self.assertIsInstance(component, MODEL_PARTS_CLS_MAPPING[component_name]) self.assertIsInstance(ov_model.config, PretrainedConfig) + + # TODO: fix in models + if preprocessors.get("processor") is not None: + preprocessors["processor"].patch_size = ov_model.config.vision_config.patch_size + inputs = ov_model.preprocess_inputs(**preprocessors, text=prompt, image=self.IMAGE.resize((600, 600))) transformers_inputs = copy.deepcopy(inputs) test_device = "AUTO" From d2366a69d164692b74800dcff211e2e36e9cafd9 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Fri, 31 Jan 2025 11:35:29 +0100 Subject: [PATCH 05/12] fix preprocess_inputs --- optimum/intel/openvino/modeling_visual_language.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/optimum/intel/openvino/modeling_visual_language.py b/optimum/intel/openvino/modeling_visual_language.py index e895a76e73..9bb3b1f679 100644 --- a/optimum/intel/openvino/modeling_visual_language.py +++ b/optimum/intel/openvino/modeling_visual_language.py @@ -1020,6 +1020,10 @@ def preprocess_inputs( prompt = "\n" + text else: prompt = text + + if hasattr(processor, "patch_size") and processor.patch_size is None: + processor.patch_size = config.vision_config.patch_size + inputs = processor(images=image, text=prompt, return_tensors="pt") return inputs From bceb37128069481dd977d6092499466d1f79f6aa Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Fri, 31 Jan 2025 12:10:46 +0100 Subject: [PATCH 06/12] fix --- optimum/intel/openvino/modeling_visual_language.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/optimum/intel/openvino/modeling_visual_language.py b/optimum/intel/openvino/modeling_visual_language.py index 9bb3b1f679..77f838da8f 100644 --- a/optimum/intel/openvino/modeling_visual_language.py +++ b/optimum/intel/openvino/modeling_visual_language.py @@ -1021,7 +1021,11 @@ def preprocess_inputs( else: prompt = text - if hasattr(processor, "patch_size") and processor.patch_size is None: + if ( + getattr(processor, "patch_size", None) is None + and hasattr(config, "vision_config") + and hasattr(config.vision_config, "patch_size") + ): processor.patch_size = config.vision_config.patch_size inputs = processor(images=image, text=prompt, return_tensors="pt") From c5202f00515fa1ad69fefe812066c53704990b6d Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Fri, 31 Jan 2025 12:11:23 +0100 Subject: [PATCH 07/12] fix --- optimum/intel/openvino/modeling_visual_language.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/optimum/intel/openvino/modeling_visual_language.py b/optimum/intel/openvino/modeling_visual_language.py index 77f838da8f..2e08652727 100644 --- a/optimum/intel/openvino/modeling_visual_language.py +++ b/optimum/intel/openvino/modeling_visual_language.py @@ -1023,8 +1023,8 @@ def preprocess_inputs( if ( getattr(processor, "patch_size", None) is None - and hasattr(config, "vision_config") - and hasattr(config.vision_config, "patch_size") + and getattr(config, "vision_config", None) is not None + and getattr(config.vision_config, "patch_size", None) is not None ): processor.patch_size = config.vision_config.patch_size From fe1e213b74d9fc7566cd755a8f1022cc9e7fdb2c Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Fri, 31 Jan 2025 12:49:46 +0100 Subject: [PATCH 08/12] fix streamer --- optimum/intel/openvino/modeling_decoder.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py index b13ebc80c6..41e4c6e3e9 100644 --- a/optimum/intel/openvino/modeling_decoder.py +++ b/optimum/intel/openvino/modeling_decoder.py @@ -56,6 +56,11 @@ if TYPE_CHECKING: + try: + from transformers.generation.streamers import BaseStreamer + except Exception: + from typing import Generator as BaseStreamer + from transformers.modeling_utils import PreTrainedModel @@ -705,6 +710,7 @@ def generate( prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None, synced_gpus: Optional[bool] = None, assistant_model: Optional["PreTrainedModel"] = None, + streamer: Optional["BaseStreamer"] = None, negative_prompt_ids: Optional[torch.Tensor] = None, negative_prompt_attention_mask: Optional[torch.Tensor] = None, **kwargs, @@ -732,6 +738,7 @@ def generate( prefix_allowed_tokens_fn, synced_gpus, assistant_model, + streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs, From c5858ff1794a8c1cefe9ff860247ebf3eff91cbf Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Fri, 31 Jan 2025 13:02:29 +0100 Subject: [PATCH 09/12] fix --- tests/openvino/test_modeling.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index 5c6fd2e3ae..918d0e6629 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -2173,10 +2173,6 @@ def test_compare_to_transformers(self, model_arch): self.assertIsInstance(component, MODEL_PARTS_CLS_MAPPING[component_name]) self.assertIsInstance(ov_model.config, PretrainedConfig) - # TODO: fix in models - if preprocessors.get("processor") is not None: - preprocessors["processor"].patch_size = ov_model.config.vision_config.patch_size - inputs = ov_model.preprocess_inputs(**preprocessors, text=prompt, image=self.IMAGE.resize((600, 600))) transformers_inputs = copy.deepcopy(inputs) test_device = "AUTO" From bb5ee65fd8e1e5940b9637ff2cc871662a6a1088 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Fri, 31 Jan 2025 13:22:44 +0100 Subject: [PATCH 10/12] fix tests --- .../intel/openvino/modeling_visual_language.py | 17 +++++++++++------ tests/openvino/test_modeling.py | 9 +++++---- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/optimum/intel/openvino/modeling_visual_language.py b/optimum/intel/openvino/modeling_visual_language.py index 2e08652727..435f656c60 100644 --- a/optimum/intel/openvino/modeling_visual_language.py +++ b/optimum/intel/openvino/modeling_visual_language.py @@ -1021,12 +1021,16 @@ def preprocess_inputs( else: prompt = text - if ( - getattr(processor, "patch_size", None) is None - and getattr(config, "vision_config", None) is not None - and getattr(config.vision_config, "patch_size", None) is not None - ): - processor.patch_size = config.vision_config.patch_size + if getattr(processor, "patch_size", None) is None: + if ( + getattr(config, "vision_config", None) is not None + and getattr(config.vision_config, "patch_size", None) is not None + ): + processor.patch_size = config.vision_config.patch_size + else: + raise ValueError( + "Processor does not have `patch_size` attribute. Please fix the processor or provide `patch_size` in the config." + ) inputs = processor(images=image, text=prompt, return_tensors="pt") return inputs @@ -1923,6 +1927,7 @@ def preprocess_inputs( input_ids = tokenizer(text, return_tensors="pt").input_ids attention_mask = torch.ones_like(input_ids, dtype=torch.int64) result = {"input_ids": input_ids, "attention_mask": attention_mask} + if image is not None: result["images"] = processor(images=[image], return_tensors="pt")["pixel_values"] return result diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index 918d0e6629..44884b7c26 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -2311,17 +2311,17 @@ def test_generate_utils(self, model_arch): def get_preprocessors(self, model_arch): model_id = MODEL_NAMES[model_arch] + config = AutoConfig.from_pretrained(model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS) + if model_arch == "nanollava": - config = AutoConfig.from_pretrained(model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS) processor = AutoProcessor.from_pretrained( config.mm_vision_tower, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS ) tokenizer = AutoTokenizer.from_pretrained( model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS ) - preprocessors = {"processor": processor, "tokenizer": tokenizer} + preprocessors = {"processor": processor, "tokenizer": tokenizer, "config": config} elif model_arch == "internvl2": - config = AutoConfig.from_pretrained(model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS) tokenizer = AutoTokenizer.from_pretrained( model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS ) @@ -2330,7 +2330,8 @@ def get_preprocessors(self, model_arch): processor = AutoProcessor.from_pretrained( model_id, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS ) - preprocessors = {"processor": processor, "tokenizer": None} + preprocessors = {"processor": processor, "tokenizer": None, "config": config} + return preprocessors @parameterized.expand(SUPPORTED_ARCHITECTURES) From 000b1ac77185e34dab03fb8781de0642cd2e3015 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Fri, 31 Jan 2025 17:49:41 +0100 Subject: [PATCH 11/12] remove unnecessary --- tests/openvino/test_modeling.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index 44884b7c26..ad51b10e5c 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -271,8 +271,7 @@ def test_load_from_hub_and_save_visual_language_model(self): else: self.assertEqual(component.request.get_property("PERFORMANCE_HINT"), "LATENCY") - processor.patch_size = 16 - # sould be fixed in https://huggingface.co/katuni4ka/tiny-random-llava-ov/blob/main/processor_config.json#L3 + processor.patch_size = loaded_model.config.vision_config.patch_size inputs = processor(images=image, text=prompt, return_tensors="pt") set_seed(SEED) loaded_model_outputs = loaded_model(**inputs) @@ -2247,8 +2246,9 @@ def test_llava_with_new_preprocessing(self, model_arch): self.assertTrue(processor.patch_size is not None) self.assertTrue(processor.vision_feature_select_strategy is not None) inputs = processor(images=self.IMAGE, text=prompt, return_tensors="pt") - self.assertTrue( - (inputs.input_ids == ov_model.config.image_token_index).sum(1).max() >= ov_model.config.image_seq_length + self.assertGreaterEqual( + (inputs.input_ids == ov_model.config.image_token_index).sum().max().item(), + ov_model.config.image_seq_length, ) set_seed(SEED) with torch.no_grad(): From f5b30f28234dac3fffa83e0c63a8d4be0c12e8e4 Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Tue, 4 Feb 2025 09:03:39 +0100 Subject: [PATCH 12/12] fix llava new processing test --- tests/openvino/test_modeling.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index ad51b10e5c..8ca72d06c2 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -2237,6 +2237,7 @@ def test_llava_with_new_preprocessing(self, model_arch): patch_size=config.vision_config.patch_size, vision_feature_select_strategy=config.vision_feature_select_strategy, trust_remote_code=model_arch in self.REMOTE_CODE_MODELS, + num_additional_image_tokens=1, ) transformers_model = self.get_transformer_model_class(model_arch).from_pretrained(model_id) ov_model = OVModelForVisualCausalLM.from_pretrained(