From bbec36a7d1ba70b53c3d5f7d59cc3eeca5a71074 Mon Sep 17 00:00:00 2001 From: eaidova Date: Wed, 29 Jan 2025 16:54:07 +0400 Subject: [PATCH 1/4] align nanollava input with original model --- optimum/intel/openvino/modeling_visual_language.py | 7 ++++++- tests/openvino/test_modeling.py | 6 +----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/optimum/intel/openvino/modeling_visual_language.py b/optimum/intel/openvino/modeling_visual_language.py index 1c0e35cca2..813b9ace2e 100644 --- a/optimum/intel/openvino/modeling_visual_language.py +++ b/optimum/intel/openvino/modeling_visual_language.py @@ -695,8 +695,10 @@ def forward( image_grid_thw=None, video_grid_thw=None, rope_deltas=None, + images=None, **kwargs, ): + pixel_values = pixel_values if pixel_values is not None else images inputs_embeds, attention_mask, position_ids = self.get_multimodal_embeddings( input_ids, pixel_values, @@ -794,6 +796,9 @@ def prepare_inputs_for_generation( else: model_inputs = {"input_ids": input_ids} + if pixel_values is None: + pixel_values = kwargs.get("images") + model_inputs.update( { "position_ids": position_ids, @@ -1907,7 +1912,7 @@ def preprocess_inputs( attention_mask = torch.ones_like(input_ids, dtype=torch.int64) result = {"input_ids": input_ids, "attention_mask": attention_mask} if image is not None: - result["pixel_values"] = processor(images=[image], return_tensors="pt")["pixel_values"] + result["images"] = processor(images=[image], return_tensors="pt")["pixel_values"] return result diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index 57d4b64764..097f20991a 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -2182,11 +2182,7 @@ def test_compare_to_transformers(self, model_arch): ov_model.clear_requests() self._check_device_and_request(ov_model, test_device, False) - # nanollava pixel_values input named as images - if model_arch == "nanollava": - pixel_values = transformers_inputs.pop("pixel_values", None) - transformers_inputs["images"] = pixel_values - # pytorch minicpmv is not designed to be used via forward + # pytorch minicpmv and internvl2 is not designed to be used via forward if model_arch not in ["minicpmv", "internvl2"]: set_seed(SEED) ov_outputs = ov_model(**inputs) From 5fda99f93c644fb5188543bf6f73f670d2ea2c82 Mon Sep 17 00:00:00 2001 From: eaidova Date: Wed, 29 Jan 2025 18:35:50 +0400 Subject: [PATCH 2/4] update test refs --- tests/openvino/test_exporters_cli.py | 2 +- tests/openvino/test_quantization.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py index 6d1bf5eb5e..04478a4445 100644 --- a/tests/openvino/test_exporters_cli.py +++ b/tests/openvino/test_exporters_cli.py @@ -197,7 +197,7 @@ class OVCLIExportTestCase(unittest.TestCase): "image-text-to-text", "nanollava", "int4 --group-size 8 --ratio 0.8 --trust-remote-code", - [{"int8": 16, "int4": 14}, {"int8": 15}, {"int8": 1}], + [{"int8": 14, "int4": 16}, {"int8": 15}, {"int8": 1}], ), ( "image-text-to-text", diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index c4c0ff247d..baf28ba72c 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -466,7 +466,7 @@ class OVWeightCompressionTest(unittest.TestCase): tokenizer=MODEL_NAMES["nanollava"], trust_remote_code=True, ), - [{"int8": 16, "int4": 14}, {"int8": 15}, {"int8": 1}], + [{"int8": 14, "int4": 16}, {"int8": 15}, {"int8": 1}], ), ] ) From 4d0a522632ce695829af05bea1d749cdb2b9ddf6 Mon Sep 17 00:00:00 2001 From: eaidova Date: Wed, 29 Jan 2025 19:03:01 +0400 Subject: [PATCH 3/4] properly fix quantization --- optimum/intel/openvino/modeling_visual_language.py | 2 ++ tests/openvino/test_exporters_cli.py | 2 +- tests/openvino/test_quantization.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/optimum/intel/openvino/modeling_visual_language.py b/optimum/intel/openvino/modeling_visual_language.py index 813b9ace2e..eb94afadc5 100644 --- a/optimum/intel/openvino/modeling_visual_language.py +++ b/optimum/intel/openvino/modeling_visual_language.py @@ -1738,6 +1738,8 @@ def get_multimodal_embeddings( vision_embeds = None IGNORE_INDEX = -100 IMAGE_TOKEN_INDEX = -200 + if pixel_values is None and "images" in kwargs: + pixel_values = kwargs["images"] if pixel_values is not None: vision_embeds = self.get_vision_embeddings(pixel_values, input_ids=input_ids, **kwargs) if vision_embeds is None: diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py index 04478a4445..6d1bf5eb5e 100644 --- a/tests/openvino/test_exporters_cli.py +++ b/tests/openvino/test_exporters_cli.py @@ -197,7 +197,7 @@ class OVCLIExportTestCase(unittest.TestCase): "image-text-to-text", "nanollava", "int4 --group-size 8 --ratio 0.8 --trust-remote-code", - [{"int8": 14, "int4": 16}, {"int8": 15}, {"int8": 1}], + [{"int8": 16, "int4": 14}, {"int8": 15}, {"int8": 1}], ), ( "image-text-to-text", diff --git a/tests/openvino/test_quantization.py b/tests/openvino/test_quantization.py index baf28ba72c..c4c0ff247d 100644 --- a/tests/openvino/test_quantization.py +++ b/tests/openvino/test_quantization.py @@ -466,7 +466,7 @@ class OVWeightCompressionTest(unittest.TestCase): tokenizer=MODEL_NAMES["nanollava"], trust_remote_code=True, ), - [{"int8": 14, "int4": 16}, {"int8": 15}, {"int8": 1}], + [{"int8": 16, "int4": 14}, {"int8": 15}, {"int8": 1}], ), ] ) From 2927556de40eaac7862349b2f792d39c5adfab70 Mon Sep 17 00:00:00 2001 From: Ekaterina Aidova Date: Wed, 29 Jan 2025 19:06:07 +0400 Subject: [PATCH 4/4] Update tests/openvino/test_modeling.py --- tests/openvino/test_modeling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index 097f20991a..abe3d7d363 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -2182,7 +2182,7 @@ def test_compare_to_transformers(self, model_arch): ov_model.clear_requests() self._check_device_and_request(ov_model, test_device, False) - # pytorch minicpmv and internvl2 is not designed to be used via forward + # pytorch minicpmv and internvl2 are not designed to be used via forward if model_arch not in ["minicpmv", "internvl2"]: set_seed(SEED) ov_outputs = ov_model(**inputs)