Skip to content

Commit cbe7460

Browse files
committed
add tests
1 parent ac5171e commit cbe7460

File tree

5 files changed

+24
-12
lines changed

5 files changed

+24
-12
lines changed

optimum/exporters/openvino/model_patcher.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -3979,7 +3979,16 @@ def __init__(
39793979

39803980
@functools.wraps(model.__orig_forward)
39813981
def patched_forward(*args, **kwargs):
3982-
return model.model.forward(*args, **kwargs)
3982+
fwd_args = inspect.signature(model.__orig_forward).parameters
3983+
internal_fwd_args = inspect.signature(model.model.forward).parameters
3984+
inputs = {}
3985+
for arg, fwd_arg_name in zip(args, fwd_args):
3986+
if fwd_arg_name in internal_fwd_args:
3987+
inputs[fwd_arg_name] = arg
3988+
for key, value in kwargs.items():
3989+
if key in internal_fwd_args:
3990+
inputs[key] = value
3991+
return model.model.forward(**inputs)
39833992

39843993
model.forward = patched_forward
39853994
self._internal_patcher = internal_patcher

optimum/exporters/openvino/stateful.py

-1
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,6 @@ def patch_stateful_decoder(config: PretrainedConfig, ov_model: ov.Model):
290290
openvino model
291291
"""
292292

293-
log.warn(ov_model)
294293
key_value_input_names = [
295294
key_name for key in ov_model.inputs for key_name in key.get_names() if "key_values" in key_name
296295
]

optimum/intel/openvino/modeling_visual_language.py

+9-6
Original file line numberDiff line numberDiff line change
@@ -349,13 +349,13 @@ def __init__(
349349
language_model: ov.Model,
350350
text_embeddings: ov.Model,
351351
vision_embeddings: ov.Model,
352-
lm_head: Optional[ov.Model] = None,
353352
config: PretrainedConfig = None,
354353
device: str = "CPU",
355354
dynamic_shapes: bool = True,
356355
ov_config: Optional[Dict[str, str]] = None,
357356
model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
358357
quantization_config: Union[OVWeightQuantizationConfig, Dict] = None,
358+
lm_head: Optional[ov.Model] = None,
359359
**kwargs,
360360
):
361361
self.config = config
@@ -717,6 +717,9 @@ def components(self):
717717
def _submodel_names(self):
718718
model_names = ["lm_model", "text_embeddings_model", "vision_embeddings_model"]
719719
for part in self.additional_parts:
720+
if part == "lm_head" and getattr(self, part + "_model", None) is not None:
721+
model_names.append(part + "_model")
722+
continue
720723
if getattr(self, part, None) is not None:
721724
model_names.append(part + "_model")
722725
return model_names
@@ -2438,6 +2441,7 @@ def generate_image(
24382441
image_token_num_per_image: int = 576,
24392442
img_size: int = 384,
24402443
patch_size: int = 16,
2444+
generator=None
24412445
):
24422446
from PIL import Image
24432447

@@ -2486,7 +2490,7 @@ def generate_image(
24862490
logits = logit_uncond + cfg_weight * (logit_cond - logit_uncond)
24872491
probs = torch.softmax(logits / temperature, dim=-1)
24882492

2489-
next_token = torch.multinomial(probs, num_samples=1)
2493+
next_token = torch.multinomial(probs, num_samples=1) if generator is None else torch.multinomial(probs, num_samples=1, generator=generator)
24902494
generated_tokens[:, i] = next_token.squeeze(dim=-1)
24912495

24922496
next_token = torch.cat([next_token.unsqueeze(dim=1), next_token.unsqueeze(dim=1)], dim=1).view(-1)
@@ -2529,11 +2533,10 @@ def preprocess_inputs(
25292533
},
25302534
{"role": "<|Assistant|>", "content": ""},
25312535
]
2532-
prompt = None
2536+
prepare_inputs = processor(conversations=conversation, images=[image], force_batchify=True)
25332537
else:
2534-
conversation = None
2535-
prompt = text
2536-
prepare_inputs = processor(prompt=prompt, conversations=conversation, images=[image], force_batchify=True)
2538+
tokenizer = tokenizer if tokenizer is not None else processor.tokenizer
2539+
prepare_inputs = tokenizer(text, return_tensors="pt")
25372540
required_keys = ["input_ids", "pixel_values", "images_seq_mask", "images_emb_mask"]
25382541
inputs = {}
25392542
for key in required_keys:

tests/openvino/test_modeling.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -2109,9 +2109,9 @@ class OVModelForVisualCausalLMIntegrationTest(unittest.TestCase):
21092109
if is_transformers_version(">=", "4.40.0"):
21102110
SUPPORTED_ARCHITECTURES += ["llava_next", "nanollava"]
21112111
if is_transformers_version(">=", "4.45.0"):
2112-
SUPPORTED_ARCHITECTURES += ["minicpmv", "internvl2", "phi3_v", "qwen2_vl"]
2112+
SUPPORTED_ARCHITECTURES += ["janus", "minicpmv", "internvl2", "phi3_v", "qwen2_vl"]
21132113
TASK = "image-text-to-text"
2114-
REMOTE_CODE_MODELS = ["internvl2", "minicpmv", "nanollava", "phi3_v"]
2114+
REMOTE_CODE_MODELS = ["internvl2", "minicpmv", "nanollava", "phi3_v", "janus"]
21152115

21162116
IMAGE = Image.open(
21172117
requests.get(
@@ -2210,8 +2210,8 @@ def test_compare_to_transformers(self, model_arch):
22102210
with torch.no_grad():
22112211
transformers_outputs = transformers_model.generate(**transformers_inputs, generation_config=gen_config)
22122212

2213-
# original minicpmv, internvl always skip input tokens in generation results, while transformers based approach provide them
2214-
if model_arch in ["minicpmv", "internvl2"]:
2213+
# original minicpmv, internvl, janus always skip input tokens in generation results, while transformers based approach provide them
2214+
if model_arch in ["minicpmv", "internvl2", "janus"]:
22152215
ov_outputs = ov_outputs[:, inputs["input_ids"].shape[1] :]
22162216
self.assertTrue(
22172217
torch.equal(ov_outputs, transformers_outputs),

tests/openvino/utils_tests.py

+1
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@
169169
"st-bert": "sentence-transformers/all-MiniLM-L6-v2",
170170
"st-mpnet": "sentence-transformers/all-mpnet-base-v2",
171171
"sana": "katuni4ka/tiny-random-sana",
172+
"janus": "katuni4ka/tiny-random-janus"
172173
}
173174

174175

0 commit comments

Comments
 (0)