Skip to content

Commit 8416414

Browse files
committed
fix test for video
1 parent c9c8beb commit 8416414

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

optimum/intel/openvino/modeling_visual_language.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1402,7 +1402,7 @@ def preprocess_inputs(
14021402
"Processor does not have `patch_size` attribute. Please fix the processor or provide `patch_size` in the config."
14031403
)
14041404

1405-
inputs = processor(images=image, text=prompt, videos=video, return_tensors="pt")
1405+
inputs = processor(images=image, text=prompt, videos=[video], return_tensors="pt")
14061406
return inputs
14071407

14081408
def get_multimodal_embeddings(
@@ -2556,7 +2556,7 @@ def preprocess_inputs(
25562556

25572557
text_prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
25582558

2559-
inputs = processor(images=image, text=text_prompt, videos=video, return_tensors="pt")
2559+
inputs = processor(images=image, text=text_prompt, videos=[video], return_tensors="pt")
25602560
return inputs
25612561

25622562

@@ -2992,7 +2992,7 @@ def preprocess_inputs(
29922992

29932993
text_prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)
29942994

2995-
inputs = processor(images=image, text=text_prompt, videos=video, return_tensors="pt")
2995+
inputs = processor(images=image, text=text_prompt, videos=[video], return_tensors="pt")
29962996
return inputs
29972997

29982998
# Copied from https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py#L1602

tests/openvino/test_modeling.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2362,7 +2362,7 @@ def test_generate_utils(self, model_arch):
23622362
video_path = hf_hub_download(
23632363
repo_id="raushan-testing-hf/videos-test", filename="sample_demo_1.mp4", repo_type="dataset"
23642364
)
2365-
input_video = load_video(video_path, num_frames=4)
2365+
input_video, _ = load_video(video_path, num_frames=2)
23662366
question = "Why is this video funny?"
23672367
inputs = model.preprocess_inputs(**preprocessors, text=question, video=input_video)
23682368
outputs = model.generate(**inputs, max_new_tokens=10)

0 commit comments

Comments
 (0)