@@ -1402,7 +1402,7 @@ def preprocess_inputs(
1402
1402
"Processor does not have `patch_size` attribute. Please fix the processor or provide `patch_size` in the config."
1403
1403
)
1404
1404
1405
- inputs = processor (images = image , text = prompt , videos = video , return_tensors = "pt" )
1405
+ inputs = processor (images = image , text = prompt , videos = [ video ] , return_tensors = "pt" )
1406
1406
return inputs
1407
1407
1408
1408
def get_multimodal_embeddings (
@@ -2556,7 +2556,7 @@ def preprocess_inputs(
2556
2556
2557
2557
text_prompt = processor .apply_chat_template (conversation , add_generation_prompt = True )
2558
2558
2559
- inputs = processor (images = image , text = text_prompt , videos = video , return_tensors = "pt" )
2559
+ inputs = processor (images = image , text = text_prompt , videos = [ video ] , return_tensors = "pt" )
2560
2560
return inputs
2561
2561
2562
2562
@@ -2992,7 +2992,7 @@ def preprocess_inputs(
2992
2992
2993
2993
text_prompt = processor .apply_chat_template (conversation , add_generation_prompt = True )
2994
2994
2995
- inputs = processor (images = image , text = text_prompt , videos = video , return_tensors = "pt" )
2995
+ inputs = processor (images = image , text = text_prompt , videos = [ video ] , return_tensors = "pt" )
2996
2996
return inputs
2997
2997
2998
2998
# Copied from https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py#L1602
0 commit comments