Skip to content

Commit b275cff

Browse files
Tweaks
1 parent 4aa12d5 commit b275cff

File tree

1 file changed

+8
-11
lines changed

1 file changed

+8
-11
lines changed

optimum/intel/openvino/quantization.py

+8-11
Original file line numberDiff line numberDiff line change
@@ -747,26 +747,22 @@ def _prepare_visual_causal_lm_dataset(self, config: OVWeightQuantizationConfig,
747747
pbar = tqdm(desc="Collecting calibration dataset", total=num_samples)
748748
for item in dataset:
749749
image_url = item[dataset_metadata["inputs"]["image_url"]]
750-
instruction = item[dataset_metadata["inputs"]["instruction"]]
751750
image = Image.open(requests.get(image_url, stream=True).raw)
752751

752+
instruction = item[dataset_metadata["inputs"]["instruction"]]
753753
chat_template = [{"role": "user", "content": [{"type": "text", "text": instruction}, {"type": "image"}]}]
754754
prompt = processor.apply_chat_template(chat_template, add_generation_prompt=True)
755-
756755
inputs = processor(images=image, text=prompt, return_tensors="pt")
757-
if inputs.input_ids.size(1) > max_tokens:
758-
continue
759756
input_ids = inputs.input_ids
760-
attention_mask = inputs.attention_mask
761-
position_ids = torch.arange(attention_mask.size(1)).unsqueeze(0).to(attention_mask.device)
762-
pixel_values = inputs.pixel_values
763-
image_sizes = inputs.image_sizes
757+
if input_ids.size(1) > max_tokens:
758+
continue
764759

760+
position_ids = torch.arange(inputs.input_ids.size(1)).unsqueeze(0).to(inputs.input_ids.device)
765761
inputs_embeds, attention_mask, position_ids = self.model.get_multimodal_embeddings(
766762
input_ids,
767-
pixel_values,
768-
image_sizes=image_sizes,
769-
attention_mask=attention_mask,
763+
inputs.pixel_values,
764+
image_sizes=inputs.image_sizes,
765+
attention_mask=inputs.attention_mask,
770766
position_ids=position_ids,
771767
)
772768

@@ -776,6 +772,7 @@ def _prepare_visual_causal_lm_dataset(self, config: OVWeightQuantizationConfig,
776772
position_ids=position_ids,
777773
inputs_embeds=inputs_embeds,
778774
)
775+
779776
pbar.update(1)
780777
calibration_dataset.append(language_model_inputs)
781778
if len(calibration_dataset) == num_samples:

0 commit comments

Comments
 (0)