Skip to content

Commit ef558f9

Browse files
Add compression tests to internvl2 and phi3v (#999)
* Fix NanoLLava quantization * Add internvl2 compression tests * Revert "Fix NanoLLava quantization" This reverts commit 3eba1de. * Add phi3 compression tests; fix phi3 preprocessors saving with optimum-cli quantization * Trigger Tests * Trigger Tests * Trigger Tests
1 parent 040ee12 commit ef558f9

File tree

4 files changed

+96
-34
lines changed

4 files changed

+96
-34
lines changed

optimum/commands/export/openvino.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,10 @@
2121
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
2222

2323
from ...exporters import TasksManager
24+
from ...exporters.openvino.convert import save_preprocessors
2425
from ...intel.utils.import_utils import DIFFUSERS_IMPORT_ERROR, is_diffusers_available
2526
from ...intel.utils.modeling_utils import _infer_library_from_model_name_or_path
26-
from ...utils.save_utils import maybe_load_preprocessors, maybe_save_preprocessors
27+
from ...utils.save_utils import maybe_load_preprocessors
2728
from ..base import BaseOptimumCLICommand, CommandInfo
2829

2930

@@ -350,11 +351,9 @@ def run(self):
350351
)
351352
model.save_pretrained(self.args.output)
352353

353-
maybe_save_preprocessors(self.args.model, self.args.output, trust_remote_code=self.args.trust_remote_code)
354+
preprocessors = maybe_load_preprocessors(self.args.model, trust_remote_code=self.args.trust_remote_code)
355+
save_preprocessors(preprocessors, model.config, self.args.output, self.args.trust_remote_code)
354356
if not self.args.disable_convert_tokenizer:
355-
preprocessors = maybe_load_preprocessors(
356-
self.args.model, trust_remote_code=self.args.trust_remote_code
357-
)
358357
maybe_convert_tokenizers(library_name, self.args.output, preprocessors=preprocessors, task=task)
359358
else:
360359
# TODO : add input shapes

optimum/exporters/openvino/convert.py

+24-13
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
2222

2323
import onnx
24+
from transformers import PretrainedConfig
2425
from transformers.generation import GenerationMixin
2526
from transformers.utils import is_tf_available, is_torch_available
2627

@@ -711,19 +712,7 @@ def export_from_model(
711712
f"The generation config will not be saved, saving failed with following error:\n{exception}"
712713
)
713714

714-
model_name_or_path = model.config._name_or_path
715-
if preprocessors is not None:
716-
# phi3-vision processor does not have chat_template attribute that breaks Processor saving on disk
717-
if is_transformers_version(">=", "4.45") and model_type == "phi3-v" and len(preprocessors) > 1:
718-
if not hasattr(preprocessors[1], "chat_template"):
719-
preprocessors[1].chat_template = getattr(preprocessors[0], "chat_template", None)
720-
for processor in preprocessors:
721-
try:
722-
processor.save_pretrained(output)
723-
except Exception as ex:
724-
logger.error(f"Saving {type(processor)} failed with {ex}")
725-
else:
726-
maybe_save_preprocessors(model_name_or_path, output, trust_remote_code=trust_remote_code)
715+
save_preprocessors(preprocessors, model.config, output, trust_remote_code)
727716

728717
files_subpaths = ["openvino_" + model_name + ".xml" for model_name in models_and_export_configs.keys()]
729718

@@ -838,6 +827,28 @@ def export_tokenizer(
838827
save_model(model, output / file_name.format(suffix))
839828

840829

830+
def save_preprocessors(
831+
preprocessors: List, config: PretrainedConfig, output: Union[str, Path], trust_remote_code: bool
832+
):
833+
model_name_or_path = config._name_or_path
834+
if hasattr(config, "export_model_type"):
835+
model_type = config.export_model_type.replace("_", "-")
836+
else:
837+
model_type = config.model_type.replace("_", "-")
838+
if preprocessors is not None:
839+
# phi3-vision processor does not have chat_template attribute that breaks Processor saving on disk
840+
if is_transformers_version(">=", "4.45") and model_type == "phi3-v" and len(preprocessors) > 1:
841+
if not hasattr(preprocessors[1], "chat_template"):
842+
preprocessors[1].chat_template = getattr(preprocessors[0], "chat_template", None)
843+
for processor in preprocessors:
844+
try:
845+
processor.save_pretrained(output)
846+
except Exception as ex:
847+
logger.error(f"Saving {type(processor)} failed with {ex}")
848+
else:
849+
maybe_save_preprocessors(model_name_or_path, output, trust_remote_code=trust_remote_code)
850+
851+
841852
def _add_runtime_options_to_rt_info(model: Model, options: Dict):
842853
"""
843854
Add runtime optinos

tests/openvino/test_exporters_cli.py

+20
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,26 @@ class OVCLIExportTestCase(unittest.TestCase):
152152
]
153153
)
154154

155+
if is_transformers_version(">=", "4.45.0"):
156+
TEST_4BIT_CONFIGURATIONS.extend(
157+
[
158+
(
159+
"image-text-to-text",
160+
"internvl2",
161+
'int4 --group-size 4 --ratio 0.9 --sensitivity-metric "hessian_input_activation" '
162+
"--dataset contextual --num-samples 1 --trust-remote-code",
163+
{"int8": 6, "int4": 24},
164+
),
165+
(
166+
"image-text-to-text",
167+
"phi3_v",
168+
'int4 --group-size 4 --ratio 0.9 --sensitivity-metric "mean_activation_magnitude" '
169+
"--dataset contextual --num-samples 1 --trust-remote-code",
170+
{"int8": 4, "int4": 14},
171+
),
172+
]
173+
)
174+
155175
def _openvino_export(self, model_name: str, task: str):
156176
with TemporaryDirectory() as tmpdir:
157177
main_export(

tests/openvino/test_quantization.py

+48-16
Original file line numberDiff line numberDiff line change
@@ -347,23 +347,55 @@ class OVWeightCompressionTest(unittest.TestCase):
347347
)
348348

349349
if is_transformers_version(">=", "4.45.0"):
350-
LOAD_IN_4_BITS_SCOPE.append(
351-
(
352-
OVModelForVisualCausalLM,
353-
"minicpmv",
354-
True,
355-
dict(
356-
bits=4,
357-
group_size=16,
358-
dataset="contextual",
359-
ratio=0.8,
360-
sensitivity_metric="mean_activation_magnitude",
361-
num_samples=1,
362-
processor=MODEL_NAMES["minicpmv"],
363-
trust_remote_code=True,
350+
LOAD_IN_4_BITS_SCOPE.extend(
351+
[
352+
(
353+
OVModelForVisualCausalLM,
354+
"minicpmv",
355+
True,
356+
dict(
357+
bits=4,
358+
group_size=16,
359+
dataset="contextual",
360+
ratio=0.8,
361+
sensitivity_metric="mean_activation_magnitude",
362+
num_samples=1,
363+
processor=MODEL_NAMES["minicpmv"],
364+
trust_remote_code=True,
365+
),
366+
{"int4": 22, "int8": 8},
364367
),
365-
{"int4": 22, "int8": 8},
366-
)
368+
(
369+
OVModelForVisualCausalLM,
370+
"internvl2",
371+
True,
372+
dict(
373+
bits=4,
374+
group_size=4,
375+
dataset="contextual",
376+
ratio=0.8,
377+
sensitivity_metric="mean_activation_magnitude",
378+
num_samples=1,
379+
trust_remote_code=True,
380+
),
381+
{"int4": 22, "int8": 8},
382+
),
383+
(
384+
OVModelForVisualCausalLM,
385+
"phi3_v",
386+
True,
387+
dict(
388+
bits=4,
389+
group_size=16,
390+
dataset="contextual",
391+
ratio=0.8,
392+
sensitivity_metric="mean_activation_magnitude",
393+
num_samples=1,
394+
trust_remote_code=True,
395+
),
396+
{"int4": 14, "int8": 4},
397+
),
398+
]
367399
)
368400

369401
SUPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION = [

0 commit comments

Comments
 (0)