Skip to content

Commit cdbff81

Browse files
committed
Merge branch 'main' into bump-release
2 parents 8fb8cfe + bc5051f commit cdbff81

14 files changed

+698
-168
lines changed

optimum/commands/export/openvino.py

+11
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,15 @@ def parse_args_openvino(parser: "ArgumentParser"):
119119
"or ['conceptual_captions','laion/220k-GPT4Vision-captions-from-LIVIS','laion/filtered-wit'] for diffusion models."
120120
),
121121
)
122+
optional_group.add_argument(
123+
"--all-layers",
124+
action="store_true",
125+
default=None,
126+
help=(
127+
"Whether embeddings and last MatMul layers should be compressed to INT4. If not provided an weight "
128+
"compression is applied, they are compressed to INT8."
129+
),
130+
)
122131
optional_group.add_argument(
123132
"--disable-stateful",
124133
action="store_true",
@@ -198,6 +207,7 @@ def run(self):
198207
and self.args.ratio is None
199208
and self.args.group_size is None
200209
and self.args.sym is None
210+
and self.args.all_layers is None
201211
and self.args.model in _DEFAULT_4BIT_CONFIGS
202212
):
203213
quantization_config = _DEFAULT_4BIT_CONFIGS[self.args.model]
@@ -207,6 +217,7 @@ def run(self):
207217
"ratio": 1 if is_int8 else (self.args.ratio or 0.8),
208218
"sym": self.args.sym or False,
209219
"group_size": -1 if is_int8 else self.args.group_size,
220+
"all_layers": None if is_int8 else self.args.all_layers,
210221
}
211222

212223
if self.args.weight_format in {"int4_sym_g128", "int4_asym_g128", "int4_sym_g64", "int4_asym_g64"}:

optimum/exporters/openvino/convert.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -614,7 +614,12 @@ def export_from_model(
614614
model.config.save_pretrained(output)
615615
generation_config = getattr(model, "generation_config", None)
616616
if generation_config is not None:
617-
generation_config.save_pretrained(output)
617+
try:
618+
generation_config.save_pretrained(output)
619+
except Exception as exception:
620+
logger.warning(
621+
f"The generation config will not be saved, saving failed with following error:\n{exception}"
622+
)
618623

619624
model_name_or_path = model.config._name_or_path
620625
maybe_save_preprocessors(model_name_or_path, output, trust_remote_code=trust_remote_code)

optimum/exporters/openvino/model_configs.py

+148-2
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,18 @@
4242
from optimum.utils.normalized_config import NormalizedTextConfig
4343

4444
from .model_patcher import (
45+
AquilaModelPatcher,
4546
BaichuanModelPatcher,
4647
ChatGLMModelPatcher,
4748
GemmaModelPatcher,
48-
InternLMPatcher,
49+
InternLM2Patcher,
50+
InternLMModelPatcher,
4951
LlamaModelPatcher,
5052
MixtralModelPatcher,
5153
MPTModelPatcher,
5254
Phi3ModelPatcher,
5355
QwenModelPatcher,
56+
XverseModelPatcher,
5457
)
5558

5659

@@ -461,7 +464,7 @@ class InternLM2OpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig):
461464
def patch_model_for_export(
462465
self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
463466
) -> "ModelPatcher":
464-
return InternLMPatcher(self, model, model_kwargs=model_kwargs)
467+
return InternLM2Patcher(self, model, model_kwargs=model_kwargs)
465468

466469

467470
@register_in_tasks_manager("orion", *["text-generation", "text-generation-with-past"], library_name="transformers")
@@ -501,6 +504,12 @@ def patch_model_for_export(
501504
library_name="transformers",
502505
)
503506
class Phi3OpenVINOConfig(PhiOnnxConfig):
507+
DUMMY_INPUT_GENERATOR_CLASSES = (
508+
MistralDummyPastKeyValuesGenerator,
509+
) + TextDecoderOnnxConfig.DUMMY_INPUT_GENERATOR_CLASSES
510+
DUMMY_PKV_GENERATOR_CLASS = MistralDummyPastKeyValuesGenerator
511+
NORMALIZED_CONFIG_CLASS = NormalizedTextConfig.with_args(num_key_value_heads="num_key_value_heads", allow_new=True)
512+
504513
def patch_model_for_export(
505514
self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
506515
) -> "ModelPatcher":
@@ -608,3 +617,140 @@ def outputs(self) -> Dict[str, Dict[int, str]]:
608617
return {
609618
"sample": {0: "batch_size", 2: "height", 3: "width"},
610619
}
620+
621+
622+
@register_in_tasks_manager(
623+
"persimmon",
624+
*[
625+
"feature-extraction",
626+
"feature-extraction-with-past",
627+
"text-generation",
628+
"text-generation-with-past",
629+
"text-classification",
630+
],
631+
library_name="transformers",
632+
)
633+
class PersimmonOpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig):
634+
DEFAULT_ONNX_OPSET = 14
635+
NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
636+
637+
638+
@register_in_tasks_manager("biogpt", *["text-generation", "text-generation-with-past"], library_name="transformers")
639+
class BioGPTOpenVINOConfig(TextDecoderOnnxConfig):
640+
# BioGPT does not require position_ids input.
641+
DEFAULT_ONNX_OPSET = 13
642+
NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
643+
644+
645+
@register_in_tasks_manager(
646+
"gpt-neox-japanese", *["text-generation", "text-generation-with-past"], library_name="transformers"
647+
)
648+
class GPTNeoxJapaneseOpenVINOConfig(TextDecoderOnnxConfig):
649+
# GPTNeoxJapanese does not require position_ids input.
650+
DEFAULT_ONNX_OPSET = 13
651+
NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
652+
653+
654+
@register_in_tasks_manager(
655+
"cohere",
656+
*[
657+
"feature-extraction",
658+
"feature-extraction-with-past",
659+
"text-generation",
660+
"text-generation-with-past",
661+
"text-classification",
662+
],
663+
library_name="transformers",
664+
)
665+
class CohereOpenVINOConfig(LlamaOpenVINOConfig):
666+
pass
667+
668+
669+
@register_in_tasks_manager("xglm", *["text-generation", "text-generation-with-past"], library_name="transformers")
670+
class XGLMConfig(TextDecoderWithPositionIdsOnnxConfig):
671+
DEFAULT_ONNX_OPSET = 13
672+
NORMALIZED_CONFIG_CLASS = NormalizedTextConfig.with_args(
673+
num_attention_heads="attention_heads", hidden_size="d_model"
674+
)
675+
676+
677+
class AquilaDummyPastKeyValuesGenerator(DummyPastKeyValuesGenerator):
678+
def __init__(
679+
self,
680+
task: str,
681+
normalized_config: NormalizedTextConfig,
682+
batch_size: int = DEFAULT_DUMMY_SHAPES["batch_size"],
683+
sequence_length: int = DEFAULT_DUMMY_SHAPES["sequence_length"],
684+
random_batch_size_range: Optional[Tuple[int, int]] = None,
685+
random_sequence_length_range: Optional[Tuple[int, int]] = None,
686+
**kwargs,
687+
):
688+
super().__init__(
689+
task,
690+
normalized_config,
691+
batch_size,
692+
sequence_length,
693+
random_batch_size_range,
694+
random_sequence_length_range,
695+
**kwargs,
696+
)
697+
self.num_key_value_heads = getattr(
698+
normalized_config, "num_key_value_heads", normalized_config.num_attention_heads
699+
)
700+
701+
def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"):
702+
shape = (
703+
self.batch_size,
704+
self.num_key_value_heads,
705+
self.sequence_length,
706+
self.hidden_size // self.num_attention_heads,
707+
)
708+
return [
709+
(
710+
self.random_float_tensor(shape, framework=framework, dtype=float_dtype),
711+
self.random_float_tensor(shape, framework=framework, dtype=float_dtype),
712+
)
713+
for _ in range(self.num_layers)
714+
]
715+
716+
717+
@register_in_tasks_manager("aquila", *["text-generation", "text-generation-with-past"], library_name="transformers")
718+
class AquilaMOpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig):
719+
DEFAULT_ONNX_OPSET = 14
720+
721+
DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, AquilaDummyPastKeyValuesGenerator)
722+
DUMMY_PKV_GENERATOR_CLASS = AquilaDummyPastKeyValuesGenerator
723+
NORMALIZED_CONFIG_CLASS = NormalizedTextConfig.with_args(num_key_value_heads="num_key_value_heads", allow_new=True)
724+
725+
def patch_model_for_export(
726+
self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
727+
) -> "ModelPatcher":
728+
return AquilaModelPatcher(self, model, model_kwargs=model_kwargs)
729+
730+
731+
@register_in_tasks_manager("xverse", *["text-generation", "text-generation-with-past"], library_name="transformers")
732+
class XverseMOpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig):
733+
DEFAULT_ONNX_OPSET = 14
734+
735+
DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, DummyPastKeyValuesGenerator)
736+
DUMMY_PKV_GENERATOR_CLASS = DummyPastKeyValuesGenerator
737+
NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
738+
739+
def patch_model_for_export(
740+
self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
741+
) -> "ModelPatcher":
742+
return XverseModelPatcher(self, model, model_kwargs=model_kwargs)
743+
744+
745+
@register_in_tasks_manager("internlm", *["text-generation", "text-generation-with-past"], library_name="transformers")
746+
class InternLMOpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig):
747+
DEFAULT_ONNX_OPSET = 14
748+
749+
DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, DummyPastKeyValuesGenerator)
750+
DUMMY_PKV_GENERATOR_CLASS = DummyPastKeyValuesGenerator
751+
NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
752+
753+
def patch_model_for_export(
754+
self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None
755+
) -> "ModelPatcher":
756+
return InternLMModelPatcher(self, model, model_kwargs=model_kwargs)

0 commit comments

Comments
 (0)