From be4498c006de7b8192aa7b65cb5abca3bdc8815f Mon Sep 17 00:00:00 2001 From: eaidova Date: Mon, 17 Feb 2025 22:20:33 +0400 Subject: [PATCH] avoid extra reshaping to max_model_lenght for unet --- optimum/exporters/openvino/convert.py | 3 ++ optimum/exporters/openvino/model_configs.py | 33 ++++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py index 6a70c3b5ad..05672bdeb4 100644 --- a/optimum/exporters/openvino/convert.py +++ b/optimum/exporters/openvino/convert.py @@ -1025,6 +1025,9 @@ def get_diffusion_models_for_export_ext( is_lcm = pipeline.__class__.__name__.startswith("LatentConsistencyModel") if is_sd or is_sdxl or is_lcm: + tokenizer = pipeline.tokenizer_2 if is_sdxl else pipeline.tokenizer + model_max_length = getattr(tokenizer, "model_max_length", None) + pipeline.unet.config.model_max_length = model_max_length models_for_export = get_diffusion_models_for_export(pipeline, int_dtype, float_dtype, exporter) if is_sdxl and pipeline.vae.config.force_upcast: models_for_export["vae_encoder"][1].runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "128.0"} diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index 6807644b9e..f6d6f1eae5 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -1864,18 +1864,49 @@ def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int return self.random_int_tensor(shape, max_value=self.vocab_size, framework=framework, dtype=int_dtype) +class DummyUnetEncoderInputGenerator(DummySeq2SeqDecoderTextInputGenerator): + def __init__( + self, + task: str, + normalized_config: NormalizedTextConfig, + batch_size: int = DEFAULT_DUMMY_SHAPES["batch_size"], + sequence_length: int = DEFAULT_DUMMY_SHAPES["sequence_length"], + num_choices: int = DEFAULT_DUMMY_SHAPES["num_choices"], + random_batch_size_range: Optional[Tuple[int, int]] = None, + random_sequence_length_range: Optional[Tuple[int, int]] = None, + random_num_choices_range: Optional[Tuple[int, int]] = None, + **kwargs, + ): + super().__init__( + task, + normalized_config, + batch_size=batch_size, + sequence_length=sequence_length, + num_choices=num_choices, + random_batch_size_range=random_batch_size_range, + random_sequence_length_range=random_sequence_length_range, + random_num_choices_range=random_num_choices_range, + **kwargs, + ) + if hasattr(normalized_config.config, "model_max_length"): + self.sequence_length = normalized_config.config.model_max_length + + @register_in_tasks_manager("unet", *["semantic-segmentation"], library_name="diffusers") @register_in_tasks_manager("unet-2d-condition", *["semantic-segmentation"], library_name="diffusers") class UNetOpenVINOConfig(UNetOnnxConfig): DUMMY_INPUT_GENERATOR_CLASSES = ( DummyUnetVisionInputGenerator, DummyUnetTimestepInputGenerator, - ) + UNetOnnxConfig.DUMMY_INPUT_GENERATOR_CLASSES[2:] + DummyUnetEncoderInputGenerator, + ) @property def inputs(self) -> Dict[str, Dict[int, str]]: common_inputs = super().inputs common_inputs["timestep"] = {0: "batch_size"} + if hasattr(self._normalized_config.config, "model_max_length"): + common_inputs["encoder_hidden_states"] = {0: "batch_size"} return common_inputs