Skip to content

Commit 52f40af

Browse files
committed
compatibility with diffusers 0.31.0
1 parent 5df09e1 commit 52f40af

File tree

3 files changed

+29
-11
lines changed

3 files changed

+29
-11
lines changed

optimum/exporters/openvino/model_configs.py

+17-5
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555
)
5656
from optimum.utils.normalized_config import NormalizedConfig, NormalizedTextConfig, NormalizedVisionConfig
5757

58-
from ...intel.utils.import_utils import _transformers_version, is_transformers_version
58+
from ...intel.utils.import_utils import _transformers_version, is_diffusers_version, is_transformers_version
5959
from .model_patcher import (
6060
AquilaModelPatcher,
6161
ArcticModelPatcher,
@@ -1681,7 +1681,9 @@ def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int
16811681
img_ids_height = self.height // 2
16821682
img_ids_width = self.width // 2
16831683
return self.random_int_tensor(
1684-
[self.batch_size, img_ids_height * img_ids_width, 3],
1684+
[self.batch_size, img_ids_height * img_ids_width, 3]
1685+
if is_diffusers_version("<", "0.31.0")
1686+
else [img_ids_height * img_ids_width, 3],
16851687
min_value=0,
16861688
max_value=min(img_ids_height, img_ids_width),
16871689
framework=framework,
@@ -1704,7 +1706,11 @@ def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int
17041706
if input_name == "txt_ids":
17051707
import torch
17061708

1707-
shape = [self.batch_size, self.sequence_length, 3]
1709+
shape = (
1710+
[self.batch_size, self.sequence_length, 3]
1711+
if is_diffusers_version("<", "0.31.0")
1712+
else [self.sequence_length, 3]
1713+
)
17081714
dtype = DTYPE_MAPPER.pt(float_dtype)
17091715
return torch.full(shape, 0, dtype=dtype)
17101716
return super().generate(input_name, framework, int_dtype, float_dtype)
@@ -1724,8 +1730,14 @@ def inputs(self):
17241730
common_inputs = super().inputs
17251731
common_inputs.pop("sample", None)
17261732
common_inputs["hidden_states"] = {0: "batch_size", 1: "packed_height_width"}
1727-
common_inputs["txt_ids"] = {0: "batch_size", 1: "sequence_length"}
1728-
common_inputs["img_ids"] = {0: "batch_size", 1: "packed_height_width"}
1733+
common_inputs["txt_ids"] = (
1734+
{0: "batch_size", 1: "sequence_length"} if is_diffusers_version("<", "0.31.0") else {0: "sequence_length"}
1735+
)
1736+
common_inputs["img_ids"] = (
1737+
{0: "batch_size", 1: "packed_height_width"}
1738+
if is_diffusers_version("<", "0.31.0")
1739+
else {0: "packed_height_width"}
1740+
)
17291741
if getattr(self._normalized_config, "guidance_embeds", False):
17301742
common_inputs["guidance"] = {0: "batch_size"}
17311743
return common_inputs

optimum/exporters/openvino/model_patcher.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
_openvino_version,
3030
_torch_version,
3131
_transformers_version,
32+
is_diffusers_version,
3233
is_openvino_version,
3334
is_torch_version,
3435
is_transformers_version,
@@ -2734,10 +2735,11 @@ def rope(pos: torch.Tensor, dim: int, theta: int) -> torch.Tensor:
27342735
class FluxTransfromerModelPatcher(ModelPatcher):
27352736
def __enter__(self):
27362737
super().__enter__()
2737-
self._model.pos_embed._orig_forward = self._model.pos_embed.forward
2738-
self._model.pos_embed.forward = types.MethodType(_embednb_forward, self._model.pos_embed)
2738+
if is_diffusers_version("<", "0.31.0"):
2739+
self._model.pos_embed._orig_forward = self._model.pos_embed.forward
2740+
self._model.pos_embed.forward = types.MethodType(_embednb_forward, self._model.pos_embed)
27392741

27402742
def __exit__(self, exc_type, exc_value, traceback):
27412743
super().__exit__(exc_type, exc_value, traceback)
2742-
2743-
self._model.pos_embed.forward = self._model.pos_embed._orig_forward
2744+
if hasattr(self._model.pos_embed, "_orig_forward"):
2745+
self._model.pos_embed.forward = self._model.pos_embed._orig_forward

optimum/intel/openvino/modeling_diffusion.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -681,9 +681,13 @@ def _reshape_transformer(
681681
elif inputs.get_any_name() == "pooled_projections":
682682
shapes[inputs] = [batch_size, self.transformer.config["pooled_projection_dim"]]
683683
elif inputs.get_any_name() == "img_ids":
684-
shapes[inputs] = [batch_size, packed_height_width, 3]
684+
shapes[inputs] = (
685+
[batch_size, packed_height_width, 3]
686+
if is_diffusers_version("<", "0.31.0")
687+
else [packed_height_width, 3]
688+
)
685689
elif inputs.get_any_name() == "txt_ids":
686-
shapes[inputs] = [batch_size, -1, 3]
690+
shapes[inputs] = [batch_size, -1, 3] if is_diffusers_version("<", "0.31.0") else [-1, 3]
687691
else:
688692
shapes[inputs][0] = batch_size
689693
shapes[inputs][1] = -1 # text_encoder_3 may have vary input length

0 commit comments

Comments
 (0)