Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix generation for statically reshaped diffusion pipeline #1199

Merged
merged 4 commits into from
Mar 17, 2025
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 58 additions & 3 deletions optimum/intel/openvino/modeling_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -889,9 +889,7 @@ def reshape(
)

if self.text_encoder_3 is not None:
self.text_encoder_3.model = self._reshape_text_encoder(
self.text_encoder_3.model, batch_size, getattr(self.tokenizer_3, "model_max_length", -1)
)
self.text_encoder_3.model = self._reshape_text_encoder(self.text_encoder_3.model, batch_size, -1)

self.clear_requests()
return self
Expand Down Expand Up @@ -973,6 +971,63 @@ def __call__(self, *args, **kwargs):
for k, v in kwargs.items():
kwargs[k] = np_to_pt_generators(v, self.device)

height, width = None, None
height_idx, width_idx = None, None
shapes_overriden = False
sig = inspect.signature(self.auto_model_class.__call__)
sig_height_idx = list(sig.parameters).index("height") if "height" in sig.parameters else len(sig.parameters)
sig_width_idx = list(sig.parameters).index("width") if "width" in sig.parameters else len(sig.parameters)
if "height" in kwargs:
height = kwargs["height"]
elif len(args) > sig_height_idx:
height = args[sig_height_idx]
height_idx = sig_height_idx

if "width" in kwargs:
width = kwargs["width"]
elif len(args) > sig_width_idx:
width = args[sig_width_idx]
width_idx = sig_width_idx

if self.height != -1:
if height is not None and height != self.height:
logger.warning(f"Incompatible height argument provided {height}. Pipeline only support {self.height}.")
height = self.height
else:
height = self.height

if height_idx is not None:
args[height_idx] = height
else:
kwargs["height"] = height

shapes_overriden = True

if self.width != -1:
if width is not None and width != self.width:
logger.warning(f"Incompatible widtth argument provided {width}. Pipeline only support {self.width}.")
width = self.width
else:
width = self.width

if width_idx is not None:
args[width_idx] = width
else:
kwargs["width"] = width
shapes_overriden = True

# Sana generates images in specific resolution grid size and then resize to requested size by default, it may contradict with pipeline height / width
# Disable this behavior for static shape pipeline
if self.auto_model_class.__name__.startswith("Sana") and shapes_overriden:
sig_resolution_bining_idx = (
list(sig.parameters).index("use_resolution_binning")
if "use_resolution_binning" in sig.parameters
else len(sig.parameters)
)
if len(args) > sig_resolution_bining_idx:
args[sig_resolution_bining_idx] = False
else:
kwargs["use_resolution_binning"] = False
# we use auto_model_class.__call__ here because we can't call super().__call__
# as OptimizedModel already defines a __call__ which is the first in the MRO
return self.auto_model_class.__call__(self, *args, **kwargs)
Expand Down
18 changes: 18 additions & 0 deletions tests/openvino/test_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,24 @@ def test_load_custom_weight_variant(self):

np.testing.assert_allclose(ov_images, diffusers_images, atol=1e-4, rtol=1e-2)

@parameterized.expand(SUPPORTED_ARCHITECTURES)
@require_diffusers
def test_static_shape_image_generation(self, model_arch):
pipeline = self.OVMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], compile=False)
pipeline.reshape(batch_size=1, height=64, width=32)
pipeline.compile()
# generation with incompatible size
height, width, batch_size = 64, 64, 1
inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size)
inputs["output_type"] = "pil"
image = pipeline(**inputs).images[0]
self.assertTupleEqual(image.size, (32, 64))
# generation without height / width provided
inputs.pop("height")
inputs.pop("width")
image = pipeline(**inputs).images[0]
self.assertTupleEqual(image.size, (32, 64))


class OVPipelineForImage2ImageTest(unittest.TestCase):
SUPPORTED_ARCHITECTURES = ["stable-diffusion", "stable-diffusion-xl", "latent-consistency"]
Expand Down
Loading