From b963c545ac859d6d0fea94e9b9cd41d917900b93 Mon Sep 17 00:00:00 2001
From: eaidova <ekaterina.aidova@intel.com>
Date: Fri, 14 Mar 2025 10:43:45 +0400
Subject: [PATCH 1/4] fix generation for statically reshaped diffusion pipeline

---
 optimum/intel/openvino/modeling_diffusion.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py
index da105c5e64..517754c3af 100644
--- a/optimum/intel/openvino/modeling_diffusion.py
+++ b/optimum/intel/openvino/modeling_diffusion.py
@@ -962,7 +962,7 @@ def components(self) -> Dict[str, Any]:
         components = {k: v for k, v in components.items() if v is not None}
         return components
 
-    def __call__(self, *args, **kwargs):
+    def __call__(self, *args, height=None, width=None, **kwargs):
         # we do this to keep numpy random states support for now
         # TODO: deprecate and add warnings when a random state is passed
 
@@ -972,10 +972,24 @@ def __call__(self, *args, **kwargs):
 
         for k, v in kwargs.items():
             kwargs[k] = np_to_pt_generators(v, self.device)
+        
+        if self.height != -1:
+            if height is not None and height != self.height:
+                logger.warning(f"Incompatible height argument provided {height}. Pipeline only support {self.height}.")
+                height = self.height
+            else:
+                height = self.height
+
+        if self.width != -1:
+            if width is not None and width != self.width:
+                logger.warning(f"Incompatible widtth argument provided {width}. Pipeline only support {self.width}.")
+                width = self.width
+            else:
+                width = self.width
 
         # we use auto_model_class.__call__ here because we can't call super().__call__
         # as OptimizedModel already defines a __call__ which is the first in the MRO
-        return self.auto_model_class.__call__(self, *args, **kwargs)
+        return self.auto_model_class.__call__(self, *args, height=height, width=width, **kwargs)
 
 
 class OVPipelinePart(ConfigMixin):

From 047b43d8f0dd75e6e3c776a02fdde30d66ac3eb5 Mon Sep 17 00:00:00 2001
From: eaidova <ekaterina.aidova@intel.com>
Date: Fri, 14 Mar 2025 11:11:52 +0400
Subject: [PATCH 2/4] add test

---
 optimum/intel/openvino/modeling_diffusion.py |  2 +-
 tests/openvino/test_diffusion.py             | 17 +++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py
index 517754c3af..7a4ffe2b84 100644
--- a/optimum/intel/openvino/modeling_diffusion.py
+++ b/optimum/intel/openvino/modeling_diffusion.py
@@ -972,7 +972,7 @@ def __call__(self, *args, height=None, width=None, **kwargs):
 
         for k, v in kwargs.items():
             kwargs[k] = np_to_pt_generators(v, self.device)
-        
+
         if self.height != -1:
             if height is not None and height != self.height:
                 logger.warning(f"Incompatible height argument provided {height}. Pipeline only support {self.height}.")
diff --git a/tests/openvino/test_diffusion.py b/tests/openvino/test_diffusion.py
index b302229cd7..1216ffbf44 100644
--- a/tests/openvino/test_diffusion.py
+++ b/tests/openvino/test_diffusion.py
@@ -438,6 +438,23 @@ def test_load_custom_weight_variant(self):
 
         np.testing.assert_allclose(ov_images, diffusers_images, atol=1e-4, rtol=1e-2)
 
+    @parameterized.expand(SUPPORTED_ARCHITECTURES)
+    @require_diffusers
+    def test_static_shape_image_generation(self, model_arch):
+        pipeline = self.OVMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], compile=False)
+        pipeline.reshape(batch_size=-1, height=40, width=32)
+        pipeline.compile()
+        # generation with incompatible size
+        height, width, batch_size = 64, 64, 1
+        inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size)
+        image = pipeline(**inputs, num_inference_steps=2).images[0]
+        self.assertTupleEqual(image.size, (32, 40))
+        # generation without height / width provided
+        inputs.pop("height")
+        inputs.pop("width")
+        image = pipeline(**inputs, num_inference_steps=2).images[0]
+        self.assertTupleEqual(image.size, (32, 40))
+
 
 class OVPipelineForImage2ImageTest(unittest.TestCase):
     SUPPORTED_ARCHITECTURES = ["stable-diffusion", "stable-diffusion-xl", "latent-consistency"]

From 2a0c957d6ea3fd59fcba82837570d867cb59edd0 Mon Sep 17 00:00:00 2001
From: eaidova <ekaterina.aidova@intel.com>
Date: Fri, 14 Mar 2025 13:47:51 +0400
Subject: [PATCH 3/4] fix sana

---
 optimum/intel/openvino/modeling_diffusion.py | 51 ++++++++++++++++++--
 tests/openvino/test_diffusion.py             | 11 +++--
 2 files changed, 52 insertions(+), 10 deletions(-)

diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py
index 7a4ffe2b84..3703e85a8a 100644
--- a/optimum/intel/openvino/modeling_diffusion.py
+++ b/optimum/intel/openvino/modeling_diffusion.py
@@ -889,9 +889,7 @@ def reshape(
             )
 
         if self.text_encoder_3 is not None:
-            self.text_encoder_3.model = self._reshape_text_encoder(
-                self.text_encoder_3.model, batch_size, getattr(self.tokenizer_3, "model_max_length", -1)
-            )
+            self.text_encoder_3.model = self._reshape_text_encoder(self.text_encoder_3.model, batch_size, -1)
 
         self.clear_requests()
         return self
@@ -962,7 +960,7 @@ def components(self) -> Dict[str, Any]:
         components = {k: v for k, v in components.items() if v is not None}
         return components
 
-    def __call__(self, *args, height=None, width=None, **kwargs):
+    def __call__(self, *args, **kwargs):
         # we do this to keep numpy random states support for now
         # TODO: deprecate and add warnings when a random state is passed
 
@@ -973,6 +971,24 @@ def __call__(self, *args, height=None, width=None, **kwargs):
         for k, v in kwargs.items():
             kwargs[k] = np_to_pt_generators(v, self.device)
 
+        height, width = None, None
+        height_idx, width_idx = None, None
+        shapes_overriden = False
+        sig = inspect.signature(self.auto_model_class.__call__)
+        sig_height_idx = list(sig.parameters).index("height") if "height" in sig.parameters else len(sig.parameters)
+        sig_width_idx = list(sig.parameters).index("width") if "width" in sig.parameters else len(sig.parameters)
+        if "height" in kwargs:
+            height = kwargs["height"]
+        elif len(args) > sig_height_idx:
+            height = args[sig_height_idx]
+            height_idx = sig_height_idx
+
+        if "width" in kwargs:
+            width = kwargs["width"]
+        elif len(args) > sig_width_idx:
+            width = args[sig_width_idx]
+            width_idx = sig_width_idx
+
         if self.height != -1:
             if height is not None and height != self.height:
                 logger.warning(f"Incompatible height argument provided {height}. Pipeline only support {self.height}.")
@@ -980,6 +996,13 @@ def __call__(self, *args, height=None, width=None, **kwargs):
             else:
                 height = self.height
 
+            if height_idx is not None:
+                args[height_idx] = height
+            else:
+                kwargs["height"] = height
+
+            shapes_overriden = True
+
         if self.width != -1:
             if width is not None and width != self.width:
                 logger.warning(f"Incompatible widtth argument provided {width}. Pipeline only support {self.width}.")
@@ -987,9 +1010,27 @@ def __call__(self, *args, height=None, width=None, **kwargs):
             else:
                 width = self.width
 
+            if width_idx is not None:
+                args[width_idx] = width
+            else:
+                kwargs["width"] = width
+            shapes_overriden = True
+
+        # Sana generates images in specific resolution grid size and then resize to requested size by default, it may contradict with pipeline height / width
+        # Disable this behavior for static shape pipeline
+        if self.auto_model_class.__name__.startswith("Sana") and shapes_overriden:
+            sig_resolution_bining_idx = (
+                list(sig.parameters).index("use_resolution_binning")
+                if "use_resolution_binning" in sig.parameters
+                else len(sig.parameters)
+            )
+            if len(args) > sig_resolution_bining_idx:
+                args[sig_resolution_bining_idx] = False
+            else:
+                kwargs["use_resolution_binning"] = False
         # we use auto_model_class.__call__ here because we can't call super().__call__
         # as OptimizedModel already defines a __call__ which is the first in the MRO
-        return self.auto_model_class.__call__(self, *args, height=height, width=width, **kwargs)
+        return self.auto_model_class.__call__(self, *args, **kwargs)
 
 
 class OVPipelinePart(ConfigMixin):
diff --git a/tests/openvino/test_diffusion.py b/tests/openvino/test_diffusion.py
index 1216ffbf44..83bc6a9515 100644
--- a/tests/openvino/test_diffusion.py
+++ b/tests/openvino/test_diffusion.py
@@ -442,18 +442,19 @@ def test_load_custom_weight_variant(self):
     @require_diffusers
     def test_static_shape_image_generation(self, model_arch):
         pipeline = self.OVMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], compile=False)
-        pipeline.reshape(batch_size=-1, height=40, width=32)
+        pipeline.reshape(batch_size=1, height=64, width=32)
         pipeline.compile()
         # generation with incompatible size
         height, width, batch_size = 64, 64, 1
         inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size)
-        image = pipeline(**inputs, num_inference_steps=2).images[0]
-        self.assertTupleEqual(image.size, (32, 40))
+        inputs["output_type"] = "pil"
+        image = pipeline(**inputs).images[0]
+        self.assertTupleEqual(image.size, (32, 64))
         # generation without height / width provided
         inputs.pop("height")
         inputs.pop("width")
-        image = pipeline(**inputs, num_inference_steps=2).images[0]
-        self.assertTupleEqual(image.size, (32, 40))
+        image = pipeline(**inputs).images[0]
+        self.assertTupleEqual(image.size, (32, 64))
 
 
 class OVPipelineForImage2ImageTest(unittest.TestCase):

From 9a02e4bc61b780fcd3ca6e839460df9930d9938d Mon Sep 17 00:00:00 2001
From: eaidova <ekaterina.aidova@intel.com>
Date: Mon, 17 Mar 2025 16:17:56 +0400
Subject: [PATCH 4/4] add check warnings

---
 tests/openvino/test_diffusion.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/tests/openvino/test_diffusion.py b/tests/openvino/test_diffusion.py
index 83bc6a9515..c4dca50ea3 100644
--- a/tests/openvino/test_diffusion.py
+++ b/tests/openvino/test_diffusion.py
@@ -13,6 +13,7 @@
 #  limitations under the License.
 
 import json
+import logging
 import unittest
 from pathlib import Path
 
@@ -442,19 +443,28 @@ def test_load_custom_weight_variant(self):
     @require_diffusers
     def test_static_shape_image_generation(self, model_arch):
         pipeline = self.OVMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], compile=False)
-        pipeline.reshape(batch_size=1, height=64, width=32)
+        pipeline.reshape(batch_size=1, height=32, width=32)
         pipeline.compile()
         # generation with incompatible size
         height, width, batch_size = 64, 64, 1
         inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size)
         inputs["output_type"] = "pil"
-        image = pipeline(**inputs).images[0]
-        self.assertTupleEqual(image.size, (32, 64))
+        from optimum.intel.openvino.modeling_diffusion import logger as diffusers_logger
+
+        with self.assertLogs(diffusers_logger, logging.WARN) as warning_log:
+            image = pipeline(**inputs).images[0]
+            self.assertTrue(
+                any(
+                    "Incompatible width argument provided" in log or "Incompatible height argument provided" in log
+                    for log in warning_log.output
+                )
+            )
+        self.assertTupleEqual(image.size, (32, 32))
         # generation without height / width provided
         inputs.pop("height")
         inputs.pop("width")
         image = pipeline(**inputs).images[0]
-        self.assertTupleEqual(image.size, (32, 64))
+        self.assertTupleEqual(image.size, (32, 32))
 
 
 class OVPipelineForImage2ImageTest(unittest.TestCase):