fix sana

eaidova · eaidova · commit 97308d232b30 · 2025-03-14T13:47:51.000+04:00
diff --git a/optimum/intel/openvino/modeling_diffusion.py b/optimum/intel/openvino/modeling_diffusion.py
@@ -889,9 +889,7 @@ def reshape(
             )
 
         if self.text_encoder_3 is not None:
-            self.text_encoder_3.model = self._reshape_text_encoder(
-                self.text_encoder_3.model, batch_size, getattr(self.tokenizer_3, "model_max_length", -1)
-            )
+            self.text_encoder_3.model = self._reshape_text_encoder(self.text_encoder_3.model, batch_size, -1)
 
         self.clear_requests()
         return self
@@ -962,7 +960,7 @@ def components(self) -> Dict[str, Any]:
         components = {k: v for k, v in components.items() if v is not None}
         return components
 
-    def __call__(self, *args, height=None, width=None, **kwargs):
+    def __call__(self, *args, **kwargs):
         # we do this to keep numpy random states support for now
         # TODO: deprecate and add warnings when a random state is passed
 
@@ -973,23 +971,62 @@ def __call__(self, *args, height=None, width=None, **kwargs):
         for k, v in kwargs.items():
             kwargs[k] = np_to_pt_generators(v, self.device)
 
+        height, width = None, None
+        height_idx, width_idx = None, None
+        shapes_overriden = False
+        sig = inspect.signature(self.auto_model_class.__call__)
+        sig_height_idx = list(sig.parameters).index("height")
+        sig_width_idx = list(sig.parameters).index("width")
+        if "height" in kwargs:
+            height = kwargs["height"]
+        elif len(args) > sig_height_idx:
+            height = args[sig_height_idx]
+            height_idx = sig_height_idx
+
+        if "width" in kwargs:
+            width = kwargs["width"]
+        elif len(args) > sig_width_idx:
+            width = args[sig_width_idx]
+            width_idx = sig_width_idx
+
         if self.height != -1:
             if height is not None and height != self.height:
                 logger.warning(f"Incompatible height argument provided {height}. Pipeline only support {self.height}.")
                 height = self.height
             else:
                 height = self.height
 
+            if height_idx is not None:
+                args[height_idx] = height
+            else:
+                kwargs["height"] = height
+
+            shapes_overriden = True
+
         if self.width != -1:
             if width is not None and width != self.width:
                 logger.warning(f"Incompatible widtth argument provided {width}. Pipeline only support {self.width}.")
                 width = self.width
             else:
                 width = self.width
 
+            if width_idx is not None:
+                args[width_idx] = width
+            else:
+                kwargs["width"] = width
+            shapes_overriden = True
+
+        # Sana generates images in specific resolution grid size and then resize to requested size by default, it may contradict with pipeline height / width
+        # Disable this behavior for static shape pipeline
+        if self.auto_model_class.__name__.startswith("Sana") and shapes_overriden:
+            sig_resolution_bining_idx = list(sig.parameters).index("use_resolution_binning")
+            if len(args) > sig_resolution_bining_idx:
+                args[sig_resolution_bining_idx] = False
+            else:
+                kwargs["use_resolution_binning"] = False
         # we use auto_model_class.__call__ here because we can't call super().__call__
         # as OptimizedModel already defines a __call__ which is the first in the MRO
-        return self.auto_model_class.__call__(self, *args, height=height, width=width, **kwargs)
+        return self.auto_model_class.__call__(self, *args, **kwargs)
 
 
 class OVPipelinePart(ConfigMixin):
diff --git a/tests/openvino/test_diffusion.py b/tests/openvino/test_diffusion.py
@@ -442,18 +442,19 @@ def test_load_custom_weight_variant(self):
     @require_diffusers
     def test_static_shape_image_generation(self, model_arch):
         pipeline = self.OVMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch], compile=False)
-        pipeline.reshape(batch_size=-1, height=40, width=32)
+        pipeline.reshape(batch_size=1, height=64, width=32)
         pipeline.compile()
         # generation with incompatible size
         height, width, batch_size = 64, 64, 1
         inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size)
-        image = pipeline(**inputs, num_inference_steps=2).images[0]
-        self.assertTupleEqual(image.size, (32, 40))
+        inputs["output_type"] = "pil"
+        image = pipeline(**inputs).images[0]
+        self.assertTupleEqual(image.size, (32, 64))
         # generation without height / width provided
         inputs.pop("height")
         inputs.pop("width")
-        image = pipeline(**inputs, num_inference_steps=2).images[0]
-        self.assertTupleEqual(image.size, (32, 40))
+        image = pipeline(**inputs).images[0]
+        self.assertTupleEqual(image.size, (32, 64))
 
 
 class OVPipelineForImage2ImageTest(unittest.TestCase):