compute image inputs using config

eaidova · eaidova · commit 28bed9dad926 · 2025-02-14T13:34:34.000+04:00
diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py
@@ -431,7 +431,7 @@ class StoreAttr(object):
             logger.info(f"Automatic task detection to {task}{possible_synonyms}.")
 
         preprocessors = load_preprocessors(
-            model_name_or_path, subfolder=subfolder, trust_remote_code=trust_remote_code
+            model_name_or_path, subfolder=subfolder, trust_remote_code=trust_remote_code, model_type=model_type
         )
 
         submodel_paths = export_from_model(
diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py
@@ -617,8 +617,7 @@ def export_from_model(
         )
 
     library_name = _infer_library_from_model_or_model_class(model, library_name=library_name)
-    if library_name != "open_clip":
-        TasksManager.standardize_model_attributes(model, library_name=library_name)
+    TasksManager.standardize_model_attributes(model, library_name=library_name)
 
     if hasattr(model.config, "export_model_type"):
         model_type = model.config.export_model_type.replace("_", "-")
diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py
@@ -14,6 +14,7 @@
 
 import enum
 import importlib
+import math
 from copy import deepcopy
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
 
@@ -2862,17 +2863,33 @@ def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int
                 dtype=int_dtype,
             )
         if input_name == "code_b":
+            # default value from https://github.com/deepseek-ai/Janus/blob/1daa72fa409002d40931bd7b36a9280362469ead/janus/models/vq_model.py#L42
+            z_channels = getattr(self.normalized_config.config.params, "z_channels", 256)
+            patch_size = int(math.sqrt(z_channels))
+            # default value from https://github.com/deepseek-ai/Janus/blob/1daa72fa409002d40931bd7b36a9280362469ead/generation_inference.py#L63
+            generated_image_size = getattr(self.normalized_config.config.params, "img_size", 384)
+            latent_heigh = int(generated_image_size // patch_size)
+            latent_width = int(generated_image_size // patch_size)
             return self.random_int_tensor(
-                [self.batch_size, 576],
+                [self.batch_size, int(latent_heigh * latent_width)],
                 max_value=self.normalized_config.config.params.image_token_size,
                 framework=framework,
                 dtype=int_dtype,
             )
         if input_name == "image_shape":
             import torch
 
+            # default value from https://github.com/deepseek-ai/Janus/blob/1daa72fa409002d40931bd7b36a9280362469ead/janus/models/vq_model.py#L42
+            z_channels = getattr(self.normalized_config.config.params, "z_channels", 256)
+            patch_size = int(math.sqrt(z_channels))
+            # default value from https://github.com/deepseek-ai/Janus/blob/1daa72fa409002d40931bd7b36a9280362469ead/generation_inference.py#L63
+            generated_image_size = getattr(self.normalized_config.config.params, "img_size", 384)
+            latent_heigh = int(generated_image_size // patch_size)
+            latent_width = int(generated_image_size // patch_size)
+
             return torch.tensor(
-                [self.batch_size, self.normalized_config.config.params.n_embed, 24, 24], dtype=torch.int64
+                [self.batch_size, self.normalized_config.config.params.n_embed, latent_heigh, latent_width],
+                dtype=torch.int64,
             )
         if input_name == "hidden_state":
             return self.random_float_tensor(
diff --git a/optimum/exporters/openvino/utils.py b/optimum/exporters/openvino/utils.py
@@ -315,18 +315,21 @@ def save_preprocessors(
         maybe_save_preprocessors(model_name_or_path, output, trust_remote_code=trust_remote_code)
 
 
-def load_preprocessors(src_name_or_path: Union[str, Path], subfolder: str = "", trust_remote_code: bool = False):
+def load_preprocessors(
+    src_name_or_path: Union[str, Path], subfolder: str = "", trust_remote_code: bool = False, model_type: str = None
+):
     preprocessors = maybe_load_preprocessors(
         src_name_or_path, subfolder=subfolder, trust_remote_code=trust_remote_code
     )
-    if importlib.util.find_spec("janus") is not None:
-        from janus.models import VLChatProcessor
-
-        try:
-            processor = VLChatProcessor.from_pretrained(
-                src_name_or_path, subfolder=subfolder, trust_remote_code=trust_remote_code
-            )
-            preprocessors.append(processor)
-        except Exception:
-            pass
+    if model_type == "janus":
+        if importlib.util.find_spec("janus") is not None:
+            from janus.models import VLChatProcessor
+
+            try:
+                processor = VLChatProcessor.from_pretrained(
+                    src_name_or_path, subfolder=subfolder, trust_remote_code=trust_remote_code
+                )
+                preprocessors.append(processor)
+            except Exception:
+                pass
     return preprocessors

Original file line number	Diff line number	Diff line change
`@@ -431,7 +431,7 @@ class StoreAttr(object):`
`431`	`431`	`logger.info(f"Automatic task detection to {task}{possible_synonyms}.")`
`432`	`432`
`433`	`433`	`preprocessors = load_preprocessors(`
`434`		`- model_name_or_path, subfolder=subfolder, trust_remote_code=trust_remote_code`
	`434`	`+ model_name_or_path, subfolder=subfolder, trust_remote_code=trust_remote_code, model_type=model_type`
`435`	`435`	`)`
`436`	`436`
`437`	`437`	`submodel_paths = export_from_model(`
Original file line number	Diff line number	Diff line change
`@@ -617,8 +617,7 @@ def export_from_model(`
`617`	`617`	`)`
`618`	`618`
`619`	`619`	`library_name = _infer_library_from_model_or_model_class(model, library_name=library_name)`
`620`		`- if library_name != "open_clip":`
`621`		`- TasksManager.standardize_model_attributes(model, library_name=library_name)`
	`620`	`+ TasksManager.standardize_model_attributes(model, library_name=library_name)`
`622`	`621`
`623`	`622`	`if hasattr(model.config, "export_model_type"):`
`624`	`623`	`model_type = model.config.export_model_type.replace("_", "-")`