Skip to content

Commit 28bed9d

Browse files
committed
compute image inputs using config
1 parent cd3e6b6 commit 28bed9d

File tree

4 files changed

+35
-16
lines changed

4 files changed

+35
-16
lines changed

optimum/exporters/openvino/__main__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,7 @@ class StoreAttr(object):
431431
logger.info(f"Automatic task detection to {task}{possible_synonyms}.")
432432

433433
preprocessors = load_preprocessors(
434-
model_name_or_path, subfolder=subfolder, trust_remote_code=trust_remote_code
434+
model_name_or_path, subfolder=subfolder, trust_remote_code=trust_remote_code, model_type=model_type
435435
)
436436

437437
submodel_paths = export_from_model(

optimum/exporters/openvino/convert.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -617,8 +617,7 @@ def export_from_model(
617617
)
618618

619619
library_name = _infer_library_from_model_or_model_class(model, library_name=library_name)
620-
if library_name != "open_clip":
621-
TasksManager.standardize_model_attributes(model, library_name=library_name)
620+
TasksManager.standardize_model_attributes(model, library_name=library_name)
622621

623622
if hasattr(model.config, "export_model_type"):
624623
model_type = model.config.export_model_type.replace("_", "-")

optimum/exporters/openvino/model_configs.py

+19-2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
import enum
1616
import importlib
17+
import math
1718
from copy import deepcopy
1819
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
1920

@@ -2862,17 +2863,33 @@ def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int
28622863
dtype=int_dtype,
28632864
)
28642865
if input_name == "code_b":
2866+
# default value from https://github.com/deepseek-ai/Janus/blob/1daa72fa409002d40931bd7b36a9280362469ead/janus/models/vq_model.py#L42
2867+
z_channels = getattr(self.normalized_config.config.params, "z_channels", 256)
2868+
patch_size = int(math.sqrt(z_channels))
2869+
# default value from https://github.com/deepseek-ai/Janus/blob/1daa72fa409002d40931bd7b36a9280362469ead/generation_inference.py#L63
2870+
generated_image_size = getattr(self.normalized_config.config.params, "img_size", 384)
2871+
latent_heigh = int(generated_image_size // patch_size)
2872+
latent_width = int(generated_image_size // patch_size)
28652873
return self.random_int_tensor(
2866-
[self.batch_size, 576],
2874+
[self.batch_size, int(latent_heigh * latent_width)],
28672875
max_value=self.normalized_config.config.params.image_token_size,
28682876
framework=framework,
28692877
dtype=int_dtype,
28702878
)
28712879
if input_name == "image_shape":
28722880
import torch
28732881

2882+
# default value from https://github.com/deepseek-ai/Janus/blob/1daa72fa409002d40931bd7b36a9280362469ead/janus/models/vq_model.py#L42
2883+
z_channels = getattr(self.normalized_config.config.params, "z_channels", 256)
2884+
patch_size = int(math.sqrt(z_channels))
2885+
# default value from https://github.com/deepseek-ai/Janus/blob/1daa72fa409002d40931bd7b36a9280362469ead/generation_inference.py#L63
2886+
generated_image_size = getattr(self.normalized_config.config.params, "img_size", 384)
2887+
latent_heigh = int(generated_image_size // patch_size)
2888+
latent_width = int(generated_image_size // patch_size)
2889+
28742890
return torch.tensor(
2875-
[self.batch_size, self.normalized_config.config.params.n_embed, 24, 24], dtype=torch.int64
2891+
[self.batch_size, self.normalized_config.config.params.n_embed, latent_heigh, latent_width],
2892+
dtype=torch.int64,
28762893
)
28772894
if input_name == "hidden_state":
28782895
return self.random_float_tensor(

optimum/exporters/openvino/utils.py

+14-11
Original file line numberDiff line numberDiff line change
@@ -315,18 +315,21 @@ def save_preprocessors(
315315
maybe_save_preprocessors(model_name_or_path, output, trust_remote_code=trust_remote_code)
316316

317317

318-
def load_preprocessors(src_name_or_path: Union[str, Path], subfolder: str = "", trust_remote_code: bool = False):
318+
def load_preprocessors(
319+
src_name_or_path: Union[str, Path], subfolder: str = "", trust_remote_code: bool = False, model_type: str = None
320+
):
319321
preprocessors = maybe_load_preprocessors(
320322
src_name_or_path, subfolder=subfolder, trust_remote_code=trust_remote_code
321323
)
322-
if importlib.util.find_spec("janus") is not None:
323-
from janus.models import VLChatProcessor
324-
325-
try:
326-
processor = VLChatProcessor.from_pretrained(
327-
src_name_or_path, subfolder=subfolder, trust_remote_code=trust_remote_code
328-
)
329-
preprocessors.append(processor)
330-
except Exception:
331-
pass
324+
if model_type == "janus":
325+
if importlib.util.find_spec("janus") is not None:
326+
from janus.models import VLChatProcessor
327+
328+
try:
329+
processor = VLChatProcessor.from_pretrained(
330+
src_name_or_path, subfolder=subfolder, trust_remote_code=trust_remote_code
331+
)
332+
preprocessors.append(processor)
333+
except Exception:
334+
pass
332335
return preprocessors

0 commit comments

Comments
 (0)