From f6bc561341ece708c4cafcf914baa0f9a871322b Mon Sep 17 00:00:00 2001 From: eaidova <ekaterina.aidova@intel.com> Date: Tue, 28 Jan 2025 15:23:08 +0400 Subject: [PATCH 01/12] deepseek moe support --- optimum/exporters/openvino/convert.py | 2 +- optimum/exporters/openvino/model_configs.py | 15 + optimum/exporters/openvino/model_patcher.py | 300 ++++++++++++++++++++ optimum/exporters/openvino/utils.py | 1 + 4 files changed, 317 insertions(+), 1 deletion(-) diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py index 6a70c3b5ad..e37d66ea02 100644 --- a/optimum/exporters/openvino/convert.py +++ b/optimum/exporters/openvino/convert.py @@ -437,7 +437,7 @@ def ts_patched_forward(*args, **kwargs): patcher.patched_forward = ts_patched_forward ts_decoder_kwargs = {} - if library_name == "diffusers" and is_openvino_version(">=", "2025.0"): + if is_openvino_version(">=", "2025.0"): ts_decoder_kwargs["trace_kwargs"] = {"check_trace": False} with patcher: diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py index 6807644b9e..81263c15e0 100644 --- a/optimum/exporters/openvino/model_configs.py +++ b/optimum/exporters/openvino/model_configs.py @@ -75,6 +75,7 @@ CodeGenModelPatcher, DBRXModelPatcher, DeciLMModelPatcher, + DeepseekPatcher, FalconModelPatcher, FluxTransfromerModelPatcher, Gemma2ModelPatcher, @@ -2782,3 +2783,17 @@ class MT5OpenVINOConfig(T5OpenVINOConfig): ) class LongT5OpenVINOConfig(T5OpenVINOConfig): pass + + +@register_in_tasks_manager( + "deepseek-v3", *["text-generation", "text-generation-with-past"], library_name="transformers" +) +@register_in_tasks_manager( + "deepseek-v2", *["text-generation", "text-generation-with-past"], library_name="transformers" +) +@register_in_tasks_manager("deepseek", *["text-generation", "text-generation-with-past"], library_name="transformers") +class DeepseekOpenVINOConfig(MiniCPM3OpenVINOConfig): + def patch_model_for_export( + self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None + ) -> "ModelPatcher": + return DeepseekPatcher(self, model, model_kwargs=model_kwargs) diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index b524f91485..614e39854b 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -3575,6 +3575,306 @@ def __exit__(self, exc_type, exc_value, traceback): block.self_attn.forward = block.self_attn._orig_forward +class DeepseekPatcher(DecoderModelPatcher): + def __enter__(self): + super().__enter__() + self_attn = { + "deepseek_v3": deepseek_v3_attn_forward, + "deepseek_v2": deepseek_v2_attn_forward, + "deepseek": minicpm3_attn_forward, + } + + self_attn_fwd = self_attn.get(self._model.config.model_type) + for block in self._model.model.layers: + if self_attn_fwd is not None: + block.self_attn._orig_forward = block.self_attn.forward + block.self_attn.forward = types.MethodType(self_attn_fwd, block.self_attn) + if hasattr(block.mlp, "moe_infer"): + block.mlp._org_moe_infer = block.mlp.moe_infer + block.mlp.moe_infer = types.MethodType(deepseek_moe_infer, block.mlp) + + def __exit__(self, exc_type, exc_value, traceback): + super().__exit__(exc_type, exc_value, traceback) + for block in self._model.model.layers: + block.self_attn.forward = block.self_attn._orig_forward + if hasattr(block.mlp, "_orig_moe_infer"): + block.mlp.moe_infer = block.mlp._orig_moe_infer + + +def deepseek_v3_attn_forward( + self, + hidden_states: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_value=None, + output_attentions: bool = False, + use_cache: bool = False, +) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + def rotate_half(x): + """Rotates half the hidden dims of the input.""" + x1 = x[..., : x.shape[-1] // 2] + x2 = x[..., x.shape[-1] // 2 :] + return torch.cat((-x2, x1), dim=-1) + + def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): + orig_dtype = k.dtype + cos = cos[position_ids].unsqueeze(unsqueeze_dim) # [bs, 1, seq_len, dim] + sin = sin[position_ids].unsqueeze(unsqueeze_dim) # [bs, 1, seq_len, dim] + q_fp32 = q.to(dtype=torch.float32, device=q.device) + k_fp32 = k.to(dtype=torch.float32, device=k.device) + q_embed = (q_fp32 * cos) + (rotate_half(q_fp32) * sin) + k_embed = (k_fp32 * cos) + (rotate_half(k_fp32) * sin) + return q_embed.to(dtype=orig_dtype), k_embed.to(dtype=orig_dtype) + + if output_attentions: + return self._orig_forward( + hidden_states=hidden_states, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_value=past_key_value, + output_attentions=output_attentions, + use_cache=use_cache, + ) + + bsz, q_len, _ = hidden_states.size() + + if self.q_lora_rank is None: + q = self.q_proj(hidden_states) + else: + q = self.q_b_proj(self.q_a_layernorm(self.q_a_proj(hidden_states))) + q = q.view(bsz, q_len, self.num_heads, self.q_head_dim).transpose(1, 2) + q_nope, q_pe = torch.split( + q, [self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1 + ) + + compressed_kv = self.kv_a_proj_with_mqa(hidden_states) + compressed_kv, k_pe = torch.split( + compressed_kv, [self.kv_lora_rank, self.qk_rope_head_dim], dim=-1 + ) + k_pe = k_pe.view(bsz, q_len, 1, self.qk_rope_head_dim).transpose(1, 2) + kv = ( + self.kv_b_proj(self.kv_a_layernorm(compressed_kv)) + .view(bsz, q_len, self.num_heads, self.qk_nope_head_dim + self.v_head_dim) + .transpose(1, 2) + ) + + k_nope, value_states = torch.split( + kv, [self.qk_nope_head_dim, self.v_head_dim], dim=-1 + ) + kv_seq_len = value_states.shape[-2] + if past_key_value is not None: + if self.layer_idx is None: + raise ValueError( + f"The cache structure has changed since version v4.36. If you are using {self.__class__.__name__} " + "for auto-regressive decoding with k/v caching, please make sure to initialize the attention class " + "with a layer index." + ) + kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx) + cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len) + + q_pe, k_pe = apply_rotary_pos_emb(q_pe, k_pe, cos, sin, position_ids) + + # Difference with original code, k_pe.new_empty create constant tensor in torchscript + query_states = torch.concat([q_nope, q_pe], dim=-1) + # query_states = k_pe.new_empty(bsz, self.num_heads, q_len, self.q_head_dim) + # query_states[:, :, :, : self.qk_nope_head_dim] = q_nope + # query_states[:, :, :, self.qk_nope_head_dim :] = q_pe + key_states = torch.concat([k_nope, k_pe.expand(-1, self.num_heads, -1, -1)], dim=-1) + # key_states = k_pe.new_empty(bsz, self.num_heads, q_len, self.q_head_dim) + # key_states[:, :, :, : self.qk_nope_head_dim] = k_nope + # key_states[:, :, :, self.qk_nope_head_dim :] = k_pe + if past_key_value is not None: + cache_kwargs = {"sin": sin, "cos": cos} # Specific to RoPE models + key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs) + + if attention_mask is not None: + if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): + raise ValueError( + f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}" + ) + + # SDPA with memory-efficient backend is currently (torch==2.1.2) bugged with non-contiguous inputs with custom attn_mask, + # Reference: https://github.com/pytorch/pytorch/issues/112577. + if query_states.device.type == "cuda" and attention_mask is not None: + query_states = query_states.contiguous() + key_states = key_states.contiguous() + value_states = value_states.contiguous() + + attn_output = torch.nn.functional.scaled_dot_product_attention( + query_states, + key_states, + value_states, + attn_mask=attention_mask, + dropout_p=self.attention_dropout if self.training else 0.0, + # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1. + is_causal=self.is_causal and attention_mask is None and q_len > 1, + ) + + attn_output = attn_output.transpose(1, 2).contiguous() + + attn_output = attn_output.reshape(bsz, q_len, self.num_heads * self.v_head_dim) + + attn_output = self.o_proj(attn_output) + + + return attn_output, None, past_key_value + + +def deepseek_v2_attn_forward( + self, + hidden_states: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_value=None, + output_attentions: bool = False, + use_cache: bool = False, + **kwargs, +) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + def rotate_half(x): + """Rotates half the hidden dims of the input.""" + x1 = x[..., : x.shape[-1] // 2] + x2 = x[..., x.shape[-1] // 2 :] + return torch.cat((-x2, x1), dim=-1) + + def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): + cos = cos[position_ids].unsqueeze(unsqueeze_dim) + sin = sin[position_ids].unsqueeze(unsqueeze_dim) + + b, h, s, d = q.shape + q = q.view(b, h, s, d // 2, 2).transpose(4, 3).reshape(b, h, s, d) + + b, h, s, d = k.shape + k = k.view(b, h, s, d // 2, 2).transpose(4, 3).reshape(b, h, s, d) + + q_embed = (q * cos) + (rotate_half(q) * sin) + k_embed = (k * cos) + (rotate_half(k) * sin) + return q_embed, k_embed + + if output_attentions: + return self._orig_forward( + hidden_states=hidden_states, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_value=past_key_value, + output_attentions=output_attentions, + use_cache=use_cache, + ) + + bsz, q_len, _ = hidden_states.shape + + if self.q_lora_rank is None: + q = self.q_proj(hidden_states) + else: + q = self.q_b_proj(self.q_a_layernorm(self.q_a_proj(hidden_states))) + q = q.view(bsz, q_len, self.num_heads, self.q_head_dim).transpose(1, 2) + q_nope, q_pe = torch.split(q, [self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1) + + compressed_kv = self.kv_a_proj_with_mqa(hidden_states) + compressed_kv, k_pe = torch.split(compressed_kv, [self.kv_lora_rank, self.qk_rope_head_dim], dim=-1) + k_pe = k_pe.view(bsz, q_len, 1, self.qk_rope_head_dim).transpose(1, 2) + kv = ( + self.kv_b_proj(self.kv_a_layernorm(compressed_kv)) + .view(bsz, q_len, self.num_heads, self.qk_nope_head_dim + self.v_head_dim) + .transpose(1, 2) + ) + + k_nope, value_states = torch.split(kv, [self.qk_nope_head_dim, self.v_head_dim], dim=-1) + kv_seq_len = value_states.shape[-2] + if past_key_value is not None: + if self.layer_idx is None: + raise ValueError( + f"The cache structure has changed since version v4.36. If you are using {self.__class__.__name__} " + "for auto-regressive decoding with k/v caching, please make sure to initialize the attention class " + "with a layer index." + ) + kv_seq_len += past_key_value.get_usable_length(kv_seq_len, self.layer_idx) + cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len) + + q_pe, k_pe = apply_rotary_pos_emb(q_pe, k_pe, cos, sin, position_ids) + + # Difference with original code, k_pe.new_empty create constant tensor in torchscript + query_states = torch.concat([q_nope, q_pe], dim=-1) + # query_states = k_pe.new_empty(bsz, self.num_heads, q_len, self.q_head_dim) + # query_states[:, :, :, : self.qk_nope_head_dim] = q_nope + # query_states[:, :, :, self.qk_nope_head_dim :] = q_pe + key_states = torch.concat([k_nope, k_pe.expand(-1, self.num_heads, -1, -1)], dim=-1) + # key_states = k_pe.new_empty(bsz, self.num_heads, q_len, self.q_head_dim) + # key_states[:, :, :, : self.qk_nope_head_dim] = k_nope + # key_states[:, :, :, self.qk_nope_head_dim :] = k_pe + if past_key_value is not None: + cache_kwargs = {"sin": sin, "cos": cos} # Specific to RoPE models + key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs) + + if attention_mask is not None: + if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): + raise ValueError( + f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}" + ) + + if attention_mask is not None: + if attention_mask.size() != (bsz, 1, q_len, kv_seq_len): + raise ValueError( + f"Attention mask should be of size {(bsz, 1, q_len, kv_seq_len)}, but is {attention_mask.size()}" + ) + # SDPA with memory-efficient backend is currently (torch==2.1.2) bugged with non-contiguous inputs with custom attn_mask, + # Reference: https://github.com/pytorch/pytorch/issues/112577. + if query_states.device.type == "cuda" and attention_mask is not None: + query_states = query_states.contiguous() + key_states = key_states.contiguous() + value_states = value_states.contiguous() + + attn_output = torch.nn.functional.scaled_dot_product_attention( + query_states, + key_states, + value_states, + attn_mask=attention_mask, + dropout_p=self.attention_dropout if self.training else 0.0, + # The q_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case q_len == 1. + is_causal=self.is_causal and attention_mask is None and q_len > 1, + ) + attn_output = attn_output.transpose(1, 2).contiguous() + + attn_output = attn_output.reshape(bsz, q_len, self.num_heads * self.v_head_dim) + + attn_output = self.o_proj(attn_output) + + return attn_output, None, past_key_value + + +def deepseek_moe_infer(self, x, topk_ids, topk_weight): + cnts = torch.zeros((topk_ids.shape[0], len(self.experts))) + cnts.scatter_(1, topk_ids, 1) + tokens_per_expert = cnts.sum(dim=0).to(torch.long) + idxs = torch.argsort(topk_ids.view(-1)) + sorted_tokens = x[idxs // topk_ids.shape[1]] + + outputs = [] + start_idx = torch.tensor(0, dtype=torch.long) + for i, num_tokens in enumerate(tokens_per_expert): + end_idx = start_idx + num_tokens + # difference with original: removed skiping expert if empty num_tokens + expert_id = i + self.ep_rank * self.experts_per_rank + expert = self.experts[expert_id] + tokens_for_this_expert = sorted_tokens[start_idx:end_idx] + expert_out = expert(tokens_for_this_expert) + outputs.append(expert_out) + start_idx = end_idx + + # difference with original: removed usage torch.new_empty if outputs empty + outs = torch.cat(outputs, dim=0) + + new_x = torch.zeros_like(outs) + new_x[idxs] = outs + final_out = ( + new_x.view(*topk_ids.shape, -1) + .to(topk_weight.dtype) + .mul_(topk_weight.unsqueeze(dim=-1)) + .sum(dim=1) + .to(new_x.dtype) + ) + return final_out + + class Qwen2VLLanguageModelPatcher(DecoderModelPatcher): def __init__( self, diff --git a/optimum/exporters/openvino/utils.py b/optimum/exporters/openvino/utils.py index 1ab9e1051e..70e43807a8 100644 --- a/optimum/exporters/openvino/utils.py +++ b/optimum/exporters/openvino/utils.py @@ -48,6 +48,7 @@ OV_XML_FILE_NAME = "openvino_model.xml" _MAX_UNCOMPRESSED_SIZE = 1e9 +SKIP_CHECK_TRACE_MODELS = ["deepseek", "deepseek_v2", "deepseek_v3"] def is_torch_model(model: Union["PreTrainedModel", "ModelMixin"]): From 11ac5d83a59c6818ab8ce0b09cc721ceb2586eec Mon Sep 17 00:00:00 2001 From: eaidova <ekaterina.aidova@intel.com> Date: Tue, 11 Feb 2025 08:30:35 +0400 Subject: [PATCH 02/12] add tests --- tests/openvino/test_modeling.py | 8 ++++++++ tests/openvino/utils_tests.py | 2 ++ 2 files changed, 10 insertions(+) diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index 60b7576973..d5c6783acf 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -1001,6 +1001,12 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): if is_transformers_version(">=", "4.46.0"): SUPPORTED_ARCHITECTURES += ("glm", "mistral-nemo", "minicpm3") + # openvino 2025.0 required for disabling check_trace + if is_openvino_version(">=", "2025.0"): + SUPPORTED_ARCHITECTURES += ( + "deepseek-v2", + "deepseek-v3" + ) # gptq and awq install disabled for windows test environment if platform.system() != "Windows": @@ -1030,6 +1036,8 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "exaone", "decilm", "minicpm3", + "deepseek-v2", + "deepseek-v3" ) @parameterized.expand(SUPPORTED_ARCHITECTURES) diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index ac39b065ca..2526449494 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -51,6 +51,8 @@ "deberta": "hf-internal-testing/tiny-random-deberta", "deberta_v2": "hf-internal-testing/tiny-random-DebertaV2Model", "decilm": "katuni4ka/tiny-random-decilm", + "deepseek-v3": "katuni4ka/tiny-random-deepseek-v3", + "deepseek-v2": "yujiepan/deepseek-v2-tiny-random", "deit": "hf-internal-testing/tiny-random-DeiTModel", "convnext": "hf-internal-testing/tiny-random-convnext", "convnextv2": "hf-internal-testing/tiny-random-ConvNextV2Model", From c4717449bc3e586a917dfa69ac764cf2b7d4922c Mon Sep 17 00:00:00 2001 From: eaidova <ekaterina.aidova@intel.com> Date: Tue, 11 Feb 2025 08:42:30 +0400 Subject: [PATCH 03/12] improve disabling trace check --- optimum/exporters/openvino/convert.py | 3 ++- optimum/exporters/openvino/model_patcher.py | 13 +++---------- optimum/exporters/openvino/utils.py | 10 ++++++++++ tests/openvino/test_modeling.py | 7 ++----- 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py index e37d66ea02..35c7a0a137 100644 --- a/optimum/exporters/openvino/convert.py +++ b/optimum/exporters/openvino/convert.py @@ -67,6 +67,7 @@ OV_XML_FILE_NAME, _get_input_info, _get_open_clip_submodels_fn_and_export_configs, + allow_skip_tracing_check, clear_class_registry, remove_none_from_dummy_inputs, save_config, @@ -437,7 +438,7 @@ def ts_patched_forward(*args, **kwargs): patcher.patched_forward = ts_patched_forward ts_decoder_kwargs = {} - if is_openvino_version(">=", "2025.0"): + if library_name == "diffusers" or allow_skip_tracing_check(model) and is_openvino_version(">=", "2025.0"): ts_decoder_kwargs["trace_kwargs"] = {"check_trace": False} with patcher: diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index 614e39854b..b1c36521e3 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -3643,14 +3643,10 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): else: q = self.q_b_proj(self.q_a_layernorm(self.q_a_proj(hidden_states))) q = q.view(bsz, q_len, self.num_heads, self.q_head_dim).transpose(1, 2) - q_nope, q_pe = torch.split( - q, [self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1 - ) + q_nope, q_pe = torch.split(q, [self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1) compressed_kv = self.kv_a_proj_with_mqa(hidden_states) - compressed_kv, k_pe = torch.split( - compressed_kv, [self.kv_lora_rank, self.qk_rope_head_dim], dim=-1 - ) + compressed_kv, k_pe = torch.split(compressed_kv, [self.kv_lora_rank, self.qk_rope_head_dim], dim=-1) k_pe = k_pe.view(bsz, q_len, 1, self.qk_rope_head_dim).transpose(1, 2) kv = ( self.kv_b_proj(self.kv_a_layernorm(compressed_kv)) @@ -3658,9 +3654,7 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): .transpose(1, 2) ) - k_nope, value_states = torch.split( - kv, [self.qk_nope_head_dim, self.v_head_dim], dim=-1 - ) + k_nope, value_states = torch.split(kv, [self.qk_nope_head_dim, self.v_head_dim], dim=-1) kv_seq_len = value_states.shape[-2] if past_key_value is not None: if self.layer_idx is None: @@ -3716,7 +3710,6 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): attn_output = self.o_proj(attn_output) - return attn_output, None, past_key_value diff --git a/optimum/exporters/openvino/utils.py b/optimum/exporters/openvino/utils.py index 70e43807a8..83b2c2cca2 100644 --- a/optimum/exporters/openvino/utils.py +++ b/optimum/exporters/openvino/utils.py @@ -345,3 +345,13 @@ def set_simplified_chat_template(ov_tokenizer_model, processor_chat_template=Non if tokenizer_chat_template is not None and tokenizer_chat_template in COMPLEX_CHAT_TEMPLATES: ov_tokenizer_model.set_rt_info(COMPLEX_CHAT_TEMPLATES[tokenizer_chat_template], "simplified_chat_template") return ov_tokenizer_model + + + +SKIP_CHECK_TRACE_MODELS = ("deepseek", "deepseek-v2", "deepseek-v3") + + +def allow_skip_tracing_check(model): + config = getattr(model, "config", {}) + model_type = getattr(config, "model_type", "").replace("_", "-") + return model_type in SKIP_CHECK_TRACE_MODELS \ No newline at end of file diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index d5c6783acf..78e1d1e1b8 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -1003,10 +1003,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES += ("glm", "mistral-nemo", "minicpm3") # openvino 2025.0 required for disabling check_trace if is_openvino_version(">=", "2025.0"): - SUPPORTED_ARCHITECTURES += ( - "deepseek-v2", - "deepseek-v3" - ) + SUPPORTED_ARCHITECTURES += ("deepseek-v2", "deepseek-v3") # gptq and awq install disabled for windows test environment if platform.system() != "Windows": @@ -1037,7 +1034,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "decilm", "minicpm3", "deepseek-v2", - "deepseek-v3" + "deepseek-v3", ) @parameterized.expand(SUPPORTED_ARCHITECTURES) From 3c7b30749971a18fd80493c947c52a3d556e96dc Mon Sep 17 00:00:00 2001 From: Ekaterina Aidova <ekaterina.aidova@intel.com> Date: Tue, 11 Feb 2025 08:43:24 +0400 Subject: [PATCH 04/12] Update optimum/exporters/openvino/convert.py --- optimum/exporters/openvino/convert.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py index 35c7a0a137..8a4647808d 100644 --- a/optimum/exporters/openvino/convert.py +++ b/optimum/exporters/openvino/convert.py @@ -438,7 +438,9 @@ def ts_patched_forward(*args, **kwargs): patcher.patched_forward = ts_patched_forward ts_decoder_kwargs = {} - if library_name == "diffusers" or allow_skip_tracing_check(model) and is_openvino_version(">=", "2025.0"): + if (library_name == "diffusers" or allow_skip_tracing_check(model)) and is_openvino_version( + ">=", "2025.0" + ): ts_decoder_kwargs["trace_kwargs"] = {"check_trace": False} with patcher: From 906a18f802b0c61fbe5f82f7c6b2c2b2bd44c6f8 Mon Sep 17 00:00:00 2001 From: eaidova <ekaterina.aidova@intel.com> Date: Tue, 11 Feb 2025 09:32:32 +0400 Subject: [PATCH 05/12] add deepseek to docs --- docs/source/openvino/models.mdx | 3 +++ tests/openvino/test_modeling.py | 5 ++--- tests/openvino/utils_tests.py | 3 +-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/source/openvino/models.mdx b/docs/source/openvino/models.mdx index e1c8c7864e..7b696e617f 100644 --- a/docs/source/openvino/models.mdx +++ b/docs/source/openvino/models.mdx @@ -43,6 +43,9 @@ Here is the list of the supported architectures : - Deberta-v2 - DeciLM - Deit +- Deepseek +- Deepseek_v2 +- Deepseek_v3 - DistilBert - Electra - Encoder Decoder diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index 78e1d1e1b8..b0feee8b3e 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -1003,7 +1003,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES += ("glm", "mistral-nemo", "minicpm3") # openvino 2025.0 required for disabling check_trace if is_openvino_version(">=", "2025.0"): - SUPPORTED_ARCHITECTURES += ("deepseek-v2", "deepseek-v3") + SUPPORTED_ARCHITECTURES += "deepseek" # gptq and awq install disabled for windows test environment if platform.system() != "Windows": @@ -1033,8 +1033,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): "exaone", "decilm", "minicpm3", - "deepseek-v2", - "deepseek-v3", + "deepseek", ) @parameterized.expand(SUPPORTED_ARCHITECTURES) diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index 2526449494..b313621e85 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -51,8 +51,7 @@ "deberta": "hf-internal-testing/tiny-random-deberta", "deberta_v2": "hf-internal-testing/tiny-random-DebertaV2Model", "decilm": "katuni4ka/tiny-random-decilm", - "deepseek-v3": "katuni4ka/tiny-random-deepseek-v3", - "deepseek-v2": "yujiepan/deepseek-v2-tiny-random", + "deepseek": "yujiepan/deepseek-v2-tiny-random", "deit": "hf-internal-testing/tiny-random-DeiTModel", "convnext": "hf-internal-testing/tiny-random-convnext", "convnextv2": "hf-internal-testing/tiny-random-ConvNextV2Model", From e507a1115a942aa77f985a94cf58540522aa1b29 Mon Sep 17 00:00:00 2001 From: Ekaterina Aidova <ekaterina.aidova@intel.com> Date: Tue, 11 Feb 2025 09:57:22 +0400 Subject: [PATCH 06/12] Update tests/openvino/test_modeling.py --- tests/openvino/test_modeling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/openvino/test_modeling.py b/tests/openvino/test_modeling.py index b0feee8b3e..8122ed3de1 100644 --- a/tests/openvino/test_modeling.py +++ b/tests/openvino/test_modeling.py @@ -1003,7 +1003,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase): SUPPORTED_ARCHITECTURES += ("glm", "mistral-nemo", "minicpm3") # openvino 2025.0 required for disabling check_trace if is_openvino_version(">=", "2025.0"): - SUPPORTED_ARCHITECTURES += "deepseek" + SUPPORTED_ARCHITECTURES += ("deepseek",) # gptq and awq install disabled for windows test environment if platform.system() != "Windows": From cd36115714635c1a8944dfdf4d00b320251e387a Mon Sep 17 00:00:00 2001 From: eaidova <ekaterina.aidova@intel.com> Date: Tue, 28 Jan 2025 15:23:08 +0400 Subject: [PATCH 07/12] deepseek moe support --- optimum/exporters/openvino/model_patcher.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index b1c36521e3..89c6648aa9 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -3643,10 +3643,14 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): else: q = self.q_b_proj(self.q_a_layernorm(self.q_a_proj(hidden_states))) q = q.view(bsz, q_len, self.num_heads, self.q_head_dim).transpose(1, 2) - q_nope, q_pe = torch.split(q, [self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1) + q_nope, q_pe = torch.split( + q, [self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1 + ) compressed_kv = self.kv_a_proj_with_mqa(hidden_states) - compressed_kv, k_pe = torch.split(compressed_kv, [self.kv_lora_rank, self.qk_rope_head_dim], dim=-1) + compressed_kv, k_pe = torch.split( + compressed_kv, [self.kv_lora_rank, self.qk_rope_head_dim], dim=-1 + ) k_pe = k_pe.view(bsz, q_len, 1, self.qk_rope_head_dim).transpose(1, 2) kv = ( self.kv_b_proj(self.kv_a_layernorm(compressed_kv)) @@ -3654,7 +3658,10 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): .transpose(1, 2) ) - k_nope, value_states = torch.split(kv, [self.qk_nope_head_dim, self.v_head_dim], dim=-1) + + k_nope, value_states = torch.split( + kv, [self.qk_nope_head_dim, self.v_head_dim], dim=-1 + ) kv_seq_len = value_states.shape[-2] if past_key_value is not None: if self.layer_idx is None: From 831766973869e5676205b290be76771b0f39a530 Mon Sep 17 00:00:00 2001 From: eaidova <ekaterina.aidova@intel.com> Date: Tue, 11 Feb 2025 08:30:35 +0400 Subject: [PATCH 08/12] add tests --- tests/openvino/utils_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py index b313621e85..140f8f771e 100644 --- a/tests/openvino/utils_tests.py +++ b/tests/openvino/utils_tests.py @@ -51,7 +51,7 @@ "deberta": "hf-internal-testing/tiny-random-deberta", "deberta_v2": "hf-internal-testing/tiny-random-DebertaV2Model", "decilm": "katuni4ka/tiny-random-decilm", - "deepseek": "yujiepan/deepseek-v2-tiny-random", + "deepseek": "katuni4ka/tiny-random-deepseek-v3", "deit": "hf-internal-testing/tiny-random-DeiTModel", "convnext": "hf-internal-testing/tiny-random-convnext", "convnextv2": "hf-internal-testing/tiny-random-ConvNextV2Model", From 7f96289bf3058ed02657a0ce6096913fef46b19c Mon Sep 17 00:00:00 2001 From: eaidova <ekaterina.aidova@intel.com> Date: Tue, 11 Feb 2025 08:42:30 +0400 Subject: [PATCH 09/12] improve disabling trace check --- optimum/exporters/openvino/model_patcher.py | 13 +++---------- optimum/exporters/openvino/utils.py | 4 ++-- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index 89c6648aa9..b1c36521e3 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -3643,14 +3643,10 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): else: q = self.q_b_proj(self.q_a_layernorm(self.q_a_proj(hidden_states))) q = q.view(bsz, q_len, self.num_heads, self.q_head_dim).transpose(1, 2) - q_nope, q_pe = torch.split( - q, [self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1 - ) + q_nope, q_pe = torch.split(q, [self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1) compressed_kv = self.kv_a_proj_with_mqa(hidden_states) - compressed_kv, k_pe = torch.split( - compressed_kv, [self.kv_lora_rank, self.qk_rope_head_dim], dim=-1 - ) + compressed_kv, k_pe = torch.split(compressed_kv, [self.kv_lora_rank, self.qk_rope_head_dim], dim=-1) k_pe = k_pe.view(bsz, q_len, 1, self.qk_rope_head_dim).transpose(1, 2) kv = ( self.kv_b_proj(self.kv_a_layernorm(compressed_kv)) @@ -3658,10 +3654,7 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids, unsqueeze_dim=1): .transpose(1, 2) ) - - k_nope, value_states = torch.split( - kv, [self.qk_nope_head_dim, self.v_head_dim], dim=-1 - ) + k_nope, value_states = torch.split(kv, [self.qk_nope_head_dim, self.v_head_dim], dim=-1) kv_seq_len = value_states.shape[-2] if past_key_value is not None: if self.layer_idx is None: diff --git a/optimum/exporters/openvino/utils.py b/optimum/exporters/openvino/utils.py index 83b2c2cca2..b7e37a04b5 100644 --- a/optimum/exporters/openvino/utils.py +++ b/optimum/exporters/openvino/utils.py @@ -314,6 +314,7 @@ def save_preprocessors( maybe_save_preprocessors(model_name_or_path, output, trust_remote_code=trust_remote_code) +<<<<<<< HEAD COMPLEX_CHAT_TEMPLATES = { # minicpm3 "{%- macro json_to_python_type(param_name, json_spec) %}\n{%- set basic_type_map = {\n 'string': 'str',\n 'number': 'float',\n 'integer': 'int',\n 'boolean': 'bool',\n 'null': 'None'\n} %}\n\n{%- if json_spec.enum %}\n {{- param_name|title }}\n{%- elif basic_type_map[json_spec.type] is defined %}\n {{- basic_type_map[json_spec.type] }}\n{%- elif json_spec.type == 'array' %}\n {{- 'List[' + json_to_python_type(param_name, json_spec['items']) + ']' }}\n{%- elif json_spec.type == 'object' %}\n {{- 'Dict[str, ' + json_to_python_type(param_name, json_spec.additionalProperties if json_spec.additionalProperties else 'Any') + ']' if not json_spec.properties else param_name|title }}\n{%- elif json_spec.type is iterable %}\n {{- 'Union[' }}\n {%- for t in json_spec.type %}\n {{- json_to_python_type(param_name, {'type': t}) }}\n {{- ', ' if not loop.last }}\n {%- endfor %}\n {{- ']' }}\n{%- else %}\n {{- 'Any' }}\n{%- endif %}\n{%- endmacro %}\n\n{%- macro object_to_fields(json_spec, field_indent) %}\n {%- set o_ns = namespace(f = caller()) %}\n {%- for param_name, param_fields in json_spec.properties|items %}\n {%- if param_fields.enum %}\n {{- '\\n\\nclass ' + param_name|title + '(Enum):\\n' }}\n {%- for enum_option in param_fields.enum %}\n {{- ' enum_' + loop.index0|string + ' = ' + enum_option|tojson + '\\n' }}\n {%- endfor %}\n {%- elif param_fields.type == 'object' and param_fields.properties %}\n {%- call object_to_fields(param_fields, ' ') %}\n {{- '\\n\\nclass ' + param_name|title + '(BaseModel):\\n' }}\n {%- endcall %}\n {%- elif param_fields.type == 'array' and param_fields['items'] and param_fields['items'].type == 'object' and param_fields['items'].properties %}\n {%- call object_to_fields(param_fields['items'], ' ') %}\n {{- '\\n\\nclass ' + param_name|title + '(BaseModel):\\n' }}\n {%- endcall %}\n {%- endif %}\n {%- set param_default = param_fields.default|tojson if param_fields.default is string else param_fields.default|string if param_fields.default is defined else 'None' %}\n {%- set o_ns.f = o_ns.f + field_indent + param_name + ': ' %}\n {%- set o_ns.f = o_ns.f + ('Optional[' + json_to_python_type(param_name, param_fields) + ']' if param_name not in json_spec.required else json_to_python_type(param_name, param_fields)) %}\n {%- if not param_fields.title and not param_fields.description and not param_fields.pattern %}\n {%- set o_ns.f = o_ns.f + (' = ' + param_default if param_name not in json_spec.required else '') %}\n {%- else %}\n {%- set o_ns.f = o_ns.f + (' = Field(...' if param_name in json_spec.required else ' = Field(' + param_default) %}\n {%- set o_ns.f = o_ns.f + (', description=' + param_fields.description|tojson if param_fields.description else '') %}\n {%- set o_ns.f = o_ns.f + (', regex=' + param_fields.pattern|tojson if param_fields.pattern else '') %}\n {%- set o_ns.f = o_ns.f + (', title=' + param_fields.title|tojson if param_fields.title else '') %}\n {%- set o_ns.f = o_ns.f + ')' %}\n {%- endif %}\n {%- set o_ns.f = o_ns.f + '\\n' %}\n {%- endfor %}\n {{- o_ns.f }}\n{%- endmacro %}\n\n{%- macro tool_parser(tools) %}\n{%- for tool in tools %}\n {%- if tool.type is not defined or tool.type == 'function' %}\n {%- if tool.function is defined %}\n {%- set tool = tool.function %}\n {%- endif %}\n {%- set tool_params = tool.parameters if tool.parameters is defined else none %}\n {%- call object_to_fields(tool_params, ' ') %}\n {{- '\\n\\ndef ' + tool.name + '(' }}\n {%- if tool_params %}\n {%- for param_name, param_fields in tool_params.properties|items %}\n {%- set param_default = param_fields.default|tojson if param_fields.default is string else param_fields.default|string if param_fields.default is defined else 'None' %}\n {{- ', ' if loop.index0 != 0 }}\n {{- param_name }}\n {{- '=' + param_default if param_name not in tool_params.required }}\n {%- endfor %}\n {%- endif %}\n {{- '):\\n \"\"\"' }}\n {{- tool.description }}\n {{- '\\n\\n Args:\\n' if tool_params else '\\n' }}\n {%- endcall %}\n {{- ' \"\"\"\\n' }}\n {%- endif %}\n{%- endfor %}\n{%- endmacro %}\n\n{%- if messages[0]['role'] == 'system' %}\n {%- set loop_messages = messages[1:] %}\n {%- set system_message = messages[0]['content'] %}\n{%- else %}\n {%- set loop_messages = messages %}\n {%- set system_message = '' %}\n{%- endif %}\n{{- '<|im_start|>system\\n' + system_message if system_message or tools }}\n{%- if tools %}\n {{- '\\n# Functions\\nHere is a list of functions that you can invoke:\\n```python\\nfrom enum import Enum\\nfrom typing import List, Dict, Optional\\nfrom pydantic import BaseModel, Field\\n\\n' }}\n {{- tool_parser(tools) }}\n {{- \"\\n```\\n\\n# Function Call Rule and Output Format\\n- If the user's question can be answered without calling any function, please answer the user's question directly. In this situation, you should return your thought and answer the user's question directly.\\n- If the user cannot be answered without calling any function, and the user does not provide enough information to call functions, please ask the user for more information. In this situation, you should return your thought and ask the user for more information.\\n- If the user's question cannot be answered without calling any function, and the user has provided enough information to call functions to solve it, you should call the functions. In this situation, the assistant should return your thought and call the functions.\\n- Use default parameters unless the user has specified otherwise.\\n- You should answer in the following format:\\n\\n<|thought_start|>\\n{explain why the user's question can be answered without calling a function or why you should ask the user for more information or why you should call one or more functions and your plan to solve the user's question.}\\n<|thought_end|>\\n<|tool_call_start|>\\n```python\\nfunc1(params_name=params_value, params_name2=params_value2...)\\nfunc2(params)\\n```\\n<|tool_call_end|>\\n{answer the user's question directly or ask the user for more information}\" }}\n{%- endif %}\n{{- '<|im_end|>\\n' if system_message or tools }}\n{%- for message in loop_messages %}\n {%- set content = message.content %}\n {%- if message.role == 'assistant' and message.tool_calls %}\n {{- '<|im_start|>' + message.role + '\\n' }}\n {{- '<|thought_start|>\\n' + message.thought + '\\n<|thought_end|>\\n' if message.thought }}\n {{- '<|tool_call_start|>\\n```python\\n' }}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- tool_call.name + '(' }}\n {%- if tool_call.arguments is defined and tool_call.arguments|length > 0 %}\n {%- for param_name, param_value in tool_call.arguments|items %}\n {{- param_name + '=' + param_value|tojson }}\n {{- ',' if not loop.last }}\n {%- endfor %}\n {%- endif %}\n {{- ')\\n' }}\n {%- endfor %}\n {{- '```\\n<|tool_call_end|>\\n' }}\n {{- content if content and not content.startswith('<|tool_call_start|>') }}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == 'assistant' and message.thought %}\n {{- '<|im_start|>' + message.role + '\\n' + '<|thought_start|>\\n' + message.thought + '\\n<|thought_end|>\\n' + content + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endfor %}\n\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '<|im_start|>system\\n' + message['content'] + '<|im_end|>\\n' }}{% elif message['role'] == 'user' %}{{ '<|im_start|>user\\n' + message['content'] + '<|im_end|>\\n' }}{% elif message['role'] == 'assistant' %}{{ '<|im_start|>assistant\\n ' + message['content'] + '<|im_end|>\\n' }}{% endif %}{% if loop.last and add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% endif %}{% endfor %}", @@ -347,11 +348,10 @@ def set_simplified_chat_template(ov_tokenizer_model, processor_chat_template=Non return ov_tokenizer_model - SKIP_CHECK_TRACE_MODELS = ("deepseek", "deepseek-v2", "deepseek-v3") def allow_skip_tracing_check(model): config = getattr(model, "config", {}) model_type = getattr(config, "model_type", "").replace("_", "-") - return model_type in SKIP_CHECK_TRACE_MODELS \ No newline at end of file + return model_type in SKIP_CHECK_TRACE_MODELS From b0e5242f377129ea3a1b9b07a811dd915c34f425 Mon Sep 17 00:00:00 2001 From: Ekaterina Aidova <ekaterina.aidova@intel.com> Date: Tue, 11 Feb 2025 10:39:11 +0400 Subject: [PATCH 10/12] Update optimum/exporters/openvino/utils.py --- optimum/exporters/openvino/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/optimum/exporters/openvino/utils.py b/optimum/exporters/openvino/utils.py index b7e37a04b5..3212276618 100644 --- a/optimum/exporters/openvino/utils.py +++ b/optimum/exporters/openvino/utils.py @@ -48,7 +48,6 @@ OV_XML_FILE_NAME = "openvino_model.xml" _MAX_UNCOMPRESSED_SIZE = 1e9 -SKIP_CHECK_TRACE_MODELS = ["deepseek", "deepseek_v2", "deepseek_v3"] def is_torch_model(model: Union["PreTrainedModel", "ModelMixin"]): From acc3390fd5a5d5212218c54d3e143e49d3ef488d Mon Sep 17 00:00:00 2001 From: eaidova <ekaterina.aidova@intel.com> Date: Mon, 17 Feb 2025 13:05:27 +0400 Subject: [PATCH 11/12] move all conditions to allow_skip_check_trace --- optimum/exporters/openvino/convert.py | 5 +---- optimum/exporters/openvino/utils.py | 8 ++++++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py index 8a4647808d..06c061375c 100644 --- a/optimum/exporters/openvino/convert.py +++ b/optimum/exporters/openvino/convert.py @@ -47,7 +47,6 @@ _transformers_version, compare_versions, is_openvino_tokenizers_version, - is_openvino_version, is_tokenizers_version, is_transformers_version, ) @@ -438,9 +437,7 @@ def ts_patched_forward(*args, **kwargs): patcher.patched_forward = ts_patched_forward ts_decoder_kwargs = {} - if (library_name == "diffusers" or allow_skip_tracing_check(model)) and is_openvino_version( - ">=", "2025.0" - ): + if allow_skip_tracing_check(library_name, model): ts_decoder_kwargs["trace_kwargs"] = {"check_trace": False} with patcher: diff --git a/optimum/exporters/openvino/utils.py b/optimum/exporters/openvino/utils.py index 3212276618..1b3de340c6 100644 --- a/optimum/exporters/openvino/utils.py +++ b/optimum/exporters/openvino/utils.py @@ -26,7 +26,7 @@ from optimum.exporters import TasksManager from optimum.exporters.onnx.base import OnnxConfig from optimum.intel.utils import is_transformers_version -from optimum.intel.utils.import_utils import is_safetensors_available +from optimum.intel.utils.import_utils import is_openvino_version, is_safetensors_available from optimum.utils import is_diffusers_available from optimum.utils.save_utils import maybe_save_preprocessors @@ -350,7 +350,11 @@ def set_simplified_chat_template(ov_tokenizer_model, processor_chat_template=Non SKIP_CHECK_TRACE_MODELS = ("deepseek", "deepseek-v2", "deepseek-v3") -def allow_skip_tracing_check(model): +def allow_skip_tracing_check(library_name, model): + if is_openvino_version("<", "2025.0.0"): + return False + if library_name == "diffusers": + return True config = getattr(model, "config", {}) model_type = getattr(config, "model_type", "").replace("_", "-") return model_type in SKIP_CHECK_TRACE_MODELS From 2adf01e241e31390adb38e02b212e86b3fffd854 Mon Sep 17 00:00:00 2001 From: eaidova <ekaterina.aidova@intel.com> Date: Mon, 17 Feb 2025 18:48:31 +0400 Subject: [PATCH 12/12] add info about source --- optimum/exporters/openvino/convert.py | 4 +++- optimum/exporters/openvino/model_patcher.py | 2 ++ optimum/exporters/openvino/utils.py | 5 +---- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py index 06c061375c..12561408db 100644 --- a/optimum/exporters/openvino/convert.py +++ b/optimum/exporters/openvino/convert.py @@ -437,7 +437,9 @@ def ts_patched_forward(*args, **kwargs): patcher.patched_forward = ts_patched_forward ts_decoder_kwargs = {} - if allow_skip_tracing_check(library_name, model): + model_config = getattr(model, "config", {}) + model_type = getattr(model_config, "model_type", "").replace("_", "-") + if allow_skip_tracing_check(library_name, model_type): ts_decoder_kwargs["trace_kwargs"] = {"check_trace": False} with patcher: diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py index b1c36521e3..87154f7ff9 100644 --- a/optimum/exporters/openvino/model_patcher.py +++ b/optimum/exporters/openvino/model_patcher.py @@ -3610,6 +3610,7 @@ def deepseek_v3_attn_forward( output_attentions: bool = False, use_cache: bool = False, ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + # modified from https://huggingface.co/deepseek-ai/DeepSeek-V3/blob/main/modeling_deepseek.py#L751 def rotate_half(x): """Rotates half the hidden dims of the input.""" x1 = x[..., : x.shape[-1] // 2] @@ -3723,6 +3724,7 @@ def deepseek_v2_attn_forward( use_cache: bool = False, **kwargs, ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]: + # modified from https://huggingface.co/deepseek-ai/DeepSeek-V2-Lite/blob/main/modeling_deepseek.py#L806 def rotate_half(x): """Rotates half the hidden dims of the input.""" x1 = x[..., : x.shape[-1] // 2] diff --git a/optimum/exporters/openvino/utils.py b/optimum/exporters/openvino/utils.py index 1b3de340c6..6eef614799 100644 --- a/optimum/exporters/openvino/utils.py +++ b/optimum/exporters/openvino/utils.py @@ -313,7 +313,6 @@ def save_preprocessors( maybe_save_preprocessors(model_name_or_path, output, trust_remote_code=trust_remote_code) -<<<<<<< HEAD COMPLEX_CHAT_TEMPLATES = { # minicpm3 "{%- macro json_to_python_type(param_name, json_spec) %}\n{%- set basic_type_map = {\n 'string': 'str',\n 'number': 'float',\n 'integer': 'int',\n 'boolean': 'bool',\n 'null': 'None'\n} %}\n\n{%- if json_spec.enum %}\n {{- param_name|title }}\n{%- elif basic_type_map[json_spec.type] is defined %}\n {{- basic_type_map[json_spec.type] }}\n{%- elif json_spec.type == 'array' %}\n {{- 'List[' + json_to_python_type(param_name, json_spec['items']) + ']' }}\n{%- elif json_spec.type == 'object' %}\n {{- 'Dict[str, ' + json_to_python_type(param_name, json_spec.additionalProperties if json_spec.additionalProperties else 'Any') + ']' if not json_spec.properties else param_name|title }}\n{%- elif json_spec.type is iterable %}\n {{- 'Union[' }}\n {%- for t in json_spec.type %}\n {{- json_to_python_type(param_name, {'type': t}) }}\n {{- ', ' if not loop.last }}\n {%- endfor %}\n {{- ']' }}\n{%- else %}\n {{- 'Any' }}\n{%- endif %}\n{%- endmacro %}\n\n{%- macro object_to_fields(json_spec, field_indent) %}\n {%- set o_ns = namespace(f = caller()) %}\n {%- for param_name, param_fields in json_spec.properties|items %}\n {%- if param_fields.enum %}\n {{- '\\n\\nclass ' + param_name|title + '(Enum):\\n' }}\n {%- for enum_option in param_fields.enum %}\n {{- ' enum_' + loop.index0|string + ' = ' + enum_option|tojson + '\\n' }}\n {%- endfor %}\n {%- elif param_fields.type == 'object' and param_fields.properties %}\n {%- call object_to_fields(param_fields, ' ') %}\n {{- '\\n\\nclass ' + param_name|title + '(BaseModel):\\n' }}\n {%- endcall %}\n {%- elif param_fields.type == 'array' and param_fields['items'] and param_fields['items'].type == 'object' and param_fields['items'].properties %}\n {%- call object_to_fields(param_fields['items'], ' ') %}\n {{- '\\n\\nclass ' + param_name|title + '(BaseModel):\\n' }}\n {%- endcall %}\n {%- endif %}\n {%- set param_default = param_fields.default|tojson if param_fields.default is string else param_fields.default|string if param_fields.default is defined else 'None' %}\n {%- set o_ns.f = o_ns.f + field_indent + param_name + ': ' %}\n {%- set o_ns.f = o_ns.f + ('Optional[' + json_to_python_type(param_name, param_fields) + ']' if param_name not in json_spec.required else json_to_python_type(param_name, param_fields)) %}\n {%- if not param_fields.title and not param_fields.description and not param_fields.pattern %}\n {%- set o_ns.f = o_ns.f + (' = ' + param_default if param_name not in json_spec.required else '') %}\n {%- else %}\n {%- set o_ns.f = o_ns.f + (' = Field(...' if param_name in json_spec.required else ' = Field(' + param_default) %}\n {%- set o_ns.f = o_ns.f + (', description=' + param_fields.description|tojson if param_fields.description else '') %}\n {%- set o_ns.f = o_ns.f + (', regex=' + param_fields.pattern|tojson if param_fields.pattern else '') %}\n {%- set o_ns.f = o_ns.f + (', title=' + param_fields.title|tojson if param_fields.title else '') %}\n {%- set o_ns.f = o_ns.f + ')' %}\n {%- endif %}\n {%- set o_ns.f = o_ns.f + '\\n' %}\n {%- endfor %}\n {{- o_ns.f }}\n{%- endmacro %}\n\n{%- macro tool_parser(tools) %}\n{%- for tool in tools %}\n {%- if tool.type is not defined or tool.type == 'function' %}\n {%- if tool.function is defined %}\n {%- set tool = tool.function %}\n {%- endif %}\n {%- set tool_params = tool.parameters if tool.parameters is defined else none %}\n {%- call object_to_fields(tool_params, ' ') %}\n {{- '\\n\\ndef ' + tool.name + '(' }}\n {%- if tool_params %}\n {%- for param_name, param_fields in tool_params.properties|items %}\n {%- set param_default = param_fields.default|tojson if param_fields.default is string else param_fields.default|string if param_fields.default is defined else 'None' %}\n {{- ', ' if loop.index0 != 0 }}\n {{- param_name }}\n {{- '=' + param_default if param_name not in tool_params.required }}\n {%- endfor %}\n {%- endif %}\n {{- '):\\n \"\"\"' }}\n {{- tool.description }}\n {{- '\\n\\n Args:\\n' if tool_params else '\\n' }}\n {%- endcall %}\n {{- ' \"\"\"\\n' }}\n {%- endif %}\n{%- endfor %}\n{%- endmacro %}\n\n{%- if messages[0]['role'] == 'system' %}\n {%- set loop_messages = messages[1:] %}\n {%- set system_message = messages[0]['content'] %}\n{%- else %}\n {%- set loop_messages = messages %}\n {%- set system_message = '' %}\n{%- endif %}\n{{- '<|im_start|>system\\n' + system_message if system_message or tools }}\n{%- if tools %}\n {{- '\\n# Functions\\nHere is a list of functions that you can invoke:\\n```python\\nfrom enum import Enum\\nfrom typing import List, Dict, Optional\\nfrom pydantic import BaseModel, Field\\n\\n' }}\n {{- tool_parser(tools) }}\n {{- \"\\n```\\n\\n# Function Call Rule and Output Format\\n- If the user's question can be answered without calling any function, please answer the user's question directly. In this situation, you should return your thought and answer the user's question directly.\\n- If the user cannot be answered without calling any function, and the user does not provide enough information to call functions, please ask the user for more information. In this situation, you should return your thought and ask the user for more information.\\n- If the user's question cannot be answered without calling any function, and the user has provided enough information to call functions to solve it, you should call the functions. In this situation, the assistant should return your thought and call the functions.\\n- Use default parameters unless the user has specified otherwise.\\n- You should answer in the following format:\\n\\n<|thought_start|>\\n{explain why the user's question can be answered without calling a function or why you should ask the user for more information or why you should call one or more functions and your plan to solve the user's question.}\\n<|thought_end|>\\n<|tool_call_start|>\\n```python\\nfunc1(params_name=params_value, params_name2=params_value2...)\\nfunc2(params)\\n```\\n<|tool_call_end|>\\n{answer the user's question directly or ask the user for more information}\" }}\n{%- endif %}\n{{- '<|im_end|>\\n' if system_message or tools }}\n{%- for message in loop_messages %}\n {%- set content = message.content %}\n {%- if message.role == 'assistant' and message.tool_calls %}\n {{- '<|im_start|>' + message.role + '\\n' }}\n {{- '<|thought_start|>\\n' + message.thought + '\\n<|thought_end|>\\n' if message.thought }}\n {{- '<|tool_call_start|>\\n```python\\n' }}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- tool_call.name + '(' }}\n {%- if tool_call.arguments is defined and tool_call.arguments|length > 0 %}\n {%- for param_name, param_value in tool_call.arguments|items %}\n {{- param_name + '=' + param_value|tojson }}\n {{- ',' if not loop.last }}\n {%- endfor %}\n {%- endif %}\n {{- ')\\n' }}\n {%- endfor %}\n {{- '```\\n<|tool_call_end|>\\n' }}\n {{- content if content and not content.startswith('<|tool_call_start|>') }}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == 'assistant' and message.thought %}\n {{- '<|im_start|>' + message.role + '\\n' + '<|thought_start|>\\n' + message.thought + '\\n<|thought_end|>\\n' + content + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endfor %}\n\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}": "{% for message in messages %}{% if message['role'] == 'system' %}{{ '<|im_start|>system\\n' + message['content'] + '<|im_end|>\\n' }}{% elif message['role'] == 'user' %}{{ '<|im_start|>user\\n' + message['content'] + '<|im_end|>\\n' }}{% elif message['role'] == 'assistant' %}{{ '<|im_start|>assistant\\n ' + message['content'] + '<|im_end|>\\n' }}{% endif %}{% if loop.last and add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% endif %}{% endfor %}", @@ -350,11 +349,9 @@ def set_simplified_chat_template(ov_tokenizer_model, processor_chat_template=Non SKIP_CHECK_TRACE_MODELS = ("deepseek", "deepseek-v2", "deepseek-v3") -def allow_skip_tracing_check(library_name, model): +def allow_skip_tracing_check(library_name, model_type): if is_openvino_version("<", "2025.0.0"): return False if library_name == "diffusers": return True - config = getattr(model, "config", {}) - model_type = getattr(config, "model_type", "").replace("_", "-") return model_type in SKIP_CHECK_TRACE_MODELS