warm up do not work for compiled model

jiqing-feng · jiqing-feng · commit 40de842ff67b · 2025-03-05T15:41:24.000Z
Signed-off-by: jiqing-feng &lt;jiqing.feng@intel.com&gt;
diff --git a/optimum/intel/ipex/modeling_base.py b/optimum/intel/ipex/modeling_base.py
@@ -240,11 +240,14 @@ def maybe_apply_torch_compile(self):
         self.compiled = True
 
     def _init_warmup(self):
-        inputs = prepare_jit_inputs(self.model, self.export_feature, False)
-        with torch.no_grad():
-            self.model(**inputs)
-            self.model(**inputs)
-        logger.info("Warm up end")
+        if self.compiled:
+            logger.info("Detected torch.compile is applied, please warm-up by your own case")
+        else:
+            inputs = prepare_jit_inputs(self.model, self.export_feature, False)
+            with torch.no_grad():
+                self.model(**inputs)
+                self.model(**inputs)
+            logger.info("Warm up end")
 
 
 class IPEXModelForSequenceClassification(IPEXModel):
@@ -400,10 +403,13 @@ def generate(self, *args, **kwargs):
         return result
 
     def _init_warmup(self):
-        inputs = prepare_jit_inputs(self.model, self.export_feature, False)
-        self.generate(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], max_new_tokens=4)
-        self.generate(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], max_new_tokens=4)
-        logger.info("Warm up end")
+        if self.compiled:
+            logger.info("Detected torch.compile is applied, please warm-up by your own case")
+        else:
+            inputs = prepare_jit_inputs(self.model, self.export_feature, False)
+            self.generate(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], max_new_tokens=4)
+            self.generate(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], max_new_tokens=4)
+            logger.info("Warm up end")
 
 
 class IPEXModelForSeq2SeqLM(IPEXModel, GenerationMixin):
@@ -478,10 +484,13 @@ def _supports_num_logits_to_keep(self) -> bool:
         return "num_logits_to_keep" in set(inspect.signature(self.model.forward).parameters.keys())
 
     def _init_warmup(self):
-        inputs = prepare_jit_inputs(self.model, self.export_feature, False)
-        self.generate(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], max_new_tokens=4)
-        self.generate(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], max_new_tokens=4)
-        logger.info("Warm up end")
+        if self.compiled:
+            logger.info("Detected torch.compile is applied, please warm-up by your own case")
+        else:
+            inputs = prepare_jit_inputs(self.model, self.export_feature, False)
+            self.generate(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], max_new_tokens=4)
+            self.generate(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], max_new_tokens=4)
+            logger.info("Warm up end")
 
 
 def _ipex_crop_past_key_values(model, past_key_values, max_length):