fix raise import error

jiqing-feng · jiqing-feng · commit 3a86c4091bcf · 2024-03-04T04:27:53.000-05:00
diff --git a/optimum/exporters/ipex/model_patcher.py b/optimum/exporters/ipex/model_patcher.py
@@ -12,8 +12,6 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
-import intel_extension_for_pytorch as ipex
-from packaging import version
 from transformers.models.llama.modeling_llama import (
     LlamaAttention,
     LlamaDecoderLayer,
@@ -22,6 +20,8 @@
     LlamaRMSNorm,
 )
 
+from optimum.intel.utils.import_utils import is_ipex_version
+
 from .modeling_utils import (
     _IPEXLlamaDecoderLayerRef,
     llama_attn_forward,
@@ -30,10 +30,6 @@
 )
 
 
-if version.parse(ipex.__version__) > version.parse("2.3.0"):
-    from intel_extension_for_pytorch.llm.modules import ApplyRotaryEmbedding, IndirectAccessKVCache
-
-
 _IPEX_EXPORTED_ARCH = ("LlamaForCausalLM",)
 _IPEX_EXPORTED_TASK = ("text-generation",)
 
@@ -66,6 +62,11 @@ def patch_op(m, target_m, new_op_name, new_op):
 
 
 def _patch_llama_model(model):
+    if is_ipex_version("<=", "2.3.0"):
+        raise ImportError("Only ipex version > 2.3.0 supports ApplyRotaryEmbedding and IndirectAccessKVCache")
+
+    from intel_extension_for_pytorch.llm.modules import ApplyRotaryEmbedding, IndirectAccessKVCache
+
     ipex_rope = ApplyRotaryEmbedding(
         model.config.max_position_embeddings,
         model.config.hidden_size // model.config.num_attention_heads,
diff --git a/optimum/exporters/ipex/modeling_utils.py b/optimum/exporters/ipex/modeling_utils.py
@@ -230,7 +230,7 @@ def llama_model_forward(
 class _IPEXLlamaDecoderLayerRef(nn.Module):
     def __init__(self, module, config, distributed=False):
         if is_ipex_version("<=", "2.3.0"):
-            raise ValueError("Only ipex version > 2.3.0 supports linear2SiluMul and linearAdd")
+            raise ImportError("Only ipex version > 2.3.0 supports linear2SiluMul and linearAdd")
 
         from intel_extension_for_pytorch.llm.modules import linear2SiluMul, linearAdd
 
diff --git a/optimum/intel/ipex/modeling_base.py b/optimum/intel/ipex/modeling_base.py
@@ -65,7 +65,7 @@ def _is_patched_with_ipex(model, task):
     if isinstance(model, torch.jit.ScriptModule):
         for node in model.graph.nodes():
             # Jit will record the codes position so we can check if the node use ipex exporter.
-            if "optimum/exporters/ipex/modeling_utils.py" in node.__str__():
+            if "torch_ipex::rotary_position_embedding" in node.__str__():
                 return True
         return False
     else:
@@ -123,7 +123,7 @@ def __init__(
         self._dtype = self.config.torch_dtype if self.config.torch_dtype is not None else torch.float32
         self.model.to(self._device)
         self.model_save_dir = model_save_dir
-        self.is_ipex_exported = _is_patched_with_ipex(model, self.export_feature)
+        self._is_ipex_exported = _is_patched_with_ipex(model, self.export_feature)
 
         self.input_names = {
             inputs.debugName().split(".")[0] for inputs in model.graph.inputs() if inputs.debugName() != "self"
@@ -285,7 +285,7 @@ def _init_warmup(self):
         # warmup, the first 2 forwards of an IPEX model include some preprocessing steps and
         # the results of the compute are unpredictable
         # TODO : add warmup for IPEX exported model
-        if not self.is_ipex_exported:
+        if not self._is_ipex_exported:
             use_cache = "past_key_values" in self.input_names
             dummy_inputs = prepare_jit_inputs(self, self.export_feature, use_cache)
             for _ in range(2):
@@ -409,7 +409,7 @@ def __init__(
         except AttributeError:
             self.model_cls = get_model_class(self.config, AutoModelForCausalLM._model_mapping)
 
-        if self.is_ipex_exported:
+        if self._is_ipex_exported:
             self._reorder_cache = _ipex_reorder_cache
         else:
             # Check if _reorder_cache is a static method
@@ -442,7 +442,7 @@ def _prepare_past_key_values(self, input_ids):
         else:
             num_attention_heads = self.normalized_config.num_attention_heads
 
-        if self.is_ipex_exported:
+        if self._is_ipex_exported:
             # Indirect access kv cache has a different data layout compared with most transformers model,
             # see https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/llm.html#indirect-access-kv-cache
             beam_idx_tmp = torch.zeros(