fix import

jiqing-feng · jiqing-feng · commit e5b7afd83849 · 2024-02-28T11:03:19.000-05:00
diff --git a/optimum/exporters/ipex/llama_functions.py b/optimum/exporters/ipex/llama_functions.py
@@ -15,14 +15,19 @@
 import math
 from typing import List, Optional, Tuple, Union
 
+import intel_extension_for_pytorch as ipex
 import torch
-from intel_extension_for_pytorch.llm.modules import linear2SiluMul, linearAdd
+from packaging import version
 from torch import nn
 from transformers.modeling_attn_mask_utils import _prepare_4d_causal_attention_mask
 from transformers.modeling_outputs import BaseModelOutputWithPast
 from transformers.models.llama.modeling_llama import repeat_kv
 
 
+if version.parse(ipex.__version__) > version.parse("2.3.0"):
+    from intel_extension_for_pytorch.llm.modules import linear2SiluMul, linearAdd
+
+
 def llama_layer_norm_forward(self, hidden_states):
     return torch.ops.torch_ipex.rmsnorm(hidden_states, self.weight, self.variance_epsilon)
 
diff --git a/optimum/exporters/ipex/model_patcher.py b/optimum/exporters/ipex/model_patcher.py
@@ -12,7 +12,8 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
-from intel_extension_for_pytorch.llm.modules import ApplyRotaryEmbedding, IndirectAccessKVCache
+import intel_extension_for_pytorch as ipex
+from packaging import version
 from transformers.models.llama.modeling_llama import (
     LlamaAttention,
     LlamaDecoderLayer,
@@ -29,6 +30,10 @@
 )
 
 
+if version.parse(ipex.__version__) > version.parse("2.3.0"):
+    from intel_extension_for_pytorch.llm.modules import ApplyRotaryEmbedding, IndirectAccessKVCache
+
+
 IPEX_EXPORTED_ARCH = ("LlamaForCausalLM",)
 IPEX_EXPORTED_TASK = ("text-generation",)