Skip to content

Commit 356d51d

Browse files
committed
fix falcon linear fusion
Signed-off-by: jiqing-feng <jiqing.feng@intel.com>
1 parent 6bf3b8b commit 356d51d

File tree

2 files changed

+5
-121
lines changed

2 files changed

+5
-121
lines changed

optimum/exporters/ipex/modeling_utils.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -869,7 +869,11 @@ def forward(
869869
residual: torch.Tensor = None,
870870
**kwargs,
871871
):
872-
mlp_hidden_states = self.linear_gelu(hidden_states)
872+
if hasattr(self, "linear_gelu"):
873+
mlp_hidden_states = self.linear_gelu(hidden_states)
874+
else:
875+
mlp_hidden_states = self.act(self.dense_h_to_4h(hidden_states))
876+
873877
if hasattr(self, "linear_add_add"):
874878
output = self.linear_add_add(mlp_hidden_states, attention_output, residual)
875879
else:

tests/ipex/test_modeling.py

-120
Original file line numberDiff line numberDiff line change
@@ -130,40 +130,6 @@ def test_pipeline(self, model_arch):
130130
_ = pipe(text)
131131
self.assertEqual(pipe.device, model.device)
132132

133-
@parameterized.expand(SUPPORTED_ARCHITECTURES)
134-
@unittest.skipIf(not is_bitsandbytes_available(), reason="Test requires bitsandbytes")
135-
def test_bnb(self, model_arch):
136-
model_id = MODEL_NAMES[model_arch]
137-
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
138-
ipex_model = self.IPEX_MODEL_CLASS.from_pretrained(
139-
model_id, device_map=DEVICE, quantization_config=quantization_config
140-
)
141-
transformers_model = self.IPEX_MODEL_CLASS.auto_model_class.from_pretrained(
142-
model_id, device_map=DEVICE, quantization_config=quantization_config
143-
)
144-
tokenizer = AutoTokenizer.from_pretrained(model_id)
145-
inputs = "This is a sample input"
146-
tokens = tokenizer(inputs, return_tensors="pt").to(DEVICE)
147-
with torch.no_grad():
148-
transformers_outputs = transformers_model(**tokens)
149-
outputs = ipex_model(**tokens)
150-
151-
# Test re-load model
152-
with tempfile.TemporaryDirectory() as tmpdirname:
153-
ipex_model.save_pretrained(tmpdirname)
154-
loaded_model = self.IPEX_MODEL_CLASS.from_pretrained(tmpdirname, device_map=DEVICE)
155-
loaded_model_outputs = loaded_model(**tokens)
156-
# Test init method
157-
init_model = self.IPEX_MODEL_CLASS(transformers_model)
158-
init_model_outputs = init_model(**tokens)
159-
160-
# Compare tensor outputs
161-
for output_name in {"logits", "last_hidden_state"}:
162-
if output_name in transformers_outputs:
163-
self.assertTrue(torch.allclose(outputs[output_name], transformers_outputs[output_name], atol=1e-3))
164-
self.assertTrue(torch.allclose(outputs[output_name], loaded_model_outputs[output_name]))
165-
self.assertTrue(torch.allclose(outputs[output_name], init_model_outputs[output_name]))
166-
167133

168134
class IPEXModelForSequenceClassificationTest(IPEXModelTest):
169135
IPEX_MODEL_CLASS = IPEXModelForSequenceClassification
@@ -248,46 +214,6 @@ def test_patched_model(self):
248214
self.assertTrue(torch.allclose(outputs.start_logits, transformers_outputs.start_logits, atol=1e-4))
249215
self.assertTrue(torch.allclose(outputs.end_logits, transformers_outputs.end_logits, atol=1e-4))
250216

251-
@parameterized.expand(SUPPORTED_ARCHITECTURES)
252-
@unittest.skipIf(not is_bitsandbytes_available(), reason="Test requires bitsandbytes")
253-
def test_bnb(self, model_arch):
254-
model_id = MODEL_NAMES[model_arch]
255-
set_seed(SEED)
256-
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
257-
ipex_model = IPEXModelForQuestionAnswering.from_pretrained(
258-
model_id, device_map=DEVICE, quantization_config=quantization_config
259-
)
260-
self.assertIsInstance(ipex_model.config, PretrainedConfig)
261-
transformers_model = AutoModelForQuestionAnswering.from_pretrained(
262-
model_id, device_map=DEVICE, quantization_config=quantization_config
263-
)
264-
tokenizer = AutoTokenizer.from_pretrained(model_id)
265-
inputs = "This is a sample input"
266-
tokens = tokenizer(inputs, return_tensors="pt").to(DEVICE)
267-
with torch.no_grad():
268-
transformers_outputs = transformers_model(**tokens)
269-
outputs = ipex_model(**tokens)
270-
271-
# Test re-load model
272-
with tempfile.TemporaryDirectory() as tmpdirname:
273-
ipex_model.save_pretrained(tmpdirname)
274-
loaded_model = self.IPEX_MODEL_CLASS.from_pretrained(tmpdirname, device_map=DEVICE)
275-
loaded_model_outputs = loaded_model(**tokens)
276-
277-
# Test init method
278-
init_model = self.IPEX_MODEL_CLASS(transformers_model)
279-
init_model_outputs = init_model(**tokens)
280-
281-
self.assertIn("start_logits", outputs)
282-
self.assertIn("end_logits", outputs)
283-
# Compare tensor outputs
284-
self.assertTrue(torch.allclose(outputs.start_logits, transformers_outputs.start_logits, atol=1e-4))
285-
self.assertTrue(torch.allclose(outputs.end_logits, transformers_outputs.end_logits, atol=1e-4))
286-
self.assertTrue(torch.equal(outputs.start_logits, loaded_model_outputs.start_logits))
287-
self.assertTrue(torch.equal(outputs.end_logits, loaded_model_outputs.end_logits))
288-
self.assertTrue(torch.equal(outputs.start_logits, init_model_outputs.start_logits))
289-
self.assertTrue(torch.equal(outputs.end_logits, init_model_outputs.end_logits))
290-
291217

292218
class IPEXModelForCausalLMTest(unittest.TestCase):
293219
IPEX_MODEL_CLASS = IPEXModelForCausalLM
@@ -799,52 +725,6 @@ def test_ipex_beam_search(self, test_name, model_arch, use_cache):
799725
self.assertIsInstance(outputs, torch.Tensor)
800726
self.assertTrue(torch.equal(outputs, transformers_outputs))
801727

802-
@parameterized.expand(SUPPORTED_ARCHITECTURES)
803-
@unittest.skipIf(not is_bitsandbytes_available(), reason="Test requires bitsandbytes")
804-
def test_bnb(self, model_arch):
805-
model_id = MODEL_NAMES[model_arch]
806-
set_seed(SEED)
807-
dtype = torch.float16 if IS_XPU_AVAILABLE else torch.float32
808-
quantization_config = BitsAndBytesConfig(load_in_8bit=True)
809-
# Test model forward do not need cache.
810-
ipex_model = self.IPEX_MODEL_CLASS.from_pretrained(
811-
model_id, torch_dtype=dtype, quantization_config=quantization_config
812-
)
813-
transformers_model = AutoModelForSeq2SeqLM.from_pretrained(
814-
model_id, torch_dtype=dtype, quantization_config=quantization_config
815-
)
816-
self.assertIsInstance(ipex_model.config, PretrainedConfig)
817-
tokenizer = AutoTokenizer.from_pretrained(model_id)
818-
tokens = tokenizer(
819-
"This is a sample",
820-
return_tensors="pt",
821-
return_token_type_ids=False if model_arch in ("llama", "llama2") else None,
822-
)
823-
decoder_start_token_id = transformers_model.config.decoder_start_token_id if model_arch != "mbart" else 2
824-
decoder_inputs = {"decoder_input_ids": torch.ones((1, 1), dtype=torch.long) * decoder_start_token_id}
825-
outputs = ipex_model(**tokens, **decoder_inputs)
826-
827-
self.assertIsInstance(outputs.logits, torch.Tensor)
828-
829-
with torch.no_grad():
830-
transformers_outputs = transformers_model(**tokens, **decoder_inputs)
831-
832-
# Test re-load model
833-
with tempfile.TemporaryDirectory() as tmpdirname:
834-
ipex_model.save_pretrained(tmpdirname)
835-
loaded_model = self.IPEX_MODEL_CLASS.from_pretrained(tmpdirname, torch_dtype=dtype)
836-
loaded_model_outputs = loaded_model(**tokens, **decoder_inputs)
837-
838-
# Test init method
839-
init_model = self.IPEX_MODEL_CLASS(transformers_model)
840-
init_model_outputs = init_model(**tokens, **decoder_inputs)
841-
842-
# Compare tensor outputs
843-
self.assertTrue(torch.allclose(outputs.logits, transformers_outputs.logits, atol=1e-4))
844-
# To avoid float pointing error
845-
self.assertTrue(torch.allclose(outputs.logits, loaded_model_outputs.logits, atol=1e-7))
846-
self.assertTrue(torch.allclose(outputs.logits, init_model_outputs.logits, atol=1e-7))
847-
848728

849729
class IPEXSTModel(unittest.TestCase):
850730
SUPPORTED_ARCHITECTURES = (

0 commit comments

Comments
 (0)