|
16 | 16 | import time
|
17 | 17 | import unittest
|
18 | 18 |
|
| 19 | +import numpy as np |
| 20 | +import requests |
19 | 21 | import torch
|
20 | 22 | from parameterized import parameterized
|
| 23 | +from PIL import Image |
21 | 24 | from transformers import (
|
| 25 | + AutoFeatureExtractor, |
22 | 26 | AutoModelForCausalLM,
|
23 | 27 | AutoModelForQuestionAnswering,
|
24 | 28 | AutoTokenizer,
|
|
30 | 34 | from optimum.exporters.onnx import MODEL_TYPES_REQUIRING_POSITION_IDS
|
31 | 35 | from optimum.intel import (
|
32 | 36 | IPEXModel,
|
| 37 | + IPEXModelForAudioClassification, |
33 | 38 | IPEXModelForCausalLM,
|
| 39 | + IPEXModelForImageClassification, |
34 | 40 | IPEXModelForMaskedLM,
|
35 | 41 | IPEXModelForQuestionAnswering,
|
36 | 42 | IPEXModelForSequenceClassification,
|
|
42 | 48 |
|
43 | 49 | MODEL_NAMES = {
|
44 | 50 | "albert": "hf-internal-testing/tiny-random-albert",
|
| 51 | + "beit": "hf-internal-testing/tiny-random-BeitForImageClassification", |
45 | 52 | "bert": "hf-internal-testing/tiny-random-bert",
|
46 | 53 | "bart": "hf-internal-testing/tiny-random-bart",
|
47 | 54 | "blenderbot-small": "hf-internal-testing/tiny-random-BlenderbotModel",
|
48 | 55 | "blenderbot": "hf-internal-testing/tiny-random-BlenderbotModel",
|
49 | 56 | "bloom": "hf-internal-testing/tiny-random-BloomModel",
|
50 | 57 | "convbert": "hf-internal-testing/tiny-random-ConvBertForSequenceClassification",
|
51 | 58 | "codegen": "hf-internal-testing/tiny-random-CodeGenForCausalLM",
|
| 59 | + "convnext": "hf-internal-testing/tiny-random-convnext", |
52 | 60 | "distilbert": "hf-internal-testing/tiny-random-distilbert",
|
53 | 61 | "electra": "hf-internal-testing/tiny-random-electra",
|
54 | 62 | "flaubert": "hf-internal-testing/tiny-random-flaubert",
|
|
57 | 65 | "gpt_neo": "hf-internal-testing/tiny-random-GPTNeoModel",
|
58 | 66 | "gpt_neox": "hf-internal-testing/tiny-random-GPTNeoXForCausalLM",
|
59 | 67 | "gptj": "hf-internal-testing/tiny-random-GPTJModel",
|
| 68 | + "levit": "hf-internal-testing/tiny-random-LevitModel", |
60 | 69 | "llama": "fxmarty/tiny-llama-fast-tokenizer",
|
61 | 70 | "opt": "hf-internal-testing/tiny-random-OPTModel",
|
62 | 71 | "marian": "sshleifer/tiny-marian-en-de",
|
63 | 72 | "mbart": "hf-internal-testing/tiny-random-mbart",
|
64 | 73 | "mistral": "echarlaix/tiny-random-mistral",
|
| 74 | + "mobilenet_v1": "google/mobilenet_v1_0.75_192", |
| 75 | + "mobilenet_v2": "hf-internal-testing/tiny-random-MobileNetV2Model", |
| 76 | + "mobilevit": "hf-internal-testing/tiny-random-mobilevit", |
65 | 77 | "mpt": "hf-internal-testing/tiny-random-MptForCausalLM",
|
66 | 78 | "mt5": "stas/mt5-tiny-random",
|
| 79 | + "resnet": "hf-internal-testing/tiny-random-resnet", |
67 | 80 | "roberta": "hf-internal-testing/tiny-random-roberta",
|
68 | 81 | "roformer": "hf-internal-testing/tiny-random-roformer",
|
69 | 82 | "squeezebert": "hf-internal-testing/tiny-random-squeezebert",
|
70 | 83 | "t5": "hf-internal-testing/tiny-random-t5",
|
| 84 | + "unispeech": "hf-internal-testing/tiny-random-unispeech", |
| 85 | + "vit": "hf-internal-testing/tiny-random-vit", |
| 86 | + "wav2vec2": "anton-l/wav2vec2-random-tiny-classifier", |
71 | 87 | "xlm": "hf-internal-testing/tiny-random-xlm",
|
72 | 88 | }
|
73 | 89 |
|
@@ -266,3 +282,84 @@ def test_compare_with_and_without_past_key_values(self):
|
266 | 282 | f"With pkv latency: {with_pkv_timer.elapsed:.3f} ms, without pkv latency: {without_pkv_timer.elapsed:.3f} ms,"
|
267 | 283 | f" speedup: {without_pkv_timer.elapsed / with_pkv_timer.elapsed:.3f}",
|
268 | 284 | )
|
| 285 | + |
| 286 | + |
| 287 | +class IPEXModelForAudioClassificationTest(unittest.TestCase): |
| 288 | + IPEX_MODEL_CLASS = IPEXModelForAudioClassification |
| 289 | + SUPPORTED_ARCHITECTURES = ( |
| 290 | + "unispeech", |
| 291 | + "wav2vec2", |
| 292 | + ) |
| 293 | + |
| 294 | + def _generate_random_audio_data(self): |
| 295 | + np.random.seed(10) |
| 296 | + t = np.linspace(0, 5.0, int(5.0 * 22050), endpoint=False) |
| 297 | + # generate pure sine wave at 220 Hz |
| 298 | + audio_data = 0.5 * np.sin(2 * np.pi * 220 * t) |
| 299 | + return audio_data |
| 300 | + |
| 301 | + @parameterized.expand(SUPPORTED_ARCHITECTURES) |
| 302 | + def test_compare_to_transformers(self, model_arch): |
| 303 | + model_id = MODEL_NAMES[model_arch] |
| 304 | + ipex_model = self.IPEX_MODEL_CLASS.from_pretrained(model_id, export=True) |
| 305 | + self.assertIsInstance(ipex_model.config, PretrainedConfig) |
| 306 | + transformers_model = self.IPEX_MODEL_CLASS.auto_model_class.from_pretrained(model_id) |
| 307 | + preprocessor = AutoFeatureExtractor.from_pretrained(model_id) |
| 308 | + inputs = preprocessor(self._generate_random_audio_data(), return_tensors="pt") |
| 309 | + with torch.no_grad(): |
| 310 | + transformers_outputs = transformers_model(**inputs) |
| 311 | + outputs = ipex_model(**inputs) |
| 312 | + # Compare tensor outputs |
| 313 | + self.assertTrue(torch.allclose(outputs.logits, transformers_outputs.logits, atol=1e-3)) |
| 314 | + |
| 315 | + @parameterized.expand(SUPPORTED_ARCHITECTURES) |
| 316 | + def test_pipeline(self, model_arch): |
| 317 | + model_id = MODEL_NAMES[model_arch] |
| 318 | + model = self.IPEX_MODEL_CLASS.from_pretrained(model_id, export=True) |
| 319 | + preprocessor = AutoFeatureExtractor.from_pretrained(model_id) |
| 320 | + pipe = pipeline("audio-classification", model=model, feature_extractor=preprocessor) |
| 321 | + outputs = pipe([np.random.random(16000)]) |
| 322 | + self.assertEqual(pipe.device, model.device) |
| 323 | + self.assertTrue(all(item["score"] > 0.0 for item in outputs[0])) |
| 324 | + |
| 325 | + |
| 326 | +class IPEXModelForImageClassificationIntegrationTest(unittest.TestCase): |
| 327 | + SUPPORTED_ARCHITECTURES = ( |
| 328 | + "beit", |
| 329 | + # "levit", |
| 330 | + "mobilenet_v1", |
| 331 | + "mobilenet_v2", |
| 332 | + "mobilevit", |
| 333 | + "resnet", |
| 334 | + "vit", |
| 335 | + ) |
| 336 | + IPEX_MODEL_CLASS = IPEXModelForImageClassification |
| 337 | + |
| 338 | + @parameterized.expand(SUPPORTED_ARCHITECTURES) |
| 339 | + def test_compare_to_transformers(self, model_arch): |
| 340 | + model_id = MODEL_NAMES[model_arch] |
| 341 | + set_seed(SEED) |
| 342 | + ipex_model = self.IPEX_MODEL_CLASS.from_pretrained(model_id, export=True) |
| 343 | + self.assertIsInstance(ipex_model.config, PretrainedConfig) |
| 344 | + transformers_model = self.IPEX_MODEL_CLASS.auto_model_class.from_pretrained(model_id) |
| 345 | + preprocessor = AutoFeatureExtractor.from_pretrained(model_id) |
| 346 | + url = "http://images.cocodataset.org/val2017/000000039769.jpg" |
| 347 | + image = Image.open(requests.get(url, stream=True).raw) |
| 348 | + inputs = preprocessor(images=image, return_tensors="pt") |
| 349 | + with torch.no_grad(): |
| 350 | + transformers_outputs = transformers_model(**inputs) |
| 351 | + outputs = ipex_model(**inputs) |
| 352 | + self.assertIn("logits", outputs) |
| 353 | + # Compare tensor outputs |
| 354 | + self.assertTrue(torch.allclose(outputs.logits, transformers_outputs.logits, atol=1e-4)) |
| 355 | + |
| 356 | + @parameterized.expand(SUPPORTED_ARCHITECTURES) |
| 357 | + def test_pipeline(self, model_arch): |
| 358 | + model_id = MODEL_NAMES[model_arch] |
| 359 | + model = self.IPEX_MODEL_CLASS.from_pretrained(model_id, export=True) |
| 360 | + preprocessor = AutoFeatureExtractor.from_pretrained(model_id) |
| 361 | + pipe = pipeline("image-classification", model=model, feature_extractor=preprocessor) |
| 362 | + outputs = pipe("http://images.cocodataset.org/val2017/000000039769.jpg") |
| 363 | + self.assertEqual(pipe.device, model.device) |
| 364 | + self.assertGreaterEqual(outputs[0]["score"], 0.0) |
| 365 | + self.assertTrue(isinstance(outputs[0]["label"], str)) |
0 commit comments