|
63 | 63 | OVModelForAudioXVector,
|
64 | 64 | OVModelForCausalLM,
|
65 | 65 | OVModelForCTC,
|
| 66 | + OVModelForCustomTasks, |
66 | 67 | OVModelForFeatureExtraction,
|
67 | 68 | OVModelForImageClassification,
|
68 | 69 | OVModelForMaskedLM,
|
@@ -1525,3 +1526,87 @@ def test_pipeline_image_to_text(self, model_arch: str):
|
1525 | 1526 | self.assertIsInstance(outputs[0]["generated_text"], str)
|
1526 | 1527 |
|
1527 | 1528 | gc.collect()
|
| 1529 | + |
| 1530 | + |
| 1531 | +class OVModelForCustomTasksIntegrationTest(unittest.TestCase): |
| 1532 | + SUPPORTED_ARCHITECTURES_WITH_ATTENTION = ["vit-with-attentions"] |
| 1533 | + SUPPORTED_ARCHITECTURES_WITH_HIDDEN_STATES = ["vit-with-hidden-states"] |
| 1534 | + |
| 1535 | + def _get_sample_image(self): |
| 1536 | + url = "http://images.cocodataset.org/val2017/000000039769.jpg" |
| 1537 | + image = Image.open(requests.get(url, stream=True).raw) |
| 1538 | + return image |
| 1539 | + |
| 1540 | + @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_ATTENTION) |
| 1541 | + def test_compare_output_attentions(self, model_arch): |
| 1542 | + model_id = MODEL_NAMES[model_arch] |
| 1543 | + |
| 1544 | + image = self._get_sample_image() |
| 1545 | + preprocessor = AutoFeatureExtractor.from_pretrained(model_id) |
| 1546 | + inputs = preprocessor(images=image, return_tensors="pt") |
| 1547 | + |
| 1548 | + transformers_model = AutoModelForImageClassification.from_pretrained(model_id) |
| 1549 | + transformers_model.eval() |
| 1550 | + with torch.no_grad(): |
| 1551 | + transformers_outputs = transformers_model(**inputs, output_attentions=True) |
| 1552 | + |
| 1553 | + ov_model = OVModelForCustomTasks.from_pretrained(model_id, ov_config=F32_CONFIG) |
| 1554 | + self.assertIsInstance(ov_model.config, PretrainedConfig) |
| 1555 | + |
| 1556 | + for input_type in ["pt", "np"]: |
| 1557 | + inputs = preprocessor(images=image, return_tensors=input_type) |
| 1558 | + ov_outputs = ov_model(**inputs) |
| 1559 | + self.assertIn("logits", ov_outputs) |
| 1560 | + self.assertIsInstance(ov_outputs.logits, TENSOR_ALIAS_TO_TYPE[input_type]) |
| 1561 | + self.assertTrue(torch.allclose(torch.Tensor(ov_outputs.logits), transformers_outputs.logits, atol=1e-4)) |
| 1562 | + self.assertTrue(len(ov_outputs.attentions) == len(transformers_outputs.attentions)) |
| 1563 | + for i in range(len(ov_outputs.attentions)): |
| 1564 | + self.assertTrue( |
| 1565 | + torch.allclose( |
| 1566 | + torch.Tensor(ov_outputs.attentions[i]), |
| 1567 | + transformers_outputs.attentions[i], |
| 1568 | + atol=1e-4, # attentions are accurate |
| 1569 | + rtol=1e-4, # attentions are accurate |
| 1570 | + ), |
| 1571 | + f"Attention mismatch at layer {i}", |
| 1572 | + ) |
| 1573 | + |
| 1574 | + del transformers_model |
| 1575 | + del ov_model |
| 1576 | + gc.collect() |
| 1577 | + |
| 1578 | + @parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_HIDDEN_STATES) |
| 1579 | + def test_compare_output_hidden_states(self, model_arch): |
| 1580 | + model_id = MODEL_NAMES[model_arch] |
| 1581 | + |
| 1582 | + image = self._get_sample_image() |
| 1583 | + preprocessor = AutoFeatureExtractor.from_pretrained(model_id) |
| 1584 | + inputs = preprocessor(images=image, return_tensors="pt") |
| 1585 | + |
| 1586 | + transformers_model = AutoModelForImageClassification.from_pretrained(model_id) |
| 1587 | + transformers_model.eval() |
| 1588 | + with torch.no_grad(): |
| 1589 | + transformers_outputs = transformers_model(**inputs, output_hidden_states=True) |
| 1590 | + |
| 1591 | + ov_model = OVModelForCustomTasks.from_pretrained(model_id, ov_config=F32_CONFIG) |
| 1592 | + self.assertIsInstance(ov_model.config, PretrainedConfig) |
| 1593 | + for input_type in ["pt", "np"]: |
| 1594 | + inputs = preprocessor(images=image, return_tensors=input_type) |
| 1595 | + ov_outputs = ov_model(**inputs) |
| 1596 | + self.assertIn("logits", ov_outputs) |
| 1597 | + self.assertIsInstance(ov_outputs.logits, TENSOR_ALIAS_TO_TYPE[input_type]) |
| 1598 | + self.assertTrue(torch.allclose(torch.Tensor(ov_outputs.logits), transformers_outputs.logits, atol=1e-4)) |
| 1599 | + self.assertTrue(len(ov_outputs.hidden_states) == len(transformers_outputs.hidden_states)) |
| 1600 | + for i in range(len(ov_outputs.hidden_states)): |
| 1601 | + self.assertTrue( |
| 1602 | + torch.allclose( |
| 1603 | + torch.Tensor(ov_outputs.hidden_states[i]), |
| 1604 | + transformers_outputs.hidden_states[i], |
| 1605 | + atol=1e-3, # hidden states are less accurate |
| 1606 | + rtol=1e-2, # hidden states are less accurate |
| 1607 | + ), |
| 1608 | + f"Hidden states mismatch at layer {i}", |
| 1609 | + ) |
| 1610 | + del transformers_model |
| 1611 | + del ov_model |
| 1612 | + gc.collect() |
0 commit comments