|
4 | 4 | import warnings
|
5 | 5 | from abc import abstractmethod
|
6 | 6 | from pathlib import Path
|
7 |
| -from typing import Dict, Optional, Tuple, Union |
| 7 | +from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union |
8 | 8 |
|
9 | 9 | import numpy as np
|
10 | 10 | import openvino as ov
|
11 | 11 | import torch
|
12 | 12 | from huggingface_hub import hf_hub_download
|
13 | 13 | from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
|
14 | 14 | from openvino._offline_transformations import apply_moc_transformations, compress_model_transformation
|
15 |
| -from PIL.Image import Image |
16 | 15 | from transformers import (
|
17 | 16 | AutoConfig,
|
18 | 17 | AutoImageProcessor,
|
|
50 | 49 | LlavaNextForConditionalGeneration = None
|
51 | 50 |
|
52 | 51 |
|
| 52 | +if TYPE_CHECKING: |
| 53 | + from PIL import Image |
| 54 | + |
| 55 | + |
53 | 56 | logger = logging.getLogger(__name__)
|
54 | 57 |
|
55 | 58 | core = ov.Core()
|
@@ -790,7 +793,7 @@ def can_generate(self):
|
790 | 793 | @abstractmethod
|
791 | 794 | def preprocess_inputs(
|
792 | 795 | text: str,
|
793 |
| - image: Optional[Image] = None, |
| 796 | + image: Optional["Image"] = None, |
794 | 797 | processor: Optional[AutoImageProcessor] = None,
|
795 | 798 | tokenizer: Optional[PreTrainedTokenizer] = None,
|
796 | 799 | config: Optional[PretrainedConfig] = None,
|
@@ -967,7 +970,7 @@ def _filter_unattended_tokens(self, input_ids, attention_mask, past_key_values):
|
967 | 970 | @staticmethod
|
968 | 971 | def preprocess_inputs(
|
969 | 972 | text: str,
|
970 |
| - image: Optional[Image] = None, |
| 973 | + image: Optional["Image"] = None, |
971 | 974 | processor: Optional[AutoImageProcessor] = None,
|
972 | 975 | tokenizer: Optional[PreTrainedTokenizer] = None,
|
973 | 976 | config: Optional[PretrainedConfig] = None,
|
@@ -1287,7 +1290,7 @@ def merge_vision_text_embeddings(
|
1287 | 1290 | @staticmethod
|
1288 | 1291 | def preprocess_inputs(
|
1289 | 1292 | text: str,
|
1290 |
| - image: Optional[Image] = None, |
| 1293 | + image: Optional["Image"] = None, |
1291 | 1294 | processor: Optional[AutoImageProcessor] = None,
|
1292 | 1295 | tokenizer: Optional[PreTrainedTokenizer] = None,
|
1293 | 1296 | config: Optional[PretrainedConfig] = None,
|
@@ -1662,7 +1665,7 @@ def merge_vision_text_embeddings(
|
1662 | 1665 | @staticmethod
|
1663 | 1666 | def preprocess_inputs(
|
1664 | 1667 | text: str,
|
1665 |
| - image: Optional[Image] = None, |
| 1668 | + image: Optional["Image"] = None, |
1666 | 1669 | processor: Optional[AutoImageProcessor] = None,
|
1667 | 1670 | tokenizer: Optional[PreTrainedTokenizer] = None,
|
1668 | 1671 | config: Optional[PretrainedConfig] = None,
|
@@ -1857,7 +1860,7 @@ def get_multimodal_embeddings(
|
1857 | 1860 | @staticmethod
|
1858 | 1861 | def preprocess_inputs(
|
1859 | 1862 | text: str,
|
1860 |
| - image: Optional[Image] = None, |
| 1863 | + image: Optional["Image"] = None, |
1861 | 1864 | processor: Optional[AutoImageProcessor] = None,
|
1862 | 1865 | tokenizer: Optional[PreTrainedTokenizer] = None,
|
1863 | 1866 | config: Optional[PretrainedConfig] = None,
|
@@ -2017,7 +2020,7 @@ def get_multimodal_embeddings(
|
2017 | 2020 | @staticmethod
|
2018 | 2021 | def preprocess_inputs(
|
2019 | 2022 | text: str,
|
2020 |
| - image: Optional[Image] = None, |
| 2023 | + image: Optional["Image"] = None, |
2021 | 2024 | processor: Optional[AutoImageProcessor] = None,
|
2022 | 2025 | tokenizer: Optional[PreTrainedTokenizer] = None,
|
2023 | 2026 | config: Optional[PretrainedConfig] = None,
|
|
0 commit comments