Skip to content

Commit b51ca3f

Browse files
Transformers 4.44 support (huggingface#877)
* test * fix ipex bloom * fix bloom * style * fix * use bloom specific modeling when export version is lower than 4.44 * fix
1 parent 2696e6f commit b51ca3f

File tree

9 files changed

+32
-19
lines changed

9 files changed

+32
-19
lines changed

.github/workflows/test_ipex.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ jobs:
2222
fail-fast: false
2323
matrix:
2424
python-version: [3.9]
25-
transformers-version: ["4.39.0", "4.43.*"]
25+
transformers-version: ["4.39.0", "4.44.*"]
2626
ipex-version: ["2.2.0", "2.3.*"]
2727
include:
2828
- python-version: 3.8

.github/workflows/test_openvino.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ jobs:
2121
fail-fast: false
2222
matrix:
2323
python-version: ["3.8", "3.12"]
24-
transformers-version: ["4.36.0", "4.43.*"]
24+
transformers-version: ["4.36.0", "4.44.*"]
2525
os: [ubuntu-latest]
2626

2727
runs-on: ${{ matrix.os }}

.github/workflows/test_openvino_basic.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ jobs:
2424
# This also ensures that the test fails if dependencies break for Python 3.7
2525
python-version: ["3.8", "3.12"]
2626
os: ["ubuntu-22.04", "windows-latest"]
27-
transformers-version: ["4.43.*"]
27+
transformers-version: ["4.44.*"]
2828
include:
2929
- python-version: "3.12"
3030
os: "ubuntu-22.04"

optimum/exporters/ipex/model_patcher.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040

4141
# Please also update in the setup.py and .github/workflows/test_ipex.yml if you change the transformers version
4242
_TRANSFORMERS_MIN_VERSION = "4.39.0"
43-
_TRANSFORMERS_MAX_VERSION = "4.43.99"
43+
_TRANSFORMERS_MAX_VERSION = "4.44.99"
4444

4545
_IPEX_EXPORTED_GENERATION_TASKS = ("text-generation",)
4646

optimum/exporters/openvino/stateful.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
import openvino as ov
2222
from openvino.runtime import opset13
2323
from optimum.exporters import TasksManager
24-
from optimum.intel.utils.import_utils import _openvino_version, is_openvino_version
24+
from optimum.intel.utils.import_utils import _openvino_version, is_openvino_version, is_transformers_version
2525

2626

2727
def model_has_state(ov_model: ov.Model):
@@ -216,7 +216,9 @@ def patch_stateful(config: PretrainedConfig, ov_model: ov.Model):
216216
batch_dim = 1 if config.model_type == "chatglm" and not hasattr(config, "rope_ratio") else 0
217217

218218
fuse_cache_reorder(ov_model, not_kv_inputs, key_value_input_names, batch_dim)
219-
num_attention_heads = config.num_attention_heads if config.model_type == "bloom" else 1
219+
num_attention_heads = (
220+
config.num_attention_heads if (config.model_type == "bloom" and is_transformers_version("<", "4.44")) else 1
221+
)
220222
make_stateful(
221223
ov_model, not_kv_inputs, key_value_input_names, key_value_output_names, batch_dim, num_attention_heads, None
222224
)

optimum/intel/ipex/modeling_base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -564,7 +564,7 @@ def _prepare_past_key_values(self, input_ids):
564564
]
565565
)
566566
return past_key_values
567-
elif model_type == "bloom":
567+
elif model_type == "bloom" and is_transformers_version("<", "4.44"):
568568
shape_key = (batch_size * num_attention_heads, d_k, 0)
569569
shape_value = (batch_size * num_attention_heads, 0, d_k)
570570
key = torch.empty(size=shape_key, dtype=self.model_dtype, device=self._device)

optimum/intel/openvino/modeling_base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ def dtype(self) -> Optional[torch.dtype]:
140140
def load_model(
141141
file_name: Union[str, Path],
142142
quantization_config: Union[OVWeightQuantizationConfig, Dict] = None,
143-
):
143+
) -> openvino.runtime.Model:
144144
"""
145145
Loads the model.
146146

optimum/intel/openvino/modeling_decoder.py

+19-8
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
2626
from openvino.preprocess import PrePostProcessor
2727
from openvino.runtime import Core, Tensor, Type
28+
from packaging.version import Version
2829
from transformers import AutoModelForCausalLM, PretrainedConfig
2930
from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward
3031
from transformers.generation import GenerationMixin
@@ -38,7 +39,7 @@
3839

3940
from ...exporters.openvino import ensure_stateful_is_available, main_export, patch_stateful
4041
from ...exporters.openvino.stateful import model_has_state
41-
from ..utils.import_utils import is_nncf_available, is_transformers_version
42+
from ..utils.import_utils import compare_versions, is_nncf_available, is_transformers_version
4243
from ..utils.modeling_utils import MULTI_QUERY_ATTN_MODELS
4344
from .configuration import (
4445
OVConfig,
@@ -51,8 +52,8 @@
5152

5253

5354
if TYPE_CHECKING:
55+
from transformers.generation.streamers import BaseStreamer
5456
from transformers.modeling_utils import PreTrainedModel
55-
from transformers.streamers import BaseStreamer
5657

5758

5859
logger = logging.getLogger(__name__)
@@ -404,7 +405,10 @@ def prepare_inputs(
404405
**kwargs,
405406
) -> Dict:
406407
batch_size = input_ids.shape[0]
407-
if self.config.model_type == "bloom":
408+
model_transformers_version = Version(
409+
self.model.rt_info["optimum"]["transformers_version"].value if "optimum" in self.model.rt_info else "0.0.0"
410+
)
411+
if self.config.model_type == "bloom" and compare_versions(model_transformers_version, "<", "4.44"):
408412
batch_size *= self.config.num_attention_heads
409413

410414
inputs = {}
@@ -619,7 +623,10 @@ def _deduplicate_inputs(self, model_inputs: Dict):
619623
shape = input_tensor.shape if isinstance(input_tensor, Tensor) else list(input_tensor.shape)
620624
dtype = input_tensor.element_type if isinstance(input_tensor, Tensor) else Type(input_tensor.dtype)
621625
upd_batch_size = indicies.shape[0]
622-
if self.config.model_type == "bloom":
626+
export_transformers_version = Version(self.model.rt_info["optimum"]["transformers_version"].value)
627+
if self.config.model_type == "bloom" and compare_versions(
628+
export_transformers_version, "<", "4.44"
629+
):
623630
upd_batch_size *= self.config.num_attention_heads
624631
shape[
625632
(
@@ -631,10 +638,11 @@ def _deduplicate_inputs(self, model_inputs: Dict):
631638
upd_model_inputs[input_name] = Tensor(dtype, shape)
632639
upd_model_inputs["input_ids"] = unique_input_ids
633640
if "beam_idx" in model_inputs:
641+
export_transformers_version = Version(self.model.rt_info["optimum"]["transformers_version"].value)
634642
beam_range = (
635-
unique_input_ids.shape[0]
636-
if self.config.model_type != "bloom"
637-
else unique_input_ids.shape[0] * self.config.num_attention_heads
643+
unique_input_ids.shape[0] * self.config.num_attention_heads
644+
if (self.config.model_type == "bloom" and compare_versions(export_transformers_version, "<", "4.44"))
645+
else unique_input_ids.shape[0]
638646
)
639647
beam_idx = np.arange(beam_range, dtype=int)
640648
upd_model_inputs["beam_idx"] = beam_idx
@@ -781,7 +789,10 @@ def _from_pretrained(
781789
model = cls.load_model(model_cache_path)
782790

783791
model_type = config.model_type.replace("_", "-")
784-
if model_type == "bloom":
792+
export_transformers_version = Version(
793+
model.rt_info["optimum"]["transformers_version"].value if "optimum" in model.rt_info else "0.0.0"
794+
)
795+
if model_type == "bloom" and compare_versions(export_transformers_version, "<", "4.44"):
785796
init_cls = OVBloomForCausalLM
786797
elif model_type == "gpt-bigcode":
787798
init_cls = OVGPTBigCodeForCausalLM

setup.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828

2929
INSTALL_REQUIRE = [
3030
"torch>=1.11",
31-
"transformers>=4.36.0,<4.44.0",
31+
"transformers>=4.36,<4.45",
3232
"optimum@git+https://github.com/huggingface/optimum.git",
3333
"datasets>=1.4.0",
3434
"sentencepiece",
@@ -59,10 +59,10 @@
5959
QUALITY_REQUIRE = ["black~=23.1", "ruff==0.4.4"]
6060

6161
EXTRAS_REQUIRE = {
62-
"neural-compressor": ["neural-compressor>=2.2.0,<3.0", "accelerate", "transformers<4.43.0"],
62+
"neural-compressor": ["neural-compressor>=2.2.0,<3.0", "accelerate", "transformers<4.43"],
6363
"openvino": ["openvino>=2023.3", "nncf>=2.11.0", "openvino-tokenizers[transformers]"],
6464
"nncf": ["nncf>=2.11.0"],
65-
"ipex": ["intel-extension-for-pytorch", "transformers>=4.39.0,<4.44.0"],
65+
"ipex": ["intel-extension-for-pytorch", "transformers>=4.39,<4.45"],
6666
"diffusers": ["diffusers"],
6767
"quality": QUALITY_REQUIRE,
6868
"tests": TESTS_REQUIRE,

0 commit comments

Comments
 (0)