Skip to content

Commit 17f912a

Browse files
committed
update aquila to support v1 and v2
1 parent 3db832a commit 17f912a

File tree

4 files changed

+61
-13
lines changed

4 files changed

+61
-13
lines changed

optimum/exporters/openvino/model_configs.py

+45-5
Original file line numberDiff line numberDiff line change
@@ -334,11 +334,11 @@ def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int
334334
past_key_shape = (self.batch_size, self.sequence_length, self.num_attention_heads, self.kv_channels)
335335
past_value_shape = (self.batch_size, self.sequence_length, self.num_attention_heads, self.kv_channels)
336336
return [
337-
(
337+
Mistral(
338338
self.random_float_tensor(past_key_shape, framework=framework, dtype=float_dtype),
339339
self.random_float_tensor(past_value_shape, framework=framework, dtype=float_dtype),
340340
)
341-
for _ in range(self.num_layers)
341+
for _ in range(self.nuMistralm_layers)
342342
]
343343

344344

@@ -658,13 +658,53 @@ class XGLMConfig(TextDecoderWithPositionIdsOnnxConfig):
658658
)
659659

660660

661+
class AquilaDummyPastKeyValuesGenerator(DummyPastKeyValuesGenerator):
662+
def __init__(
663+
self,
664+
task: str,
665+
normalized_config: NormalizedTextConfig,
666+
batch_size: int = DEFAULT_DUMMY_SHAPES["batch_size"],
667+
sequence_length: int = DEFAULT_DUMMY_SHAPES["sequence_length"],
668+
random_batch_size_range: Optional[Tuple[int, int]] = None,
669+
random_sequence_length_range: Optional[Tuple[int, int]] = None,
670+
**kwargs,
671+
):
672+
super().__init__(
673+
task,
674+
normalized_config,
675+
batch_size,
676+
sequence_length,
677+
random_batch_size_range,
678+
random_sequence_length_range,
679+
**kwargs,
680+
)
681+
self.num_key_value_heads = getattr(
682+
normalized_config, "num_key_value_heads", normalized_config.num_attention_heads
683+
)
684+
685+
def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"):
686+
shape = (
687+
self.batch_size,
688+
self.num_key_value_heads,
689+
self.sequence_length,
690+
self.hidden_size // self.num_attention_heads,
691+
)
692+
return [
693+
(
694+
self.random_float_tensor(shape, framework=framework, dtype=float_dtype),
695+
self.random_float_tensor(shape, framework=framework, dtype=float_dtype),
696+
)
697+
for _ in range(self.num_layers)
698+
]
699+
700+
661701
@register_in_tasks_manager("aquila", *["text-generation", "text-generation-with-past"], library_name="transformers")
662702
class AquilaMOpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig):
663703
DEFAULT_ONNX_OPSET = 14
664704

665-
DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, MistralDummyPastKeyValuesGenerator)
666-
DUMMY_PKV_GENERATOR_CLASS = MistralDummyPastKeyValuesGenerator
667-
NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
705+
DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, AquilaDummyPastKeyValuesGenerator)
706+
DUMMY_PKV_GENERATOR_CLASS = AquilaDummyPastKeyValuesGenerator
707+
NORMALIZED_CONFIG_CLASS = NormalizedTextConfig.with_args(num_key_value_heads="num_key_value_heads", allow_new=True)
668708

669709
def patch_model_for_export(
670710
self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None

optimum/exporters/openvino/model_patcher.py

+12-7
Original file line numberDiff line numberDiff line change
@@ -1099,7 +1099,7 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids):
10991099
)
11001100
bsz, q_len, _ = hidden_states.size()
11011101

1102-
if self.config.pretraining_tp > 1:
1102+
if hasattr(self.config, "pretraining_tp") and self.config.pretraining_tp > 1:
11031103
key_value_slicing = (self.num_key_value_heads * self.head_dim) // self.config.pretraining_tp
11041104
query_slices = self.q_proj.weight.split((self.num_heads * self.head_dim) // self.config.pretraining_tp, dim=0)
11051105
key_slices = self.k_proj.weight.split(key_value_slicing, dim=0)
@@ -1120,8 +1120,12 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids):
11201120
value_states = self.v_proj(hidden_states)
11211121

11221122
query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim).transpose(1, 2)
1123-
key_states = key_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
1124-
value_states = value_states.view(bsz, q_len, self.num_key_value_heads, self.head_dim).transpose(1, 2)
1123+
key_states = key_states.view(
1124+
bsz, q_len, getattr(self, "num_key_value_heads", self.num_heads), self.head_dim
1125+
).transpose(1, 2)
1126+
value_states = value_states.view(
1127+
bsz, q_len, getattr(self, "num_key_value_heads", self.num_heads), self.head_dim
1128+
).transpose(1, 2)
11251129

11261130
kv_seq_len = key_states.shape[-2]
11271131
if past_key_value is not None:
@@ -1136,9 +1140,10 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids):
11361140

11371141
past_key_value = (key_states, value_states) if use_cache else None
11381142

1139-
# repeat k/v heads if n_kv_heads < n_heads
1140-
key_states = repeat_kv(key_states, self.num_key_value_groups)
1141-
value_states = repeat_kv(value_states, self.num_key_value_groups)
1143+
if hasattr(self, "num_key_value_groups"):
1144+
# repeat k/v heads if n_kv_heads < n_heads
1145+
key_states = repeat_kv(key_states, self.num_key_value_groups)
1146+
value_states = repeat_kv(value_states, self.num_key_value_groups)
11421147

11431148
attn_output = torch.nn.functional.scaled_dot_product_attention(
11441149
query_states, key_states, value_states, attention_mask, scale=(1 / math.sqrt(self.head_dim))
@@ -1148,7 +1153,7 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids):
11481153
attn_output = attn_output.transpose(1, 2).contiguous()
11491154
attn_output = attn_output.reshape(bsz, q_len, self.hidden_size)
11501155

1151-
if self.config.pretraining_tp > 1:
1156+
if hasattr(self.config, "pretraining_tp") and self.config.pretraining_tp > 1:
11521157
attn_output = attn_output.split(self.hidden_size // self.config.pretraining_tp, dim=2)
11531158
o_proj_slices = self.o_proj.weight.split(self.hidden_size // self.config.pretraining_tp, dim=1)
11541159
attn_output = sum([F.linear(attn_output[i], o_proj_slices[i]) for i in range(self.config.pretraining_tp)])

tests/openvino/test_modeling.py

+2
Original file line numberDiff line numberDiff line change
@@ -558,6 +558,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase):
558558
"cohere",
559559
"xglm",
560560
"aquila",
561+
"aquila2",
561562
"xverse",
562563
"internlm",
563564
)
@@ -573,6 +574,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase):
573574
"orion",
574575
"phi3",
575576
"aquila",
577+
"aquila2",
576578
"xverse",
577579
"internlm",
578580
)

tests/openvino/utils_tests.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818

1919
MODEL_NAMES = {
2020
"albert": "hf-internal-testing/tiny-random-albert",
21-
"aquila": "katuni4ka/tiny-random-aquila",
21+
"aquila": "katuni4ka/tiny-random-aquilachat",
22+
"aquila2": "katuni4ka/tiny-random-aquila2",
2223
"audio_spectrogram_transformer": "Ericwang/tiny-random-ast",
2324
"bge": "BAAI/bge-small-en-v1.5",
2425
"beit": "hf-internal-testing/tiny-random-BeitForImageClassification",

0 commit comments

Comments
 (0)