Support qwen2 family model (qwen1.5) (#1746)

uniartisan · fxmarty · web-flow · commit e6641b0bc62c · 2024-03-20T16:22:58.000+08:00
* Support qwen2 family model (qwen1.5)

* update docs

* add tests for qwen2

* fix test

* ordering

---------

Co-authored-by: fxmarty &lt;9808326+fxmarty@users.noreply.github.com&gt;
diff --git a/docs/source/exporters/onnx/overview.mdx b/docs/source/exporters/onnx/overview.mdx
@@ -77,6 +77,7 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra
 - Phi
 - Pix2Struct
 - PoolFormer
+- Qwen2(Qwen1.5)
 - RegNet
 - ResNet
 - Roberta
diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py
@@ -241,6 +241,10 @@ class LlamaOnnxConfig(TextDecoderWithPositionIdsOnnxConfig):
     NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
 
 
+class Qwen2OnnxConfig(LlamaOnnxConfig):
+    pass
+
+
 class GemmaOnnxConfig(LlamaOnnxConfig):
     DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, GemmaDummyPastKeyValuesGenerator)
     DUMMY_PKV_GENERATOR_CLASS = GemmaDummyPastKeyValuesGenerator
diff --git a/optimum/exporters/onnx/utils.py b/optimum/exporters/onnx/utils.py
@@ -82,8 +82,9 @@
     "gptj",
     "imagegpt",
     "llama",
-    "phi",
     "mistral",
+    "phi",
+    "qwen2",
 }
 
 
diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py
@@ -853,6 +853,14 @@ class TasksManager:
             "text-classification",
             onnx="OPTOnnxConfig",
         ),
+        "qwen2": supported_tasks_mapping(
+            "feature-extraction",
+            "feature-extraction-with-past",
+            "text-generation",
+            "text-generation-with-past",
+            "text-classification",
+            onnx="Qwen2OnnxConfig",
+        ),
         "llama": supported_tasks_mapping(
             "feature-extraction",
             "feature-extraction-with-past",
diff --git a/optimum/onnxruntime/modeling_decoder.py b/optimum/onnxruntime/modeling_decoder.py
@@ -338,7 +338,7 @@ def prepare_past_key_values(
             if self.model_type == "gemma":
                 num_attention_heads = self.normalized_config.num_key_value_heads
                 embed_size_per_head = self.normalized_config.head_dim
-            elif self.model_type in {"gemma", "mistral", "llama"}:
+            elif self.model_type in {"mistral", "llama", "qwen2"}:
                 num_attention_heads = self.normalized_config.num_key_value_heads
             else:
                 num_attention_heads = self.normalized_config.num_attention_heads
diff --git a/optimum/utils/normalized_config.py b/optimum/utils/normalized_config.py
@@ -264,6 +264,7 @@ class NormalizedConfigManager:
         "whisper": WhisperLikeNormalizedTextConfig,
         "xlm-roberta": NormalizedTextConfig,
         "yolos": NormalizedVisionConfig,
+        "qwen2": NormalizedTextConfig,
     }
 
     @classmethod
diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py
@@ -133,6 +133,7 @@
     "pix2struct": "fxmarty/pix2struct-tiny-random",
     # "rembert": "google/rembert",
     "poolformer": "hf-internal-testing/tiny-random-PoolFormerModel",
+    "qwen2": "fxmarty/tiny-dummy-qwen2",
     "regnet": "hf-internal-testing/tiny-random-RegNetModel",
     "resnet": "hf-internal-testing/tiny-random-resnet",
     "roberta": "hf-internal-testing/tiny-random-RobertaModel",
diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py
@@ -2258,6 +2258,7 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin):
         "llama",
         "mistral",
         "mpt",
+        "qwen2",
     ]
 
     FULL_GRID = {
diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py
@@ -130,6 +130,7 @@
     "perceiver_vision": "hf-internal-testing/tiny-random-vision_perceiver_conv",
     "pix2struct": "fxmarty/pix2struct-tiny-random",
     "poolformer": "hf-internal-testing/tiny-random-PoolFormerModel",
+    "qwen2": "fxmarty/tiny-dummy-qwen2",
     "resnet": "hf-internal-testing/tiny-random-resnet",
     "roberta": "hf-internal-testing/tiny-random-RobertaModel",
     "roformer": "hf-internal-testing/tiny-random-RoFormerModel",

Original file line number	Diff line number	Diff line change
`@@ -82,8 +82,9 @@`
`82`	`82`	`"gptj",`
`83`	`83`	`"imagegpt",`
`84`	`84`	`"llama",`
`85`		`- "phi",`
`86`	`85`	`"mistral",`
	`86`	`+ "phi",`
	`87`	`+ "qwen2",`
`87`	`88`	`}`
`88`	`89`
`89`	`90`
Original file line number	Diff line number	Diff line change
`@@ -264,6 +264,7 @@ class NormalizedConfigManager:`
`264`	`264`	`"whisper": WhisperLikeNormalizedTextConfig,`
`265`	`265`	`"xlm-roberta": NormalizedTextConfig,`
`266`	`266`	`"yolos": NormalizedVisionConfig,`
	`267`	`+ "qwen2": NormalizedTextConfig,`
`267`	`268`	`}`
`268`	`269`
`269`	`270`	`@classmethod`
Original file line number	Diff line number	Diff line change
`@@ -2258,6 +2258,7 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin):`
`2258`	`2258`	`"llama",`
`2259`	`2259`	`"mistral",`
`2260`	`2260`	`"mpt",`
	`2261`	`+ "qwen2",`
`2261`	`2262`	`]`
`2262`	`2263`
`2263`	`2264`	`FULL_GRID = {`