Skip to content

Commit e7fd50f

Browse files
committed
Upated real models
1 parent b715e15 commit e7fd50f

File tree

5 files changed

+59
-42
lines changed

5 files changed

+59
-42
lines changed

text_generation/causal_lm/cpp/continuous_batching/library/src/paged_attention_transformations.cpp

-4
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,6 @@ void apply_paged_attention_transformations(std::shared_ptr<ov::Model> model, Dev
2121

2222
const ov::ParameterVector& parameters = model->get_parameters();
2323

24-
for (auto param : parameters) {
25-
std::cout << param->get_friendly_name() << " " << param->get_partial_shape() << std::endl;
26-
}
27-
2824
// extract num_kv_heads and head_size
2925
size_t kv_caches_inputs_offset = 2;
3026
ov::PartialShape k_shape = parameters[kv_caches_inputs_offset]->get_partial_shape();

text_generation/causal_lm/cpp/continuous_batching/python/tests/common.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ def get_greedy() -> GenerationConfig:
1717
generation_config.num_return_sequences = 1
1818
return generation_config
1919

20+
2021
def get_beam_search() -> GenerationConfig:
2122
generation_config = GenerationConfig()
2223
generation_config.num_groups = 3
@@ -25,6 +26,7 @@ def get_beam_search() -> GenerationConfig:
2526
generation_config.num_return_sequences = generation_config.num_groups * generation_config.group_size
2627
return generation_config
2728

29+
2830
def get_test_dataset() -> Tuple[List[str], List[GenerationConfig]]:
2931
prompts = [
3032
"What is OpenVINO?",
@@ -40,6 +42,7 @@ def get_test_dataset() -> Tuple[List[str], List[GenerationConfig]]:
4042
]
4143
return (prompts, generation_configs)
4244

45+
4346
def get_scheduler_config(scheduler_params: dict = None) -> SchedulerConfig:
4447
scheduler_config = SchedulerConfig()
4548
if scheduler_params is None:
@@ -54,6 +57,7 @@ def get_scheduler_config(scheduler_params: dict = None) -> SchedulerConfig:
5457

5558
return scheduler_config
5659

60+
5761
def convert_to_hf(
5862
default_generation_config : HFGenerationConfig,
5963
generation_config : GenerationConfig
@@ -91,6 +95,7 @@ def convert_to_hf(
9195
hf_generation_config = HFGenerationConfig(**kwargs)
9296
return hf_generation_config
9397

98+
9499
def run_hugging_face(
95100
model_id : str,
96101
prompts: List[str],
@@ -117,7 +122,7 @@ def run_hugging_face(
117122
inputs = hf_tokenizer(prompt, return_tensors="pt")
118123
prompt_len = len(inputs['input_ids'][0])
119124
generate_outputs = model.generate(**inputs, generation_config=convert_to_hf(model.generation_config, generation_config), return_dict_in_generate=True)
120-
all_text_batch = hf_tokenizer.batch_decode([generated_ids[prompt_len:] for generated_ids in generate_outputs.sequences])
125+
all_text_batch = hf_tokenizer.batch_decode([generated_ids[prompt_len:] for generated_ids in generate_outputs.sequences], skip_special_tokens=True)
121126

122127
generation_result = GenerationResult()
123128
generation_result.m_generation_ids = all_text_batch
@@ -126,16 +131,23 @@ def run_hugging_face(
126131
generation_result.m_scores = [score for score in generate_outputs.sequences_scores]
127132
generation_results.append(generation_result)
128133

134+
del hf_tokenizer
135+
del model
136+
129137
return (generation_results, model_path)
130138

139+
131140
def run_continuous_batching(
132141
model_path : Path,
133142
scheduler_config : SchedulerConfig,
134143
prompts: List[str],
135144
generation_configs : List[GenerationConfig]
136145
) -> List[GenerationResult]:
137146
pipe = ContinuousBatchingPipeline(model_path.absolute().as_posix(), scheduler_config)
138-
return pipe.generate(prompts, generation_configs)
147+
output = pipe.generate(prompts, generation_configs)
148+
del pipe
149+
return output
150+
139151

140152
def get_models_list(file_name: str):
141153
models = []
@@ -148,6 +160,7 @@ def get_models_list(file_name: str):
148160
models.append(model_name)
149161
return models
150162

163+
151164
def compare_results(hf_result, ov_result, generation_config):
152165
if generation_config.is_beam_search:
153166
assert len(hf_result.m_scores) == len(ov_result.m_scores)

text_generation/causal_lm/cpp/continuous_batching/python/tests/models/real_models

+37-35
Original file line numberDiff line numberDiff line change
@@ -4,71 +4,73 @@ microsoft/phi-2
44
microsoft/phi-1_5
55
EleutherAI/gpt-neo-125m
66
EleutherAI/gpt-neo-125m
7+
EleutherAI/gpt-neo-1.3B
8+
EleutherAI/gpt-j-6b
9+
# EleutherAI/gpt-neox-20b
710
BAAI/AquilaChat2-7B
811
BAAI/Aquila-7B
912
BAAI/AquilaChat-7B
10-
baichuan-inc/Baichuan2-13B-Chat
13+
baichuan-inc/Baichuan2-7B-Chat
1114
baichuan-inc/Baichuan-7B
12-
bigscience/bloomz-7b1
13-
bigscience/bloomz
15+
bigscience/bloomz-1b7
16+
bigscience/bloomz-560m
1417
THUDM/chatglm2-6b
1518
THUDM/chatglm3-6b
16-
CohereForAI/c4ai-command-r-v01
17-
databricks/dbrx-base
18-
databricks/dbrx-instruct
19+
databricks/dolly-v2-3b
1920
tiiuae/falcon-7b
2021
tiiuae/falcon-rw-7b
2122
google/gemma-2b
2223
google/gemma-7b
24+
openai-community/gpt2
25+
openai-community/gpt2-xl
2326
gpt2
2427
gpt2-xl
25-
bigcode/starcoder
28+
bigcode/starcoderbase-3b
29+
bigcode/starcoder2-3b
2630
bigcode/gpt_bigcode-santacoder
27-
EleutherAI/gpt-j-6b
2831
nomic-ai/gpt4all-j
29-
EleutherAI/gpt-neox-20b
30-
databricks/dolly-v2-12b
31-
stabilityai/stablelm-tuned-alpha-7b
32-
internlm/internlm-7b
32+
nomic-ai/gpt4all-mpt
33+
nomic-ai/gpt4all-falcon
34+
stabilityai/stablelm-3b-4e1t
35+
stabilityai/stablelm-2-zephyr-1_6b
3336
internlm/internlm-chat-7b
3437
internlm/internlm2-7b
35-
internlm/internlm2-chat-7b
36-
core42/jais-13b
37-
core42/jais-13b-chat
38+
# core42/jais-13b
39+
# core42/jais-13b-chat
40+
meta-llama/Llama-2-7b-hf
3841
meta-llama/Meta-Llama-3-8B-Instruct
39-
lmsys/vicuna-13b-v1.3
40-
young-geng/koala
41-
openlm-research/open_llama_13b
42+
lmsys/vicuna-7b-v1.3
43+
lmsys/vicuna-7b-v1.5
44+
# young-geng/koala
45+
openlm-research/open_llama_3b
46+
openlm-research/open_llama_3b_v2
47+
openbmb/MiniCPM-V-2
4248
openbmb/MiniCPM-2B-sft-bf16
4349
openbmb/MiniCPM-2B-dpo-bf16
4450
mistralai/Mistral-7B-v0.1
4551
mistralai/Mistral-7B-Instruct-v0.1
46-
mistralai/Mixtral-8x7B-v0.1
47-
mistralai/Mixtral-8x7B-Instruct-v0.1
52+
# mistralai/Mixtral-8x7B-v0.1
53+
# mistralai/Mixtral-8x7B-Instruct-v0.1
54+
mosaicml/mpt-1b-redpajama-200b
4855
mosaicml/mpt-7b
49-
mosaicml/mpt-30b
56+
# mosaicml/mpt-30b
5057
allenai/OLMo-1B-hf
5158
allenai/OLMo-7B-hf
52-
OrionStarAI/Orion-14B-Base
53-
OrionStarAI/Orion-14B-Chat
59+
# OrionStarAI/Orion-14B-Base
60+
# OrionStarAI/Orion-14B-Chat
5461
Qwen/Qwen-7B
5562
Qwen/Qwen-7B-Chat
56-
Qwen/Qwen1.5-7B
63+
Qwen/Qwen1.5-0.5B
5764
Qwen/Qwen1.5-7B-Chat
58-
Qwen/Qwen1.5-MoE-A2.7B
59-
Qwen/Qwen1.5-MoE-A2.7B-Chat
60-
stabilityai/stablelm-3b-4e1t
61-
stabilityai/stablelm-base-alpha-7b-v2
62-
bigcode/starcoder2-3b
63-
bigcode/starcoder2-7b
64-
bigcode/starcoder2-15b
65+
# Qwen/Qwen1.5-MoE-A2.7B
66+
# Qwen/Qwen1.5-MoE-A2.7B-Chat
6567
xverse/XVERSE-7B-Chat
66-
xverse/XVERSE-13B-Chat
68+
# xverse/XVERSE-MoE-A4.2B
6769
01-ai/Yi-6B
68-
01-ai/Yi-34B
6970
Salesforce/codegen-350M-multi
70-
EleutherAI/gpt-j-6b
71-
EleutherAI/gpt-neo-125m
71+
Salesforce/codegen-350M-nl
7272
rinna/bilingual-gpt-neox-4b
7373
facebook/opt-350m
74+
facebook/incoder-1B
75+
google/pegasus-big_patent
7476
google/pegasus-large

text_generation/causal_lm/cpp/continuous_batching/python/tests/requirements.txt

+5-1
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,8 @@ git+https://github.com/huggingface/optimum-intel.git@main
77
pytest
88
pytest-html
99
# set 'export HF_HUB_ENABLE_HF_TRANSFER=1' to benefits from hf_transfer
10-
hf_transfer
10+
hf_transfer
11+
12+
# requirements for specific models
13+
# - hf-tiny-model-private/tiny-random-RoFormerForCausalLM
14+
rjieba

text_generation/causal_lm/cpp/continuous_batching/python/tests/test_sampling.py

+2
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,13 @@
1616
def test_sampling_precommit(tmp_path, model_id):
1717
run_test_pipeline(tmp_path, model_id)
1818

19+
1920
@pytest.mark.nightly
2021
@pytest.mark.parametrize("model_id", get_models_list(os.path.join(os.path.dirname(os.path.realpath(__file__)), "models", "nightly")))
2122
def test_sampling_nightly(tmp_path, model_id):
2223
run_test_pipeline(tmp_path, model_id)
2324

25+
2426
@pytest.mark.parametrize("model_id", get_models_list(os.path.join(os.path.dirname(os.path.realpath(__file__)), "models", "real_models")))
2527
def test_real_models(tmp_path, model_id):
2628
run_test_pipeline(tmp_path, model_id)

0 commit comments

Comments
 (0)