Skip to content

Commit 20238a5

Browse files
authored
Added WWB tests (openvinotoolkit#880)
1 parent b092408 commit 20238a5

File tree

3 files changed

+142
-1
lines changed

3 files changed

+142
-1
lines changed

.github/workflows/llm_bench-python.yml

+12
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ name: llm_bench Python Test
55

66
env:
77
LLM_BENCH_PYPATH: llm_bench/python
8+
WWB_PATH: llm_bench/python/who_what_benchmark
89

910
on:
1011
push:
@@ -40,6 +41,8 @@ jobs:
4041
python -m pip install flake8 pytest black
4142
GIT_CLONE_PROTECTION_ACTIVE=false pip install -r ${{ env.LLM_BENCH_PYPATH }}/requirements.txt
4243
pip install openvino-nightly
44+
GIT_CLONE_PROTECTION_ACTIVE=false pip install -r ${{ env.WWB_PATH }}/requirements.txt
45+
GIT_CLONE_PROTECTION_ACTIVE=false pip install ${{ env.WWB_PATH }}
4346
4447
- name: Lint with flake8
4548
run: |
@@ -68,6 +71,9 @@ jobs:
6871
run: |
6972
python ./llm_bench/python/convert.py --model_id segmind/tiny-sd --output_dir ./ov_models/tiny-sd --precision FP16
7073
python ./llm_bench/python/benchmark.py -m ./ov_models/tiny-sd/pytorch/dldt/FP16/ -pf ./llm_bench/python/prompts/stable-diffusion.jsonl -d cpu -n 1
74+
- name: WWB Tests
75+
run: |
76+
python -m pytest ./llm_bench/python/who_what_benchmark/tests
7177
stateful:
7278
runs-on: ubuntu-20.04
7379
steps:
@@ -82,3 +88,9 @@ jobs:
8288
python -m pip install openvino-nightly
8389
python llm_bench/python/convert.py --model_id TinyLlama/TinyLlama-1.1B-Chat-v1.0 --output_dir . --stateful
8490
grep beam_idx pytorch/dldt/FP32/openvino_model.xml
91+
- name: WWB Tests
92+
run: |
93+
GIT_CLONE_PROTECTION_ACTIVE=false pip install -r llm_bench/python/who_what_benchmark/requirements.txt
94+
GIT_CLONE_PROTECTION_ACTIVE=false pip install llm_bench/python/who_what_benchmark/
95+
pip install pytest
96+
python -m pytest llm_bench/python/who_what_benchmark/tests

bandit.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ any_other_function_with_shell_equals_true:
131131
- subprocess.check_output
132132
- subprocess.run
133133
assert_used:
134-
skips: []
134+
skips: ["llm_bench/python/who_what_benchmark/tests/test_*.py"]
135135
hardcoded_tmp_directory:
136136
tmp_dirs:
137137
- /tmp
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
import subprocess # nosec B404
2+
import os
3+
import shutil
4+
import tempfile
5+
import pandas as pd
6+
import pytest
7+
import logging
8+
9+
from transformers import AutoTokenizer
10+
from optimum.intel.openvino import OVModelForCausalLM, OVWeightQuantizationConfig
11+
12+
13+
logging.basicConfig(level=logging.INFO)
14+
logger = logging.getLogger(__name__)
15+
16+
17+
def run_wwb(args):
18+
logger.info(" ".join(["wwb"] + args))
19+
result = subprocess.run(
20+
["wwb"] + args,
21+
capture_output=True,
22+
text=True
23+
)
24+
logger.info(result)
25+
return result
26+
27+
28+
model_id = "facebook/opt-125m"
29+
tmp_dir = tempfile.mkdtemp()
30+
base_model_path = os.path.join(tmp_dir, "opt125m")
31+
target_model_path = os.path.join(tmp_dir, "opt125m_int8")
32+
33+
34+
def setup_module():
35+
logger.info("Create models")
36+
tokenizer = AutoTokenizer.from_pretrained(model_id)
37+
base_model = OVModelForCausalLM.from_pretrained(model_id)
38+
base_model.save_pretrained(base_model_path)
39+
tokenizer.save_pretrained(base_model_path)
40+
41+
target_model = OVModelForCausalLM.from_pretrained(
42+
model_id, quantization_config=OVWeightQuantizationConfig(bits=8)
43+
)
44+
target_model.save_pretrained(target_model_path)
45+
tokenizer.save_pretrained(target_model_path)
46+
47+
48+
def teardown_module():
49+
logger.info("Remove models")
50+
shutil.rmtree(tmp_dir)
51+
52+
53+
def test_target_model():
54+
result = run_wwb([
55+
"--base-model", base_model_path,
56+
"--target-model", target_model_path,
57+
"--num-samples", "2",
58+
"--device", "CPU"
59+
])
60+
assert result.returncode == 0
61+
assert "Metrics for model" in result.stdout
62+
assert "## Reference text" not in result.stdout
63+
64+
65+
@pytest.fixture
66+
def test_gt_data():
67+
with tempfile.NamedTemporaryFile(suffix=".csv") as tmpfile:
68+
temp_file_name = tmpfile.name
69+
70+
result = run_wwb([
71+
"--base-model", base_model_path,
72+
"--gt-data", temp_file_name,
73+
"--dataset", "EleutherAI/lambada_openai,en",
74+
"--dataset-field", "text",
75+
"--split", "test",
76+
"--num-samples", "2",
77+
"--device", "CPU"
78+
])
79+
import time
80+
time.sleep(1)
81+
data = pd.read_csv(temp_file_name)
82+
os.remove(temp_file_name)
83+
84+
assert result.returncode == 0
85+
assert len(data["questions"].values) == 2
86+
87+
88+
def test_output_directory():
89+
with tempfile.TemporaryDirectory() as temp_dir:
90+
result = run_wwb([
91+
"--base-model", base_model_path,
92+
"--target-model", target_model_path,
93+
"--num-samples", "2",
94+
"--device", "CPU",
95+
"--output", temp_dir
96+
])
97+
assert result.returncode == 0
98+
assert "Metrics for model" in result.stdout
99+
assert os.path.exists(os.path.join(temp_dir, "metrics_per_qustion.csv"))
100+
assert os.path.exists(os.path.join(temp_dir, "metrics.csv"))
101+
102+
103+
def test_verbose():
104+
result = run_wwb([
105+
"--base-model", base_model_path,
106+
"--target-model", target_model_path,
107+
"--num-samples", "2",
108+
"--device", "CPU",
109+
"--verbose"
110+
])
111+
assert result.returncode == 0
112+
assert "## Reference text" in result.stdout
113+
114+
115+
def test_language_autodetect():
116+
with tempfile.NamedTemporaryFile(suffix=".csv") as tmpfile:
117+
temp_file_name = tmpfile.name
118+
119+
result = run_wwb([
120+
"--base-model", "Qwen/Qwen2-0.5B",
121+
"--gt-data", temp_file_name,
122+
"--num-samples", "2",
123+
"--device", "CPU"
124+
])
125+
data = pd.read_csv(temp_file_name)
126+
os.remove(temp_file_name)
127+
128+
assert result.returncode == 0
129+
assert "马克" in data["questions"].values[0]

0 commit comments

Comments
 (0)