Skip to content

Commit 724b27f

Browse files
authored
Merge branch 'master' into vlm_python_bindings
2 parents 26009c4 + 09c5742 commit 724b27f

File tree

5 files changed

+23
-13
lines changed

5 files changed

+23
-13
lines changed

llm_bench/python/benchmark.py

+8-7
Original file line numberDiff line numberDiff line change
@@ -308,13 +308,14 @@ def run_text_generation_genai(input_text, num, model, tokenizer, args, iter_data
308308
log.warning(f"[{num}] Prompt[{prompt_index}]'s md5 {result_md5_list} "
309309
f"is different from md5 of the {num - 1} iteration {prev_md5}")
310310
llm_bench_utils.metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0])
311-
if num == 1:
312-
# if the device is CPU, throw exception
313-
if args['devices'].lower().startswith('cpu') is True:
311+
if not args.get("use_cb", False):
312+
if num == 1:
313+
# if the device is CPU, throw exception
314+
if args['devices'].lower().startswith('cpu') is True:
315+
assert (result_md5_list == prev_md5)
316+
else:
317+
# throw exception
314318
assert (result_md5_list == prev_md5)
315-
else:
316-
# throw exception
317-
assert (result_md5_list == prev_md5)
318319
else:
319320
llm_bench_utils.metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0])
320321

@@ -814,7 +815,7 @@ def get_argprser():
814815
llm_bench_utils.model_utils.add_stateful_model_arguments(parser)
815816
parser.add_argument("--genai", action="store_true", help="Use OpenVINO GenAI optimized pipelines for benchmarking")
816817
parser.add_argument("--use_cb", action="store_true", help="Use Continuous Batching inference mode")
817-
parser.add_argument("--cb_config", required=False, default=None, help="Path to file with Continuous Batching Scheduler settings")
818+
parser.add_argument("--cb_config", required=False, default=None, help="Path to file with Continuous Batching Scheduler settings or dict")
818819
parser.add_argument(
819820
'--end_token_stopping',
820821
action='store_true',

llm_bench/python/llm_bench_utils/model_utils.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -204,11 +204,17 @@ def get_use_case(model_name_or_path):
204204

205205

206206
def get_config(config):
207-
with open(config, 'r') as f:
207+
if Path(config).is_file():
208+
with open(config, 'r') as f:
209+
try:
210+
ov_config = json.load(f)
211+
except Exception:
212+
raise RuntimeError(f'==Parse file:{config} failiure, json format is incorrect ==')
213+
else:
208214
try:
209-
ov_config = json.load(f)
215+
ov_config = json.loads(config)
210216
except Exception:
211-
raise RuntimeError(f'==Parse file:{config} failiure, json format is incorrect ==')
217+
raise RuntimeError(f'==Parse config:{config} failiure, json format is incorrect ==')
212218
return ov_config
213219

214220

llm_bench/python/llm_bench_utils/ov_utils.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -189,8 +189,11 @@ def create_genai_text_gen_model(model_path, device, ov_config, **kwargs):
189189
cb = kwargs.get("use_cb", False)
190190
if cb:
191191
log.info("Continuous Batching mode activated")
192+
default_cb_config = {"cache_size": 1}
193+
if "GPU" in device:
194+
default_cb_config["block_size"] = 16
192195
scheduler_config = openvino_genai.SchedulerConfig()
193-
scheduler_params = kwargs.get("cb_config") or {"cache_size": 1}
196+
scheduler_params = kwargs.get("cb_config") or default_cb_config
194197
if scheduler_params:
195198
log.info(f"Scheduler parameters:\n{scheduler_params}")
196199

samples/cpp/whisper_speech_recognition/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ Prepare audio file in wav format with sampling rate 16k Hz.
2323

2424
Output: text transcription of `sample.wav`
2525

26-
Models can be downloaded from [OpenAI HiggingFace](https://huggingface.co/openai).
26+
Models can be downloaded from [OpenAI HuggingFace](https://huggingface.co/openai).
2727

2828
Supported Models:
2929
[openai/whisper-tiny](https://huggingface.co/openai/whisper-tiny)

samples/python/whisper_speech_recognition/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ Prepare audio file in wav format with sampling rate 16k Hz.
2323

2424
Output: text transcription of `sample.wav`
2525

26-
Models can be downloaded from [OpenAI HiggingFace](https://huggingface.co/openai).
26+
Models can be downloaded from [OpenAI HuggingFace](https://huggingface.co/openai).
2727

2828
Supported Models:
2929
[openai/whisper-tiny](https://huggingface.co/openai/whisper-tiny)

0 commit comments

Comments
 (0)