Skip to content

Commit f58216d

Browse files
committed
add support cb
1 parent ee5aa1e commit f58216d

File tree

3 files changed

+24
-3
lines changed

3 files changed

+24
-3
lines changed

llm_bench/python/benchmark.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -703,7 +703,9 @@ def get_argprser():
703703
)
704704
parser.add_argument('-od', '--output_dir', help='Save the input text and generated text, images to files')
705705
llm_bench_utils.model_utils.add_stateful_model_arguments(parser)
706-
parser.add_argument("--genai", action="store_true")
706+
parser.add_argument("--genai", action="store_true", help="Use OpenVINO GenAI optimized pipelines for benchmarking")
707+
parser.add_argument("--use_cb", action="store_true", help="Use Continious Batching inference mode")
708+
parser.add_argument("--cb_config", required=False, default=None, help="Path to file with Continious batching settings")
707709
parser.add_argument(
708710
'--end_token_stopping',
709711
action='store_true',

llm_bench/python/llm_bench_utils/model_utils.py

+8
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ def analyze_args(args):
136136
model_args['subsequent'] = args.subsequent
137137
model_args['output_dir'] = args.output_dir
138138
model_args['genai'] = args.genai
139+
model_args["use_cb"] = args.use_cb
139140
model_args['devices'] = args.device
140141
model_args['prompt_index'] = [] if args.prompt_index is not None else None
141142
if model_args['prompt_index'] is not None:
@@ -164,6 +165,13 @@ def analyze_args(args):
164165
log.info(f"PT Config={model_args['config']}")
165166
model_args['model_type'] = get_model_type(model_name, use_case, model_framework)
166167
model_args['model_name'] = model_name
168+
169+
if args.use_cb and not args.genai:
170+
raise RuntimeError("Continious batching mode supported only via OpenVINO GenAI")
171+
cb_config = None
172+
if args.cb_config:
173+
cb_config = get_config(args.cb_config)
174+
model_args["cb_config"] = cb_config
167175
return model_path, model_framework, model_args, model_name
168176

169177

llm_bench/python/llm_bench_utils/ov_utils.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ def create_text_gen_model(model_path, device, **kwargs):
144144
raise RuntimeError(f'==Failure ==: model path:{model_path} does not exist')
145145
else:
146146
if kwargs.get("genai", False) and is_genai_available(log_msg=True):
147-
if model_class not in [OV_MODEL_CLASSES_MAPPING[default_model_type], OV_MODEL_CLASSES_MAPPING["mpt"]]:
147+
if model_class not in [OV_MODEL_CLASSES_MAPPING[default_model_type], OV_MODEL_CLASSES_MAPPING["mpt"], OV_MODEL_CLASSES_MAPPING["chatglm"]]:
148148
log.warning("OpenVINO GenAI based benchmarking is not available for {model_type}. Will be switched to default bencmarking")
149149
else:
150150
return create_genai_text_gen_model(model_path, device, ov_config, **kwargs)
@@ -185,8 +185,19 @@ def create_genai_text_gen_model(model_path, device, ov_config, **kwargs):
185185
convert_ov_tokenizer(model_path)
186186

187187
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
188-
start = time.perf_counter()
189188

189+
cb = kwargs.get("use_cb", False)
190+
if cb:
191+
log.info("Continius Batching mode activated")
192+
scheduler_config = openvino_genai.SchedulerConfig()
193+
scheduler_params = kwargs.get("cb_config") or {"cache_size": 1}
194+
if scheduler_params:
195+
log.info(f"Scheduler parameters:\n{scheduler_params}")
196+
197+
for param, value in scheduler_params.items():
198+
setattr(scheduler_config, param, value)
199+
ov_config["scheduler_config"] = scheduler_config
200+
start = time.perf_counter()
190201
llm_pipe = openvino_genai.LLMPipeline(str(model_path), device.upper(), ov_config)
191202
end = time.perf_counter()
192203
log.info(f'Pipeline initialization time: {end - start:.2f}s')

0 commit comments

Comments
 (0)