@@ -308,13 +308,14 @@ def run_text_generation_genai(input_text, num, model, tokenizer, args, iter_data
308
308
log .warning (f"[{ num } ] Prompt[{ prompt_index } ]'s md5 { result_md5_list } "
309
309
f"is different from md5 of the { num - 1 } iteration { prev_md5 } " )
310
310
llm_bench_utils .metrics_print .print_generated (num , warm_up = (num == 0 ), generated = generated_text [0 ])
311
- if num == 1 :
312
- # if the device is CPU, throw exception
313
- if args ['devices' ].lower ().startswith ('cpu' ) is True :
311
+ if not args .get ("use_cb" , False ):
312
+ if num == 1 :
313
+ # if the device is CPU, throw exception
314
+ if args ['devices' ].lower ().startswith ('cpu' ) is True :
315
+ assert (result_md5_list == prev_md5 )
316
+ else :
317
+ # throw exception
314
318
assert (result_md5_list == prev_md5 )
315
- else :
316
- # throw exception
317
- assert (result_md5_list == prev_md5 )
318
319
else :
319
320
llm_bench_utils .metrics_print .print_generated (num , warm_up = (num == 0 ), generated = generated_text [0 ])
320
321
@@ -814,7 +815,7 @@ def get_argprser():
814
815
llm_bench_utils .model_utils .add_stateful_model_arguments (parser )
815
816
parser .add_argument ("--genai" , action = "store_true" , help = "Use OpenVINO GenAI optimized pipelines for benchmarking" )
816
817
parser .add_argument ("--use_cb" , action = "store_true" , help = "Use Continuous Batching inference mode" )
817
- parser .add_argument ("--cb_config" , required = False , default = None , help = "Path to file with Continuous Batching Scheduler settings" )
818
+ parser .add_argument ("--cb_config" , required = False , default = None , help = "Path to file with Continuous Batching Scheduler settings or dict " )
818
819
parser .add_argument (
819
820
'--end_token_stopping' ,
820
821
action = 'store_true' ,
0 commit comments