@@ -144,7 +144,7 @@ def create_text_gen_model(model_path, device, **kwargs):
144
144
raise RuntimeError (f'==Failure ==: model path:{ model_path } does not exist' )
145
145
else :
146
146
if kwargs .get ("genai" , False ) and is_genai_available (log_msg = True ):
147
- if model_class not in [OV_MODEL_CLASSES_MAPPING [default_model_type ], OV_MODEL_CLASSES_MAPPING ["mpt" ]]:
147
+ if model_class not in [OV_MODEL_CLASSES_MAPPING [default_model_type ], OV_MODEL_CLASSES_MAPPING ["mpt" ], OV_MODEL_CLASSES_MAPPING [ "chatglm" ] ]:
148
148
log .warning ("OpenVINO GenAI based benchmarking is not available for {model_type}. Will be switched to default bencmarking" )
149
149
else :
150
150
return create_genai_text_gen_model (model_path , device , ov_config , ** kwargs )
@@ -185,8 +185,19 @@ def create_genai_text_gen_model(model_path, device, ov_config, **kwargs):
185
185
convert_ov_tokenizer (model_path )
186
186
187
187
tokenizer = AutoTokenizer .from_pretrained (model_path , trust_remote_code = True )
188
- start = time .perf_counter ()
189
188
189
+ cb = kwargs .get ("use_cb" , False )
190
+ if cb :
191
+ log .info ("Continius Batching mode activated" )
192
+ scheduler_config = openvino_genai .SchedulerConfig ()
193
+ scheduler_params = kwargs .get ("cb_config" ) or {"cache_size" : 1 }
194
+ if scheduler_params :
195
+ log .info (f"Scheduler parameters:\n { scheduler_params } " )
196
+
197
+ for param , value in scheduler_params .items ():
198
+ setattr (scheduler_config , param , value )
199
+ ov_config ["scheduler_config" ] = scheduler_config
200
+ start = time .perf_counter ()
190
201
llm_pipe = openvino_genai .LLMPipeline (str (model_path ), device .upper (), ov_config )
191
202
end = time .perf_counter ()
192
203
log .info (f'Pipeline initialization time: { end - start :.2f} s' )
0 commit comments