Skip to content

Commit 29ea577

Browse files
[llm_bench] update optimum bench hook for transformer-based imagegen (openvinotoolkit#1525)
Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com>
1 parent b3096c3 commit 29ea577

File tree

4 files changed

+19
-11
lines changed

4 files changed

+19
-11
lines changed

tools/llm_bench/llm_bench_utils/hook_forward.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ def __init__(self):
99
self.text_encoder_step_count = 0
1010
self.unet_step_count = 0
1111
self.vae_decoder_step_count = 0
12+
self.main_model_name = "unet"
1213

1314
def get_text_encoder_latency(self):
1415
return (self.text_encoder_time / self.text_encoder_step_count) * 1000 if self.text_encoder_step_count > 0 else 0
@@ -56,7 +57,9 @@ def my_text_encoder(inputs, share_inputs=True, **kwargs):
5657
pipe.text_encoder.request = my_text_encoder
5758

5859
def new_unet(self, pipe):
59-
old_unet = pipe.unet.request
60+
main_model = pipe.unet if pipe.unet is not None else pipe.transformer
61+
self.main_model_name = "unet" if pipe.unet is not None else "transformer"
62+
old_unet = main_model.request
6063

6164
def my_unet(inputs, share_inputs=True, **kwargs):
6265
t1 = time.time()
@@ -66,7 +69,7 @@ def my_unet(inputs, share_inputs=True, **kwargs):
6669
self.unet_time_list.append(unet_time)
6770
self.unet_step_count += 1
6871
return r
69-
pipe.unet.request = my_unet
72+
main_model.request = my_unet
7073

7174
def new_vae_decoder(self, pipe):
7275
old_vae_decoder = pipe.vae_decoder.request

tools/llm_bench/llm_bench_utils/metrics_print.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -97,17 +97,17 @@ def print_stable_diffusion_infer_latency(iter_str, iter_data, stable_diffusion,
9797
iter_data['first_token_infer_latency'] = iter_data['first_token_latency']
9898
iter_data['other_tokens_infer_avg_latency'] = iter_data['other_tokens_avg_latency']
9999
prefix = f'[{iter_str}][P{prompt_idx}]'
100-
log.info(f"{prefix} First step of unet latency: {iter_data['first_token_latency']:.2f} ms/step, "
101-
f"other steps of unet latency: {iter_data['other_tokens_avg_latency']:.2f} ms/step",)
100+
log.info(f"{prefix} First step of {stable_diffusion.main_model_name} latency: {iter_data['first_token_latency']:.2f} ms/step, "
101+
f"other steps of {stable_diffusion.main_model_name} latency: {iter_data['other_tokens_avg_latency']:.2f} ms/step",)
102102
has_text_encoder_time = stable_diffusion.get_text_encoder_step_count() != -1
103103
log_str = (
104104
f"{prefix} Text encoder latency: {stable_diffusion.get_text_encoder_latency():.2f}" if has_text_encoder_time else f"{prefix} Text encoder latency: N/A "
105-
f"unet latency: {stable_diffusion.get_unet_latency():.2f} ms/step, "
105+
f"{stable_diffusion.main_model_name} latency: {stable_diffusion.get_unet_latency():.2f} ms/step, "
106106
f"vae decoder latency: {stable_diffusion.get_vae_decoder_latency():.2f} ms/step, ")
107107
if has_text_encoder_time:
108108
log_str += f"text encoder step count: {stable_diffusion.get_text_encoder_step_count()}, "
109109
log_str += (
110-
f"unet step count: {stable_diffusion.get_unet_step_count()}, "
110+
f"{stable_diffusion.main_model_name} step count: {stable_diffusion.get_unet_step_count()}, "
111111
f"vae decoder step count: {stable_diffusion.get_vae_decoder_step_count()}")
112112
log.info(log_str)
113113

tools/llm_bench/llm_bench_utils/ov_utils.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -363,10 +363,11 @@ def create_genai_image_gen_model(model_path, device, ov_config, **kwargs):
363363
import openvino_genai
364364

365365
class PerfCollector:
366-
def __init__(self) -> types.NoneType:
366+
def __init__(self, main_model_name="unet") -> types.NoneType:
367367
self.iteration_time = []
368368
self.start_time = time.perf_counter()
369369
self.duration = -1
370+
self.main_model_name = main_model_name
370371

371372
def __call__(self, step, num_steps, latents):
372373
self.iteration_time.append(time.perf_counter() - self.start_time)
@@ -405,8 +406,6 @@ def get_unet_step_count(self):
405406
def get_vae_decoder_step_count(self):
406407
return 1
407408

408-
callback = PerfCollector()
409-
410409
adapter_config = get_lora_config(kwargs.get("lora", None), kwargs.get("lora_alphas", []))
411410
if adapter_config:
412411
ov_config['adapters'] = adapter_config
@@ -416,6 +415,11 @@ def get_vae_decoder_step_count(self):
416415
data = json.load(f)
417416

418417
model_class_name = data.get("_class_name", "")
418+
main_model_name = "unet" if "unet" in data else "transformer"
419+
callback = PerfCollector(main_model_name)
420+
421+
orig_tokenizer = AutoTokenizer.from_pretrained(model_path, subfolder="tokenizer")
422+
callback.orig_tokenizer = orig_tokenizer
419423

420424
start = time.perf_counter()
421425

tools/llm_bench/task/image_generation.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ def run_image_generation(image_param, num, image_id, pipe, args, iter_data_list,
123123
def run_image_generation_genai(image_param, num, image_id, pipe, args, iter_data_list, proc_id, mem_consumption, callback=None):
124124
set_seed(args['seed'])
125125
input_text = image_param['prompt']
126+
input_token_size = callback.orig_tokenizer(input_text, return_tensors="pt").input_ids.numel()
126127
input_args = collects_input_args(image_param, args['model_type'], args['model_name'], args["num_steps"], args.get("height"), args.get("width"), callback)
127128
out_str = f"Input params: Batch_size={args['batch_size']}, " \
128129
f"steps={input_args['num_inference_steps']}, width={input_args['width']}, height={input_args['height']}"
@@ -157,6 +158,7 @@ def run_image_generation_genai(image_param, num, image_id, pipe, args, iter_data
157158
generation_time = end - start
158159
iter_data = gen_output_data.gen_iterate_data(
159160
iter_idx=num,
161+
in_size=input_token_size * args['batch_size'],
160162
infer_count=input_args["num_inference_steps"],
161163
gen_time=generation_time,
162164
res_md5=result_md5_list,
@@ -230,8 +232,7 @@ def run_image_generation_benchmark(model_path, framework, device, args, num_iter
230232
prefix = '[warm-up]' if num == 0 else '[{}]'.format(num)
231233
log.info(f"{prefix}[P{p_idx}] start: {iter_timestamp[num][p_idx]['start']}, end: {iter_timestamp[num][p_idx]['end']}")
232234

233-
if not use_genai:
234-
metrics_print.print_average(iter_data_list, prompt_idx_list, args['batch_size'], False)
235+
metrics_print.print_average(iter_data_list, prompt_idx_list, args['batch_size'], False)
235236
return iter_data_list, pretrain_time, iter_timestamp
236237

237238

0 commit comments

Comments
 (0)