Skip to content

Commit 311766e

Browse files
committed
print [Px][Lx]
1 parent ac263eb commit 311766e

File tree

3 files changed

+8
-7
lines changed

3 files changed

+8
-7
lines changed

llm_bench/python/benchmark.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -752,7 +752,8 @@ def run_speech_2txt_generation(pipe, args, num, md5_list, prompt_id, audio_promp
752752
max_rss_mem=max_rss_mem_consumption,
753753
max_shared_mem=max_shared_mem_consumption,
754754
max_uss_mem=max_uss_mem_consumption,
755-
whisper=whisper_hook
755+
whisper=whisper_hook,
756+
prompt_idx=prompt_id
756757
)
757758
if num > 0:
758759
prev_md5 = md5_list[num - 1][prompt_id]

llm_bench/python/llm_bench_utils/hook_forward_whisper.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,15 @@ def get_time_infer_list(self):
2727
time_infer_list.insert(0, self.time_data[0]['enc_infer_time'])
2828
return time_infer_list
2929

30-
def get_whisper_latency(self, iter):
30+
def get_whisper_latency(self, iter, prompt_idx):
3131
str = ''
3232
for idx, data in enumerate(self.time_data):
3333
enc_infer_time = data['enc_infer_time'] * 1000
3434
dec_token_count = len(data['dec_token_time'])
3535
dec_infer_count = len(data['dec_infer_time'])
3636
dec_token_time = sum(data['dec_token_time']) / dec_token_count * 1000 if dec_token_count > 1 else 0
3737
dec_infer_time = sum(data['dec_infer_time']) / dec_infer_count * 1000 if dec_infer_count > 1 else 0
38-
str += f"[{iter}][{idx}] encoder token latency: {enc_infer_time:.2f} ms/token, " \
38+
str += f"[{iter}][P{prompt_idx}][L{idx}] encoder token latency: {enc_infer_time:.2f} ms/token, " \
3939
f"decoder tokens latency: {dec_token_time:.2f} ms/token, " \
4040
f"decoder infers latency: {dec_infer_time:.2f} ms/infer, " \
4141
f"decoder tokens count: {dec_token_count}, " \

llm_bench/python/llm_bench_utils/metrics_print.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
def print_metrics(
88
iter_num, iter_data, tms=None, tms_infer=None, warm_up=False, max_rss_mem=-1, max_shared_mem=-1,
9-
max_uss_mem=-1, stable_diffusion=None, tokenization_time=None, batch_size=1, whisper = None
9+
max_uss_mem=-1, stable_diffusion=None, tokenization_time=None, batch_size=1, whisper = None, prompt_idx=-1
1010
):
1111
iter_str = str(iter_num)
1212
if warm_up:
@@ -57,7 +57,7 @@ def print_metrics(
5757
if stable_diffusion is not None:
5858
print_stable_diffusion_infer_latency(iter_str, iter_data, stable_diffusion)
5959
if whisper is not None:
60-
print_whisper_infer_latency(iter_str, whisper)
60+
print_whisper_infer_latency(iter_str, whisper, prompt_idx)
6161
output_str = ''
6262
if max_rss_mem != '' and max_rss_mem > -1:
6363
output_str += 'Max rss memory cost: {:.2f}MBytes, '.format(max_rss_mem)
@@ -102,8 +102,8 @@ def print_stable_diffusion_infer_latency(iter_str, iter_data, stable_diffusion):
102102
f"vae decoder step count: {stable_diffusion.get_vae_decoder_step_count()}",)
103103

104104

105-
def print_whisper_infer_latency(iter_str, whisper):
106-
print(f'{whisper.get_whisper_latency(iter_str)}')
105+
def print_whisper_infer_latency(iter_str, whisper, prompt_idx):
106+
print(f'{whisper.get_whisper_latency(iter_str, prompt_idx)}')
107107

108108

109109
def print_ldm_unet_vqvae_infer_latency(iter_num, iter_data, tms=None, warm_up=False):

0 commit comments

Comments
 (0)