@@ -8,10 +8,6 @@ def print_metrics(
8
8
iter_num , iter_data , tms = None , tms_infer = None , warm_up = False , max_rss_mem = - 1 , max_shared_mem = - 1 ,
9
9
max_uss_mem = - 1 , stable_diffusion = None , tokenization_time = None , batch_size = 1
10
10
):
11
- if tms is None :
12
- tms = []
13
- if tms_infer is None :
14
- tms_infer = []
15
11
iter_str = str (iter_num )
16
12
if warm_up :
17
13
iter_str = 'warm-up'
@@ -36,25 +32,27 @@ def print_metrics(
36
32
if output_str != '' :
37
33
output_str = ' ' .join (['[{}]' .format (iter_str ), output_str ])
38
34
log .info (output_str )
39
- if len ( tms ) > 0 :
35
+ if tms is not None :
40
36
iter_data ['first_token_latency' ] = tms [0 ] * 1000 if len (tms ) > 0 else - 1
41
37
iter_data ['other_tokens_avg_latency' ] = sum (tms [1 :]) / (len (tms ) - 1 ) * 1000 if len (tms ) > 1 else - 1
38
+ first_token_latency = 'NA' if iter_data ['first_token_latency' ] == - 1 else f"{ iter_data ['first_token_latency' ]:.2f} ms/{ latency_unit } "
39
+ other_token_latency = 'NA' if iter_data ['other_tokens_avg_latency' ] == - 1 else f"{ iter_data ['other_tokens_avg_latency' ]:.2f} ms/{ latency_unit } "
42
40
log .info (
43
- f"[{ iter_str } ] First token latency: { iter_data [ ' first_token_latency' ]:.2f } ms/ { latency_unit } , "
44
- f"other tokens latency: { iter_data [ 'other_tokens_avg_latency' ]:.2f } ms/ { latency_unit } , len of tokens: { len (tms )} * { batch_size } " ,
41
+ f"[{ iter_str } ] First token latency: { first_token_latency } , "
42
+ f"other tokens latency: { other_token_latency } , len of tokens: { len (tms )} * { batch_size } " ,
45
43
)
46
- else :
47
- if tokenization_time :
44
+ if len (tms ) == 0 :
48
45
log .warning (f'[{ iter_str } ] No hook data output for first token latency and other tokens latency' )
49
- if len ( tms_infer ) > 0 :
46
+ if tms_infer is not None :
50
47
iter_data ['first_token_infer_latency' ] = tms_infer [0 ] * 1000 if len (tms_infer ) > 0 else - 1
51
48
iter_data ['other_tokens_infer_avg_latency' ] = sum (tms_infer [1 :]) / (len (tms_infer ) - 1 ) * 1000 if len (tms_infer ) > 1 else - 1
49
+ first_infer_latency = 'NA' if iter_data ['first_token_infer_latency' ] == - 1 else f"{ iter_data ['first_token_infer_latency' ]:.2f} ms/infer"
50
+ other_infer_latency = 'NA' if iter_data ['other_tokens_infer_avg_latency' ] == - 1 else f"{ iter_data ['other_tokens_infer_avg_latency' ]:.2f} ms/infer"
52
51
log .info (
53
- f"[{ iter_str } ] First infer latency: { iter_data [ 'first_token_infer_latency' ]:.2f } ms/infer , "
54
- f"other infers latency: { iter_data [ 'other_tokens_infer_avg_latency' ]:.2f } ms/infer , inference count: { len (tms_infer )} " ,
52
+ f"[{ iter_str } ] First infer latency: { first_infer_latency } , "
53
+ f"other infers latency: { other_infer_latency } , inference count: { len (tms_infer )} " ,
55
54
)
56
- else :
57
- if tokenization_time :
55
+ if len (tms_infer ) == 0 :
58
56
log .warning (f'[{ iter_str } ] No hook data output for first infer latency and other infers latency' )
59
57
if stable_diffusion is not None :
60
58
print_stable_diffusion_infer_latency (iter_str , iter_data , stable_diffusion )
@@ -112,8 +110,10 @@ def print_ldm_unet_vqvae_infer_latency(iter_num, iter_data, tms=None, warm_up=Fa
112
110
iter_data ['first_token_infer_latency' ] = iter_data ['first_token_latency' ]
113
111
iter_data ['other_tokens_infer_avg_latency' ] = iter_data ['other_tokens_avg_latency' ]
114
112
115
- log .info (f"[{ iter_str } ] First step of unet latency: { iter_data ['first_token_latency' ]:.2f} ms/step, "
116
- f"other steps of unet latency: { iter_data ['other_tokens_avg_latency' ]:.2f} ms/step" ,)
113
+ first_token_latency = 'NA' if iter_data ['first_token_latency' ] == - 1 else f"{ iter_data ['first_token_latency' ]:.2f} ms/step"
114
+ other_token_latency = 'NA' if iter_data ['other_tokens_avg_latency' ] == - 1 else f"{ iter_data ['other_tokens_avg_latency' ]:.2f} ms/step"
115
+ log .info (f"[{ iter_str } ] First step of unet latency: { first_token_latency } , "
116
+ f"other steps of unet latency: { other_token_latency } " ,)
117
117
if len_tms > 1 :
118
118
log .info (f"[{ iter_str } ] Unet latency: { (sum (tms [0 :(len_tms - 1 )]) / (len_tms - 1 )) * 1000 :.2f} ms/step, "
119
119
f"vqvae decoder latency: { tms [len_tms - 1 ] * 1000 :.2f} ms/step, "
@@ -149,14 +149,17 @@ def output_avg_statis_tokens(prompt_dict, prompt_idx_list, iter_data_list, batch
149
149
latency_unit = '{}tokens' .format (batch_size )
150
150
else :
151
151
latency_unit = '{}steps' .format (batch_size )
152
+ avg_1st_token_latency = 'NA' if avg_1st_token_latency < 0 else f'{ avg_1st_token_latency :.2f} ms/{ latency_unit } '
153
+ avg_2nd_tokens_latency = 'NA' if avg_2nd_tokens_latency < 0 else f'{ avg_2nd_tokens_latency :.2f} ms/{ latency_unit } '
154
+ avg_2nd_token_tput = 'NA' if avg_2nd_tokens_latency == 'NA' else f'{ avg_2nd_token_tput :.2f} { latency_unit } s/s'
152
155
if is_text_gen is True :
153
- prompt_dict [p_idx ] = '\n [ INFO ] [Average] Prompt[{}] Input token size: {}, 1st token lantency: {:.2f} ms/{ }, ' \
154
- '2nd tokens latency : {:.2f} ms/{} , 2nd tokens throughput: {:.2f} tokens/s ' \
155
- .format (p_idx , avg_input_size , avg_1st_token_latency , latency_unit , avg_2nd_tokens_latency , latency_unit , avg_2nd_token_tput )
156
+ prompt_dict [p_idx ] = '\n [ INFO ] [Average] Prompt[{}] Input token size: {}, 1st token lantency: {}, ' \
157
+ '2nd token lantency : {} , 2nd tokens throughput: {} ' \
158
+ .format (p_idx , avg_input_size , avg_1st_token_latency , avg_2nd_tokens_latency , avg_2nd_token_tput )
156
159
else :
157
- prompt_dict [p_idx ] = '\n [ INFO ] [Average] Prompt[{}] 1st step of unet latency {:.2f} ms/ {}, ' \
158
- '2nd steps of unet latency: {:.2f} ms/{} , 2nd steps throughput: {:.2f} steps/s ' \
159
- .format (p_idx , avg_1st_token_latency , latency_unit , avg_2nd_tokens_latency , latency_unit , avg_2nd_token_tput )
160
+ prompt_dict [p_idx ] = '\n [ INFO ] [Average] Prompt[{}] 1st step of unet latency: {}, ' \
161
+ '2nd steps of unet latency: {} , 2nd steps throughput: {} ' \
162
+ .format (p_idx , avg_1st_token_latency , avg_2nd_tokens_latency , avg_2nd_token_tput )
160
163
161
164
162
165
def print_average (iter_data_list , prompt_idx_list , batch_size , is_text_gen = False ):
0 commit comments