@@ -101,19 +101,18 @@ def test_perf_metrics(cache):
101
101
102
102
assert perf_metrics is not None
103
103
104
- assert perf_metrics .get_load_time () > 0
105
- assert perf_metrics .get_num_generated_tokens () > 0
106
- assert perf_metrics .get_num_input_tokens () > 0
107
- assert perf_metrics .get_ttft ().mean > 0
108
- assert perf_metrics .get_tpot ().mean > 0
109
- assert perf_metrics .get_ipot ().mean > 0
110
- assert perf_metrics .get_throughput ().mean > 0
111
- assert perf_metrics .get_inference_duration ().mean > 0
112
- assert perf_metrics .get_generate_duration ().mean > 0
113
- assert perf_metrics .get_tokenization_duration ().mean > 0
114
- assert perf_metrics .get_detokenization_duration ().mean > 0
115
- assert perf_metrics .get_detokenization_duration ().mean > 0
116
- assert perf_metrics .get_prepare_embeddings_duration ().mean > 0
104
+ assert 0 < perf_metrics .get_load_time () < 2000
105
+ assert 0 < perf_metrics .get_num_generated_tokens () < 100
106
+ assert 0 < perf_metrics .get_num_input_tokens () < 100
107
+ assert 0 < perf_metrics .get_ttft ().mean < 1000
108
+ assert 0 < perf_metrics .get_tpot ().mean < 100
109
+ assert 0 < perf_metrics .get_ipot ().mean < 100
110
+ assert 0 < perf_metrics .get_throughput ().mean < 1000
111
+ assert 0 < perf_metrics .get_inference_duration ().mean < 1000
112
+ assert 0 < perf_metrics .get_generate_duration ().mean < 1000
113
+ assert 0 < perf_metrics .get_tokenization_duration ().mean < 100
114
+ assert 0 < perf_metrics .get_detokenization_duration ().mean < 10
115
+ assert 0 < perf_metrics .get_prepare_embeddings_duration ().mean < 100
117
116
118
117
# assert that calculating statistics manually from the raw counters we get the same results as from PerfMetrics
119
118
vlm_raw_metrics = perf_metrics .vlm_raw_metrics
0 commit comments