|
9 | 9 | from openvino import get_version
|
10 | 10 | import torch
|
11 | 11 | import traceback
|
12 |
| -from llm_bench_utils.memory_profile import MemConsumption |
| 12 | +from llm_bench_utils.memory_profile import MemMonitorWrapper |
13 | 13 | import llm_bench_utils.output_csv
|
14 | 14 | import llm_bench_utils.output_json
|
15 | 15 | import task.visual_language_generation as bench_vlm
|
|
19 | 19 | import task.speech_to_text_generation as bench_speech
|
20 | 20 |
|
21 | 21 | DEFAULT_TORCH_THREAD_NUMS = 16
|
22 |
| -mem_consumption = MemConsumption() |
23 |
| - |
| 22 | +memory_monitor = MemMonitorWrapper() |
24 | 23 |
|
25 | 24 | def num_iters_type(x):
|
26 | 25 | x = int(x)
|
@@ -85,6 +84,14 @@ def get_argprser():
|
85 | 84 | help='if the value is 1, output the maximum memory consumption in warm-up iterations. If the value is 2,'
|
86 | 85 | ' output the maximum memory consumption in all iterations.',
|
87 | 86 | )
|
| 87 | + parser.add_argument( |
| 88 | + '-mc_dir', |
| 89 | + '--memory_consumption_dir', |
| 90 | + default=None, |
| 91 | + required=False, |
| 92 | + type=str, |
| 93 | + help='Path to store memory consamption logs and chart.', |
| 94 | + ) |
88 | 95 | parser.add_argument('-bs', '--batch_size', type=int, default=1, required=False, help='Batch size value')
|
89 | 96 | parser.add_argument('--num_beams', type=int, default=1, help='Number of beams in the decoding strategy, activates beam_search if greater than 1')
|
90 | 97 | parser.add_argument(
|
@@ -226,16 +233,17 @@ def main():
|
226 | 233 | f'{original_torch_thread_nums} to {torch.get_num_threads()}, avoid to use the CPU cores for OpenVINO inference.')
|
227 | 234 | log.info(out_str)
|
228 | 235 | if args.memory_consumption:
|
229 |
| - mem_consumption.start_collect_mem_consumption_thread() |
| 236 | + memory_monitor.create_monitors() |
| 237 | + if args.memory_consumption_dir: |
| 238 | + memory_monitor.set_dir(args.memory_consumption_dir) |
230 | 239 | try:
|
231 | 240 | if model_args['use_case'] in ['text_gen', 'code_gen']:
|
232 | 241 | iter_data_list, pretrain_time, iter_timestamp = CASE_TO_BENCH[model_args['use_case']](
|
233 | 242 | model_path, framework, args.device, args.tokens_len, args.streaming, model_args,
|
234 |
| - args.num_iters, mem_consumption) |
| 243 | + args.num_iters, memory_monitor) |
235 | 244 | else:
|
236 | 245 | iter_data_list, pretrain_time, iter_timestamp = CASE_TO_BENCH[model_args['use_case']](
|
237 |
| - model_path, framework, args.device, model_args, args.num_iters, |
238 |
| - mem_consumption) |
| 246 | + model_path, framework, args.device, model_args, args.num_iters, memory_monitor) |
239 | 247 | if args.report is not None or args.report_json is not None:
|
240 | 248 | model_precision = ''
|
241 | 249 | if framework == 'ov':
|
@@ -276,7 +284,7 @@ def main():
|
276 | 284 | exit(1)
|
277 | 285 | finally:
|
278 | 286 | if args.memory_consumption:
|
279 |
| - mem_consumption.end_collect_mem_consumption_thread() |
| 287 | + memory_monitor.stop() |
280 | 288 |
|
281 | 289 |
|
282 | 290 | if __name__ == '__main__':
|
|
0 commit comments