|
9 | 9 | from openvino import get_version
|
10 | 10 | import torch
|
11 | 11 | import traceback
|
12 |
| -from llm_bench_utils.memory_profile import MemConsumption |
| 12 | +from llm_bench_utils.memory_profile import MemMonitorWrapper |
13 | 13 | import llm_bench_utils.output_csv
|
14 | 14 | import llm_bench_utils.output_json
|
15 | 15 | import task.visual_language_generation as bench_vlm
|
|
19 | 19 | import task.speech_to_text_generation as bench_speech
|
20 | 20 |
|
21 | 21 | DEFAULT_TORCH_THREAD_NUMS = 16
|
22 |
| -mem_consumption = MemConsumption() |
| 22 | +memory_monitor = MemMonitorWrapper() |
23 | 23 |
|
24 | 24 |
|
25 | 25 | def num_iters_type(x):
|
@@ -85,6 +85,14 @@ def get_argprser():
|
85 | 85 | help='if the value is 1, output the maximum memory consumption in warm-up iterations. If the value is 2,'
|
86 | 86 | ' output the maximum memory consumption in all iterations.',
|
87 | 87 | )
|
| 88 | + parser.add_argument( |
| 89 | + '-mc_dir', |
| 90 | + '--memory_consumption_dir', |
| 91 | + default=None, |
| 92 | + required=False, |
| 93 | + type=str, |
| 94 | + help='Path to store memory consamption logs and chart.', |
| 95 | + ) |
88 | 96 | parser.add_argument('-bs', '--batch_size', type=int, default=1, required=False, help='Batch size value')
|
89 | 97 | parser.add_argument('--num_beams', type=int, default=1, help='Number of beams in the decoding strategy, activates beam_search if greater than 1')
|
90 | 98 | parser.add_argument(
|
@@ -221,21 +229,23 @@ def main():
|
221 | 229 | if half_nums_of_torch_threads > DEFAULT_TORCH_THREAD_NUMS:
|
222 | 230 | torch.set_num_threads(DEFAULT_TORCH_THREAD_NUMS)
|
223 | 231 | else:
|
| 232 | + half_nums_of_torch_threads = int(half_nums_of_torch_threads) if int(half_nums_of_torch_threads) else 1 |
224 | 233 | torch.set_num_threads(int(half_nums_of_torch_threads))
|
225 | 234 | log.info(f"The num_beams is {model_args['num_beams']}, update Torch thread num from "
|
226 | 235 | f'{original_torch_thread_nums} to {torch.get_num_threads()}, avoid to use the CPU cores for OpenVINO inference.')
|
227 | 236 | log.info(out_str)
|
228 | 237 | if args.memory_consumption:
|
229 |
| - mem_consumption.start_collect_mem_consumption_thread() |
| 238 | + memory_monitor.create_monitors() |
| 239 | + if args.memory_consumption_dir: |
| 240 | + memory_monitor.set_dir(args.memory_consumption_dir) |
230 | 241 | try:
|
231 | 242 | if model_args['use_case'] in ['text_gen', 'code_gen']:
|
232 | 243 | iter_data_list, pretrain_time, iter_timestamp = CASE_TO_BENCH[model_args['use_case']](
|
233 | 244 | model_path, framework, args.device, args.tokens_len, args.streaming, model_args,
|
234 |
| - args.num_iters, mem_consumption) |
| 245 | + args.num_iters, memory_monitor) |
235 | 246 | else:
|
236 | 247 | iter_data_list, pretrain_time, iter_timestamp = CASE_TO_BENCH[model_args['use_case']](
|
237 |
| - model_path, framework, args.device, model_args, args.num_iters, |
238 |
| - mem_consumption) |
| 248 | + model_path, framework, args.device, model_args, args.num_iters, memory_monitor) |
239 | 249 | if args.report is not None or args.report_json is not None:
|
240 | 250 | model_precision = ''
|
241 | 251 | if framework == 'ov':
|
@@ -276,7 +286,7 @@ def main():
|
276 | 286 | exit(1)
|
277 | 287 | finally:
|
278 | 288 | if args.memory_consumption:
|
279 |
| - mem_consumption.end_collect_mem_consumption_thread() |
| 289 | + memory_monitor.stop() |
280 | 290 |
|
281 | 291 |
|
282 | 292 | if __name__ == '__main__':
|
|
0 commit comments