Skip to content

Commit 991a7f1

Browse files
committed
update
1 parent c1fc02a commit 991a7f1

10 files changed

+141
-102
lines changed

tools/llm_bench/benchmark.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from openvino import get_version
1010
import torch
1111
import traceback
12-
from llm_bench_utils.memory_profile import MemMonitorWrapper
12+
from llm_bench_utils.memory_monitor import MemMonitorWrapper
1313
import llm_bench_utils.output_csv
1414
import llm_bench_utils.output_json
1515
import task.visual_language_generation as bench_vlm

tools/llm_bench/llm_bench_utils/gen_output_data.py

+4
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ def gen_iterate_data(
1212
latency='',
1313
res_md5='',
1414
max_rss_mem='',
15+
max_rss_mem_increase='',
1516
max_sys_mem='',
17+
max_sys_mem_increase='',
1618
prompt_idx='',
1719
tokenization_time=[],
1820
mm_embeddings_preparation_time=''
@@ -30,7 +32,9 @@ def gen_iterate_data(
3032
iter_data['first_token_infer_latency'] = -1
3133
iter_data['other_tokens_infer_avg_latency'] = -1
3234
iter_data['max_rss_mem_consumption'] = max_rss_mem
35+
iter_data['max_rss_mem_increase'] = max_rss_mem_increase
3336
iter_data['max_sys_mem_consumption'] = max_sys_mem
37+
iter_data['max_sys_mem_increase'] = max_sys_mem_increase
3438
iter_data['prompt_idx'] = prompt_idx
3539
iter_data['tokenization_time'] = tokenization_time[0] if len(tokenization_time) > 0 else ''
3640
iter_data['detokenization_time'] = tokenization_time[1] if len(tokenization_time) > 1 else ''

tools/llm_bench/llm_bench_utils/memory_monitor.py

+85-8
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
# limitations under the License.
1111
import argparse
1212
import atexit
13-
import logging
1413
import queue
1514
import subprocess
1615
import threading
@@ -26,7 +25,7 @@
2625
import psutil
2726
from tabulate import tabulate
2827

29-
logger = logging.getLogger("memory_monitor")
28+
import logging as log
3029

3130
# CUSTOM FIX TO AVOID ISSUE: RuntimeError: main thread is not in main loop
3231
matplotlib.use('Agg')
@@ -50,7 +49,7 @@ class MemoryUnit(Enum):
5049
@lru_cache
5150
def system_memory_warning():
5251
# Log once
53-
logger.warning(
52+
log.warning(
5453
"Please note that MemoryType.SYSTEM in general is affected by other processes that change RAM availability."
5554
)
5655

@@ -265,7 +264,7 @@ def _monitor_memory(self):
265264
class memory_monitor_context:
266265
def __init__(
267266
self,
268-
interval: Optional[float] = 0.1,
267+
interval: Optional[float] = 0.01,
269268
memory_unit: Optional[MemoryUnit] = MemoryUnit.MiB,
270269
return_max_value: Optional[bool] = True,
271270
save_dir: Optional[Path] = None,
@@ -296,7 +295,7 @@ def __init__(
296295
self.return_max_value = return_max_value
297296
self.save_dir = save_dir
298297

299-
self.memory_data = {}
298+
self.memory_data = {'full_mem': {}, 'from_zero': {}}
300299

301300
def __enter__(self):
302301
for mm in self.memory_monitors.values():
@@ -312,18 +311,96 @@ def __exit__(self, exc_type, exc_val, exc_tb):
312311
mm.stop()
313312
for fz in [False, True]:
314313
time_values, memory_values = mm.get_data(memory_from_zero=fz)
315-
if fz:
316-
self.memory_data[mt] = max(memory_values) if self.return_max_value else (time_values, memory_values)
314+
315+
mm_measure_type = 'from_zero' if fz else 'full_mem'
316+
self.memory_data[mm_measure_type][mt] = max(memory_values) if self.return_max_value else (time_values, memory_values)
317317

318318
if self.save_dir:
319319
mm.save_memory_logs(
320320
time_values,
321321
memory_values,
322322
save_dir=self.save_dir,
323-
filename_suffix="_from-zero" if fz else "",
323+
filename_suffix="_mem_increase" if fz else "",
324324
)
325325

326326

327+
class MemMonitorWrapper():
328+
def __init__(self):
329+
self.save_dir = None
330+
331+
self.interval = 0.01
332+
self.memory_unit = MemoryUnit.MiB
333+
334+
self.memory_types = [MemoryType.RSS, MemoryType.SYSTEM]
335+
336+
self.memory_monitors = {}
337+
self.memory_data = {'full_mem': {}, 'from_zero': {}}
338+
339+
def create_monitors(self):
340+
for memory_type in self.memory_types:
341+
self.memory_monitors[memory_type] = MemoryMonitor(
342+
interval=self.interval, memory_type=memory_type, memory_unit=self.memory_unit
343+
)
344+
345+
def set_dir(self, dir):
346+
if not Path(dir).exists():
347+
log.warning(f"Path to dir for memory consamption data is not exists {dir}, run without it.")
348+
else:
349+
self.save_dir = Path(dir)
350+
351+
def start(self, delay=None):
352+
self.memory_data = {'full_mem': {}, 'from_zero': {}}
353+
for mm in self.memory_monitors.values():
354+
mm.start()
355+
356+
# compilation could be very fast, apply delay
357+
if delay:
358+
time.sleep(delay)
359+
else:
360+
time.sleep(self.interval * 3)
361+
362+
def stop_and_collect_data(self, dir_name='mem_monitor_log'):
363+
self.stop()
364+
365+
for mt, mm in self.memory_monitors.items():
366+
if not mm._memory_values_queue or len(mm._memory_values_queue.queue) == 0:
367+
continue
368+
369+
for from_zero in [False, True]:
370+
time_values, memory_values = mm.get_data(memory_from_zero=from_zero)
371+
372+
mm_measure_type = 'from_zero' if from_zero else 'full_mem'
373+
self.memory_data[mm_measure_type][mt] = max(memory_values)
374+
375+
if self.save_dir:
376+
mm.save_memory_logs(
377+
time_values,
378+
memory_values,
379+
save_dir=self.save_dir / dir_name,
380+
filename_suffix="_mem_increase" if from_zero else "",
381+
)
382+
383+
def stop(self):
384+
# Stop addition of new values as soon as possible
385+
for mm in self.memory_monitors.values():
386+
mm._monitoring_thread_should_stop = True
387+
388+
for mm in self.memory_monitors.values():
389+
mm.stop()
390+
391+
def get_data(self):
392+
return (self.memory_data['full_mem'].get(MemoryType.RSS, -1), self.memory_data['from_zero'].get(MemoryType.RSS, -1),
393+
self.memory_data['full_mem'].get(MemoryType.SYSTEM, -1), self.memory_data['from_zero'].get(MemoryType.SYSTEM, -1))
394+
395+
def log_data(self, comment):
396+
max_rss_mem, max_rss_increase, max_sys_mem, max_sys_increase = self.get_data()
397+
msg = (f"Max rss memory cost {comment}: {max_rss_mem:.2f}{self.memory_unit.value}, "
398+
f"rss memory increase {comment}: {max_rss_increase:.2f}{self.memory_unit.value}, "
399+
f"max system memory cost {comment}: {max_sys_mem:.2f}{self.memory_unit.value}, "
400+
f"system memory increase {comment}: {max_sys_increase:.2f}{self.memory_unit.value}")
401+
log.info(msg)
402+
403+
327404
def _cast_bytes_to(bytes, memory_unit, round_to_int=False):
328405
memory_unit_divisors = {
329406
MemoryUnit.B: 1,

tools/llm_bench/llm_bench_utils/memory_profile.py

-79
This file was deleted.

tools/llm_bench/llm_bench_utils/metrics_print.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,12 @@ def print_metrics(
6363
output_str = ''
6464
if iter_data['max_rss_mem_consumption'] != '' and iter_data['max_rss_mem_consumption'] > -1:
6565
output_str += f"Max rss memory cost: {iter_data['max_rss_mem_consumption']:.2f}MBytes, "
66+
if iter_data['max_rss_mem_increase'] != '' and iter_data['max_rss_mem_increase'] > -1:
67+
output_str += f"rss memory increase: {iter_data['max_rss_mem_increase']:.2f}MBytes, "
6668
if iter_data['max_sys_mem_consumption'] != '' and iter_data['max_sys_mem_consumption'] > -1:
67-
output_str += f"max system memory memory cost: {iter_data['max_sys_mem_consumption']:.2f}MBytes"
69+
output_str += f"max system memory memory cost: {iter_data['max_sys_mem_consumption']:.2f}MBytes, "
70+
if iter_data['max_sys_mem_increase'] != '' and iter_data['max_sys_mem_increase'] > -1:
71+
output_str += f"system memory increase: {iter_data['max_sys_mem_increase']:.2f}MBytes "
6872
if output_str != '':
6973
output_str = ' '.join([prefix, output_str])
7074
log.info(output_str)

tools/llm_bench/task/image_generation.py

+11-6
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,8 @@ def run_image_generation(image_param, num, image_id, pipe, args, iter_data_list,
9191
result_md5_list = []
9292
max_rss_mem_consumption = ''
9393
max_sys_mem_consumption = ''
94-
print("MEM mem_consumption MEASURES ")
94+
max_rss_mem_increase = ''
95+
max_sys_mem_increase = ''
9596
if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
9697
mem_consumption.start()
9798

@@ -109,9 +110,7 @@ def run_image_generation(image_param, num, image_id, pipe, args, iter_data_list,
109110
end = time.perf_counter()
110111
if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
111112
mem_consumption.stop_and_collect_data(f"{'P' + str(num) if num > 0 else 'warm-up'}_{proc_id}")
112-
print("MEM mem_consumption MEASURES FINISH 1")
113-
max_rss_mem_consumption, max_sys_mem_consumption = mem_consumption.get_data()
114-
print("MEM mem_consumption MEASURES FINISH 2")
113+
max_rss_mem_consumption, max_rss_mem_increase, max_sys_mem_consumption, max_sys_mem_increase = mem_consumption.get_data()
115114
for bs_idx in range(args['batch_size']):
116115
rslt_img_fn = llm_bench_utils.output_file.output_gen_image(res[bs_idx], args, image_id, num, bs_idx, proc_id, '.png')
117116
result_md5_list.append(hashlib.md5(Image.open(rslt_img_fn).tobytes(), usedforsecurity=False).hexdigest())
@@ -123,7 +122,9 @@ def run_image_generation(image_param, num, image_id, pipe, args, iter_data_list,
123122
gen_time=generation_time,
124123
res_md5=result_md5_list,
125124
max_rss_mem=max_rss_mem_consumption,
125+
max_rss_mem_increase=max_rss_mem_increase,
126126
max_sys_mem=max_sys_mem_consumption,
127+
max_sys_mem_increase=max_sys_mem_increase,
127128
prompt_idx=image_id,
128129
)
129130
iter_data_list.append(iter_data)
@@ -158,6 +159,8 @@ def run_image_generation_genai(image_param, num, image_id, pipe, args, iter_data
158159
result_md5_list = []
159160
max_rss_mem_consumption = ''
160161
max_sys_mem_consumption = ''
162+
max_rss_mem_increase = ''
163+
max_sys_mem_increase = ''
161164
if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
162165
mem_consumption.start()
163166

@@ -180,7 +183,7 @@ def run_image_generation_genai(image_param, num, image_id, pipe, args, iter_data
180183

181184
if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
182185
mem_consumption.stop_and_collect_data(f"{'P' + str(num) if num > 0 else 'warm-up'}_{proc_id}")
183-
max_rss_mem_consumption, max_sys_mem_consumption = mem_consumption.get_data()
186+
max_rss_mem_consumption, max_rss_mem_increase, max_sys_mem_consumption, max_sys_mem_increase = mem_consumption.get_data()
184187
for bs_idx in range(args['batch_size']):
185188
image = Image.fromarray(res[bs_idx])
186189
rslt_img_fn = llm_bench_utils.output_file.output_gen_image(image, args, image_id, num, bs_idx, proc_id, '.png')
@@ -193,7 +196,9 @@ def run_image_generation_genai(image_param, num, image_id, pipe, args, iter_data
193196
gen_time=generation_time,
194197
res_md5=result_md5_list,
195198
max_rss_mem=max_rss_mem_consumption,
199+
max_rss_mem_increase=max_rss_mem_increase,
196200
max_sys_mem=max_sys_mem_consumption,
201+
max_sys_mem_increase=max_sys_mem_increase,
197202
prompt_idx=image_id,
198203
)
199204
iter_data_list.append(iter_data)
@@ -233,7 +238,7 @@ def run_image_generation_benchmark(model_path, framework, device, args, num_iter
233238
if "guidance_scale" in static_input_args:
234239
args["guidance_scale"] = static_input_args["guidance_scale"]
235240

236-
pipe, pretrain_time, use_genai, callback = FW_UTILS[framework].create_image_gen_model(model_path, device, **args)
241+
pipe, pretrain_time, use_genai, callback = FW_UTILS[framework].create_image_gen_model(model_path, device, mem_consumption, **args)
237242
iter_data_list = []
238243

239244
if framework == "ov" and not use_genai:

tools/llm_bench/task/speech_to_text_generation.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ def run_speech_2_txt_generation(input_param, args, md5_list, iter_data_list):
2525
result_md5_list = []
2626
max_rss_mem_consumption = ''
2727
max_sys_mem_consumption = ''
28+
max_rss_mem_increase = ''
29+
max_sys_mem_increase = ''
2830
pipe = input_param['pipe']
2931
raw_speech = input_param['raw_speech']
3032
num = input_param['iter_idx']
@@ -85,15 +87,17 @@ def run_speech_2_txt_generation(input_param, args, md5_list, iter_data_list):
8587
md5_list[num][speech_id] = result_md5_list
8688
if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
8789
mem_consumption.stop_and_collect_data(f"{'P' + str(num) if num > 0 else 'warm-up'}")
88-
max_rss_mem_consumption, max_sys_mem_consumption = mem_consumption.get_data()
90+
max_rss_mem_consumption, max_rss_mem_increase, max_sys_mem_consumption, max_sys_mem_increase = mem_consumption.get_data()
8991

9092
iter_data = gen_output_data.gen_iterate_data(
9193
iter_idx=num,
9294
out_size=out_token_size,
9395
gen_time=generation_time,
9496
res_md5=result_md5_list,
9597
max_rss_mem=max_rss_mem_consumption,
98+
max_rss_mem_increase=max_rss_mem_increase,
9699
max_sys_mem=max_sys_mem_consumption,
100+
max_sys_mem_increase=max_sys_mem_increase,
97101
prompt_idx=speech_id,
98102
)
99103
iter_data_list.append(iter_data)

tools/llm_bench/task/super_resolution_generation.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,16 @@ def run_ldm_super_resolution(img, num, pipe, args, framework, iter_data_list, im
3434
low_res_img = low_res_img.resize((resize_image_width, resize_image_height))
3535
max_rss_mem_consumption = ''
3636
max_sys_mem_consumption = ''
37+
max_rss_mem_increase = ''
38+
max_sys_mem_increase = ''
3739
if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
3840
mem_consumption.start()
3941
start = time.perf_counter()
4042
res = pipe(low_res_img, num_inference_steps=nsteps, tm_list=tm_list)
4143
end = time.perf_counter()
4244
if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
4345
mem_consumption.stop_and_collect_data(f"{'P' + str(num) if num > 0 else 'warm-up'}_{proc_id}")
44-
max_rss_mem_consumption, max_sys_mem_consumption = mem_consumption.get_data()
46+
max_rss_mem_consumption, max_rss_mem_increase, max_sys_mem_consumption, max_sys_mem_increase = mem_consumption.get_data()
4547
result_md5_list = []
4648
if framework == 'ov':
4749
rslt_img_fn = llm_bench_utils.output_file.output_gen_image(res[0], args, image_id, num, None, proc_id, '.png')
@@ -54,7 +56,9 @@ def run_ldm_super_resolution(img, num, pipe, args, framework, iter_data_list, im
5456
gen_time=generation_time,
5557
res_md5=result_md5_list,
5658
max_rss_mem=max_rss_mem_consumption,
59+
max_rss_mem_increase=max_rss_mem_increase,
5760
max_sys_mem=max_sys_mem_consumption,
61+
max_sys_mem_increase=max_sys_mem_increase,
5862
prompt_idx=image_id,
5963
)
6064
iter_data_list.append(iter_data)

0 commit comments

Comments
 (0)