Skip to content

Commit f37e4ec

Browse files
committed
update
1 parent c1fc02a commit f37e4ec

10 files changed

+142
-147
lines changed

tools/llm_bench/benchmark.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from openvino import get_version
1010
import torch
1111
import traceback
12-
from llm_bench_utils.memory_profile import MemMonitorWrapper
12+
from llm_bench_utils.memory_monitor import MemMonitorWrapper
1313
import llm_bench_utils.output_csv
1414
import llm_bench_utils.output_json
1515
import task.visual_language_generation as bench_vlm

tools/llm_bench/llm_bench_utils/gen_output_data.py

+4
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ def gen_iterate_data(
1212
latency='',
1313
res_md5='',
1414
max_rss_mem='',
15+
max_rss_mem_increase='',
1516
max_sys_mem='',
17+
max_sys_mem_increase='',
1618
prompt_idx='',
1719
tokenization_time=[],
1820
mm_embeddings_preparation_time=''
@@ -30,7 +32,9 @@ def gen_iterate_data(
3032
iter_data['first_token_infer_latency'] = -1
3133
iter_data['other_tokens_infer_avg_latency'] = -1
3234
iter_data['max_rss_mem_consumption'] = max_rss_mem
35+
iter_data['max_rss_mem_increase'] = max_rss_mem_increase
3336
iter_data['max_sys_mem_consumption'] = max_sys_mem
37+
iter_data['max_sys_mem_increase'] = max_sys_mem_increase
3438
iter_data['prompt_idx'] = prompt_idx
3539
iter_data['tokenization_time'] = tokenization_time[0] if len(tokenization_time) > 0 else ''
3640
iter_data['detokenization_time'] = tokenization_time[1] if len(tokenization_time) > 1 else ''

tools/llm_bench/llm_bench_utils/memory_monitor.py

+86-53
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,8 @@
88
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
99
# See the License for the specific language governing permissions and
1010
# limitations under the License.
11-
import argparse
1211
import atexit
13-
import logging
1412
import queue
15-
import subprocess
1613
import threading
1714
import time
1815
from enum import Enum
@@ -21,12 +18,11 @@
2118
from pathlib import Path
2219
from typing import Callable, List, Optional, Tuple
2320

21+
import psutil
2422
import matplotlib
2523
import matplotlib.pyplot as plt
26-
import psutil
27-
from tabulate import tabulate
24+
import logging as log
2825

29-
logger = logging.getLogger("memory_monitor")
3026

3127
# CUSTOM FIX TO AVOID ISSUE: RuntimeError: main thread is not in main loop
3228
matplotlib.use('Agg')
@@ -50,7 +46,7 @@ class MemoryUnit(Enum):
5046
@lru_cache
5147
def system_memory_warning():
5248
# Log once
53-
logger.warning(
49+
log.warning(
5450
"Please note that MemoryType.SYSTEM in general is affected by other processes that change RAM availability."
5551
)
5652

@@ -265,7 +261,7 @@ def _monitor_memory(self):
265261
class memory_monitor_context:
266262
def __init__(
267263
self,
268-
interval: Optional[float] = 0.1,
264+
interval: Optional[float] = 0.01,
269265
memory_unit: Optional[MemoryUnit] = MemoryUnit.MiB,
270266
return_max_value: Optional[bool] = True,
271267
save_dir: Optional[Path] = None,
@@ -296,7 +292,7 @@ def __init__(
296292
self.return_max_value = return_max_value
297293
self.save_dir = save_dir
298294

299-
self.memory_data = {}
295+
self.memory_data = {'full_mem': {}, 'from_zero': {}}
300296

301297
def __enter__(self):
302298
for mm in self.memory_monitors.values():
@@ -312,18 +308,96 @@ def __exit__(self, exc_type, exc_val, exc_tb):
312308
mm.stop()
313309
for fz in [False, True]:
314310
time_values, memory_values = mm.get_data(memory_from_zero=fz)
315-
if fz:
316-
self.memory_data[mt] = max(memory_values) if self.return_max_value else (time_values, memory_values)
311+
312+
mm_measure_type = 'from_zero' if fz else 'full_mem'
313+
self.memory_data[mm_measure_type][mt] = max(memory_values) if self.return_max_value else (time_values, memory_values)
317314

318315
if self.save_dir:
319316
mm.save_memory_logs(
320317
time_values,
321318
memory_values,
322319
save_dir=self.save_dir,
323-
filename_suffix="_from-zero" if fz else "",
320+
filename_suffix="_mem_increase" if fz else "",
324321
)
325322

326323

324+
class MemMonitorWrapper():
325+
def __init__(self):
326+
self.save_dir = None
327+
328+
self.interval = 0.01
329+
self.memory_unit = MemoryUnit.MiB
330+
331+
self.memory_types = [MemoryType.RSS, MemoryType.SYSTEM]
332+
333+
self.memory_monitors = {}
334+
self.memory_data = {'full_mem': {}, 'from_zero': {}}
335+
336+
def create_monitors(self):
337+
for memory_type in self.memory_types:
338+
self.memory_monitors[memory_type] = MemoryMonitor(
339+
interval=self.interval, memory_type=memory_type, memory_unit=self.memory_unit
340+
)
341+
342+
def set_dir(self, dir):
343+
if not Path(dir).exists():
344+
log.warning(f"Path to dir for memory consamption data is not exists {dir}, run without it.")
345+
else:
346+
self.save_dir = Path(dir)
347+
348+
def start(self, delay=None):
349+
self.memory_data = {'full_mem': {}, 'from_zero': {}}
350+
for mm in self.memory_monitors.values():
351+
mm.start()
352+
353+
# compilation could be very fast, apply delay
354+
if delay:
355+
time.sleep(delay)
356+
else:
357+
time.sleep(self.interval * 3)
358+
359+
def stop_and_collect_data(self, dir_name='mem_monitor_log'):
360+
self.stop()
361+
362+
for mt, mm in self.memory_monitors.items():
363+
if not mm._memory_values_queue or len(mm._memory_values_queue.queue) == 0:
364+
continue
365+
366+
for from_zero in [False, True]:
367+
time_values, memory_values = mm.get_data(memory_from_zero=from_zero)
368+
369+
mm_measure_type = 'from_zero' if from_zero else 'full_mem'
370+
self.memory_data[mm_measure_type][mt] = max(memory_values)
371+
372+
if self.save_dir:
373+
mm.save_memory_logs(
374+
time_values,
375+
memory_values,
376+
save_dir=self.save_dir / dir_name,
377+
filename_suffix="_mem_increase" if from_zero else "",
378+
)
379+
380+
def stop(self):
381+
# Stop addition of new values as soon as possible
382+
for mm in self.memory_monitors.values():
383+
mm._monitoring_thread_should_stop = True
384+
385+
for mm in self.memory_monitors.values():
386+
mm.stop()
387+
388+
def get_data(self):
389+
return (self.memory_data['full_mem'].get(MemoryType.RSS, -1), self.memory_data['from_zero'].get(MemoryType.RSS, -1),
390+
self.memory_data['full_mem'].get(MemoryType.SYSTEM, -1), self.memory_data['from_zero'].get(MemoryType.SYSTEM, -1))
391+
392+
def log_data(self, comment):
393+
max_rss_mem, max_rss_increase, max_sys_mem, max_sys_increase = self.get_data()
394+
msg = (f"Max rss memory cost {comment}: {max_rss_mem:.2f}{self.memory_unit.value}, "
395+
f"rss memory increase {comment}: {max_rss_increase:.2f}{self.memory_unit.value}, "
396+
f"max system memory cost {comment}: {max_sys_mem:.2f}{self.memory_unit.value}, "
397+
f"system memory increase {comment}: {max_sys_increase:.2f}{self.memory_unit.value}")
398+
log.info(msg)
399+
400+
327401
def _cast_bytes_to(bytes, memory_unit, round_to_int=False):
328402
memory_unit_divisors = {
329403
MemoryUnit.B: 1,
@@ -343,44 +417,3 @@ def _subtract_first_element(data):
343417
data[i] = data[i] - data[0]
344418
data[0] = 0
345419
return data
346-
347-
348-
if __name__ == "__main__":
349-
parser = argparse.ArgumentParser(
350-
description="Memory Monitor Tool. Monitors memory for an executable and saves logs at specified location.",
351-
epilog="Examples:\n"
352-
" python memory_monitor.py --log-dir ./allocation_logs python allocate.py\n"
353-
" python memory_monitor.py optimum-cli export openvino ...",
354-
formatter_class=argparse.RawTextHelpFormatter,
355-
)
356-
parser.add_argument(
357-
"--log-dir", type=str, default="memory_logs", help="A directory to save logs at. './memory_logs' by default."
358-
)
359-
parser.add_argument("executable", type=str, nargs="+", help="Target executable to monitor memory for.")
360-
args = parser.parse_args()
361-
362-
memory_monitors = [
363-
MemoryMonitor(memory_type=mt, include_child_processes=True).start()
364-
for mt in (MemoryType.RSS, MemoryType.SYSTEM)
365-
]
366-
367-
with subprocess.Popen(" ".join(args.executable), shell=True) as p:
368-
p.wait()
369-
370-
# Stop addition of new values as soon as possible
371-
for mm in memory_monitors:
372-
mm._monitoring_thread_should_stop = True
373-
374-
summary_data = []
375-
for mm in memory_monitors:
376-
mm.stop()
377-
for fz in (True, False):
378-
time_values, memory_values = mm.get_data(memory_from_zero=fz)
379-
# Most probably the last value is recorded once the child process has already died
380-
time_values, memory_values = time_values[:-1], memory_values[:-1]
381-
mm.save_memory_logs(
382-
time_values, memory_values, save_dir=Path(args.log_dir), filename_suffix="_from-zero" if fz else ""
383-
)
384-
summary_data.append([mm.memory_type.value, fz, f"{int(max(memory_values))} {mm.memory_unit.value}"])
385-
print("\nMemory summary:")
386-
print(tabulate(summary_data, headers=["Memory type", "From zero", "Peak value"]))

tools/llm_bench/llm_bench_utils/memory_profile.py

-79
This file was deleted.

tools/llm_bench/llm_bench_utils/metrics_print.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,12 @@ def print_metrics(
6363
output_str = ''
6464
if iter_data['max_rss_mem_consumption'] != '' and iter_data['max_rss_mem_consumption'] > -1:
6565
output_str += f"Max rss memory cost: {iter_data['max_rss_mem_consumption']:.2f}MBytes, "
66+
if iter_data['max_rss_mem_increase'] != '' and iter_data['max_rss_mem_increase'] > -1:
67+
output_str += f"rss memory increase: {iter_data['max_rss_mem_increase']:.2f}MBytes, "
6668
if iter_data['max_sys_mem_consumption'] != '' and iter_data['max_sys_mem_consumption'] > -1:
67-
output_str += f"max system memory memory cost: {iter_data['max_sys_mem_consumption']:.2f}MBytes"
69+
output_str += f"max system memory memory cost: {iter_data['max_sys_mem_consumption']:.2f}MBytes, "
70+
if iter_data['max_sys_mem_increase'] != '' and iter_data['max_sys_mem_increase'] > -1:
71+
output_str += f"system memory increase: {iter_data['max_sys_mem_increase']:.2f}MBytes "
6872
if output_str != '':
6973
output_str = ' '.join([prefix, output_str])
7074
log.info(output_str)

tools/llm_bench/task/image_generation.py

+11-6
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,8 @@ def run_image_generation(image_param, num, image_id, pipe, args, iter_data_list,
9191
result_md5_list = []
9292
max_rss_mem_consumption = ''
9393
max_sys_mem_consumption = ''
94-
print("MEM mem_consumption MEASURES ")
94+
max_rss_mem_increase = ''
95+
max_sys_mem_increase = ''
9596
if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
9697
mem_consumption.start()
9798

@@ -109,9 +110,7 @@ def run_image_generation(image_param, num, image_id, pipe, args, iter_data_list,
109110
end = time.perf_counter()
110111
if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
111112
mem_consumption.stop_and_collect_data(f"{'P' + str(num) if num > 0 else 'warm-up'}_{proc_id}")
112-
print("MEM mem_consumption MEASURES FINISH 1")
113-
max_rss_mem_consumption, max_sys_mem_consumption = mem_consumption.get_data()
114-
print("MEM mem_consumption MEASURES FINISH 2")
113+
max_rss_mem_consumption, max_rss_mem_increase, max_sys_mem_consumption, max_sys_mem_increase = mem_consumption.get_data()
115114
for bs_idx in range(args['batch_size']):
116115
rslt_img_fn = llm_bench_utils.output_file.output_gen_image(res[bs_idx], args, image_id, num, bs_idx, proc_id, '.png')
117116
result_md5_list.append(hashlib.md5(Image.open(rslt_img_fn).tobytes(), usedforsecurity=False).hexdigest())
@@ -123,7 +122,9 @@ def run_image_generation(image_param, num, image_id, pipe, args, iter_data_list,
123122
gen_time=generation_time,
124123
res_md5=result_md5_list,
125124
max_rss_mem=max_rss_mem_consumption,
125+
max_rss_mem_increase=max_rss_mem_increase,
126126
max_sys_mem=max_sys_mem_consumption,
127+
max_sys_mem_increase=max_sys_mem_increase,
127128
prompt_idx=image_id,
128129
)
129130
iter_data_list.append(iter_data)
@@ -158,6 +159,8 @@ def run_image_generation_genai(image_param, num, image_id, pipe, args, iter_data
158159
result_md5_list = []
159160
max_rss_mem_consumption = ''
160161
max_sys_mem_consumption = ''
162+
max_rss_mem_increase = ''
163+
max_sys_mem_increase = ''
161164
if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
162165
mem_consumption.start()
163166

@@ -180,7 +183,7 @@ def run_image_generation_genai(image_param, num, image_id, pipe, args, iter_data
180183

181184
if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
182185
mem_consumption.stop_and_collect_data(f"{'P' + str(num) if num > 0 else 'warm-up'}_{proc_id}")
183-
max_rss_mem_consumption, max_sys_mem_consumption = mem_consumption.get_data()
186+
max_rss_mem_consumption, max_rss_mem_increase, max_sys_mem_consumption, max_sys_mem_increase = mem_consumption.get_data()
184187
for bs_idx in range(args['batch_size']):
185188
image = Image.fromarray(res[bs_idx])
186189
rslt_img_fn = llm_bench_utils.output_file.output_gen_image(image, args, image_id, num, bs_idx, proc_id, '.png')
@@ -193,7 +196,9 @@ def run_image_generation_genai(image_param, num, image_id, pipe, args, iter_data
193196
gen_time=generation_time,
194197
res_md5=result_md5_list,
195198
max_rss_mem=max_rss_mem_consumption,
199+
max_rss_mem_increase=max_rss_mem_increase,
196200
max_sys_mem=max_sys_mem_consumption,
201+
max_sys_mem_increase=max_sys_mem_increase,
197202
prompt_idx=image_id,
198203
)
199204
iter_data_list.append(iter_data)
@@ -233,7 +238,7 @@ def run_image_generation_benchmark(model_path, framework, device, args, num_iter
233238
if "guidance_scale" in static_input_args:
234239
args["guidance_scale"] = static_input_args["guidance_scale"]
235240

236-
pipe, pretrain_time, use_genai, callback = FW_UTILS[framework].create_image_gen_model(model_path, device, **args)
241+
pipe, pretrain_time, use_genai, callback = FW_UTILS[framework].create_image_gen_model(model_path, device, mem_consumption, **args)
237242
iter_data_list = []
238243

239244
if framework == "ov" and not use_genai:

0 commit comments

Comments
 (0)