diff --git a/tools/llm_bench/benchmark.py b/tools/llm_bench/benchmark.py
index 819879e9f4..1884dba15c 100644
--- a/tools/llm_bench/benchmark.py
+++ b/tools/llm_bench/benchmark.py
@@ -9,7 +9,7 @@
 from openvino import get_version
 import torch
 import traceback
-from llm_bench_utils.memory_profile import MemConsumption
+from llm_bench_utils.memory_monitor import MemMonitorWrapper
 import llm_bench_utils.output_csv
 import llm_bench_utils.output_json
 import task.visual_language_generation as bench_vlm
@@ -19,7 +19,7 @@
 import task.speech_to_text_generation as bench_speech
 
 DEFAULT_TORCH_THREAD_NUMS = 16
-mem_consumption = MemConsumption()
+memory_monitor = MemMonitorWrapper()
 
 
 def num_iters_type(x):
@@ -87,11 +87,19 @@ def get_argprser():
     )
     parser.add_argument(
         "--memory_consumption_delay",
-        default=0.5,
+        default=None,
         required=False,
         type=float,
         help="delay for memory consumption check in seconds, smaller value will lead to more precised memory consumption, but may affects performance."
-        "It is not recommended to run memory consumption and performance benchmarking in the same time"
+        "It is not recommended to run memory consumption and performance benchmarking in the same time",
+    )
+    parser.add_argument(
+        '-mc_dir',
+        '--memory_consumption_dir',
+        default=None,
+        required=False,
+        type=str,
+        help='Path to store memory consamption logs and chart.',
     )
     parser.add_argument('-bs', '--batch_size', type=int, default=1, required=False, help='Batch size value')
     parser.add_argument('--num_beams', type=int, default=1, help='Number of beams in the decoding strategy, activates beam_search if greater than 1')
@@ -233,22 +241,25 @@ def main():
                     if half_nums_of_torch_threads > DEFAULT_TORCH_THREAD_NUMS:
                         torch.set_num_threads(DEFAULT_TORCH_THREAD_NUMS)
                     else:
+                        half_nums_of_torch_threads = int(half_nums_of_torch_threads) if int(half_nums_of_torch_threads) else 1
                         torch.set_num_threads(int(half_nums_of_torch_threads))
             log.info(f"The num_beams is {model_args['num_beams']}, update Torch thread num from "
                      f'{original_torch_thread_nums} to {torch.get_num_threads()}, avoid to use the CPU cores for OpenVINO inference.')
     log.info(out_str)
     if args.memory_consumption:
-        mem_consumption.delay = args.memory_consumption_delay
-        mem_consumption.start_collect_mem_consumption_thread()
+        if args.memory_consumption_delay:
+            memory_monitor.interval = args.memory_consumption_delay
+        memory_monitor.create_monitors()
+        if args.memory_consumption_dir:
+            memory_monitor.set_dir(args.memory_consumption_dir)
     try:
         if model_args['use_case'] in ['text_gen', 'code_gen']:
             iter_data_list, pretrain_time, iter_timestamp = CASE_TO_BENCH[model_args['use_case']](
                 model_path, framework, args.device, args.tokens_len, args.streaming, model_args,
-                args.num_iters, mem_consumption)
+                args.num_iters, memory_monitor)
         else:
             iter_data_list, pretrain_time, iter_timestamp = CASE_TO_BENCH[model_args['use_case']](
-                model_path, framework, args.device, model_args, args.num_iters,
-                mem_consumption)
+                model_path, framework, args.device, model_args, args.num_iters, memory_monitor)
         if args.report is not None or args.report_json is not None:
             model_precision = ''
             if framework == 'ov':
@@ -289,7 +300,7 @@ def main():
         exit(1)
     finally:
         if args.memory_consumption:
-            mem_consumption.end_collect_mem_consumption_thread()
+            memory_monitor.stop()
 
 
 if __name__ == '__main__':
diff --git a/tools/llm_bench/llm_bench_utils/gen_output_data.py b/tools/llm_bench/llm_bench_utils/gen_output_data.py
index 594903912d..1287a0fd81 100644
--- a/tools/llm_bench/llm_bench_utils/gen_output_data.py
+++ b/tools/llm_bench/llm_bench_utils/gen_output_data.py
@@ -12,8 +12,9 @@ def gen_iterate_data(
     latency='',
     res_md5='',
     max_rss_mem='',
-    max_shared_mem='',
-    max_uss_mem='',
+    max_rss_mem_increase='',
+    max_sys_mem='',
+    max_sys_mem_increase='',
     prompt_idx='',
     tokenization_time=[],
     mm_embeddings_preparation_time=''
@@ -31,8 +32,9 @@ def gen_iterate_data(
     iter_data['first_token_infer_latency'] = -1
     iter_data['other_tokens_infer_avg_latency'] = -1
     iter_data['max_rss_mem_consumption'] = max_rss_mem
-    iter_data['max_shared_mem_consumption'] = max_shared_mem
-    iter_data['max_uss_mem_consumption'] = max_uss_mem
+    iter_data['max_rss_mem_increase'] = max_rss_mem_increase
+    iter_data['max_sys_mem_consumption'] = max_sys_mem
+    iter_data['max_sys_mem_increase'] = max_sys_mem_increase
     iter_data['prompt_idx'] = prompt_idx
     iter_data['tokenization_time'] = tokenization_time[0] if len(tokenization_time) > 0 else ''
     iter_data['detokenization_time'] = tokenization_time[1] if len(tokenization_time) > 1 else ''
diff --git a/tools/llm_bench/llm_bench_utils/memory_monitor.py b/tools/llm_bench/llm_bench_utils/memory_monitor.py
new file mode 100644
index 0000000000..db6f5fefe9
--- /dev/null
+++ b/tools/llm_bench/llm_bench_utils/memory_monitor.py
@@ -0,0 +1,419 @@
+# Copyright (c) 2025 Intel Corporation
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#      http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import atexit
+import queue
+import threading
+import time
+from enum import Enum
+from functools import lru_cache
+from functools import partial
+from pathlib import Path
+from typing import Callable, List, Optional, Tuple
+
+import psutil
+import matplotlib
+import matplotlib.pyplot as plt
+import logging as log
+
+
+# CUSTOM FIX TO AVOID ISSUE: RuntimeError: main thread is not in main loop
+matplotlib.use('Agg')
+
+
+class MemoryType(Enum):
+    RSS = "rss"
+    SYSTEM = "system"
+
+
+class MemoryUnit(Enum):
+    B = "B"  # byte
+    KiB = "KiB"  # Kibibyte
+    MiB = "MiB"  # Mibibyte
+    GiB = "GiB"  # Gibibyte
+    KB = "KB"  # Kilobyte
+    MB = "MB"  # Megabyte
+    GB = "GB"  # Gigabyte
+
+
+@lru_cache
+def system_memory_warning():
+    # Log once
+    log.warning(
+        "Please note that MemoryType.SYSTEM in general is affected by other processes that change RAM availability."
+    )
+
+
+class MemoryMonitor:
+    def __init__(
+        self,
+        interval: Optional[float] = 0.1,
+        memory_type: Optional[MemoryType] = MemoryType.RSS,
+        memory_unit: Optional[MemoryUnit] = MemoryUnit.MiB,
+        include_child_processes: Optional[bool] = None,
+    ):
+        """
+        Memory monitoring utility to measure python process memory footprint. After start() is called, it
+        creates a thread which runs in parallel and takes memory measurements every *interval* seconds using the
+        specified *memory_type* approach. When stop() is called, the memory measuring thread is stopped. The results
+        can be obtained by calling get_data(). Memory logs can be saved by calling save_memory_logs(). There are two
+        log files: one with data values in a .txt format and another one in a form of a 2D time-memory plot.
+
+        Memory monitor itself allocates some memory itself, especially during figure saving. It is advised to use it
+        for measuring large memory processes.
+
+        :param interval: How frequently to take memory measurements (in seconds).
+        :param memory_type: Type of memory to log. Accepts four possible values:
+            - MemoryType.RSS: Resident Set Size is the portion of memory occupied by a process that is held in RAM.
+              Values are obtained through psutil library. If some data is read using mmap, RSS will report this data
+              as allocated, however this is not necessarily the case.
+            - MemoryType.SYSTEM: This metric is defined as the difference between total system virtual memory
+              and system available memory. Be aware, that this way it is affected by other processes that can change
+              RAM availability. It is advised to call get_data(memory_from_zero=True) for this type of memory logging,
+              if one is interested in memory footprint for a certain process. This subtracts the starting memory from
+              all values.
+
+            RSS and SYSTEM behave differently when mmap is used, e.g. during OV model loading. RSS will report data
+            which was read with mmap enabled as allocated, however this is not necessarily the case. SYSTEM does not
+            report memory loaded with mmap. So it can be used to analyze "pure" memory usage without contribution of
+            mmap pages which are actually free, but are reported as allocated by RSS.
+        :param memory_unit: Unit to report memory in.
+        :param include_child_processes: For MemoryType.RSS only: whether to include memory of child processes. If not
+            provided, child processes are counted.
+        """
+        self.interval = interval
+        self.memory_type = memory_type
+        if memory_type == MemoryType.SYSTEM:
+            system_memory_warning()
+        elif memory_type == MemoryType.RSS:
+            if include_child_processes is None:
+                include_child_processes = True
+        else:
+            raise ValueError("Unknown memory type to log")
+        self.memory_unit = memory_unit
+        self.include_child_processes = include_child_processes
+
+        self._monitoring_thread_should_stop = False
+        self._monitoring_in_progress = False
+
+        self._memory_monitor_thread = None
+        self._memory_values_queue = None
+        self._stop_logging_atexit_fn = None
+
+    def start(self, at_exit_fn: Optional[Callable] = None) -> "MemoryMonitor":
+        """
+        Start memory monitoring.
+
+        :param at_exit_fn: A callable to execute at program exit. Useful fot providing logs saving routine, e.g.
+            ```
+                at_exit_fn = lambda: memory_monitor.save_memory_logs(*memory_monitor.get_data(), save_dir)
+                memory_monitor.start(at_exit_fn=at_exit_fn)
+            ```
+        """
+        if self._monitoring_in_progress:
+            raise Exception("Monitoring already in progress")
+
+        self._memory_values_queue = queue.Queue()
+        self._monitoring_thread_should_stop = False
+
+        self._memory_monitor_thread = threading.Thread(target=self._monitor_memory)
+        self._memory_monitor_thread.daemon = True
+        self._memory_monitor_thread.start()
+        if at_exit_fn:
+            self._stop_logging_atexit_fn = at_exit_fn
+            atexit.register(self._stop_logging_atexit_fn)
+
+        self._monitoring_in_progress = True
+
+        return self
+
+    def stop(self):
+        """
+        Stop memory monitoring.
+        """
+        if not self._monitoring_in_progress:
+            return
+        self._monitoring_thread_should_stop = True
+        self._monitoring_in_progress = False
+        self._memory_monitor_thread.join()
+        if self._stop_logging_atexit_fn is not None:
+            atexit.unregister(self._stop_logging_atexit_fn)
+            self._stop_logging_atexit_fn = None
+
+    def get_data(self, memory_from_zero: Optional[bool] = False) -> Tuple[List, List]:
+        """
+        :param memory_from_zero: Whether to normalize memory measurements by subtracting the first value. This way
+            the measurements will start with 0. Hence, is not very reliable and may actually result in negative values.
+        :returns: A tuple of list where the first element corresponds to measurements timestamps and the second one --
+        to memory values.
+        """
+        memory_usage_data = list(self._memory_values_queue.queue)
+        if len(memory_usage_data) == 0:
+            return [], []
+        time_values, memory_values = tuple(zip(*memory_usage_data))
+        time_values = _subtract_first_element(list(time_values))
+        if memory_from_zero:
+            memory_values = _subtract_first_element(list(memory_values))
+
+        # Convert to target memory unit
+        memory_values = list(map(partial(_cast_bytes_to, memory_unit=self.memory_unit), memory_values))
+
+        return time_values, memory_values
+
+    def save_memory_logs(
+        self,
+        time_values: List[float],
+        memory_values: List[float],
+        save_dir: Path,
+        plot_title: Optional[str] = "",
+        filename_suffix: Optional[str] = "",
+    ):
+        """
+        Save memory logs as a text file and a 2D plot.
+
+        :param time_values: Timestamps of the memory measurements.
+        :param memory_values: Memory measurements.
+        :param save_dir: Directory to save logs into.
+        :param plot_title: A title for a plot.
+        :param filename_suffix: A string suffix to give to the saved files.
+        """
+        if not save_dir.exists():
+            save_dir.mkdir(parents=True)
+
+        filename_label = f"{self.memory_type.value}_memory_usage{filename_suffix}"
+        # Save measurements to text file
+        log_filepath = save_dir / f"{filename_label}.txt"
+        with open(log_filepath, "w") as log_file:
+            if len(time_values) == 0:
+                log_file.write("No measurements recorded.\nPlease make sure logging duration or interval were enough.")
+                return
+            for timestamp, memory_usage in zip(time_values, memory_values):
+                log_file.write(f"{timestamp} {memory_usage:.3f}\n")
+
+            log_file.writelines(
+                [
+                    f"Total time: {time_values[-1] - time_values[0]}\n",
+                    f"Max memory: {max(memory_values):.3f} ({self.memory_unit.value})",
+                ]
+            )
+
+        # Save measurements plot
+        self.save_memory_plot(log_filepath, plot_title)
+
+    def save_memory_plot(self, log_filepath: Path, plot_title: Optional[str] = "", filename_suffix: Optional[str] = ""):
+        """
+        Parse pre-saved txt file logs and plot a new figure based on this data. May be useful for re-plotting with
+        different title.
+
+        :param log_filepath: A path to a .txt log file.
+        :param plot_title: A title to give to a plot.
+        :param filename_suffix: A string suffix to give to the saved figure.
+        """
+        with open(log_filepath, "r") as f:
+            lines = f.readlines()
+            time_values, memory_values = [], []
+            for line in lines[:-2]:
+                time_value, memory_value = tuple(map(float, line.split(" ")))
+                time_values.append(time_value)
+                memory_values.append(memory_value)
+
+        fig = plt.figure(figsize=(10, 6))
+        plt.plot(time_values, memory_values)
+        plt.xlabel("Time (seconds)")
+        plt.ylabel(f"Memory Usage ({self.memory_type.value}, {self.memory_unit.value})")
+        plt.title(f"{plot_title} Max_{self.memory_type.value}: {max(memory_values):.2f} {self.memory_unit.value}")
+        plt.grid(True)
+        plt.tight_layout()
+        plt.savefig(str(log_filepath).replace(".txt", f"{filename_suffix}.png"))
+        plt.close(fig)
+
+    def __enter__(self) -> "MemoryMonitor":
+        return self.start()
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.stop()
+
+    def _monitor_memory(self):
+        while not self._monitoring_thread_should_stop:
+            _last_measurement_time = time.perf_counter()
+            if self.memory_type == MemoryType.RSS:
+                bytes_used = psutil.Process().memory_info().rss
+                if self.include_child_processes:
+                    for child_process in psutil.Process().children(recursive=True):
+                        bytes_used += psutil.Process(child_process.pid).memory_info().rss
+            elif self.memory_type == MemoryType.SYSTEM:
+                bytes_used = psutil.virtual_memory().total - psutil.virtual_memory().available
+            else:
+                raise Exception("Unknown memory type to log")
+            if self._monitoring_thread_should_stop:
+                break
+            self._memory_values_queue.put((time.perf_counter(), bytes_used))
+            time.sleep(max(0.0, self.interval - (time.perf_counter() - _last_measurement_time)))
+
+
+class memory_monitor_context:
+    def __init__(
+        self,
+        interval: Optional[float] = 0.01,
+        memory_unit: Optional[MemoryUnit] = MemoryUnit.MiB,
+        return_max_value: Optional[bool] = True,
+        save_dir: Optional[Path] = None,
+    ):
+        """
+        A memory monitor context manager which monitors both RSS and SYSTEM memory types. After, it stores the
+        result for the maximum memory recorded if `return_max_value=True or the whole time-memory sequences. Works
+        by subtracting the first memory measurement from all the other ones so that the resulting sequence starts
+        from 0. Hence, it can actually return negative memory values.
+
+        After exiting, the result is stored at .memory_data field -- a dict with memory types (RSS or SYSTEM)
+        as keys. The values are either a single float number if return_max_value is provided, or a tuple with time
+        and memory value lists.
+
+        Additionally, memory logs may be saved by providing save_dir argument.
+
+        :param interval: Interval in seconds to take measurements.
+        :param memory_unit: Memory unit.
+        :param return_max_value: Whether to return max value for each memory type or full memory sequences.
+        :param save_dir: If provided, will save memory logs at this location.
+        """
+
+        self.memory_monitors = {}
+        for memory_type in [MemoryType.RSS, MemoryType.SYSTEM]:
+            self.memory_monitors[memory_type] = MemoryMonitor(
+                interval=interval, memory_type=memory_type, memory_unit=memory_unit
+            )
+        self.return_max_value = return_max_value
+        self.save_dir = save_dir
+
+        self.memory_data = {'full_mem': {}, 'from_zero': {}}
+
+    def __enter__(self):
+        for mm in self.memory_monitors.values():
+            mm.start()
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        # Stop addition of new values as soon as possible
+        for mm in self.memory_monitors.values():
+            mm._monitoring_thread_should_stop = True
+
+        for mt, mm in self.memory_monitors.items():
+            mm.stop()
+            for fz in [False, True]:
+                time_values, memory_values = mm.get_data(memory_from_zero=fz)
+
+                mm_measure_type = 'from_zero' if fz else 'full_mem'
+                self.memory_data[mm_measure_type][mt] = max(memory_values) if self.return_max_value else (time_values, memory_values)
+
+                if self.save_dir:
+                    mm.save_memory_logs(
+                        time_values,
+                        memory_values,
+                        save_dir=self.save_dir,
+                        filename_suffix="_mem_increase" if fz else "",
+                    )
+
+
+class MemMonitorWrapper():
+    def __init__(self):
+        self.save_dir = None
+
+        self.interval = 0.01
+        self.memory_unit = MemoryUnit.MiB
+
+        self.memory_types = [MemoryType.RSS, MemoryType.SYSTEM]
+
+        self.memory_monitors = {}
+        self.memory_data = {'full_mem': {}, 'from_zero': {}}
+
+    def create_monitors(self):
+        for memory_type in self.memory_types:
+            self.memory_monitors[memory_type] = MemoryMonitor(
+                interval=self.interval, memory_type=memory_type, memory_unit=self.memory_unit
+            )
+
+    def set_dir(self, dir):
+        if not Path(dir).exists():
+            log.warning(f"Path to dir for memory consamption data is not exists {dir}, run without it.")
+        else:
+            self.save_dir = Path(dir)
+
+    def start(self, delay=None):
+        self.memory_data = {'full_mem': {}, 'from_zero': {}}
+        for mm in self.memory_monitors.values():
+            mm.start()
+
+        # compilation could be very fast, apply delay
+        if delay:
+            time.sleep(delay)
+        else:
+            time.sleep(self.interval * 3)
+
+    def stop_and_collect_data(self, dir_name='mem_monitor_log'):
+        self.stop()
+
+        for mt, mm in self.memory_monitors.items():
+            if not mm._memory_values_queue or len(mm._memory_values_queue.queue) == 0:
+                continue
+
+            for from_zero in [False, True]:
+                time_values, memory_values = mm.get_data(memory_from_zero=from_zero)
+
+                mm_measure_type = 'from_zero' if from_zero else 'full_mem'
+                self.memory_data[mm_measure_type][mt] = max(memory_values)
+
+                if self.save_dir:
+                    mm.save_memory_logs(
+                        time_values,
+                        memory_values,
+                        save_dir=self.save_dir / dir_name,
+                        filename_suffix="_mem_increase" if from_zero else "",
+                    )
+
+    def stop(self):
+        # Stop addition of new values as soon as possible
+        for mm in self.memory_monitors.values():
+            mm._monitoring_thread_should_stop = True
+
+        for mm in self.memory_monitors.values():
+            mm.stop()
+
+    def get_data(self):
+        return (self.memory_data['full_mem'].get(MemoryType.RSS, -1), self.memory_data['from_zero'].get(MemoryType.RSS, -1),
+                self.memory_data['full_mem'].get(MemoryType.SYSTEM, -1), self.memory_data['from_zero'].get(MemoryType.SYSTEM, -1))
+
+    def log_data(self, comment):
+        max_rss_mem, max_rss_increase, max_sys_mem, max_sys_increase = self.get_data()
+        msg = (f"Max rss memory cost {comment}: {max_rss_mem:.2f}{self.memory_unit.value}, "
+               f"rss memory increase {comment}: {max_rss_increase:.2f}{self.memory_unit.value}, "
+               f"max system memory cost {comment}: {max_sys_mem:.2f}{self.memory_unit.value}, "
+               f"system memory increase {comment}: {max_sys_increase:.2f}{self.memory_unit.value}")
+        log.info(msg)
+
+
+def _cast_bytes_to(bytes, memory_unit, round_to_int=False):
+    memory_unit_divisors = {
+        MemoryUnit.B: 1,
+        MemoryUnit.KiB: 2**10,
+        MemoryUnit.MiB: 2**20,
+        MemoryUnit.GiB: 2**30,
+        MemoryUnit.KB: 10**3,
+        MemoryUnit.MB: 10**6,
+        MemoryUnit.GB: 10**9,
+    }
+    result = bytes / memory_unit_divisors[memory_unit]
+    return int(result) if round_to_int else result
+
+
+def _subtract_first_element(data):
+    for i in range(1, len(data)):
+        data[i] = data[i] - data[0]
+    data[0] = 0
+    return data
diff --git a/tools/llm_bench/llm_bench_utils/memory_profile.py b/tools/llm_bench/llm_bench_utils/memory_profile.py
deleted file mode 100644
index 813e3e8489..0000000000
--- a/tools/llm_bench/llm_bench_utils/memory_profile.py
+++ /dev/null
@@ -1,96 +0,0 @@
-# -*- coding: utf-8 -*-
-# Copyright (C) 2023-2025 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-from threading import Event, Thread
-import psutil
-import time
-import os
-import sys
-
-
-class MemConsumption:
-    def __init__(self):
-        """Initialize MemConsumption."""
-        self.g_exit_get_mem_thread = False
-        self.g_end_collect_mem = False
-        self.g_max_rss_mem_consumption = -1
-        self.g_max_uss_mem_consumption = -1
-        self.g_max_shared_mem_consumption = -1
-        self.g_event = Event()
-        self.g_data_event = Event()
-        self.delay = 0.5
-
-    def collect_memory_consumption(self):
-        """Collect the data."""
-        while self.g_exit_get_mem_thread is False:
-            self.g_event.wait()
-            while True:
-                process = psutil.Process(os.getpid())
-                try:
-                    memory_full_info = process.memory_full_info()
-                    rss_mem_data = memory_full_info.rss
-                    if sys.platform.startswith('linux'):
-                        shared_mem_data = memory_full_info.shared
-                        uss_mem_data = rss_mem_data - shared_mem_data
-                    elif sys.platform.startswith('win'):
-                        uss_mem_data = memory_full_info.uss
-                        shared_mem_data = rss_mem_data - uss_mem_data
-                    else:
-                        uss_mem_data = -1
-                        shared_mem_data = -1
-                except Exception:
-                    rss_mem_data = -1
-                    uss_mem_data = -1
-                    shared_mem_data = -1
-
-                if rss_mem_data > self.g_max_rss_mem_consumption:
-                    self.g_max_rss_mem_consumption = rss_mem_data
-                if shared_mem_data > self.g_max_shared_mem_consumption:
-                    self.g_max_shared_mem_consumption = shared_mem_data
-                if uss_mem_data > self.g_max_uss_mem_consumption:
-                    self.g_max_uss_mem_consumption = uss_mem_data
-                self.g_data_event.set()
-                if self.g_end_collect_mem is True:
-                    self.g_event.set()
-                    self.g_event.clear()
-                    self.g_end_collect_mem = False
-                    break
-                time.sleep(self.delay)
-
-    def start_collect_memory_consumption(self):
-        """Start collect."""
-        self.g_end_collect_mem = False
-        self.g_event.set()
-
-    def end_collect_momory_consumption(self):
-        """Stop collect."""
-        self.g_end_collect_mem = True
-        self.g_event.wait()
-
-    def get_max_memory_consumption(self):
-        """Return the data."""
-        self.g_data_event.wait()
-        self.g_data_event.clear()
-        max_rss_mem = self.g_max_rss_mem_consumption / float(2**20) if self.g_max_rss_mem_consumption > -1 else -1
-        max_shared_mem = self.g_max_shared_mem_consumption / float(2**20) if self.g_max_shared_mem_consumption > -1 else -1
-        max_uss_mem = self.g_max_uss_mem_consumption / float(2**20) if self.g_max_uss_mem_consumption > -1 else -1
-        return max_rss_mem, max_shared_mem, max_uss_mem
-
-    def clear_max_memory_consumption(self):
-        """Clear MemConsumption."""
-        self.g_max_rss_mem_consumption = -1
-        self.g_max_uss_mem_consumption = -1
-        self.g_max_shared_mem_consumption = -1
-
-    def start_collect_mem_consumption_thread(self):
-        """Start the thread."""
-        self.t_mem_thread = Thread(target=self.collect_memory_consumption)
-        self.t_mem_thread.start()
-
-    def end_collect_mem_consumption_thread(self):
-        """End the thread."""
-        self.g_event.set()
-        self.g_data_event.set()
-        self.g_end_collect_mem = True
-        self.g_exit_get_mem_thread = True
-        self.t_mem_thread.join()
diff --git a/tools/llm_bench/llm_bench_utils/metrics_print.py b/tools/llm_bench/llm_bench_utils/metrics_print.py
index 8668e3bba2..ca0fc2d20f 100644
--- a/tools/llm_bench/llm_bench_utils/metrics_print.py
+++ b/tools/llm_bench/llm_bench_utils/metrics_print.py
@@ -5,8 +5,7 @@
 
 
 def print_metrics(
-        iter_num, iter_data, tms=None, tms_infer=None, warm_up=False, max_rss_mem=-1, max_shared_mem=-1,
-        max_uss_mem=-1, stable_diffusion=None, tokenization_time=None, batch_size=1, prompt_idx=-1, whisper=None
+        iter_num, iter_data, tms=None, tms_infer=None, warm_up=False, stable_diffusion=None, tokenization_time=None, batch_size=1, prompt_idx=-1, whisper=None
 ):
     iter_str = str(iter_num)
     if warm_up:
@@ -62,12 +61,14 @@ def print_metrics(
     if whisper is not None:
         print_whisper_infer_latency(iter_str, whisper, prompt_idx)
     output_str = ''
-    if max_rss_mem != '' and max_rss_mem > -1:
-        output_str += 'Max rss memory cost: {:.2f}MBytes, '.format(max_rss_mem)
-    if max_uss_mem != '' and max_uss_mem > -1:
-        output_str += 'max uss memory cost: {:.2f}MBytes, '.format(max_uss_mem)
-    if max_shared_mem != '' and max_shared_mem > -1:
-        output_str += 'max shared memory cost: {:.2f}MBytes'.format(max_shared_mem)
+    if iter_data['max_rss_mem_consumption'] != '' and iter_data['max_rss_mem_consumption'] > -1:
+        output_str += f"Max rss memory cost: {iter_data['max_rss_mem_consumption']:.2f}MBytes, "
+    if iter_data['max_rss_mem_increase'] != '' and iter_data['max_rss_mem_increase'] > -1:
+        output_str += f"rss memory increase: {iter_data['max_rss_mem_increase']:.2f}MBytes, "
+    if iter_data['max_sys_mem_consumption'] != '' and iter_data['max_sys_mem_consumption'] > -1:
+        output_str += f"max system memory memory cost: {iter_data['max_sys_mem_consumption']:.2f}MBytes, "
+    if iter_data['max_sys_mem_increase'] != '' and iter_data['max_sys_mem_increase'] > -1:
+        output_str += f"system memory increase: {iter_data['max_sys_mem_increase']:.2f}MBytes "
     if output_str != '':
         output_str = ' '.join([prefix, output_str])
         log.info(output_str)
diff --git a/tools/llm_bench/llm_bench_utils/output_csv.py b/tools/llm_bench/llm_bench_utils/output_csv.py
index ea1402f82f..72ccb3f7d4 100644
--- a/tools/llm_bench/llm_bench_utils/output_csv.py
+++ b/tools/llm_bench/llm_bench_utils/output_csv.py
@@ -49,7 +49,7 @@ def output_comments(result, use_case, writer):
         'max_rss_mem: max rss memory consumption;'
     )
     comment_list.append(
-        'max_shared_mem: max shared memory consumption;'
+        'max_sys_mem: max system consumption;'
     )
 
     for comments in comment_list:
@@ -95,8 +95,7 @@ def gen_data_to_csv(result, iter_data, pretrain_time, iter_timestamp):
     first_token_infer_latency = iter_data['first_token_infer_latency']
     other_token_infer_latency = iter_data['other_tokens_infer_avg_latency']
     rss_mem = iter_data['max_rss_mem_consumption']
-    uss_mem = iter_data['max_uss_mem_consumption']
-    shared_mem = iter_data['max_shared_mem_consumption']
+    sys_mem = iter_data['max_sys_mem_consumption']
     token_time = iter_data['tokenization_time']
     detoken_time = iter_data['detokenization_time']
     result['iteration'] = str(iter_data['iteration'])
@@ -124,8 +123,7 @@ def gen_data_to_csv(result, iter_data, pretrain_time, iter_timestamp):
     else:
         result['2nd_infer_avg_latency(ms)'] = round(other_token_infer_latency, 5) if other_token_infer_latency != '' else other_token_infer_latency
     result['max_rss_mem(MB)'] = round(rss_mem, 5) if rss_mem != '' else rss_mem
-    result['max_uss_mem(MB)'] = round(uss_mem, 5) if uss_mem != '' else uss_mem
-    result['max_shared_mem(MB)'] = round(shared_mem, 5) if shared_mem != '' else shared_mem
+    result['max_sys_mem(MB)'] = round(sys_mem, 5) if sys_mem != '' else sys_mem
     result['prompt_idx'] = iter_data['prompt_idx']
     result['tokenization_time'] = round(token_time, 5) if token_time != '' else token_time
     result['detokenization_time'] = round(detoken_time, 5) if detoken_time != '' else detoken_time
@@ -148,8 +146,7 @@ def write_result(report_file, model, framework, device, model_args, iter_data_li
         '2nd_avg_latency(ms)',
         'precision',
         'max_rss_mem(MB)',
-        'max_uss_mem(MB)',
-        'max_shared_mem(MB)',
+        'max_sys_mem(MB)',
         'prompt_idx',
         '1st_infer_latency(ms)',
         '2nd_infer_avg_latency(ms)',
diff --git a/tools/llm_bench/llm_bench_utils/output_json.py b/tools/llm_bench/llm_bench_utils/output_json.py
index 4a95a9e94d..08ea1c8e79 100644
--- a/tools/llm_bench/llm_bench_utils/output_json.py
+++ b/tools/llm_bench/llm_bench_utils/output_json.py
@@ -15,8 +15,7 @@ def write_result(report_file, model, framework, device, model_args, iter_data_li
         first_token_infer_latency = iter_data['first_token_infer_latency']
         other_token_infer_latency = iter_data['other_tokens_infer_avg_latency']
         rss_mem = iter_data['max_rss_mem_consumption']
-        uss_mem = iter_data['max_uss_mem_consumption']
-        shared_mem = iter_data['max_shared_mem_consumption']
+        max_sys_mem = iter_data['max_sys_mem_consumption']
         tokenization_time = iter_data['tokenization_time']
         detokenization_time = iter_data['detokenization_time']
 
@@ -39,8 +38,7 @@ def write_result(report_file, model, framework, device, model_args, iter_data_li
             'first_infer_latency': round(first_token_infer_latency, 5) if first_token_infer_latency != '' else first_token_infer_latency,
             'second_infer_avg_latency': round(other_token_infer_latency, 5) if other_token_infer_latency != '' else other_token_infer_latency,
             'max_rss_mem': round(rss_mem, 5) if rss_mem != '' else -1,
-            'max_uss_mem': round(uss_mem, 5) if uss_mem != '' else -1,
-            'max_shared_mem': round(shared_mem, 5) if shared_mem != '' else -1,
+            'max_sys_mem': round(max_sys_mem, 5) if max_sys_mem != '' else -1,
             'prompt_idx': iter_data['prompt_idx'],
             'tokenization_time': round(tokenization_time, 5) if tokenization_time != '' else tokenization_time,
             'detokenization_time': round(detokenization_time, 5) if detokenization_time != '' else detokenization_time,
diff --git a/tools/llm_bench/llm_bench_utils/ov_utils.py b/tools/llm_bench/llm_bench_utils/ov_utils.py
index 6aa3ec2395..011c56402f 100644
--- a/tools/llm_bench/llm_bench_utils/ov_utils.py
+++ b/tools/llm_bench/llm_bench_utils/ov_utils.py
@@ -102,7 +102,7 @@ def get_lora_config(lora_paths, lora_alphas, lora_mode=None):
     return adapter_config
 
 
-def create_text_gen_model(model_path, device, **kwargs):
+def create_text_gen_model(model_path, device, memory_monitor, **kwargs):
     """Create text generation model.
 
     - model_path: can be model_path or IR path
@@ -130,7 +130,7 @@ def create_text_gen_model(model_path, device, **kwargs):
                 log.warning(f"OpenVINO GenAI based benchmarking is not available for {model_type}. Will be switched to default benchmarking")
             else:
                 log.info("Selected OpenVINO GenAI for benchmarking")
-                return create_genai_text_gen_model(model_path, device, ov_config, **kwargs)
+                return create_genai_text_gen_model(model_path, device, ov_config, memory_monitor, **kwargs)
         log.info("Selected Optimum Intel for benchmarking")
         remote_code = False
         try:
@@ -138,6 +138,9 @@ def create_text_gen_model(model_path, device, **kwargs):
         except Exception:
             model_config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
             remote_code = True
+
+        if kwargs.get("mem_consumption"):
+            memory_monitor.start()
         start = time.perf_counter()
         ov_model = model_class.from_pretrained(
             model_path,
@@ -148,6 +151,9 @@ def create_text_gen_model(model_path, device, **kwargs):
             trust_remote_code=remote_code
         )
         end = time.perf_counter()
+        if kwargs.get("mem_consumption"):
+            memory_monitor.stop_and_collect_data('compilation_phase')
+            memory_monitor.log_data('for copmpilation phase')
     bench_hook = get_bench_hook(kwargs['num_beams'], ov_model)
     from_pretrained_time = end - start
     log.info(f'From pretrained time: {from_pretrained_time:.2f}s')
@@ -173,7 +179,7 @@ def get_scheduler_config_genai(user_config, config_name="CB config"):
     return scheduler_config
 
 
-def create_genai_text_gen_model(model_path, device, ov_config, **kwargs):
+def create_genai_text_gen_model(model_path, device, ov_config, memory_monitor, **kwargs):
     import openvino_genai
     from transformers import AutoTokenizer
     from packaging.version import parse
@@ -213,10 +219,15 @@ def create_genai_text_gen_model(model_path, device, ov_config, **kwargs):
     if adapter_config:
         ov_config['adapters'] = adapter_config
 
+    if kwargs.get("mem_consumption"):
+        memory_monitor.start()
     start = time.perf_counter()
     llm_pipe = openvino_genai.LLMPipeline(model_path, device.upper(), **ov_config)
     end = time.perf_counter()
     log.info(f'Pipeline initialization time: {end - start:.2f}s')
+    if kwargs.get("mem_consumption"):
+        memory_monitor.stop_and_collect_data('compilation_phase')
+        memory_monitor.log_data('for copmpilation phase')
 
     class TokenStreamer(openvino_genai.StreamerBase):
         def __init__(self, tokenizer):
@@ -259,7 +270,7 @@ def convert_ov_tokenizer(tokenizer_path):
     export_tokenizer(hf_tokenizer, tokenizer_path)
 
 
-def create_image_gen_model(model_path, device, **kwargs):
+def create_image_gen_model(model_path, device, memory_monitor, **kwargs):
     model_index_data = {}
     with open(str(model_path / "model_index.json"), 'r') as f:
         model_index_data = json.load(f)
@@ -278,8 +289,10 @@ def create_image_gen_model(model_path, device, **kwargs):
     else:
         if kwargs.get("genai", True) and is_genai_available(log_msg=True):
             log.info("Selected OpenVINO GenAI for benchmarking")
-            return create_genai_image_gen_model(model_path, device, ov_config, model_index_data, **kwargs)
+            return create_genai_image_gen_model(model_path, device, ov_config, model_index_data, memory_monitor, **kwargs)
 
+        if kwargs.get("mem_consumption"):
+            memory_monitor.start()
         log.info("Selected Optimum Intel for benchmarking")
         start = time.perf_counter()
         if kwargs.get("static_reshape", False):
@@ -293,6 +306,9 @@ def create_image_gen_model(model_path, device, **kwargs):
         else:
             ov_model = model_class.from_pretrained(model_path, device=device, ov_config=ov_config)
         end = time.perf_counter()
+        if kwargs.get("mem_consumption"):
+            memory_monitor.stop_and_collect_data('compilation_phase')
+            memory_monitor.log_data('for copmpilation phase')
     from_pretrained_time = end - start
     log.info(f'From pretrained time: {from_pretrained_time:.2f}s')
     return ov_model, from_pretrained_time, False, None
@@ -331,7 +347,7 @@ def get_genai_unet_model(model_index_data, model_path, device, ov_config):
     return unet
 
 
-def create_genai_image_gen_model(model_path, device, ov_config, model_index_data, **kwargs):
+def create_genai_image_gen_model(model_path, device, ov_config, model_index_data, memory_monitor, **kwargs):
     import openvino_genai
 
     class PerfCollector:
@@ -406,6 +422,8 @@ def raw_metrics(self):
     orig_tokenizer = AutoTokenizer.from_pretrained(model_path, subfolder="tokenizer")
     callback.orig_tokenizer = orig_tokenizer
 
+    if kwargs.get("mem_consumption"):
+        memory_monitor.start()
     start = time.perf_counter()
 
     scheduler_type = model_index_data.get("scheduler", ["", ""])[1]
@@ -454,11 +472,14 @@ def raw_metrics(self):
             image_gen_pipe = image_gen_pipeline_class(model_path, device.upper(), **ov_config)
 
     end = time.perf_counter()
+    if kwargs.get("mem_consumption"):
+        memory_monitor.stop_and_collect_data('compilation_phase')
+        memory_monitor.log_data('for copmpilation phase')
     log.info(f'Pipeline initialization time: {end - start:.2f}s')
     return image_gen_pipe, end - start, True, callback
 
 
-def create_ldm_super_resolution_model(model_path, device, **kwargs):
+def create_ldm_super_resolution_model(model_path, device, memory_monitor, **kwargs):
     core = Core()
     ov_config = kwargs['config']
     core.set_property(ov_config)
@@ -466,30 +487,40 @@ def create_ldm_super_resolution_model(model_path, device, **kwargs):
     model_type = kwargs.get('model_type', default_model_type)
     model_class = OV_MODEL_CLASSES_MAPPING[model_type]
     model_path = Path(model_path)
+    if kwargs.get("mem_consumption"):
+        memory_monitor.start()
     start = time.perf_counter()
     ov_model = model_class(model_path, core, device.upper())
     end = time.perf_counter()
+    if kwargs.get("mem_consumption"):
+        memory_monitor.stop_and_collect_data('compilation_phase')
+        memory_monitor.log_data('for copmpilation phase')
     from_pretrained_time = end - start
     log.info(f'From pretrained time: {from_pretrained_time:.2f}s')
     return ov_model, from_pretrained_time
 
 
-def create_genai_speech_2_txt_model(model_path, device, **kwargs):
+def create_genai_speech_2_txt_model(model_path, device, memory_monitor, **kwargs):
     import openvino_genai as ov_genai
     if kwargs.get("genai", True) is False:
         raise RuntimeError('==Failure the command line does not set --genai ==')
     if is_genai_available(log_msg=True) is False:
         raise RuntimeError('==Failure genai is not enable ==')
+    if kwargs.get("mem_consumption"):
+        memory_monitor.start()
     start = time.perf_counter()
     genai_pipe = ov_genai.WhisperPipeline(model_path, device.upper())
     end = time.perf_counter()
+    if kwargs.get("mem_consumption"):
+        memory_monitor.stop_and_collect_data('compilation_phase')
+        memory_monitor.log_data('for copmpilation phase')
     from_pretrained_time = end - start
     log.info(f'From pretrained time: {from_pretrained_time:.2f}s')
     processor = AutoProcessor.from_pretrained(model_path)
     return genai_pipe, processor, from_pretrained_time, True
 
 
-def create_speech_2txt_model(model_path, device, **kwargs):
+def create_speech_2txt_model(model_path, device, memory_monitor, **kwargs):
     """Create speech generation model.
     - model_path: can be model_path or IR path
     - device: can be CPU
@@ -509,14 +540,19 @@ def create_speech_2txt_model(model_path, device, **kwargs):
                 log.warning("OpenVINO GenAI based benchmarking is not available for {model_type}. Will be switched to default bencmarking")
             else:
                 log.info("Selected OpenVINO GenAI for benchmarking")
-                return create_genai_speech_2_txt_model(model_path, device, **kwargs)
+                return create_genai_speech_2_txt_model(model_path, device, memory_monitor, **kwargs)
         log.info("Selected Optimum Intel for benchmarking")
+        if kwargs.get("mem_consumption"):
+            memory_monitor.start()
         start = time.perf_counter()
         ov_model = model_class.from_pretrained(
             model_path,
             device=device
         )
         end = time.perf_counter()
+        if kwargs.get("mem_consumption"):
+            memory_monitor.stop_and_collect_data('compilation_phase')
+            memory_monitor.log_data('for copmpilation phase')
     from_pretrained_time = end - start
     log.info(f'From pretrained time: {from_pretrained_time:.2f}s')
     processor = AutoProcessor.from_pretrained(model_path)
@@ -546,7 +582,7 @@ def get_vlm_processor(model_path):
     return preprocessors
 
 
-def create_genai_image_text_gen_model(model_path, device, ov_config, **kwargs):
+def create_genai_image_text_gen_model(model_path, device, ov_config, memory_monitor, **kwargs):
     import openvino_genai
 
     if not (model_path / "openvino_tokenizer.xml").exists() or not (model_path / "openvino_detokenizer.xml").exists():
@@ -560,16 +596,21 @@ def create_genai_image_text_gen_model(model_path, device, ov_config, **kwargs):
         log.info("Continuous Batching mode activated")
         ov_config["scheduler_config"] = get_scheduler_config_genai(cb_config)
 
+    if kwargs.get("mem_consumption"):
+        memory_monitor.start()
     start = time.perf_counter()
     llm_pipe = openvino_genai.VLMPipeline(model_path, device.upper(), **ov_config)
     end = time.perf_counter()
     log.info("Selected OpenVINO GenAI for benchmarking")
+    if kwargs.get("mem_consumption"):
+        memory_monitor.stop_and_collect_data('compilation_phase')
+        memory_monitor.log_data('for copmpilation phase')
     log.info(f'Pipeline initialization time: {end - start:.2f}s')
 
     return llm_pipe, processor_config, end - start, None, True
 
 
-def create_image_text_gen_model(model_path, device, **kwargs):
+def create_image_text_gen_model(model_path, device, memory_monitor, **kwargs):
     model_path = Path(model_path)
     # specify the model path
     if model_path.name.endswith('xml'):
@@ -590,7 +631,7 @@ def create_image_text_gen_model(model_path, device, **kwargs):
             remote_code = True
         if kwargs.get("genai", True) and is_genai_available(log_msg=True):
             try:
-                return create_genai_image_text_gen_model(model_path, device, ov_config, **kwargs)
+                return create_genai_image_text_gen_model(model_path, device, ov_config, memory_monitor, **kwargs)
             except Exception as exp:
                 log.warning(
                     f"Model type `{model_config.model_type}` is not supported by OpenVINO GenAI. "
@@ -600,6 +641,8 @@ def create_image_text_gen_model(model_path, device, **kwargs):
 
         log.info("Selected Optimum Intel for benchmarking")
         model_class = OV_MODEL_CLASSES_MAPPING.get(DEFAULT_MODEL_CLASSES[kwargs['use_case']])
+        if kwargs.get("mem_consumption"):
+            memory_monitor.start()
         start = time.perf_counter()
         ov_model = model_class.from_pretrained(
             model_path,
@@ -609,6 +652,9 @@ def create_image_text_gen_model(model_path, device, **kwargs):
             trust_remote_code=remote_code
         )
         end = time.perf_counter()
+        if kwargs.get("mem_consumption"):
+            memory_monitor.stop_and_collect_data('compilation_phase')
+            memory_monitor.log_data('for copmpilation phase')
     bench_hook = get_bench_hook(kwargs['num_beams'], ov_model)
     from_pretrained_time = end - start
     log.info(f'From pretrained time: {from_pretrained_time:.2f}s')
diff --git a/tools/llm_bench/llm_bench_utils/pt_utils.py b/tools/llm_bench/llm_bench_utils/pt_utils.py
index a22dec9578..cdc6a5b447 100644
--- a/tools/llm_bench/llm_bench_utils/pt_utils.py
+++ b/tools/llm_bench/llm_bench_utils/pt_utils.py
@@ -30,7 +30,9 @@ def torch_compile_child_module(model, child_modules, backend='openvino', dynamic
     return model
 
 
-def run_torch_compile(model, backend='openvino', dynamic=None, options=None, child_modules=None):
+def run_torch_compile(model, backend='openvino', dynamic=None, options=None, child_modules=None, memory_monitor=None):
+    if memory_monitor:
+        memory_monitor.start()
     if backend == 'pytorch':
         log.info(f'Running torch.compile() with {backend} backend')
         start = time.perf_counter()
@@ -48,10 +50,13 @@ def run_torch_compile(model, backend='openvino', dynamic=None, options=None, chi
         end = time.perf_counter()
         compile_time = end - start
         log.info(f'Compiling model via torch.compile() took: {compile_time}')
+    if memory_monitor:
+        memory_monitor.stop_and_collect_data('compilation_phase')
+        memory_monitor.log_data('for from torch.compile() phase')
     return compiled_model
 
 
-def create_text_gen_model(model_path, device, **kwargs):
+def create_text_gen_model(model_path, device, memory_monitor, **kwargs):
     model_path = Path(model_path)
     from_pretrain_time = 0
     if model_path.exists():
@@ -61,6 +66,8 @@ def create_text_gen_model(model_path, device, **kwargs):
             model_type = kwargs.get('model_type', default_model_type)
             model_class = PT_MODEL_CLASSES_MAPPING.get(model_type, PT_MODEL_CLASSES_MAPPING[default_model_type])
             token_class = TOKENIZE_CLASSES_MAPPING.get(model_type, TOKENIZE_CLASSES_MAPPING[default_model_type])
+            if kwargs.get("mem_consumption"):
+                memory_monitor.start()
             start = time.perf_counter()
             trust_remote_code = False
             try:
@@ -72,6 +79,9 @@ def create_text_gen_model(model_path, device, **kwargs):
             tokenizer = token_class.from_pretrained(model_path, trust_remote_code=trust_remote_code)
             end = time.perf_counter()
             from_pretrain_time = end - start
+            if kwargs.get("mem_consumption"):
+                memory_monitor.stop_and_collect_data('from_pretrained_phase')
+                memory_monitor.log_data('for from pretrained phase')
         else:
             raise RuntimeError(f'==Failure ==: model path:{model_path} is not directory or directory is empty')
     else:
@@ -119,12 +129,12 @@ def create_text_gen_model(model_path, device, **kwargs):
             options = json.loads(kwargs['torch_compile_options'])
         if kwargs['torch_compile_input_module']:
             child_modules = kwargs['torch_compile_input_module'].split(".")
-        compiled_model = run_torch_compile(model, backend, dynamic, options, child_modules)
+        compiled_model = run_torch_compile(model, backend, dynamic, options, child_modules, memory_monitor if kwargs.get("mem_consumption") else None)
         model = compiled_model
     return model, tokenizer, from_pretrain_time, bench_hook, False
 
 
-def create_image_gen_model(model_path, device, **kwargs):
+def create_image_gen_model(model_path, device, memory_monitor, **kwargs):
     model_path = Path(model_path)
     from_pretrain_time = 0
     if model_path.exists():
@@ -132,10 +142,15 @@ def create_image_gen_model(model_path, device, **kwargs):
             log.info(f'Load image model from model path:{model_path}')
             model_type = DEFAULT_MODEL_CLASSES[kwargs['use_case']]
             model_class = PT_MODEL_CLASSES_MAPPING[model_type]
+            if kwargs.get("mem_consumption"):
+                memory_monitor.start()
             start = time.perf_counter()
             pipe = model_class.from_pretrained(model_path)
             pipe = set_bf16(pipe, device, **kwargs)
             end = time.perf_counter()
+            if kwargs.get("mem_consumption"):
+                memory_monitor.stop_and_collect_data('from_pretrained_phase')
+                memory_monitor.log_data('for from pretrained phase')
             from_pretrain_time = end - start
         else:
             raise RuntimeError(f'==Failure ==: model path:{model_path} is not directory or directory is empty')
@@ -158,12 +173,12 @@ def create_image_gen_model(model_path, device, **kwargs):
 
     if kwargs['torch_compile_backend']:
         backend = kwargs['torch_compile_backend']
-        compiled_model = run_torch_compile(pipe, backend)
+        compiled_model = run_torch_compile(pipe, backend, memory_monitor if kwargs.get("mem_consumption") else None)
         pipe = compiled_model
     return pipe, from_pretrain_time, False, None
 
 
-def create_ldm_super_resolution_model(model_path, device, **kwargs):
+def create_ldm_super_resolution_model(model_path, device, memory_monitor, **kwargs):
     model_path = Path(model_path)
     from_pretrain_time = 0
     if model_path.exists():
@@ -174,6 +189,9 @@ def create_ldm_super_resolution_model(model_path, device, **kwargs):
             start = time.perf_counter()
             pipe = model_class.from_pretrained(model_path)
             end = time.perf_counter()
+            if kwargs.get("mem_consumption"):
+                memory_monitor.stop_and_collect_data('from_pretrained_phase')
+                memory_monitor.log_data('for from pretrained phase')
             from_pretrain_time = end - start
         else:
             raise RuntimeError(f'==Failure ==: model path:{model_path} is not directory or directory is empty')
@@ -196,6 +214,6 @@ def create_ldm_super_resolution_model(model_path, device, **kwargs):
 
     if kwargs['torch_compile_backend']:
         backend = kwargs['torch_compile_backend']
-        compiled_model = run_torch_compile(pipe, backend)
+        compiled_model = run_torch_compile(pipe, backend, memory_monitor if kwargs.get("mem_consumption") else None)
         pipe = compiled_model
     return pipe, from_pretrain_time
diff --git a/tools/llm_bench/task/image_generation.py b/tools/llm_bench/task/image_generation.py
index eb36a5496a..3c6246259a 100644
--- a/tools/llm_bench/task/image_generation.py
+++ b/tools/llm_bench/task/image_generation.py
@@ -90,10 +90,11 @@ def run_image_generation(image_param, num, image_id, pipe, args, iter_data_list,
 
     result_md5_list = []
     max_rss_mem_consumption = ''
-    max_uss_mem_consumption = ''
-    max_shared_mem_consumption = ''
+    max_sys_mem_consumption = ''
+    max_rss_mem_increase = ''
+    max_sys_mem_increase = ''
     if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
-        mem_consumption.start_collect_memory_consumption()
+        mem_consumption.start()
 
     input_text_list = [input_text] * args['batch_size']
     input_data = pipe.tokenizer(input_text, return_tensors='pt')
@@ -108,9 +109,8 @@ def run_image_generation(image_param, num, image_id, pipe, args, iter_data_list,
     res = pipe(input_text_list, **input_args, num_images_per_prompt=args['batch_size']).images
     end = time.perf_counter()
     if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
-        mem_consumption.end_collect_momory_consumption()
-        max_rss_mem_consumption, max_shared_mem_consumption, max_uss_mem_consumption = mem_consumption.get_max_memory_consumption()
-        mem_consumption.clear_max_memory_consumption()
+        mem_consumption.stop_and_collect_data(f"{'P' + str(num) if num > 0 else 'warm-up'}_{proc_id}")
+        max_rss_mem_consumption, max_rss_mem_increase, max_sys_mem_consumption, max_sys_mem_increase = mem_consumption.get_data()
     for bs_idx in range(args['batch_size']):
         rslt_img_fn = llm_bench_utils.output_file.output_gen_image(res[bs_idx], args, image_id, num, bs_idx, proc_id, '.png')
         result_md5_list.append(hashlib.md5(Image.open(rslt_img_fn).tobytes(), usedforsecurity=False).hexdigest())
@@ -122,8 +122,9 @@ def run_image_generation(image_param, num, image_id, pipe, args, iter_data_list,
         gen_time=generation_time,
         res_md5=result_md5_list,
         max_rss_mem=max_rss_mem_consumption,
-        max_shared_mem=max_shared_mem_consumption,
-        max_uss_mem=max_uss_mem_consumption,
+        max_rss_mem_increase=max_rss_mem_increase,
+        max_sys_mem=max_sys_mem_consumption,
+        max_sys_mem_increase=max_sys_mem_increase,
         prompt_idx=image_id,
     )
     iter_data_list.append(iter_data)
@@ -131,9 +132,6 @@ def run_image_generation(image_param, num, image_id, pipe, args, iter_data_list,
         num,
         iter_data,
         warm_up=(num == 0),
-        max_rss_mem=max_rss_mem_consumption,
-        max_shared_mem=max_shared_mem_consumption,
-        max_uss_mem=max_uss_mem_consumption,
         stable_diffusion=stable_diffusion_hook,
         prompt_idx=image_id
     )
@@ -160,10 +158,11 @@ def run_image_generation_genai(image_param, num, image_id, pipe, args, iter_data
 
     result_md5_list = []
     max_rss_mem_consumption = ''
-    max_uss_mem_consumption = ''
-    max_shared_mem_consumption = ''
+    max_sys_mem_consumption = ''
+    max_rss_mem_increase = ''
+    max_sys_mem_increase = ''
     if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
-        mem_consumption.start_collect_memory_consumption()
+        mem_consumption.start()
 
     input_text_list = [input_text] * args['batch_size']
     if num == 0 and args["output_dir"] is not None:
@@ -183,9 +182,8 @@ def run_image_generation_genai(image_param, num, image_id, pipe, args, iter_data
         performance_metrics = callback
 
     if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
-        mem_consumption.end_collect_momory_consumption()
-        max_rss_mem_consumption, max_shared_mem_consumption, max_uss_mem_consumption = mem_consumption.get_max_memory_consumption()
-        mem_consumption.clear_max_memory_consumption()
+        mem_consumption.stop_and_collect_data(f"{'P' + str(num) if num > 0 else 'warm-up'}_{proc_id}")
+        max_rss_mem_consumption, max_rss_mem_increase, max_sys_mem_consumption, max_sys_mem_increase = mem_consumption.get_data()
     for bs_idx in range(args['batch_size']):
         image = Image.fromarray(res[bs_idx])
         rslt_img_fn = llm_bench_utils.output_file.output_gen_image(image, args, image_id, num, bs_idx, proc_id, '.png')
@@ -198,8 +196,9 @@ def run_image_generation_genai(image_param, num, image_id, pipe, args, iter_data
         gen_time=generation_time,
         res_md5=result_md5_list,
         max_rss_mem=max_rss_mem_consumption,
-        max_shared_mem=max_shared_mem_consumption,
-        max_uss_mem=max_uss_mem_consumption,
+        max_rss_mem_increase=max_rss_mem_increase,
+        max_sys_mem=max_sys_mem_consumption,
+        max_sys_mem_increase=max_sys_mem_increase,
         prompt_idx=image_id,
     )
     iter_data_list.append(iter_data)
@@ -207,9 +206,6 @@ def run_image_generation_genai(image_param, num, image_id, pipe, args, iter_data
         num,
         iter_data,
         warm_up=(num == 0),
-        max_rss_mem=max_rss_mem_consumption,
-        max_shared_mem=max_shared_mem_consumption,
-        max_uss_mem=max_uss_mem_consumption,
         stable_diffusion=performance_metrics,
         prompt_idx=image_id
     )
@@ -218,7 +214,6 @@ def run_image_generation_genai(image_param, num, image_id, pipe, args, iter_data
 
 
 def run_image_generation_benchmark(model_path, framework, device, args, num_iters, mem_consumption):
-
     input_image_list = get_image_prompt(args)
     if args['prompt_index'] is None:
         prompt_idx_list = [image_id for image_id, input_text in enumerate(input_image_list)]
@@ -243,7 +238,7 @@ def run_image_generation_benchmark(model_path, framework, device, args, num_iter
         if "guidance_scale" in static_input_args:
             args["guidance_scale"] = static_input_args["guidance_scale"]
 
-    pipe, pretrain_time, use_genai, callback = FW_UTILS[framework].create_image_gen_model(model_path, device, **args)
+    pipe, pretrain_time, use_genai, callback = FW_UTILS[framework].create_image_gen_model(model_path, device, mem_consumption, **args)
     iter_data_list = []
 
     if framework == "ov" and not use_genai:
diff --git a/tools/llm_bench/task/speech_to_text_generation.py b/tools/llm_bench/task/speech_to_text_generation.py
index cb34a81a2f..630df3f02b 100644
--- a/tools/llm_bench/task/speech_to_text_generation.py
+++ b/tools/llm_bench/task/speech_to_text_generation.py
@@ -24,8 +24,9 @@
 def run_speech_2_txt_generation(input_param, args, md5_list, iter_data_list):
     result_md5_list = []
     max_rss_mem_consumption = ''
-    max_uss_mem_consumption = ''
-    max_shared_mem_consumption = ''
+    max_sys_mem_consumption = ''
+    max_rss_mem_increase = ''
+    max_sys_mem_increase = ''
     pipe = input_param['pipe']
     raw_speech = input_param['raw_speech']
     num = input_param['iter_idx']
@@ -38,7 +39,7 @@ def run_speech_2_txt_generation(input_param, args, md5_list, iter_data_list):
     max_gen_tokens = DEFAULT_OUTPUT_TOKEN_SIZE if args['infer_count'] is None else args['infer_count']
 
     if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
-        mem_consumption.start_collect_memory_consumption()
+        mem_consumption.start()
     if use_genai:
         start = time.perf_counter()
         result_text = pipe.generate(
@@ -85,9 +86,8 @@ def run_speech_2_txt_generation(input_param, args, md5_list, iter_data_list):
     else:
         md5_list[num][speech_id] = result_md5_list
     if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
-        mem_consumption.end_collect_momory_consumption()
-        max_rss_mem_consumption, max_shared_mem_consumption, max_uss_mem_consumption = mem_consumption.get_max_memory_consumption()
-        mem_consumption.clear_max_memory_consumption()
+        mem_consumption.stop_and_collect_data(f"{'P' + str(num) if num > 0 else 'warm-up'}")
+        max_rss_mem_consumption, max_rss_mem_increase, max_sys_mem_consumption, max_sys_mem_increase = mem_consumption.get_data()
 
     iter_data = gen_output_data.gen_iterate_data(
         iter_idx=num,
@@ -95,8 +95,9 @@ def run_speech_2_txt_generation(input_param, args, md5_list, iter_data_list):
         gen_time=generation_time,
         res_md5=result_md5_list,
         max_rss_mem=max_rss_mem_consumption,
-        max_shared_mem=max_shared_mem_consumption,
-        max_uss_mem=max_uss_mem_consumption,
+        max_rss_mem_increase=max_rss_mem_increase,
+        max_sys_mem=max_sys_mem_consumption,
+        max_sys_mem_increase=max_sys_mem_increase,
         prompt_idx=speech_id,
     )
     iter_data_list.append(iter_data)
@@ -106,9 +107,6 @@ def run_speech_2_txt_generation(input_param, args, md5_list, iter_data_list):
         tms=tm_list,
         tms_infer=tm_infer_list,
         warm_up=(num == 0),
-        max_rss_mem=max_rss_mem_consumption,
-        max_shared_mem=max_shared_mem_consumption,
-        max_uss_mem=max_uss_mem_consumption,
         prompt_idx=speech_id,
         whisper=whisper_hook
     )
@@ -147,7 +145,7 @@ def run_speech_2_txt_benchmark(model_path, framework, device, args, num_iters, m
     if len(speech_list) == 0:
         raise RuntimeError('==Failure speech list is empty ==')
     log.info(f'Benchmarking iter nums(exclude warm-up): {num_iters}, speech file nums: {len(speech_file_list)}, speech idx: {speech_idx_list}')
-    pipe, processor, pretrain_time, use_genai = FW_UTILS[framework].create_speech_2txt_model(model_path, device, **args)
+    pipe, processor, pretrain_time, use_genai = FW_UTILS[framework].create_speech_2txt_model(model_path, device, mem_consumption, **args)
     md5_list = {num : {} for num in range(num_iters + 1)}
     iter_timestamp = model_utils.init_timestamp(num_iters, speech_list, speech_idx_list)
     input_param = {
diff --git a/tools/llm_bench/task/super_resolution_generation.py b/tools/llm_bench/task/super_resolution_generation.py
index c2f3cff6e4..3e2c35c59b 100644
--- a/tools/llm_bench/task/super_resolution_generation.py
+++ b/tools/llm_bench/task/super_resolution_generation.py
@@ -33,17 +33,17 @@ def run_ldm_super_resolution(img, num, pipe, args, framework, iter_data_list, im
     low_res_img = Image.open(img['prompt']).convert('RGB')
     low_res_img = low_res_img.resize((resize_image_width, resize_image_height))
     max_rss_mem_consumption = ''
-    max_uss_mem_consumption = ''
-    max_shared_mem_consumption = ''
+    max_sys_mem_consumption = ''
+    max_rss_mem_increase = ''
+    max_sys_mem_increase = ''
     if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
-        mem_consumption.start_collect_memory_consumption()
+        mem_consumption.start()
     start = time.perf_counter()
     res = pipe(low_res_img, num_inference_steps=nsteps, tm_list=tm_list)
     end = time.perf_counter()
     if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
-        mem_consumption.end_collect_momory_consumption()
-        max_rss_mem_consumption, max_shared_mem_consumption, max_uss_mem_consumption = mem_consumption.get_max_memory_consumption()
-        mem_consumption.clear_max_memory_consumption()
+        mem_consumption.stop_and_collect_data(f"{'P' + str(num) if num > 0 else 'warm-up'}_{proc_id}")
+        max_rss_mem_consumption, max_rss_mem_increase, max_sys_mem_consumption, max_sys_mem_increase = mem_consumption.get_data()
     result_md5_list = []
     if framework == 'ov':
         rslt_img_fn = llm_bench_utils.output_file.output_gen_image(res[0], args, image_id, num, None, proc_id, '.png')
@@ -56,8 +56,9 @@ def run_ldm_super_resolution(img, num, pipe, args, framework, iter_data_list, im
         gen_time=generation_time,
         res_md5=result_md5_list,
         max_rss_mem=max_rss_mem_consumption,
-        max_shared_mem=max_shared_mem_consumption,
-        max_uss_mem=max_uss_mem_consumption,
+        max_rss_mem_increase=max_rss_mem_increase,
+        max_sys_mem=max_sys_mem_consumption,
+        max_sys_mem_increase=max_sys_mem_increase,
         prompt_idx=image_id,
     )
     iter_data_list.append(iter_data)
@@ -65,9 +66,6 @@ def run_ldm_super_resolution(img, num, pipe, args, framework, iter_data_list, im
         num,
         iter_data,
         warm_up=(num == 0),
-        max_rss_mem=max_rss_mem_consumption,
-        max_shared_mem=max_shared_mem_consumption,
-        max_uss_mem=max_uss_mem_consumption,
         prompt_idx=image_id
     )
     metrics_print.print_generated(num, warm_up=(num == 0), generated=rslt_img_fn, prompt_idx=image_id)
@@ -77,7 +75,7 @@ def run_ldm_super_resolution(img, num, pipe, args, framework, iter_data_list, im
 def run_ldm_super_resolution_benchmark(model_path, framework, device, args, num_iters, mem_consumption):
     if args["genai"]:
         log.warning("GenAI pipeline is not supported for this task. Switched on default benchmarking")
-    pipe, pretrain_time = FW_UTILS[framework].create_ldm_super_resolution_model(model_path, device, **args)
+    pipe, pretrain_time = FW_UTILS[framework].create_ldm_super_resolution_model(model_path, device, mem_consumption, **args)
     iter_data_list = []
     tm_list = []
     images = get_ldm_image_prompt(args)
diff --git a/tools/llm_bench/task/text_generation.py b/tools/llm_bench/task/text_generation.py
index e83b5eff34..10423f189a 100644
--- a/tools/llm_bench/task/text_generation.py
+++ b/tools/llm_bench/task/text_generation.py
@@ -49,10 +49,11 @@ def run_text_generation(input_text, num, model, tokenizer, args, iter_data_list,
         log.info(out_str)
 
     max_rss_mem_consumption = ''
-    max_uss_mem_consumption = ''
-    max_shared_mem_consumption = ''
+    max_sys_mem_consumption = ''
+    max_rss_mem_increase = ''
+    max_sys_mem_increase = ''
     if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
-        mem_consumption.start_collect_memory_consumption()
+        mem_consumption.start()
     max_gen_tokens = DEFAULT_OUTPUT_TOKEN_SIZE if args['infer_count'] is None else args['infer_count']
     start = time.perf_counter()
     if streaming:
@@ -99,9 +100,8 @@ def run_text_generation(input_text, num, model, tokenizer, args, iter_data_list,
             )
     end = time.perf_counter()
     if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
-        mem_consumption.end_collect_momory_consumption()
-        max_rss_mem_consumption, max_shared_mem_consumption, max_uss_mem_consumption = mem_consumption.get_max_memory_consumption()
-        mem_consumption.clear_max_memory_consumption()
+        mem_consumption.stop_and_collect_data(f"{'P' + str(num) if num > 0 else 'warm-up'}_{proc_id}")
+        max_rss_mem_consumption, max_rss_mem_increase, max_sys_mem_consumption, max_sys_mem_increase = mem_consumption.get_data()
 
     generation_time = end - start
     tok_decode_start = time.perf_counter()
@@ -156,8 +156,9 @@ def run_text_generation(input_text, num, model, tokenizer, args, iter_data_list,
         latency=per_token_time,
         res_md5=result_md5_list,
         max_rss_mem=max_rss_mem_consumption,
-        max_shared_mem=max_shared_mem_consumption,
-        max_uss_mem=max_uss_mem_consumption,
+        max_rss_mem_increase=max_rss_mem_increase,
+        max_sys_mem=max_sys_mem_consumption,
+        max_sys_mem_increase=max_sys_mem_increase,
         prompt_idx=prompt_index,
         tokenization_time=(tok_encode_time, tok_decode_time)
     )
@@ -168,9 +169,6 @@ def run_text_generation(input_text, num, model, tokenizer, args, iter_data_list,
         tm_list,
         tm_infer_list,
         warm_up=(num == 0),
-        max_rss_mem=max_rss_mem_consumption,
-        max_shared_mem=max_shared_mem_consumption,
-        max_uss_mem=max_uss_mem_consumption,
         tokenization_time=(tok_encode_time, tok_decode_time),
         batch_size=args['batch_size'],
         prompt_idx=prompt_index
@@ -195,10 +193,12 @@ def run_text_generation_genai(input_text, num, model, tokenizer, args, iter_data
         for bs_index, in_text in enumerate(input_text_list):
             llm_bench_utils.output_file.output_input_text(in_text, args, model_precision, prompt_index, bs_index, proc_id)
     max_rss_mem_consumption = ''
-    max_uss_mem_consumption = ''
-    max_shared_mem_consumption = ''
+    max_sys_mem_consumption = ''
+    max_rss_mem_increase = ''
+    max_sys_mem_increase = ''
     if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
-        mem_consumption.start_collect_memory_consumption()
+        mem_consumption.start()
+
     max_gen_tokens = DEFAULT_OUTPUT_TOKEN_SIZE if args['infer_count'] is None else args['infer_count']
     tokenizer = model.get_tokenizer()
 
@@ -283,9 +283,8 @@ def token_printer():
         tokenization_time.append((detokenization_end - detokenization_start) * 1000)
 
     if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
-        mem_consumption.end_collect_momory_consumption()
-        max_rss_mem_consumption, max_shared_mem_consumption, max_uss_mem_consumption = mem_consumption.get_max_memory_consumption()
-        mem_consumption.clear_max_memory_consumption()
+        mem_consumption.stop_and_collect_data(f"{'P' + str(num) if num > 0 else 'warm-up'}_{proc_id}")
+        max_rss_mem_consumption, max_rss_mem_increase, max_sys_mem_consumption, max_sys_mem_increase = mem_consumption.get_data()
 
     generation_time = end - start
     # Only text_gen need to minus length of input_data, because generated_text may include input_text
@@ -328,8 +327,9 @@ def token_printer():
         latency=per_token_time,
         res_md5=result_md5_list,
         max_rss_mem=max_rss_mem_consumption,
-        max_shared_mem=max_shared_mem_consumption,
-        max_uss_mem=max_uss_mem_consumption,
+        max_rss_mem_increase=max_rss_mem_increase,
+        max_sys_mem=max_sys_mem_consumption,
+        max_sys_mem_increase=max_sys_mem_increase,
         prompt_idx=prompt_index,
         tokenization_time=tokenization_time
     )
@@ -340,9 +340,6 @@ def token_printer():
         tm_list,
         inference_durations,
         warm_up=(num == 0),
-        max_rss_mem=max_rss_mem_consumption,
-        max_shared_mem=max_shared_mem_consumption,
-        max_uss_mem=max_uss_mem_consumption,
         tokenization_time=tokenization_time,
         batch_size=args['batch_size'],
         prompt_idx=prompt_index
@@ -377,10 +374,11 @@ def run_text_generation_genai_with_stream(input_text, num, model, tokenizer, arg
             out_str += 'all max_output_token_size: {} * {}'.format(args['infer_count'], args['batch_size'])
         log.info(out_str)
     max_rss_mem_consumption = ''
-    max_uss_mem_consumption = ''
-    max_shared_mem_consumption = ''
+    max_sys_mem_consumption = ''
+    max_rss_mem_increase = ''
+    max_sys_mem_increase = ''
     if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
-        mem_consumption.start_collect_memory_consumption()
+        mem_consumption.start()
     max_gen_tokens = DEFAULT_OUTPUT_TOKEN_SIZE if args['infer_count'] is None else args['infer_count']
     streamer.reset()
     gen_config = model.get_generation_config()
@@ -423,9 +421,8 @@ def run_text_generation_genai_with_stream(input_text, num, model, tokenizer, arg
     generated_tokens = model.generate(input_data, gen_config, streamer=streamer).tokens
     end = time.perf_counter()
     if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
-        mem_consumption.end_collect_momory_consumption()
-        max_rss_mem_consumption, max_shared_mem_consumption, max_uss_mem_consumption = mem_consumption.get_max_memory_consumption()
-        mem_consumption.clear_max_memory_consumption()
+        mem_consumption.stop_and_collect_data(f"{'P' + str(num) if num > 0 else 'warm-up'}_{proc_id}")
+        max_rss_mem_consumption, max_rss_mem_increase, max_sys_mem_consumption, max_sys_mem_increase = mem_consumption.get_data()
     generation_time = end - start
     tok_decode_start = time.perf_counter()
     generated_text = pipe_tokenizer.decode(generated_tokens)
@@ -464,8 +461,9 @@ def run_text_generation_genai_with_stream(input_text, num, model, tokenizer, arg
         latency=per_token_time,
         res_md5=result_md5_list,
         max_rss_mem=max_rss_mem_consumption,
-        max_shared_mem=max_shared_mem_consumption,
-        max_uss_mem=max_uss_mem_consumption,
+        max_rss_mem_increase=max_rss_mem_increase,
+        max_sys_mem=max_sys_mem_consumption,
+        max_sys_mem_increase=max_sys_mem_increase,
         prompt_idx=prompt_index,
         tokenization_time=(tok_encode_time, tok_decode_time)
     )
@@ -476,9 +474,6 @@ def run_text_generation_genai_with_stream(input_text, num, model, tokenizer, arg
         tms=tm_list,
         tms_infer=None,
         warm_up=(num == 0),
-        max_rss_mem=max_rss_mem_consumption,
-        max_shared_mem=max_shared_mem_consumption,
-        max_uss_mem=max_uss_mem_consumption,
         tokenization_time=(tok_encode_time, tok_decode_time),
         batch_size=args['batch_size'],
         prompt_idx=prompt_index
@@ -495,7 +490,7 @@ def run_text_generation_genai_with_stream(input_text, num, model, tokenizer, arg
 
 
 def run_text_generation_benchmark(model_path, framework, device, tokens_len, streaming, args, num_iters, mem_consumption):
-    model, tokenizer, pretrain_time, bench_hook, use_genai = FW_UTILS[framework].create_text_gen_model(model_path, device, **args)
+    model, tokenizer, pretrain_time, bench_hook, use_genai = FW_UTILS[framework].create_text_gen_model(model_path, device, mem_consumption, **args)
     model_precision = model_utils.get_model_precision(model_path.parts)
     iter_data_list = []
     md5_list = {num : {} for num in range(num_iters + 1)}
diff --git a/tools/llm_bench/task/visual_language_generation.py b/tools/llm_bench/task/visual_language_generation.py
index 54b4467c14..6cd72f225c 100644
--- a/tools/llm_bench/task/visual_language_generation.py
+++ b/tools/llm_bench/task/visual_language_generation.py
@@ -59,10 +59,11 @@ def run_visual_language_generation_optimum(
         log.info(out_str)
 
     max_rss_mem_consumption = ''
-    max_uss_mem_consumption = ''
-    max_shared_mem_consumption = ''
+    max_sys_mem_consumption = ''
+    max_rss_mem_increase = ''
+    max_sys_mem_increase = ''
     if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
-        mem_consumption.start_collect_memory_consumption()
+        mem_consumption.start()
     max_gen_tokens = DEFAULT_OUTPUT_TOKEN_SIZE if args['infer_count'] is None else args['infer_count']
     start = time.perf_counter()
     if args['infer_count'] is not None and args['end_token_stopping'] is False:
@@ -86,9 +87,8 @@ def run_visual_language_generation_optimum(
         )
     end = time.perf_counter()
     if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
-        mem_consumption.end_collect_momory_consumption()
-        max_rss_mem_consumption, max_shared_mem_consumption, max_uss_mem_consumption = mem_consumption.get_max_memory_consumption()
-        mem_consumption.clear_max_memory_consumption()
+        mem_consumption.stop_and_collect_data(f"{'P' + str(num) if num > 0 else 'warm-up'}_{proc_id}")
+        max_rss_mem_consumption, max_rss_mem_increase, max_sys_mem_consumption, max_sys_mem_increase = mem_consumption.get_data()
 
     generation_time = end - start
     tok_decode_start = time.perf_counter()
@@ -138,8 +138,9 @@ def run_visual_language_generation_optimum(
         latency=per_token_time,
         res_md5=result_md5_list,
         max_rss_mem=max_rss_mem_consumption,
-        max_shared_mem=max_shared_mem_consumption,
-        max_uss_mem=max_uss_mem_consumption,
+        max_rss_mem_increase=max_rss_mem_increase,
+        max_sys_mem=max_sys_mem_consumption,
+        max_sys_mem_increase=max_sys_mem_increase,
         prompt_idx=prompt_index,
         tokenization_time=(tok_encode_time, tok_decode_time),
         mm_embeddings_preparation_time=tm_mm_embeddings
@@ -151,9 +152,6 @@ def run_visual_language_generation_optimum(
         tm_list,
         tm_infer_list,
         warm_up=(num == 0),
-        max_rss_mem=max_rss_mem_consumption,
-        max_shared_mem=max_shared_mem_consumption,
-        max_uss_mem=max_uss_mem_consumption,
         tokenization_time=(tok_encode_time, tok_decode_time),
         batch_size=args['batch_size'],
         prompt_idx=prompt_index
@@ -203,10 +201,11 @@ def run_visual_language_generation_genai(
         for bs_index, in_text in enumerate(prompts):
             llm_bench_utils.output_file.output_input_text(in_text, args, model_precision, prompt_index, bs_index, proc_id)
     max_rss_mem_consumption = ''
-    max_uss_mem_consumption = ''
-    max_shared_mem_consumption = ''
+    max_sys_mem_consumption = ''
+    max_rss_mem_increase = ''
+    max_sys_mem_increase = ''
     if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
-        mem_consumption.start_collect_memory_consumption()
+        mem_consumption.start()
     max_gen_tokens = DEFAULT_OUTPUT_TOKEN_SIZE if args['infer_count'] is None else args['infer_count']
     gen_config = model.get_generation_config()
     gen_config.max_new_tokens = max_gen_tokens
@@ -224,9 +223,8 @@ def run_visual_language_generation_genai(
     generated_text = generation_result.texts
     perf_metrics = generation_result.perf_metrics
     if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
-        mem_consumption.end_collect_momory_consumption()
-        max_rss_mem_consumption, max_shared_mem_consumption, max_uss_mem_consumption = mem_consumption.get_max_memory_consumption()
-        mem_consumption.clear_max_memory_consumption()
+        mem_consumption.stop_and_collect_data(f"{'P' + str(num) if num > 0 else 'warm-up'}_{proc_id}")
+        max_rss_mem_consumption, max_rss_mem_increase, max_sys_mem_consumption, max_sys_mem_increase = mem_consumption.get_data()
 
     generation_time = end - start
     result_md5_list = []
@@ -268,8 +266,9 @@ def run_visual_language_generation_genai(
         latency=per_token_time,
         res_md5=result_md5_list,
         max_rss_mem=max_rss_mem_consumption,
-        max_shared_mem=max_shared_mem_consumption,
-        max_uss_mem=max_uss_mem_consumption,
+        max_rss_mem_increase=max_rss_mem_increase,
+        max_sys_mem=max_sys_mem_consumption,
+        max_sys_mem_increase=max_sys_mem_increase,
         prompt_idx=prompt_index,
         tokenization_time=tokenization_time,
         mm_embeddings_preparation_time=perf_metrics.get_prepare_embeddings_duration().mean
@@ -282,9 +281,6 @@ def run_visual_language_generation_genai(
         tm_list.tolist(),
         inference_durations.tolist(),
         warm_up=(num == 0),
-        max_rss_mem=max_rss_mem_consumption,
-        max_shared_mem=max_shared_mem_consumption,
-        max_uss_mem=max_uss_mem_consumption,
         tokenization_time=tokenization_time,
         batch_size=args['batch_size'],
         prompt_idx=prompt_index
@@ -300,7 +296,7 @@ def run_visual_language_generation_genai(
 
 
 def run_visual_language_generation_benchmark(model_path, framework, device, args, num_iters, mem_consumption):
-    model, processor, pretrain_time, bench_hook, use_genai = FW_UTILS[framework].create_image_text_gen_model(model_path, device, **args)
+    model, processor, pretrain_time, bench_hook, use_genai = FW_UTILS[framework].create_image_text_gen_model(model_path, device, mem_consumption, **args)
     model_precision = model_utils.get_model_precision(model_path.parts)
     iter_data_list = []
     md5_list = {num : {} for num in range(num_iters + 1)}