Merge branch 'master' into vlm_python_bindings

Wovchena · web-flow · commit 724b27f39a31 · 2024-10-09T08:33:40.000+04:00
diff --git a/llm_bench/python/benchmark.py b/llm_bench/python/benchmark.py
@@ -308,13 +308,14 @@ def run_text_generation_genai(input_text, num, model, tokenizer, args, iter_data
             log.warning(f"[{num}] Prompt[{prompt_index}]'s md5 {result_md5_list} "
                         f"is different from md5 of the {num - 1} iteration {prev_md5}")
             llm_bench_utils.metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0])
-            if num == 1:
-                # if the device is CPU, throw exception
-                if args['devices'].lower().startswith('cpu') is True:
+            if not args.get("use_cb", False):
+                if num == 1:
+                    # if the device is CPU, throw exception
+                    if args['devices'].lower().startswith('cpu') is True:
+                        assert (result_md5_list == prev_md5)
+                else:
+                    # throw exception
                     assert (result_md5_list == prev_md5)
-            else:
-                # throw exception
-                assert (result_md5_list == prev_md5)
     else:
         llm_bench_utils.metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0])
 
@@ -814,7 +815,7 @@ def get_argprser():
     llm_bench_utils.model_utils.add_stateful_model_arguments(parser)
     parser.add_argument("--genai", action="store_true", help="Use OpenVINO GenAI optimized pipelines for benchmarking")
     parser.add_argument("--use_cb", action="store_true", help="Use Continuous Batching inference mode")
-    parser.add_argument("--cb_config", required=False, default=None, help="Path to file with Continuous Batching Scheduler settings")
+    parser.add_argument("--cb_config", required=False, default=None, help="Path to file with Continuous Batching Scheduler settings or dict")
     parser.add_argument(
         '--end_token_stopping',
         action='store_true',
diff --git a/llm_bench/python/llm_bench_utils/model_utils.py b/llm_bench/python/llm_bench_utils/model_utils.py
@@ -204,11 +204,17 @@ def get_use_case(model_name_or_path):
 
 
 def get_config(config):
-    with open(config, 'r') as f:
+    if Path(config).is_file():
+        with open(config, 'r') as f:
+            try:
+                ov_config = json.load(f)
+            except Exception:
+                raise RuntimeError(f'==Parse file:{config} failiure, json format is incorrect ==')
+    else:
         try:
-            ov_config = json.load(f)
+            ov_config = json.loads(config)
         except Exception:
-            raise RuntimeError(f'==Parse file:{config} failiure, json format is incorrect ==')
+            raise RuntimeError(f'==Parse config:{config} failiure, json format is incorrect ==')
     return ov_config
 
 
diff --git a/llm_bench/python/llm_bench_utils/ov_utils.py b/llm_bench/python/llm_bench_utils/ov_utils.py
@@ -189,8 +189,11 @@ def create_genai_text_gen_model(model_path, device, ov_config, **kwargs):
     cb = kwargs.get("use_cb", False)
     if cb:
         log.info("Continuous Batching mode activated")
+        default_cb_config = {"cache_size": 1}
+        if "GPU" in device:
+            default_cb_config["block_size"] = 16
         scheduler_config = openvino_genai.SchedulerConfig()
-        scheduler_params = kwargs.get("cb_config") or {"cache_size": 1}
+        scheduler_params = kwargs.get("cb_config") or default_cb_config
         if scheduler_params:
             log.info(f"Scheduler parameters:\n{scheduler_params}")
 
diff --git a/samples/cpp/whisper_speech_recognition/README.md b/samples/cpp/whisper_speech_recognition/README.md
@@ -23,7 +23,7 @@ Prepare audio file in wav format with sampling rate 16k Hz.
 
 Output: text transcription of `sample.wav`
 
-Models can be downloaded from [OpenAI HiggingFace](https://huggingface.co/openai).
+Models can be downloaded from [OpenAI HuggingFace](https://huggingface.co/openai).
 
 Supported Models:
 [openai/whisper-tiny](https://huggingface.co/openai/whisper-tiny)
diff --git a/samples/python/whisper_speech_recognition/README.md b/samples/python/whisper_speech_recognition/README.md
@@ -23,7 +23,7 @@ Prepare audio file in wav format with sampling rate 16k Hz.
 
 Output: text transcription of `sample.wav`
 
-Models can be downloaded from [OpenAI HiggingFace](https://huggingface.co/openai).
+Models can be downloaded from [OpenAI HuggingFace](https://huggingface.co/openai).
 
 Supported Models:
 [openai/whisper-tiny](https://huggingface.co/openai/whisper-tiny)