Add --static_reshape option to llm_bench, to force static reshape + compilation at pipeline creation (#1851)

RyanMetcalfeInt8 · ilya-lavrenov · eaidova · web-flow · commit 1949366505d5 · 2025-03-20T12:39:34.000+04:00
This PR introduces --static_reshape option to benchmark.py

If specified, it triggers image generation pipeline to be reshaped
before compile, fixing width, height, etc.

This has a couple of advantages:
1. Running SD pipelines with NPU is now possible through benchmark.py
(as static reshape is a requirement)
2. Even for other devices, such as GPU, pipeline performance is
generally improved when pipelines are reshaped (fixed to particular
dimensions) before compilation.

---------

Co-authored-by: Ilya Lavrenov &lt;ilya.lavrenov@intel.com&gt;
Co-authored-by: Ekaterina Aidova &lt;ekaterina.aidova@intel.com&gt;
diff --git a/.github/workflows/genai-tools.yml b/.github/workflows/genai-tools.yml
@@ -112,6 +112,10 @@ jobs:
           python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-latent-consistency/ -pf ./image_generation.jsonl -d cpu -n 1 --optimum --num_steps 4
       - name: Test echarlaix/tiny-random-latent-consistency with GenAI Text to Image
         run: python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-latent-consistency/ -pf ./image_generation.jsonl -d cpu -n 1 --num_steps 4
+      - name: Test echarlaix/tiny-random-latent-consistency with Optimum Intel, static reshape
+        run: python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-latent-consistency/ -p "an astronaut riding a horse on mars" -d cpu -n 1 --num_steps 4 --static_reshape --optimum
+      - name: Test echarlaix/tiny-random-latent-consistency with GenAI Text to Image, static reshape
+        run: python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-latent-consistency/ -p "an astronaut riding a horse on mars" -d cpu -n 1 --num_steps 4 --static_reshape
       - name: Test echarlaix/tiny-random-latent-consistency with GenAI and LoRA
         run: |
           huggingface-cli download katuni4ka/tiny-random-latent-consistency-lora --local-dir ./lora
diff --git a/tools/llm_bench/benchmark.py b/tools/llm_bench/benchmark.py
@@ -154,6 +154,10 @@ def get_argprser():
     parser.add_argument("--num_steps", type=int, required=False, help="Number of inference steps for image generation")
     parser.add_argument("--height", type=int, required=False, help="Generated image height. Applicable only for Image Generation.")
     parser.add_argument("--width", type=int, required=False, help="Generated image width. Applicable only for Image Generation.")
+    parser.add_argument(
+        "--static_reshape",
+        action="store_true",
+        help="Reshape image generation pipeline to specific width & height at pipline creation time. Applicable for Image Generation.")
     parser.add_argument('-mi', '--mask_image', default=None,
                         help='Mask image for Inpainting pipelines. Can be directory or path to single image. Applicable for Image Generation.')
     parser.add_argument('-t', '--task', default=None,
diff --git a/tools/llm_bench/llm_bench_utils/model_utils.py b/tools/llm_bench/llm_bench_utils/model_utils.py
@@ -122,6 +122,7 @@ def analyze_args(args):
     model_args['torch_compile_input_module'] = args.torch_compile_input_module
     model_args['media'] = args.media
     model_args["disable_prompt_permutation"] = args.disable_prompt_permutation
+    model_args["static_reshape"] = args.static_reshape
     model_args['mask_image'] = args.mask_image
     model_args['task'] = args.task
     model_args['strength'] = args.strength
diff --git a/tools/llm_bench/llm_bench_utils/ov_utils.py b/tools/llm_bench/llm_bench_utils/ov_utils.py
@@ -282,7 +282,16 @@ def create_image_gen_model(model_path, device, **kwargs):
 
         log.info("Selected Optimum Intel for benchmarking")
         start = time.perf_counter()
-        ov_model = model_class.from_pretrained(model_path, device=device, ov_config=ov_config)
+        if kwargs.get("static_reshape", False):
+            ov_model = model_class.from_pretrained(model_path, device=device, ov_config=ov_config, compile=False)
+            num_images_per_prompt = kwargs.get("batch_size", 1)
+            height = kwargs.get("height", 512)
+            width = kwargs.get("width", 512)
+            log.info(f"Image Pipeline reshape(batch_size=1, height={height}, width={width}, num_images_per_prompt={num_images_per_prompt})")
+            ov_model.reshape(batch_size=1, height=height, width=width, num_images_per_prompt=num_images_per_prompt)
+            ov_model.compile()
+        else:
+            ov_model = model_class.from_pretrained(model_path, device=device, ov_config=ov_config)
         end = time.perf_counter()
     from_pretrained_time = end - start
     log.info(f'From pretrained time: {from_pretrained_time:.2f}s')
@@ -402,6 +411,11 @@ def raw_metrics(self):
     scheduler_type = model_index_data.get("scheduler", ["", ""])[1]
     if (scheduler_type not in ["LCMScheduler", "DDIMScheduler", "PNDMScheduler", "EulerDiscreteScheduler",
                                "FlowMatchEulerDiscreteScheduler", "EulerAncestralDiscreteScheduler"]):
+        # It's possible we could support --static_reshape here, but initially it seems too complicated to be worth it..
+        # (as we'd need to refactor each get_*_model calls below to perform explicit reshape + compile)
+        if kwargs.get("static_reshape", False):
+            raise RuntimeError(f'Type of scheduler {scheduler_type} is unsupported. Right now this is unsupported if --static_reshape is also specified. ')
+
         scheduler = openvino_genai.Scheduler.from_config(model_path / "scheduler/scheduler_config.json", openvino_genai.Scheduler.Type.DDIM)
         log.warning(f'Type of scheduler {scheduler_type} is unsupported. Please, be aware that it will be replaced to DDIMScheduler')
 
@@ -427,7 +441,17 @@ def raw_metrics(self):
         else:
             raise RuntimeError(f'==Failure ==: model by path:{model_path} has unsupported _class_name {model_class_name}')
     else:
-        image_gen_pipe = image_gen_pipeline_class(model_path, device.upper(), **ov_config)
+        if kwargs.get("static_reshape", False):
+            image_gen_pipe = image_gen_pipeline_class(model_path)
+            guidance_scale = kwargs.get("guidance_scale", image_gen_pipe.get_generation_config().guidance_scale)
+            num_images_per_prompt = kwargs.get("batch_size", 1)
+            height = kwargs.get("height", 512)
+            width = kwargs.get("width", 512)
+            log.info(f"Image Pipeline reshape(num_images_per_prompt={num_images_per_prompt}, height={height}, width={width}, guidance_scale={guidance_scale})")
+            image_gen_pipe.reshape(num_images_per_prompt=num_images_per_prompt, height=height, width=width, guidance_scale=guidance_scale)
+            image_gen_pipe.compile(device.upper(), **ov_config)
+        else:
+            image_gen_pipe = image_gen_pipeline_class(model_path, device.upper(), **ov_config)
 
     end = time.perf_counter()
     log.info(f'Pipeline initialization time: {end - start:.2f}s')
diff --git a/tools/llm_bench/task/image_generation.py b/tools/llm_bench/task/image_generation.py
@@ -105,7 +105,7 @@ def run_image_generation(image_param, num, image_id, pipe, args, iter_data_list,
         for bs_idx, in_text in enumerate(input_text_list):
             llm_bench_utils.output_file.output_image_input_text(in_text, args, image_id, bs_idx, proc_id)
     start = time.perf_counter()
-    res = pipe(input_text_list, **input_args, num_images_per_prompt=2).images
+    res = pipe(input_text_list, **input_args, num_images_per_prompt=args['batch_size']).images
     end = time.perf_counter()
     if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
         mem_consumption.end_collect_momory_consumption()
@@ -152,6 +152,12 @@ def run_image_generation_genai(image_param, num, image_id, pipe, args, iter_data
         out_str += f", guidance_scale={input_args['guidance_scale']}"
     log.info(f"[{'warm-up' if num == 0 else num}][P{image_id}] {out_str}")
 
+    if args.get("static_reshape", False) and 'guidance_scale' in input_args:
+        reshaped_gs = pipe.get_generation_config().guidance_scale
+        new_gs = input_args['guidance_scale']
+        if new_gs != reshaped_gs:
+            log.warning(f"image generation pipeline was reshaped with guidance_scale={reshaped_gs}, but is being passed into generate() as {new_gs}")
+
     result_md5_list = []
     max_rss_mem_consumption = ''
     max_uss_mem_consumption = ''
@@ -212,14 +218,8 @@ def run_image_generation_genai(image_param, num, image_id, pipe, args, iter_data
 
 
 def run_image_generation_benchmark(model_path, framework, device, args, num_iters, mem_consumption):
-    pipe, pretrain_time, use_genai, callback = FW_UTILS[framework].create_image_gen_model(model_path, device, **args)
-    iter_data_list = []
-    input_image_list = get_image_prompt(args)
-    if framework == "ov" and not use_genai:
-        stable_diffusion_hook.new_text_encoder(pipe)
-        stable_diffusion_hook.new_unet(pipe)
-        stable_diffusion_hook.new_vae_decoder(pipe)
 
+    input_image_list = get_image_prompt(args)
     if args['prompt_index'] is None:
         prompt_idx_list = [image_id for image_id, input_text in enumerate(input_image_list)]
         image_list = input_image_list
@@ -232,6 +232,25 @@ def run_image_generation_benchmark(model_path, framework, device, args, num_iter
                 prompt_idx_list.append(i)
     if len(image_list) == 0:
         raise RuntimeError('==Failure prompts is empty ==')
+
+    # If --static_reshape is specified, we need to get width, height, and guidance scale to drop into args
+    # as genai's create_image_gen_model implementation will need those to reshape the pipeline before compile().
+    if args.get("static_reshape", False):
+        static_input_args = collects_input_args(image_list[0], args['model_name'], args["num_steps"],
+                                                args.get("height"), args.get("width"), image_as_ov_tensor=False)
+        args["height"] = static_input_args["height"]
+        args["width"] = static_input_args["width"]
+        if "guidance_scale" in static_input_args:
+            args["guidance_scale"] = static_input_args["guidance_scale"]
+
+    pipe, pretrain_time, use_genai, callback = FW_UTILS[framework].create_image_gen_model(model_path, device, **args)
+    iter_data_list = []
+
+    if framework == "ov" and not use_genai:
+        stable_diffusion_hook.new_text_encoder(pipe)
+        stable_diffusion_hook.new_unet(pipe)
+        stable_diffusion_hook.new_vae_decoder(pipe)
+
     log.info(f'Benchmarking iter nums(exclude warm-up): {num_iters}, prompt nums: {len(image_list)}, prompt idx: {prompt_idx_list}')
 
     if use_genai:
@@ -268,7 +287,7 @@ def run_image_generation_benchmark(model_path, framework, device, args, num_iter
 def get_image_prompt(args):
     input_image_list = []
 
-    input_key = 'prompt'
+    input_key = ['prompt']
     if args.get("task") == TASK["inpainting"] or ((args.get("media") or args.get("images")) and args.get("mask_image")):
         input_key = ['media', "mask_image", "prompt"]
     elif args.get("task") == TASK["img2img"] or args.get("media") or args.get("images"):