Skip to content

Commit 1949366

Browse files
RyanMetcalfeInt8ilya-lavrenoveaidova
authored
Add --static_reshape option to llm_bench, to force static reshape + compilation at pipeline creation (#1851)
This PR introduces --static_reshape option to benchmark.py If specified, it triggers image generation pipeline to be reshaped before compile, fixing width, height, etc. This has a couple of advantages: 1. Running SD pipelines with NPU is now possible through benchmark.py (as static reshape is a requirement) 2. Even for other devices, such as GPU, pipeline performance is generally improved when pipelines are reshaped (fixed to particular dimensions) before compilation. --------- Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com> Co-authored-by: Ekaterina Aidova <ekaterina.aidova@intel.com>
1 parent 7000397 commit 1949366

File tree

5 files changed

+63
-11
lines changed

5 files changed

+63
-11
lines changed

.github/workflows/genai-tools.yml

+4
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,10 @@ jobs:
112112
python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-latent-consistency/ -pf ./image_generation.jsonl -d cpu -n 1 --optimum --num_steps 4
113113
- name: Test echarlaix/tiny-random-latent-consistency with GenAI Text to Image
114114
run: python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-latent-consistency/ -pf ./image_generation.jsonl -d cpu -n 1 --num_steps 4
115+
- name: Test echarlaix/tiny-random-latent-consistency with Optimum Intel, static reshape
116+
run: python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-latent-consistency/ -p "an astronaut riding a horse on mars" -d cpu -n 1 --num_steps 4 --static_reshape --optimum
117+
- name: Test echarlaix/tiny-random-latent-consistency with GenAI Text to Image, static reshape
118+
run: python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-random-latent-consistency/ -p "an astronaut riding a horse on mars" -d cpu -n 1 --num_steps 4 --static_reshape
115119
- name: Test echarlaix/tiny-random-latent-consistency with GenAI and LoRA
116120
run: |
117121
huggingface-cli download katuni4ka/tiny-random-latent-consistency-lora --local-dir ./lora

tools/llm_bench/benchmark.py

+4
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,10 @@ def get_argprser():
154154
parser.add_argument("--num_steps", type=int, required=False, help="Number of inference steps for image generation")
155155
parser.add_argument("--height", type=int, required=False, help="Generated image height. Applicable only for Image Generation.")
156156
parser.add_argument("--width", type=int, required=False, help="Generated image width. Applicable only for Image Generation.")
157+
parser.add_argument(
158+
"--static_reshape",
159+
action="store_true",
160+
help="Reshape image generation pipeline to specific width & height at pipline creation time. Applicable for Image Generation.")
157161
parser.add_argument('-mi', '--mask_image', default=None,
158162
help='Mask image for Inpainting pipelines. Can be directory or path to single image. Applicable for Image Generation.')
159163
parser.add_argument('-t', '--task', default=None,

tools/llm_bench/llm_bench_utils/model_utils.py

+1
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ def analyze_args(args):
122122
model_args['torch_compile_input_module'] = args.torch_compile_input_module
123123
model_args['media'] = args.media
124124
model_args["disable_prompt_permutation"] = args.disable_prompt_permutation
125+
model_args["static_reshape"] = args.static_reshape
125126
model_args['mask_image'] = args.mask_image
126127
model_args['task'] = args.task
127128
model_args['strength'] = args.strength

tools/llm_bench/llm_bench_utils/ov_utils.py

+26-2
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,16 @@ def create_image_gen_model(model_path, device, **kwargs):
282282

283283
log.info("Selected Optimum Intel for benchmarking")
284284
start = time.perf_counter()
285-
ov_model = model_class.from_pretrained(model_path, device=device, ov_config=ov_config)
285+
if kwargs.get("static_reshape", False):
286+
ov_model = model_class.from_pretrained(model_path, device=device, ov_config=ov_config, compile=False)
287+
num_images_per_prompt = kwargs.get("batch_size", 1)
288+
height = kwargs.get("height", 512)
289+
width = kwargs.get("width", 512)
290+
log.info(f"Image Pipeline reshape(batch_size=1, height={height}, width={width}, num_images_per_prompt={num_images_per_prompt})")
291+
ov_model.reshape(batch_size=1, height=height, width=width, num_images_per_prompt=num_images_per_prompt)
292+
ov_model.compile()
293+
else:
294+
ov_model = model_class.from_pretrained(model_path, device=device, ov_config=ov_config)
286295
end = time.perf_counter()
287296
from_pretrained_time = end - start
288297
log.info(f'From pretrained time: {from_pretrained_time:.2f}s')
@@ -402,6 +411,11 @@ def raw_metrics(self):
402411
scheduler_type = model_index_data.get("scheduler", ["", ""])[1]
403412
if (scheduler_type not in ["LCMScheduler", "DDIMScheduler", "PNDMScheduler", "EulerDiscreteScheduler",
404413
"FlowMatchEulerDiscreteScheduler", "EulerAncestralDiscreteScheduler"]):
414+
# It's possible we could support --static_reshape here, but initially it seems too complicated to be worth it..
415+
# (as we'd need to refactor each get_*_model calls below to perform explicit reshape + compile)
416+
if kwargs.get("static_reshape", False):
417+
raise RuntimeError(f'Type of scheduler {scheduler_type} is unsupported. Right now this is unsupported if --static_reshape is also specified. ')
418+
405419
scheduler = openvino_genai.Scheduler.from_config(model_path / "scheduler/scheduler_config.json", openvino_genai.Scheduler.Type.DDIM)
406420
log.warning(f'Type of scheduler {scheduler_type} is unsupported. Please, be aware that it will be replaced to DDIMScheduler')
407421

@@ -427,7 +441,17 @@ def raw_metrics(self):
427441
else:
428442
raise RuntimeError(f'==Failure ==: model by path:{model_path} has unsupported _class_name {model_class_name}')
429443
else:
430-
image_gen_pipe = image_gen_pipeline_class(model_path, device.upper(), **ov_config)
444+
if kwargs.get("static_reshape", False):
445+
image_gen_pipe = image_gen_pipeline_class(model_path)
446+
guidance_scale = kwargs.get("guidance_scale", image_gen_pipe.get_generation_config().guidance_scale)
447+
num_images_per_prompt = kwargs.get("batch_size", 1)
448+
height = kwargs.get("height", 512)
449+
width = kwargs.get("width", 512)
450+
log.info(f"Image Pipeline reshape(num_images_per_prompt={num_images_per_prompt}, height={height}, width={width}, guidance_scale={guidance_scale})")
451+
image_gen_pipe.reshape(num_images_per_prompt=num_images_per_prompt, height=height, width=width, guidance_scale=guidance_scale)
452+
image_gen_pipe.compile(device.upper(), **ov_config)
453+
else:
454+
image_gen_pipe = image_gen_pipeline_class(model_path, device.upper(), **ov_config)
431455

432456
end = time.perf_counter()
433457
log.info(f'Pipeline initialization time: {end - start:.2f}s')

tools/llm_bench/task/image_generation.py

+28-9
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ def run_image_generation(image_param, num, image_id, pipe, args, iter_data_list,
105105
for bs_idx, in_text in enumerate(input_text_list):
106106
llm_bench_utils.output_file.output_image_input_text(in_text, args, image_id, bs_idx, proc_id)
107107
start = time.perf_counter()
108-
res = pipe(input_text_list, **input_args, num_images_per_prompt=2).images
108+
res = pipe(input_text_list, **input_args, num_images_per_prompt=args['batch_size']).images
109109
end = time.perf_counter()
110110
if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
111111
mem_consumption.end_collect_momory_consumption()
@@ -152,6 +152,12 @@ def run_image_generation_genai(image_param, num, image_id, pipe, args, iter_data
152152
out_str += f", guidance_scale={input_args['guidance_scale']}"
153153
log.info(f"[{'warm-up' if num == 0 else num}][P{image_id}] {out_str}")
154154

155+
if args.get("static_reshape", False) and 'guidance_scale' in input_args:
156+
reshaped_gs = pipe.get_generation_config().guidance_scale
157+
new_gs = input_args['guidance_scale']
158+
if new_gs != reshaped_gs:
159+
log.warning(f"image generation pipeline was reshaped with guidance_scale={reshaped_gs}, but is being passed into generate() as {new_gs}")
160+
155161
result_md5_list = []
156162
max_rss_mem_consumption = ''
157163
max_uss_mem_consumption = ''
@@ -212,14 +218,8 @@ def run_image_generation_genai(image_param, num, image_id, pipe, args, iter_data
212218

213219

214220
def run_image_generation_benchmark(model_path, framework, device, args, num_iters, mem_consumption):
215-
pipe, pretrain_time, use_genai, callback = FW_UTILS[framework].create_image_gen_model(model_path, device, **args)
216-
iter_data_list = []
217-
input_image_list = get_image_prompt(args)
218-
if framework == "ov" and not use_genai:
219-
stable_diffusion_hook.new_text_encoder(pipe)
220-
stable_diffusion_hook.new_unet(pipe)
221-
stable_diffusion_hook.new_vae_decoder(pipe)
222221

222+
input_image_list = get_image_prompt(args)
223223
if args['prompt_index'] is None:
224224
prompt_idx_list = [image_id for image_id, input_text in enumerate(input_image_list)]
225225
image_list = input_image_list
@@ -232,6 +232,25 @@ def run_image_generation_benchmark(model_path, framework, device, args, num_iter
232232
prompt_idx_list.append(i)
233233
if len(image_list) == 0:
234234
raise RuntimeError('==Failure prompts is empty ==')
235+
236+
# If --static_reshape is specified, we need to get width, height, and guidance scale to drop into args
237+
# as genai's create_image_gen_model implementation will need those to reshape the pipeline before compile().
238+
if args.get("static_reshape", False):
239+
static_input_args = collects_input_args(image_list[0], args['model_name'], args["num_steps"],
240+
args.get("height"), args.get("width"), image_as_ov_tensor=False)
241+
args["height"] = static_input_args["height"]
242+
args["width"] = static_input_args["width"]
243+
if "guidance_scale" in static_input_args:
244+
args["guidance_scale"] = static_input_args["guidance_scale"]
245+
246+
pipe, pretrain_time, use_genai, callback = FW_UTILS[framework].create_image_gen_model(model_path, device, **args)
247+
iter_data_list = []
248+
249+
if framework == "ov" and not use_genai:
250+
stable_diffusion_hook.new_text_encoder(pipe)
251+
stable_diffusion_hook.new_unet(pipe)
252+
stable_diffusion_hook.new_vae_decoder(pipe)
253+
235254
log.info(f'Benchmarking iter nums(exclude warm-up): {num_iters}, prompt nums: {len(image_list)}, prompt idx: {prompt_idx_list}')
236255

237256
if use_genai:
@@ -268,7 +287,7 @@ def run_image_generation_benchmark(model_path, framework, device, args, num_iter
268287
def get_image_prompt(args):
269288
input_image_list = []
270289

271-
input_key = 'prompt'
290+
input_key = ['prompt']
272291
if args.get("task") == TASK["inpainting"] or ((args.get("media") or args.get("images")) and args.get("mask_image")):
273292
input_key = ['media', "mask_image", "prompt"]
274293
elif args.get("task") == TASK["img2img"] or args.get("media") or args.get("images"):

0 commit comments

Comments
 (0)