@@ -220,7 +220,8 @@ class StableDiffusionPipeline : public DiffusionPipeline {
220
220
generation_config.generator = std::make_shared<CppStdGenerator>(seed);
221
221
}
222
222
223
- ov::Tensor encoder_hidden_states = m_clip_text_encoder->infer (positive_prompt, generation_config.negative_prompt ,
223
+ std::string negative_prompt = generation_config.negative_prompt != std::nullopt ? *generation_config.negative_prompt : std::string{};
224
+ ov::Tensor encoder_hidden_states = m_clip_text_encoder->infer (positive_prompt, negative_prompt,
224
225
batch_size_multiplier > 1 );
225
226
226
227
// replicate encoder hidden state to UNet model
@@ -261,13 +262,10 @@ class StableDiffusionPipeline : public DiffusionPipeline {
261
262
262
263
ov::Tensor denoised, noisy_residual_tensor (ov::element::f32, {});
263
264
for (size_t inference_step = 0 ; inference_step < generation_config.num_inference_steps ; inference_step++) {
265
+ batch_copy (latent, latent_cfg, 0 , 0 , generation_config.num_images_per_prompt );
264
266
// concat the same latent twice along a batch dimension in case of CFG
265
267
if (batch_size_multiplier > 1 ) {
266
- batch_copy (latent, latent_cfg, 0 , 0 , generation_config.num_images_per_prompt );
267
268
batch_copy (latent, latent_cfg, 0 , generation_config.num_images_per_prompt , generation_config.num_images_per_prompt );
268
- } else {
269
- // just assign to save memory copy
270
- latent_cfg = latent;
271
269
}
272
270
273
271
m_scheduler->scale_model_input (latent_cfg, inference_step);
@@ -277,9 +275,10 @@ class StableDiffusionPipeline : public DiffusionPipeline {
277
275
278
276
ov::Shape noise_pred_shape = noise_pred_tensor.get_shape ();
279
277
noise_pred_shape[0 ] /= batch_size_multiplier;
280
- noisy_residual_tensor.set_shape (noise_pred_shape);
281
278
282
279
if (batch_size_multiplier > 1 ) {
280
+ noisy_residual_tensor.set_shape (noise_pred_shape);
281
+
283
282
// perform guidance
284
283
float * noisy_residual = noisy_residual_tensor.data <float >();
285
284
const float * noise_pred_uncond = noise_pred_tensor.data <const float >();
@@ -349,9 +348,9 @@ class StableDiffusionPipeline : public DiffusionPipeline {
349
348
OPENVINO_ASSERT (generation_config.prompt_2 == std::nullopt, " Prompt 2 is not used by " , pipeline_name);
350
349
OPENVINO_ASSERT (generation_config.prompt_3 == std::nullopt, " Prompt 3 is not used by " , pipeline_name);
351
350
if (is_lcm) {
352
- OPENVINO_ASSERT (generation_config.negative_prompt . empty () , " Negative prompt is not used by " , pipeline_name);
351
+ OPENVINO_ASSERT (generation_config.negative_prompt == std::nullopt , " Negative prompt is not used by " , pipeline_name);
353
352
} else if (!is_classifier_free_guidance) {
354
- OPENVINO_ASSERT (generation_config.negative_prompt . empty () , " Negative prompt is not used when guidance scale <= 1.0" );
353
+ OPENVINO_ASSERT (generation_config.negative_prompt == std::nullopt , " Negative prompt is not used when guidance scale <= 1.0" );
355
354
}
356
355
OPENVINO_ASSERT (generation_config.negative_prompt_2 == std::nullopt, " Negative prompt 2 is not used by " , pipeline_name);
357
356
OPENVINO_ASSERT (generation_config.negative_prompt_3 == std::nullopt, " Negative prompt 3 is not used by " , pipeline_name);
0 commit comments