Skip to content

Commit 18d1bf8

Browse files
Image generation: rely on activation_scale_factor for GPU (#1548)
Waiting for implementation on optimum-intel and GPU sides - huggingface/optimum-intel#1110 - openvinotoolkit/openvino#28449
1 parent 36b88ad commit 18d1bf8

File tree

3 files changed

+33
-16
lines changed

3 files changed

+33
-16
lines changed

src/cpp/src/image_generation/models/autoencoder_kl.cpp

+27-2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
namespace ov {
2323
namespace genai {
2424

25+
namespace {
26+
2527
class DiagonalGaussianDistribution {
2628
public:
2729
explicit DiagonalGaussianDistribution(ov::Tensor parameters)
@@ -64,6 +66,29 @@ class DiagonalGaussianDistribution {
6466
ov::Tensor m_mean, m_std;
6567
};
6668

69+
// for BW compatibility with 2024.6.0
70+
ov::AnyMap handle_scale_factor(std::shared_ptr<ov::Model> model, const std::string& device, ov::AnyMap properties) {
71+
std::cout << ov::Any(properties).as<std::string>() << std::endl;
72+
73+
auto it = properties.find("WA_INFERENCE_PRECISION_HINT");
74+
ov::element::Type wa_inference_precision = it != properties.end() ? it->second.as<ov::element::Type>() : ov::element::undefined;
75+
if (it != properties.end()) {
76+
properties.erase(it);
77+
}
78+
79+
const std::vector<std::string> activation_scale_factor_path = { "runtime_options", ov::hint::activations_scale_factor.name() };
80+
const bool activation_scale_factor_defined = model->has_rt_info(activation_scale_factor_path);
81+
82+
// convert WA inference precision to actual inference precision if activation_scale_factor is not defined in IR
83+
if (device.find("GPU") != std::string::npos && !activation_scale_factor_defined && wa_inference_precision != ov::element::undefined) {
84+
properties[ov::hint::inference_precision.name()] = wa_inference_precision;
85+
}
86+
87+
return properties;
88+
}
89+
90+
} // namespace
91+
6792
size_t get_vae_scale_factor(const std::filesystem::path& vae_config_path) {
6893
std::ifstream file(vae_config_path);
6994
OPENVINO_ASSERT(file.is_open(), "Failed to open ", vae_config_path);
@@ -207,14 +232,14 @@ AutoencoderKL& AutoencoderKL::compile(const std::string& device, const ov::AnyMa
207232
ov::Core core = utils::singleton_core();
208233

209234
if (m_encoder_model) {
210-
ov::CompiledModel encoder_compiled_model = core.compile_model(m_encoder_model, device, properties);
235+
ov::CompiledModel encoder_compiled_model = core.compile_model(m_encoder_model, device, handle_scale_factor(m_encoder_model, device, properties));
211236
ov::genai::utils::print_compiled_model_properties(encoder_compiled_model, "Auto encoder KL encoder model");
212237
m_encoder_request = encoder_compiled_model.create_infer_request();
213238
// release the original model
214239
m_encoder_model.reset();
215240
}
216241

217-
ov::CompiledModel decoder_compiled_model = core.compile_model(m_decoder_model, device, properties);
242+
ov::CompiledModel decoder_compiled_model = core.compile_model(m_decoder_model, device, handle_scale_factor(m_decoder_model, device, properties));
218243
ov::genai::utils::print_compiled_model_properties(decoder_compiled_model, "Auto encoder KL decoder model");
219244
m_decoder_request = decoder_compiled_model.create_infer_request();
220245
// release the original model

src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp

+5-13
Original file line numberDiff line numberDiff line change
@@ -137,25 +137,17 @@ class StableDiffusion3Pipeline : public DiffusionPipeline {
137137

138138
set_scheduler(Scheduler::from_config(root_dir / "scheduler/scheduler_config.json"));
139139

140-
// Temporary fix for GPU
141-
ov::AnyMap updated_properties = properties;
142-
if (device.find("GPU") != std::string::npos &&
143-
updated_properties.find("INFERENCE_PRECISION_HINT") == updated_properties.end()) {
144-
updated_properties["INFERENCE_PRECISION_HINT"] = ov::element::f32;
145-
}
146-
147140
const std::string text_encoder = data["text_encoder"][1].get<std::string>();
148141
if (text_encoder == "CLIPTextModelWithProjection") {
149142
m_clip_text_encoder_1 =
150-
std::make_shared<CLIPTextModelWithProjection>(root_dir / "text_encoder", device, updated_properties);
143+
std::make_shared<CLIPTextModelWithProjection>(root_dir / "text_encoder", device, properties);
151144
} else {
152145
OPENVINO_THROW("Unsupported '", text_encoder, "' text encoder type");
153146
}
154147

155148
const std::string text_encoder_2 = data["text_encoder_2"][1].get<std::string>();
156149
if (text_encoder_2 == "CLIPTextModelWithProjection") {
157-
m_clip_text_encoder_2 =
158-
std::make_shared<CLIPTextModelWithProjection>(root_dir / "text_encoder_2", device, updated_properties);
150+
m_clip_text_encoder_2 = std::make_shared<CLIPTextModelWithProjection>(root_dir / "text_encoder_2", device, properties);
159151
} else {
160152
OPENVINO_THROW("Unsupported '", text_encoder_2, "' text encoder type");
161153
}
@@ -164,7 +156,7 @@ class StableDiffusion3Pipeline : public DiffusionPipeline {
164156
if (!text_encoder_3_json.is_null()) {
165157
const std::string text_encoder_3 = text_encoder_3_json.get<std::string>();
166158
if (text_encoder_3 == "T5EncoderModel") {
167-
m_t5_text_encoder = std::make_shared<T5EncoderModel>(root_dir / "text_encoder_3", device, updated_properties);
159+
m_t5_text_encoder = std::make_shared<T5EncoderModel>(root_dir / "text_encoder_3", device, properties);
168160
} else {
169161
OPENVINO_THROW("Unsupported '", text_encoder_3, "' text encoder type");
170162
}
@@ -180,9 +172,9 @@ class StableDiffusion3Pipeline : public DiffusionPipeline {
180172
const std::string vae = data["vae"][1].get<std::string>();
181173
if (vae == "AutoencoderKL") {
182174
if (m_pipeline_type == PipelineType::TEXT_2_IMAGE)
183-
m_vae = std::make_shared<AutoencoderKL>(root_dir / "vae_decoder", device, updated_properties);
175+
m_vae = std::make_shared<AutoencoderKL>(root_dir / "vae_decoder", device, properties);
184176
else if (m_pipeline_type == PipelineType::IMAGE_2_IMAGE || m_pipeline_type == PipelineType::INPAINTING) {
185-
m_vae = std::make_shared<AutoencoderKL>(root_dir / "vae_encoder", root_dir / "vae_decoder", device, updated_properties);
177+
m_vae = std::make_shared<AutoencoderKL>(root_dir / "vae_encoder", root_dir / "vae_decoder", device, properties);
186178
} else {
187179
OPENVINO_ASSERT("Unsupported pipeline type");
188180
}

src/cpp/src/image_generation/stable_diffusion_xl_pipeline.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ class StableDiffusionXLPipeline : public StableDiffusionPipeline {
7777
ov::AnyMap updated_properties = properties;
7878
if (device.find("GPU") != std::string::npos &&
7979
updated_properties.find("INFERENCE_PRECISION_HINT") == updated_properties.end()) {
80-
updated_properties["INFERENCE_PRECISION_HINT"] = ov::element::f32;
80+
updated_properties["WA_INFERENCE_PRECISION_HINT"] = ov::element::f32;
8181
}
8282

8383
const std::string vae = data["vae"][1].get<std::string>();

0 commit comments

Comments
 (0)