Image generation: rely on activation_scale_factor for GPU (#1548)

ilya-lavrenov · web-flow · commit 18d1bf8a8048 · 2025-01-16T16:24:58.000+04:00
Waiting for implementation on optimum-intel and GPU sides - huggingface/optimum-intel#1110 - openvinotoolkit/openvino#28449
diff --git a/src/cpp/src/image_generation/models/autoencoder_kl.cpp b/src/cpp/src/image_generation/models/autoencoder_kl.cpp
@@ -22,6 +22,8 @@
 namespace ov {
 namespace genai {
 
+namespace {
+
 class DiagonalGaussianDistribution {
 public:
     explicit DiagonalGaussianDistribution(ov::Tensor parameters)
@@ -64,6 +66,29 @@ class DiagonalGaussianDistribution {
     ov::Tensor m_mean, m_std;
 };
 
+// for BW compatibility with 2024.6.0
+ov::AnyMap handle_scale_factor(std::shared_ptr<ov::Model> model, const std::string& device, ov::AnyMap properties) {
+    std::cout << ov::Any(properties).as<std::string>() << std::endl;
+
+    auto it = properties.find("WA_INFERENCE_PRECISION_HINT");
+    ov::element::Type wa_inference_precision = it != properties.end() ? it->second.as<ov::element::Type>() : ov::element::undefined;
+    if (it != properties.end()) {
+        properties.erase(it);
+    }
+
+    const std::vector<std::string> activation_scale_factor_path = { "runtime_options", ov::hint::activations_scale_factor.name() };
+    const bool activation_scale_factor_defined = model->has_rt_info(activation_scale_factor_path);
+
+    // convert WA inference precision to actual inference precision if activation_scale_factor is not defined in IR
+    if (device.find("GPU") != std::string::npos && !activation_scale_factor_defined && wa_inference_precision != ov::element::undefined) {
+        properties[ov::hint::inference_precision.name()] = wa_inference_precision;
+    }
+
+    return properties;
+}
+
+} // namespace
+
 size_t get_vae_scale_factor(const std::filesystem::path& vae_config_path) {
     std::ifstream file(vae_config_path);
     OPENVINO_ASSERT(file.is_open(), "Failed to open ", vae_config_path);
@@ -207,14 +232,14 @@ AutoencoderKL& AutoencoderKL::compile(const std::string& device, const ov::AnyMa
     ov::Core core = utils::singleton_core();
 
     if (m_encoder_model) {
-        ov::CompiledModel encoder_compiled_model = core.compile_model(m_encoder_model, device, properties);
+        ov::CompiledModel encoder_compiled_model = core.compile_model(m_encoder_model, device, handle_scale_factor(m_encoder_model, device, properties));
         ov::genai::utils::print_compiled_model_properties(encoder_compiled_model, "Auto encoder KL encoder model");
         m_encoder_request = encoder_compiled_model.create_infer_request();
         // release the original model
         m_encoder_model.reset();
     }
 
-    ov::CompiledModel decoder_compiled_model = core.compile_model(m_decoder_model, device, properties);
+    ov::CompiledModel decoder_compiled_model = core.compile_model(m_decoder_model, device, handle_scale_factor(m_decoder_model, device, properties));
     ov::genai::utils::print_compiled_model_properties(decoder_compiled_model, "Auto encoder KL decoder model");
     m_decoder_request = decoder_compiled_model.create_infer_request();
     // release the original model
diff --git a/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp b/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp
@@ -137,25 +137,17 @@ class StableDiffusion3Pipeline : public DiffusionPipeline {
 
         set_scheduler(Scheduler::from_config(root_dir / "scheduler/scheduler_config.json"));
 
-        // Temporary fix for GPU
-        ov::AnyMap updated_properties = properties;
-        if (device.find("GPU") != std::string::npos &&
-            updated_properties.find("INFERENCE_PRECISION_HINT") == updated_properties.end()) {
-            updated_properties["INFERENCE_PRECISION_HINT"] = ov::element::f32;
-        }
-
         const std::string text_encoder = data["text_encoder"][1].get<std::string>();
         if (text_encoder == "CLIPTextModelWithProjection") {
             m_clip_text_encoder_1 =
-                std::make_shared<CLIPTextModelWithProjection>(root_dir / "text_encoder", device, updated_properties);
+                std::make_shared<CLIPTextModelWithProjection>(root_dir / "text_encoder", device, properties);
         } else {
             OPENVINO_THROW("Unsupported '", text_encoder, "' text encoder type");
         }
 
         const std::string text_encoder_2 = data["text_encoder_2"][1].get<std::string>();
         if (text_encoder_2 == "CLIPTextModelWithProjection") {
-            m_clip_text_encoder_2 =
-                std::make_shared<CLIPTextModelWithProjection>(root_dir / "text_encoder_2", device, updated_properties);
+            m_clip_text_encoder_2 = std::make_shared<CLIPTextModelWithProjection>(root_dir / "text_encoder_2", device, properties);
         } else {
             OPENVINO_THROW("Unsupported '", text_encoder_2, "' text encoder type");
         }
@@ -164,7 +156,7 @@ class StableDiffusion3Pipeline : public DiffusionPipeline {
         if (!text_encoder_3_json.is_null()) {
             const std::string text_encoder_3 = text_encoder_3_json.get<std::string>();
             if (text_encoder_3 == "T5EncoderModel") {
-                m_t5_text_encoder = std::make_shared<T5EncoderModel>(root_dir / "text_encoder_3", device, updated_properties);
+                m_t5_text_encoder = std::make_shared<T5EncoderModel>(root_dir / "text_encoder_3", device, properties);
             } else {
                 OPENVINO_THROW("Unsupported '", text_encoder_3, "' text encoder type");
             }
@@ -180,9 +172,9 @@ class StableDiffusion3Pipeline : public DiffusionPipeline {
         const std::string vae = data["vae"][1].get<std::string>();
         if (vae == "AutoencoderKL") {
             if (m_pipeline_type == PipelineType::TEXT_2_IMAGE)
-                m_vae = std::make_shared<AutoencoderKL>(root_dir / "vae_decoder", device, updated_properties);
+                m_vae = std::make_shared<AutoencoderKL>(root_dir / "vae_decoder", device, properties);
             else if (m_pipeline_type == PipelineType::IMAGE_2_IMAGE || m_pipeline_type == PipelineType::INPAINTING) {
-                m_vae = std::make_shared<AutoencoderKL>(root_dir / "vae_encoder", root_dir / "vae_decoder", device, updated_properties);
+                m_vae = std::make_shared<AutoencoderKL>(root_dir / "vae_encoder", root_dir / "vae_decoder", device, properties);
             } else {
                 OPENVINO_ASSERT("Unsupported pipeline type");
             }
diff --git a/src/cpp/src/image_generation/stable_diffusion_xl_pipeline.hpp b/src/cpp/src/image_generation/stable_diffusion_xl_pipeline.hpp
@@ -77,7 +77,7 @@ class StableDiffusionXLPipeline : public StableDiffusionPipeline {
         ov::AnyMap updated_properties = properties;
         if (device.find("GPU") != std::string::npos &&
             updated_properties.find("INFERENCE_PRECISION_HINT") == updated_properties.end()) {
-            updated_properties["INFERENCE_PRECISION_HINT"] = ov::element::f32;
+            updated_properties["WA_INFERENCE_PRECISION_HINT"] = ov::element::f32;
         }
 
         const std::string vae = data["vae"][1].get<std::string>();

Original file line number	Diff line number	Diff line change
`@@ -77,7 +77,7 @@ class StableDiffusionXLPipeline : public StableDiffusionPipeline {`
`77`	`77`	`ov::AnyMap updated_properties = properties;`
`78`	`78`	`if (device.find("GPU") != std::string::npos &&`
`79`	`79`	`updated_properties.find("INFERENCE_PRECISION_HINT") == updated_properties.end()) {`
`80`		`- updated_properties["INFERENCE_PRECISION_HINT"] = ov::element::f32;`
	`80`	`+ updated_properties["WA_INFERENCE_PRECISION_HINT"] = ov::element::f32;`
`81`	`81`	`}`
`82`	`82`
`83`	`83`	`const std::string vae = data["vae"][1].get<std::string>();`