Skip to content

Commit 0ee968c

Browse files
Preparation for image2image pipeline (openvinotoolkit#1098)
- Extracted generation config from sub-class of `Text2ImagePipeline` to be a separate class `ImageGenerationConfig` - The same for scheduler - Added an ability to load encoder part inside `AutoencoderKL` - Renamed public folder from `text2image` to more generic `image_generation`
1 parent 5c31578 commit 0ee968c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+1297
-841
lines changed

.github/labeler.yml

+4-4
Original file line numberDiff line numberDiff line change
@@ -117,10 +117,10 @@
117117
- 'pyproject.toml'
118118

119119
'category: text to image':
120-
- 'src/include/openvino/genai/text2image/**/*'
121-
- 'src/cpp/src/text2image/**/*'
122-
- 'src/python/py_text2image_models.cpp'
123-
- 'src/python/py_text2image_pipeline.cpp'
120+
- 'src/include/openvino/genai/image_generation/**/*'
121+
- 'src/cpp/src/image_generation/**/*'
122+
- 'src/python/py_image_generation_models.cpp'
123+
- 'src/python/py_image_generation_pipelines.cpp'
124124

125125
'category: GenAI C++ API':
126126
- 'src/cpp/include/openvino/genai/**/*'

.github/workflows/stable_diffusion_1_5_cpp.yml

+4-4
Original file line numberDiff line numberDiff line change
@@ -77,15 +77,15 @@ jobs:
7777
run: |
7878
source ${{ env.OV_INSTALL_DIR }}/setupvars.sh
7979
${{ env.build_dir }}/samples/cpp/text2image/lora_stable_diffusion ./models/dreamlike-art-dreamlike-anime-1.0/FP16 "curly-haired unicorn in the forest, anime, line" ./models/soulcard.safetensors 0.7
80-
80+
8181
- name: Run Python main app
8282
run: |
8383
source openvino_sd_cpp/bin/activate
8484
source ./ov/setupvars.sh
8585
python ./samples/python/text2image/main.py ./models/dreamlike-art-dreamlike-anime-1.0/FP16 "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting"
8686
env:
8787
PYTHONPATH: ${{ env.build_dir }}
88-
88+
8989
- name: Run Python LoRA app
9090
run: |
9191
source openvino_sd_cpp/bin/activate
@@ -120,7 +120,7 @@ jobs:
120120
with:
121121
python-version: ${{ env.PYTHON_VERSION }}
122122
cache: 'pip'
123-
123+
124124
- name: Build app
125125
run: |
126126
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
@@ -148,7 +148,7 @@ jobs:
148148
"${{ env.build_dir }}/samples/cpp/text2image/Release/stable_diffusion.exe ./models/dreamlike-art-dreamlike-anime-1.0/FP16 'cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting'"
149149
env:
150150
PATH: ${{ env.build_dir }}\openvino_genai
151-
151+
152152
- name: Run LoRA app
153153
run: |
154154
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ def main():
192192
Code below requires installation of C++ compatible package (see [here](https://docs.openvino.ai/2024/get-started/install-openvino/install-openvino-genai.html#archive-installation) for additional setup details, or this blog for full instruction [How to Build OpenVINO™ GenAI APP in C++](https://medium.com/openvino-toolkit/how-to-build-openvino-genai-app-in-c-32dcbe42fa67)
193193

194194
```cpp
195-
#include "openvino/genai/text2image/pipeline.hpp"
195+
#include "openvino/genai/image_generation/text2image_pipeline.hpp"
196196
#include "imwrite.hpp"
197197
int main(int argc, char* argv[]) {
198198

samples/cpp/text2image/lora.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// Copyright (C) 2023-2024 Intel Corporation
22
// SPDX-License-Identifier: Apache-2.0
33

4-
#include "openvino/genai/text2image/pipeline.hpp"
4+
#include "openvino/genai/image_generation/text2image_pipeline.hpp"
55

66
#include "imwrite.hpp"
77

samples/cpp/text2image/main.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// Copyright (C) 2023-2024 Intel Corporation
22
// SPDX-License-Identifier: Apache-2.0
33

4-
#include "openvino/genai/text2image/pipeline.hpp"
4+
#include "openvino/genai/image_generation/text2image_pipeline.hpp"
55

66
#include "imwrite.hpp"
77

samples/cpp/visual_language_chat/load_image.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ ov::Tensor utils::load_image(const std::filesystem::path& image_path) {
4545
if (channels * height * width != bytes) {
4646
throw std::runtime_error{"Unexpected number of bytes was requested to deallocate."};
4747
}
48-
std::free(image);
48+
stbi_image_free(image);
4949
image = nullptr;
5050
}
5151
bool is_equal(const SharedImageAllocator& other) const noexcept {return this == &other;}

src/cpp/include/openvino/genai/text2image/autoencoder_kl.hpp src/cpp/include/openvino/genai/image_generation/autoencoder_kl.hpp

+31-10
Original file line numberDiff line numberDiff line change
@@ -30,42 +30,63 @@ class OPENVINO_GENAI_EXPORTS AutoencoderKL {
3030
explicit Config(const std::filesystem::path& config_path);
3131
};
3232

33-
explicit AutoencoderKL(const std::filesystem::path& root_dir);
33+
explicit AutoencoderKL(const std::filesystem::path& vae_decoder_path);
3434

35-
AutoencoderKL(const std::filesystem::path& root_dir,
35+
AutoencoderKL(const std::filesystem::path& vae_encoder_path,
36+
const std::filesystem::path& vae_decoder_path);
37+
38+
AutoencoderKL(const std::filesystem::path& vae_decoder_path,
39+
const std::string& device,
40+
const ov::AnyMap& properties = {});
41+
42+
AutoencoderKL(const std::filesystem::path& vae_encoder_path,
43+
const std::filesystem::path& vae_decoder_path,
3644
const std::string& device,
3745
const ov::AnyMap& properties = {});
3846

3947
template <typename... Properties,
4048
typename std::enable_if<ov::util::StringAny<Properties...>::value, bool>::type = true>
41-
AutoencoderKL(const std::filesystem::path& root_dir,
49+
AutoencoderKL(const std::filesystem::path& vae_decoder_path,
50+
const std::string& device,
51+
Properties&&... properties)
52+
: AutoencoderKL(vae_decoder_path, device, ov::AnyMap{std::forward<Properties>(properties)...}) { }
53+
54+
template <typename... Properties,
55+
typename std::enable_if<ov::util::StringAny<Properties...>::value, bool>::type = true>
56+
AutoencoderKL(const std::filesystem::path& vae_encoder_path,
57+
const std::filesystem::path& vae_decoder_path,
4258
const std::string& device,
4359
Properties&&... properties)
44-
: AutoencoderKL(root_dir, device, ov::AnyMap{std::forward<Properties>(properties)...}) { }
60+
: AutoencoderKL(vae_encoder_path, vae_decoder_path, device, ov::AnyMap{std::forward<Properties>(properties)...}) { }
4561

4662
AutoencoderKL(const AutoencoderKL&);
4763

4864
AutoencoderKL& reshape(int batch_size, int height, int width);
4965

5066
AutoencoderKL& compile(const std::string& device, const ov::AnyMap& properties = {});
5167

52-
const Config& get_config() const;
53-
5468
template <typename... Properties>
5569
ov::util::EnableIfAllStringAny<AutoencoderKL&, Properties...> compile(
5670
const std::string& device,
5771
Properties&&... properties) {
5872
return compile(device, ov::AnyMap{std::forward<Properties>(properties)...});
5973
}
6074

61-
ov::Tensor infer(ov::Tensor latent);
75+
ov::Tensor decode(ov::Tensor latent);
76+
77+
ov::Tensor encode(ov::Tensor image);
78+
79+
const Config& get_config() const;
80+
81+
size_t get_vae_scale_factor() const;
6282

6383
private:
64-
void merge_vae_image_processor() const;
84+
void merge_vae_image_pre_processing() const;
85+
void merge_vae_image_post_processing() const;
6586

6687
Config m_config;
67-
ov::InferRequest m_request;
68-
std::shared_ptr<ov::Model> m_model;
88+
ov::InferRequest m_encoder_request, m_decoder_request;
89+
std::shared_ptr<ov::Model> m_encoder_model = nullptr, m_decoder_model = nullptr;
6990
};
7091

7192
} // namespace genai
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
// Copyright (C) 2023-2024 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
#pragma once
5+
6+
#include <string>
7+
#include <random>
8+
#include <optional>
9+
10+
#include "openvino/runtime/tensor.hpp"
11+
#include "openvino/runtime/properties.hpp"
12+
13+
#include "openvino/genai/lora_adapter.hpp"
14+
#include "openvino/genai/visibility.hpp"
15+
16+
namespace ov {
17+
namespace genai {
18+
19+
//
20+
// Random generators
21+
//
22+
23+
class OPENVINO_GENAI_EXPORTS Generator {
24+
public:
25+
virtual float next() = 0;
26+
virtual ~Generator();
27+
};
28+
29+
class OPENVINO_GENAI_EXPORTS CppStdGenerator : public Generator {
30+
public:
31+
// creates 'std::mt19937' with initial 'seed' to generate numbers within a range [0.0f, 1.0f]
32+
explicit CppStdGenerator(uint32_t seed);
33+
34+
virtual float next() override;
35+
private:
36+
std::mt19937 gen;
37+
std::normal_distribution<float> normal;
38+
};
39+
40+
struct OPENVINO_GENAI_EXPORTS ImageGenerationConfig {
41+
// LCM: prompt only w/o negative prompt
42+
// SD XL: prompt2 and negative_prompt2
43+
// FLUX: prompt2 (prompt if prompt2 is not defined explicitly)
44+
// SD 3: prompt2, prompt3 (with fallback to prompt) and negative_prompt2, negative_prompt3
45+
std::string negative_prompt;
46+
std::optional<std::string> prompt_2 = std::nullopt, prompt_3 = std::nullopt;
47+
std::optional<std::string> negative_prompt_2 = std::nullopt, negative_prompt_3 = std::nullopt;
48+
49+
size_t num_images_per_prompt = 1;
50+
51+
// random generator to have deterministic results
52+
std::shared_ptr<Generator> random_generator = std::make_shared<CppStdGenerator>(42);
53+
54+
// the following values depend on HF diffusers class used to perform generation
55+
float guidance_scale = 7.5f;
56+
int64_t height = -1;
57+
int64_t width = -1;
58+
size_t num_inference_steps = 50;
59+
60+
// used by some image to image pipelines to balance between noise and initial image
61+
// higher 'stregth' value means more noise is added to initial latent image
62+
// for text to image pipeline it must be set to 1.0f
63+
float strength = 1.0f;
64+
65+
std::optional<AdapterConfig> adapters;
66+
67+
void update_generation_config(const ov::AnyMap& config_map);
68+
69+
// checks whether is config is valid
70+
void validate() const;
71+
72+
template <typename... Properties>
73+
ov::util::EnableIfAllStringAny<void, Properties...> update_generation_config(Properties&&... properties) {
74+
return update_generation_config(ov::AnyMap{std::forward<Properties>(properties)...});
75+
}
76+
};
77+
78+
//
79+
// Generation config properties
80+
//
81+
82+
static constexpr ov::Property<std::string> prompt_2{"prompt_2"};
83+
static constexpr ov::Property<std::string> prompt_3{"prompt_3"};
84+
85+
static constexpr ov::Property<std::string> negative_prompt{"negative_prompt"};
86+
static constexpr ov::Property<std::string> negative_prompt_2{"negative_prompt_2"};
87+
static constexpr ov::Property<std::string> negative_prompt_3{"negative_prompt_3"};
88+
89+
static constexpr ov::Property<size_t> num_images_per_prompt{"num_images_per_prompt"};
90+
static constexpr ov::Property<float> guidance_scale{"guidance_scale"};
91+
static constexpr ov::Property<int64_t> height{"height"};
92+
static constexpr ov::Property<int64_t> width{"width"};
93+
static constexpr ov::Property<size_t> num_inference_steps{"num_inference_steps"};
94+
95+
static constexpr ov::Property<float> strength{"strength"};
96+
97+
static constexpr ov::Property<std::shared_ptr<Generator>> random_generator{"random_generator"};
98+
99+
OPENVINO_GENAI_EXPORTS
100+
std::pair<std::string, ov::Any> generation_config(const ImageGenerationConfig& generation_config);
101+
102+
} // namespace genai
103+
} // namespace ov
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
// Copyright (C) 2023-2024 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
#pragma once
5+
6+
#include <filesystem>
7+
8+
#include "openvino/genai/visibility.hpp"
9+
10+
namespace ov {
11+
namespace genai {
12+
13+
class OPENVINO_GENAI_EXPORTS Scheduler {
14+
public:
15+
enum Type {
16+
AUTO,
17+
LCM,
18+
LMS_DISCRETE,
19+
DDIM,
20+
EULER_DISCRETE,
21+
FLOW_MATCH_EULER_DISCRETE
22+
};
23+
24+
static std::shared_ptr<Scheduler> from_config(const std::filesystem::path& scheduler_config_path,
25+
Type scheduler_type = AUTO);
26+
27+
virtual ~Scheduler();
28+
};
29+
30+
} // namespace genai
31+
} // namespace ov

src/cpp/include/openvino/genai/text2image/sd3_transformer_2d_model.hpp src/cpp/include/openvino/genai/image_generation/sd3_transformer_2d_model.hpp

+1-10
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
#include <filesystem>
77
#include <string>
8+
#include <vector>
89

910
#include "openvino/core/any.hpp"
1011
#include "openvino/runtime/infer_request.hpp"
@@ -22,15 +23,7 @@ class OPENVINO_GENAI_EXPORTS SD3Transformer2DModel {
2223
size_t sample_size = 128;
2324
size_t patch_size = 2;
2425
size_t in_channels = 16;
25-
size_t num_layers = 18;
26-
size_t attention_head_dim = 64;
27-
size_t num_attention_heads = 18;
2826
size_t joint_attention_dim = 4096;
29-
size_t caption_projection_dim = 1152;
30-
size_t pooled_projection_dim = 2048;
31-
size_t out_channels = 16;
32-
size_t pos_embed_max_size = 96;
33-
std::vector<size_t> block_out_channels = { 128, 256, 512, 512 };
3427

3528
explicit Config(const std::filesystem::path& config_path);
3629
};
@@ -64,8 +57,6 @@ class OPENVINO_GENAI_EXPORTS SD3Transformer2DModel {
6457

6558
ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep);
6659

67-
size_t get_vae_scale_factor() const;
68-
6960
private:
7061
Config m_config;
7162
ov::InferRequest m_request;

0 commit comments

Comments
 (0)