@@ -18,7 +18,6 @@ int32_t main(int32_t argc, char* argv[]) try {
18
18
19
19
const int width = 512 ;
20
20
const int height = 512 ;
21
- const float guidance_scale = 7 .5f ;
22
21
const int number_of_images_to_generate = 1 ;
23
22
const int number_of_inference_steps_per_image = 20 ;
24
23
@@ -37,73 +36,36 @@ int32_t main(int32_t argc, char* argv[]) try {
37
36
std::string ov_cache_dir = " ./cache" ;
38
37
39
38
//
40
- // Step 1: Prepare each Text2Image subcomponent (scheduler, text encoder, unet, vae) separately.
39
+ // Step 1: Create the initial Text2ImagePipeline, given the model path
41
40
//
41
+ ov::genai::Text2ImagePipeline pipe (models_path);
42
42
43
- // Create the scheduler from the details listed in the json.
44
- auto scheduler = ov::genai::Scheduler::from_config (root_dir / " scheduler/scheduler_config.json" );
45
-
46
- // Note that we could have created the scheduler by specifying specific type (for example EULER_DISCRETE), like
47
- // this: auto scheduler = ov::genai::Scheduler::from_config(root_dir / "scheduler/scheduler_config.json",
48
- // ov::genai::Scheduler::Type::EULER_DISCRETE);
49
- // This can be useful when a particular type of Scheduler is not yet supported natively by OpenVINO GenAI.
50
- // (even though we are actively working to support most commonly used ones)
51
-
52
- // Create unet object
53
- auto unet = ov::genai::UNet2DConditionModel (root_dir / " unet" );
54
-
55
- // Set batch size based on classifier free guidance condition.
56
- int unet_batch_size = unet.do_classifier_free_guidance (guidance_scale) ? 2 : 1 ;
57
-
58
- // Create the text encoder.
59
- auto text_encoder = ov::genai::CLIPTextModel (root_dir / " text_encoder" );
60
-
61
- // In case of NPU, we need to reshape the model to have static shapes
62
- if (text_encoder_device == " NPU" ) {
63
- text_encoder.reshape (unet_batch_size);
64
- }
65
-
66
- // Compile text encoder for the specified device
67
- text_encoder.compile (text_encoder_device, ov::cache_dir (ov_cache_dir));
68
-
69
- // In case of NPU, we need to reshape the model to have static shapes
70
- if (unet_device == " NPU" ) {
71
- // The max_postiion_embeddings config from text encoder will be used as a parameter to unet reshape.
72
- int max_position_embeddings = text_encoder.get_config ().max_position_embeddings ;
73
-
74
- unet.reshape (unet_batch_size, height, width, max_position_embeddings);
75
- }
76
-
77
- // Compile unet for specified device
78
- unet.compile (unet_device, ov::cache_dir (ov_cache_dir));
43
+ //
44
+ // Step 2: Reshape the pipeline given number of images, width, height, and guidance scale.
45
+ //
46
+ pipe .reshape (1 , width, height, pipe .get_generation_config ().guidance_scale );
79
47
80
- // Create the vae decoder.
81
- auto vae = ov::genai::AutoencoderKL (root_dir / " vae_decoder" );
48
+ //
49
+ // Step 3: Compile the pipeline with the specified devices, and properties (like cache dir)
50
+ //
51
+ ov::AnyMap properties = {ov::cache_dir (ov_cache_dir)};
82
52
83
- // In case of NPU, we need to reshape the model to have static shapes
84
- if (vae_decoder_device == " NPU " ) {
85
- // We set batch-size to '1' here, as we're configuring our pipeline to return 1 image per 'generate' call.
86
- vae. reshape ( 1 , height, width);
87
- }
53
+ // Note that if there are device-specific properties that are needed, they can
54
+ // be added using ov::device::properties groups, like this:
55
+ // ov::AnyMap properties = {ov::device::properties("CPU", ov::cache_dir("cpu_cache")),
56
+ // ov::device::properties("GPU", ov::cache_dir("gpu_cache")),
57
+ // ov::device::properties("NPU", ov::cache_dir("npu_cache"))};
88
58
89
- // Compile vae decoder for the specified device
90
- vae.compile (vae_decoder_device, ov::cache_dir (ov_cache_dir));
59
+ pipe .compile (text_encoder_device, unet_device, vae_decoder_device, properties);
91
60
92
- //
93
- // Step 2: Create a Text2ImagePipeline from the individual subcomponents
94
- //
95
- auto pipe = ov::genai::Text2ImagePipeline::stable_diffusion (scheduler, text_encoder, unet, vae);
96
61
97
62
//
98
- // Step 3 : Use the Text2ImagePipeline to generate 'number_of_images_to_generate' images.
63
+ // Step 4 : Use the Text2ImagePipeline to generate 'number_of_images_to_generate' images.
99
64
//
100
65
for (int imagei = 0 ; imagei < number_of_images_to_generate; imagei++) {
101
66
std::cout << " Generating image " << imagei << std::endl;
102
67
103
68
ov::Tensor image = pipe .generate (prompt,
104
- ov::genai::width (width),
105
- ov::genai::height (height),
106
- ov::genai::guidance_scale (guidance_scale),
107
69
ov::genai::num_inference_steps (number_of_inference_steps_per_image),
108
70
ov::genai::callback (progress_bar));
109
71
0 commit comments