Skip to content

Commit ce253c0

Browse files
authored
Merge branch 'master' into gha/model_sporadics
2 parents d7b31da + 34400b7 commit ce253c0

25 files changed

+434
-99
lines changed

.github/workflows/genai-tools.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ jobs:
4444
with:
4545
platform: ubuntu22
4646
commit_packages_to_provide: wheels
47-
revision: latest_available_commit
47+
revision: a8aba4e104f027c2ba8a21fd6c4c861110c57ed9
4848

4949
llm_bench:
5050
name: 'LLM bench tests'

.github/workflows/lcm_dreamshaper_cpp.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ jobs:
108108
${{ env.build_dir }}/samples/cpp/image_generation/benchmark_image_gen -t inpainting -m ./models/lcm_dreamshaper_v7 -p "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting" -i ./image.png --mi ./mask_image.png
109109
110110
lcm_dreamshaper_v7_cpp-windows:
111-
runs-on: windows-2022
111+
runs-on: aks-win-4-cores-8gb-staging
112112
defaults:
113113
run:
114114
shell: pwsh

.github/workflows/linux.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ jobs:
5252
with:
5353
platform: ubuntu22
5454
commit_packages_to_provide: wheels,openvino_js_package.tar.gz
55-
revision: latest_available_commit
55+
revision: a8aba4e104f027c2ba8a21fd6c4c861110c57ed9
5656

5757
- name: Clone docker tag from OpenVINO repo
5858
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2

.github/workflows/stable_diffusion_1_5_cpp.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ jobs:
153153

154154
stable_diffusion_1_5_cpp-windows:
155155
needs: [ openvino_download_windows ]
156-
runs-on: windows-2022
156+
runs-on: aks-win-4-cores-8gb-staging
157157
defaults:
158158
run:
159159
shell: pwsh

.github/workflows/windows.yml

+13-7
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ jobs:
4949
with:
5050
platform: 'windows'
5151
commit_packages_to_provide: wheels,openvino_js_package.zip
52-
revision: 'latest_available_commit'
52+
revision: 'a8aba4e104f027c2ba8a21fd6c4c861110c57ed9'
5353

5454
genai_build_cpack:
5555
name: genai cpack (${{ matrix.build_type }})
@@ -61,7 +61,7 @@ jobs:
6161
defaults:
6262
run:
6363
shell: pwsh
64-
runs-on: windows-2022
64+
runs-on: aks-win-8-cores-16gb-staging
6565
env:
6666
OV_INSTALL_DIR: ${{ github.workspace }}\install\ov
6767
GENAI_INSTALL_DIR: ${{ github.workspace }}\install\genai
@@ -162,7 +162,7 @@ jobs:
162162
defaults:
163163
run:
164164
shell: pwsh
165-
runs-on: windows-2022
165+
runs-on: aks-win-8-cores-16gb-staging
166166
env:
167167
OV_INSTALL_DIR: ${{ github.workspace }}\install\ov
168168
SRC_DIR: ${{ github.workspace }}\src\genai
@@ -216,7 +216,7 @@ jobs:
216216
- name: Configure Developer Command Prompt for Microsoft Visual C++
217217
uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1.13.0
218218
with:
219-
toolset: 14.40 # v2022
219+
toolset: 14.40 # v2022
220220

221221
- name: Build Tokenizers Wheel
222222
run: |
@@ -271,7 +271,7 @@ jobs:
271271
defaults:
272272
run:
273273
shell: pwsh
274-
runs-on: windows-2022
274+
runs-on: aks-win-4-cores-8gb-staging
275275
env:
276276
OV_INSTALL_DIR: ${{ github.workspace }}/install/ov
277277
SRC_DIR: ${{ github.workspace }}/src
@@ -319,7 +319,7 @@ jobs:
319319
defaults:
320320
run:
321321
shell: pwsh
322-
runs-on: windows-2022
322+
runs-on: aks-win-4-cores-8gb-staging
323323

324324
env:
325325
OV_INSTALL_DIR: ${{ github.workspace }}/ov
@@ -340,6 +340,12 @@ jobs:
340340
name: ${{ needs.openvino_download.outputs.ov_artifact_name }}
341341
path: ${{ env.OV_INSTALL_DIR }}
342342
merge-multiple: true
343+
344+
- name: Setup Python ${{ env.PYTHON_VERSION }}
345+
uses: actions/setup-python@42375524e23c412d93fb67b49958b491fce71c38 # v5.4.0
346+
with:
347+
python-version: ${{ env.PYTHON_VERSION }}
348+
cache: 'pip'
343349

344350
- name: Build GenAI Node.js bindings
345351
run: |
@@ -483,7 +489,7 @@ jobs:
483489
defaults:
484490
run:
485491
shell: pwsh
486-
runs-on: windows-latest
492+
runs-on: windows-2022
487493

488494
env:
489495
SRC_DIR: ${{ github.workspace }}/openvino.genai

CMakeLists.txt

+8-6
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,14 @@ endif()
4141

4242
# Looking for OpenVINO in the python distribution. It doesn't work for cross-compiling build
4343
if(NOT CMAKE_CROSSCOMPILING)
44-
find_package(Python3 REQUIRED)
45-
execute_process(
46-
COMMAND ${Python3_EXECUTABLE} -c "from openvino.utils import get_cmake_path; print(get_cmake_path(), end='')"
47-
OUTPUT_VARIABLE OpenVINO_DIR_PY
48-
ERROR_QUIET
49-
)
44+
find_package(Python3 QUIET COMPONENTS Interpreter)
45+
if(Python3_Interpreter_FOUND)
46+
execute_process(
47+
COMMAND ${Python3_EXECUTABLE} -c "from openvino.utils import get_cmake_path; print(get_cmake_path(), end='')"
48+
OUTPUT_VARIABLE OpenVINO_DIR_PY
49+
ERROR_QUIET
50+
)
51+
endif()
5052
endif()
5153

5254
# Find OpenVINODeveloperPackage first to compile with SDL flags

src/cpp/include/openvino/genai/image_generation/sd3_transformer_2d_model.hpp

+6
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,16 @@ class OPENVINO_GENAI_EXPORTS SD3Transformer2DModel {
8080
ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep);
8181

8282
private:
83+
class Inference;
84+
std::shared_ptr<Inference> m_impl;
85+
8386
Config m_config;
8487
ov::InferRequest m_request;
8588
std::shared_ptr<ov::Model> m_model;
8689
size_t m_vae_scale_factor;
90+
91+
class InferenceDynamic;
92+
class InferenceStaticBS1;
8793
};
8894

8995
} // namespace genai

src/cpp/src/debug_utils.hpp

+81
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,84 @@ inline void read_tensor(const std::string& file_name, ov::Tensor tensor, bool as
7272

7373
std::cout << "Closing " << file_name << std::endl;
7474
}
75+
76+
/// @brief Read an npy file created in Python:
77+
/// with open('ndarray.npy', 'wb') as file:
78+
/// np.save(file, ndarray)
79+
inline ov::Tensor from_npy(const std::filesystem::path& npy) {
80+
std::ifstream fstream{npy, std::ios::binary};
81+
fstream.seekg(0, std::ios_base::end);
82+
OPENVINO_ASSERT(fstream.good());
83+
auto full_file_size = static_cast<std::size_t>(fstream.tellg());
84+
fstream.seekg(0, std::ios_base::beg);
85+
86+
std::string magic_string(6, ' ');
87+
fstream.read(&magic_string[0], magic_string.size());
88+
OPENVINO_ASSERT(magic_string == "\x93NUMPY");
89+
90+
fstream.ignore(2);
91+
unsigned short header_size;
92+
fstream.read((char*)&header_size, sizeof(header_size));
93+
94+
std::string header(header_size, ' ');
95+
fstream.read(&header[0], header.size());
96+
97+
int idx, from, to;
98+
99+
// Verify fortran order is false
100+
const std::string fortran_key = "'fortran_order':";
101+
idx = header.find(fortran_key);
102+
OPENVINO_ASSERT(idx != -1);
103+
104+
from = header.find_last_of(' ', idx + fortran_key.size()) + 1;
105+
to = header.find(',', from);
106+
auto fortran_value = header.substr(from, to - from);
107+
OPENVINO_ASSERT(fortran_value == "False");
108+
109+
// Verify array shape matches the input's
110+
const std::string shape_key = "'shape':";
111+
idx = header.find(shape_key);
112+
OPENVINO_ASSERT(idx != -1);
113+
114+
from = header.find('(', idx + shape_key.size()) + 1;
115+
to = header.find(')', from);
116+
117+
std::string shape_data = header.substr(from, to - from);
118+
ov::Shape _shape;
119+
120+
if (!shape_data.empty()) {
121+
shape_data.erase(std::remove(shape_data.begin(), shape_data.end(), ','), shape_data.end());
122+
123+
std::istringstream shape_data_stream(shape_data);
124+
size_t value;
125+
while (shape_data_stream >> value) {
126+
_shape.push_back(value);
127+
}
128+
}
129+
130+
// Verify array data type matches input's
131+
std::string dataTypeKey = "'descr':";
132+
idx = header.find(dataTypeKey);
133+
OPENVINO_ASSERT(-1 != idx);
134+
135+
from = header.find('\'', idx + dataTypeKey.size()) + 1;
136+
to = header.find('\'', from);
137+
std::string type;
138+
type = header.substr(from, to - from);
139+
140+
size_t _size = 0;
141+
_size = full_file_size - static_cast<std::size_t>(fstream.tellg());
142+
ov::element::Type tensor_type;
143+
if ("<f4" == type) {
144+
tensor_type = ov::element::f32;
145+
} else if ("|u1" == type) {
146+
tensor_type = ov::element::u8;
147+
} else {
148+
OPENVINO_THROW("Not implemented dtype");
149+
}
150+
OPENVINO_ASSERT(_size == ov::shape_size(_shape) * tensor_type.size());
151+
ov::Tensor tensor{tensor_type, _shape};
152+
fstream.read((char*)tensor.data(), _size);
153+
OPENVINO_ASSERT(fstream.gcount() == _size);
154+
return tensor;
155+
}

src/cpp/src/image_generation/flux_pipeline.hpp

+7-11
Original file line numberDiff line numberDiff line change
@@ -247,20 +247,16 @@ class FluxPipeline : public DiffusionPipeline {
247247
m_vae->reshape(num_images_per_prompt, height, width);
248248
}
249249

250-
void compile(const std::string& device, const ov::AnyMap& properties) override {
251-
update_adapters_from_properties(properties, m_generation_config.adapters);
252-
auto updated_properties = update_adapters_in_properties(properties, &FluxPipeline::derived_adapters);
253-
m_clip_text_encoder->compile(device, *updated_properties);
254-
m_t5_text_encoder->compile(device, *updated_properties);
255-
m_vae->compile(device, *updated_properties);
256-
m_transformer->compile(device, *updated_properties);
257-
}
258-
259250
void compile(const std::string& text_encode_device,
260251
const std::string& denoise_device,
261252
const std::string& vae_device,
262253
const ov::AnyMap& properties) override {
263-
OPENVINO_THROW("not supported yet.");
254+
update_adapters_from_properties(properties, m_generation_config.adapters);
255+
auto updated_properties = update_adapters_in_properties(properties, &FluxPipeline::derived_adapters);
256+
m_clip_text_encoder->compile(text_encode_device, *updated_properties);
257+
m_t5_text_encoder->compile(text_encode_device, *updated_properties);
258+
m_vae->compile(vae_device, *updated_properties);
259+
m_transformer->compile(denoise_device, *updated_properties);
264260
}
265261

266262
void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config) override {
@@ -618,7 +614,7 @@ class FluxPipeline : public DiffusionPipeline {
618614
size_t inference_step) override {
619615
OPENVINO_ASSERT(m_pipeline_type == PipelineType::INPAINTING, "'blend_latents' can be called for inpainting pipeline only");
620616
OPENVINO_ASSERT(image_latent.get_shape() == latents.get_shape(),
621-
"Shapes for current", latents.get_shape(), "and initial image latents ", image_latent.get_shape(), " must match");
617+
"Shapes for current ", latents.get_shape(), " and initial image latents ", image_latent.get_shape(), " must match");
622618

623619
ov::Tensor init_latents_proper(image_latent.get_element_type(), image_latent.get_shape());
624620
image_latent.copy_to(init_latents_proper);

src/cpp/src/image_generation/models/sd3_transformer_2d_model.cpp

+17-32
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
// SPDX-License-Identifier: Apache-2.0
33

44
#include "openvino/genai/image_generation/sd3_transformer_2d_model.hpp"
5+
#include "image_generation/models/sd3transformer_2d_inference_dynamic.hpp"
6+
#include "image_generation/models/sd3transformer_2d_inference_static_bs1.hpp"
57

68
#include <fstream>
79

@@ -77,54 +79,37 @@ SD3Transformer2DModel& SD3Transformer2DModel::reshape(int batch_size,
7779
height /= m_vae_scale_factor;
7880
width /= m_vae_scale_factor;
7981

80-
std::map<std::string, ov::PartialShape> name_to_shape;
81-
82-
for (auto&& input : m_model->inputs()) {
83-
std::string input_name = input.get_any_name();
84-
name_to_shape[input_name] = input.get_partial_shape();
85-
if (input_name == "timestep") {
86-
name_to_shape[input_name][0] = 1;
87-
} else if (input_name == "hidden_states") {
88-
name_to_shape[input_name] = {batch_size, name_to_shape[input_name][1], height, width};
89-
} else if (input_name == "encoder_hidden_states") {
90-
name_to_shape[input_name][0] = batch_size;
91-
name_to_shape[input_name][1] =
92-
tokenizer_model_max_length *
93-
2; // x2 is necessary because of the concatenation of prompt_embeds and t5_prompt_embeds
94-
} else if (input_name == "pooled_projections") {
95-
name_to_shape[input_name][0] = batch_size;
96-
}
97-
}
98-
99-
m_model->reshape(name_to_shape);
82+
SD3Transformer2DModel::Inference::reshape(m_model, batch_size, height, width, tokenizer_model_max_length);
10083

10184
return *this;
10285
}
10386

10487
SD3Transformer2DModel& SD3Transformer2DModel::compile(const std::string& device, const ov::AnyMap& properties) {
10588
OPENVINO_ASSERT(m_model, "Model has been already compiled. Cannot re-compile already compiled model");
106-
ov::CompiledModel compiled_model = utils::singleton_core().compile_model(m_model, device, properties);
107-
ov::genai::utils::print_compiled_model_properties(compiled_model, "SD3 Transformer 2D model");
108-
m_request = compiled_model.create_infer_request();
89+
90+
if (device.find("NPU") != std::string::npos) {
91+
m_impl = std::make_shared<SD3Transformer2DModel::InferenceStaticBS1>();
92+
}
93+
else {
94+
m_impl = std::make_shared<SD3Transformer2DModel::InferenceDynamic>();
95+
}
96+
97+
m_impl->compile(m_model, device, properties);
98+
10999
// release the original model
110100
m_model.reset();
111101

112102
return *this;
113103
}
114104

115105
void SD3Transformer2DModel::set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states) {
116-
OPENVINO_ASSERT(m_request, "Transformer model must be compiled first");
117-
m_request.set_tensor(tensor_name, encoder_hidden_states);
106+
OPENVINO_ASSERT(m_impl, "Transformer model must be compiled first");
107+
m_impl->set_hidden_states(tensor_name, encoder_hidden_states);
118108
}
119109

120110
ov::Tensor SD3Transformer2DModel::infer(const ov::Tensor latent_model_input, const ov::Tensor timestep) {
121-
OPENVINO_ASSERT(m_request, "Transformer model must be compiled first. Cannot infer non-compiled model");
122-
123-
m_request.set_tensor("hidden_states", latent_model_input);
124-
m_request.set_tensor("timestep", timestep);
125-
m_request.infer();
126-
127-
return m_request.get_output_tensor();
111+
OPENVINO_ASSERT(m_impl, "Transformer model must be compiled first. Cannot infer non-compiled model");
112+
return m_impl->infer(latent_model_input, timestep);
128113
}
129114

130115
} // namespace genai

0 commit comments

Comments
 (0)