Skip to content

Commit 8fcabe3

Browse files
committed
Add python bindings to VLMPipeline for encrypted models
1 parent 7fce092 commit 8fcabe3

File tree

7 files changed

+325
-4
lines changed

7 files changed

+325
-4
lines changed

samples/cpp/visual_language_chat/CMakeLists.txt

+16-1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,21 @@ install(TARGETS visual_language_chat
2828
COMPONENT samples_bin
2929
EXCLUDE_FROM_ALL)
3030

31+
# create encrypted model sample executable
32+
33+
add_executable(encrypted_model_vlm encrypted_model_vlm.cpp load_image.cpp)
34+
target_include_directories(encrypted_model_vlm PRIVATE "${CMAKE_CURRENT_SOUCE_DIR}" "${CMAKE_BINARY_DIR}")
35+
target_link_libraries(encrypted_model_vlm PRIVATE openvino::genai)
36+
37+
set_target_properties(encrypted_model_vlm PROPERTIES
38+
# Ensure out of box LC_RPATH on macOS with SIP
39+
INSTALL_RPATH_USE_LINK_PATH ON)
40+
41+
install(TARGETS encrypted_model_vlm
42+
RUNTIME DESTINATION samples_bin/
43+
COMPONENT samples_bin
44+
EXCLUDE_FROM_ALL)
45+
3146
# create benchmark executable
3247

3348
add_executable(benchmark_vlm benchmark_vlm.cpp load_image.cpp)
@@ -40,4 +55,4 @@ set_target_properties(benchmark_vlm PROPERTIES
4055
install(TARGETS benchmark_vlm
4156
RUNTIME DESTINATION samples_bin/
4257
COMPONENT samples_bin
43-
EXCLUDE_FROM_ALL)
58+
EXCLUDE_FROM_ALL)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
// Copyright (C) 2024 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
#include <filesystem>
5+
#include <fstream>
6+
7+
#include "load_image.hpp"
8+
#include "openvino/genai/visual_language/pipeline.hpp"
9+
10+
std::pair<std::string, ov::Tensor> decrypt_model(const std::string& model_path, const std::string& weights_path) {
11+
std::ifstream model_file(model_path);
12+
std::ifstream weights_file(weights_path, std::ios::binary);
13+
if (!model_file.is_open() || !weights_file.is_open()) {
14+
throw std::runtime_error("Cannot open model or weights file");
15+
}
16+
17+
// User can add file decryption of model_file and weights_file in memory here.
18+
19+
20+
std::string model_str((std::istreambuf_iterator<char>(model_file)), std::istreambuf_iterator<char>());
21+
22+
weights_file.seekg(0, std::ios::end);
23+
auto weight_size = static_cast<unsigned>(weights_file.tellg());
24+
weights_file.seekg(0, std::ios::beg);
25+
auto weights_tensor = ov::Tensor(ov::element::u8, {weight_size});
26+
if (!weights_file.read(static_cast<char*>(weights_tensor.data()), weight_size)) {
27+
throw std::runtime_error("Cannot read weights file");
28+
}
29+
30+
return {model_str, weights_tensor};
31+
}
32+
33+
ov::genai::Tokenizer decrypt_tokenizer(const std::string& models_path) {
34+
std::string tok_model_path = models_path + "/openvino_tokenizer.xml";
35+
std::string tok_weights_path = models_path + "/openvino_tokenizer.bin";
36+
auto [tok_model_str, tok_weights_tensor] = decrypt_model(tok_model_path, tok_weights_path);
37+
38+
std::string detok_model_path = models_path + "/openvino_detokenizer.xml";
39+
std::string detok_weights_path = models_path + "/openvino_detokenizer.bin";
40+
auto [detok_model_str, detok_weights_tensor] = decrypt_model(detok_model_path, detok_weights_path);
41+
42+
return ov::genai::Tokenizer(tok_model_str, tok_weights_tensor, detok_model_str, detok_weights_tensor);
43+
}
44+
45+
46+
bool print_subword(std::string&& subword) {
47+
return !(std::cout << subword << std::flush);
48+
}
49+
50+
int main(int argc, char* argv[]) try {
51+
if (4 != argc) {
52+
throw std::runtime_error(std::string{"Usage "} + argv[0] + " <MODEL_DIR> <IMAGE_FILE OR DIR_WITH_IMAGES> <PROMPT>");
53+
}
54+
55+
//read and encrypt models
56+
std::string models_path = argv[1];
57+
auto language_model = decrypt_model(models_path + "/openvino_language_model.xml", models_path + "/openvino_language_model.bin");
58+
auto resampler_model = decrypt_model(models_path + "/openvino_resampler_model.xml", models_path + "/openvino_resampler_model.bin");
59+
auto text_embeddings_model = decrypt_model(models_path + "/openvino_text_embeddings_model.xml", models_path + "/openvino_text_embeddings_model.bin");
60+
auto vision_embeddings_model = decrypt_model(models_path + "/openvino_vision_embeddings_model.xml", models_path + "/openvino_vision_embeddings_model.bin");
61+
62+
ov::genai::ModelsMap models_map;
63+
models_map.emplace("language", std::move(language_model));
64+
models_map.emplace("resampler", std::move(resampler_model));
65+
models_map.emplace("text_embeddings", std::move(text_embeddings_model));
66+
models_map.emplace("vision_embeddings", std::move(vision_embeddings_model));
67+
ov::genai::Tokenizer tokenizer = decrypt_tokenizer(models_path);
68+
69+
std::vector<ov::Tensor> rgbs = utils::load_images(argv[2]);
70+
71+
// GPU and NPU can be used as well.
72+
// Note: If NPU selected, only language model will be run on NPU
73+
std::string device = "CPU";
74+
ov::AnyMap enable_compile_cache;
75+
if (device == "GPU") {
76+
// Cache compiled models on disk for GPU to save time on the
77+
// next run. It's not beneficial for CPU.
78+
enable_compile_cache.insert({ov::cache_dir("vlm_cache")});
79+
}
80+
ov::genai::VLMPipeline pipe(models_map, tokenizer, models_path, device, enable_compile_cache);
81+
82+
ov::genai::GenerationConfig generation_config;
83+
generation_config.max_new_tokens = 100;
84+
85+
std::string prompt = argv[3];
86+
pipe.generate(prompt,
87+
ov::genai::images(rgbs),
88+
ov::genai::generation_config(generation_config),
89+
ov::genai::streamer(print_subword));
90+
91+
} catch (const std::exception& error) {
92+
try {
93+
std::cerr << error.what() << '\n';
94+
} catch (const std::ios_base::failure&) {}
95+
return EXIT_FAILURE;
96+
} catch (...) {
97+
try {
98+
std::cerr << "Non-exception object thrown\n";
99+
} catch (const std::ios_base::failure&) {}
100+
return EXIT_FAILURE;
101+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
#!/usr/bin/env python3
2+
# Copyright (C) 2024 Intel Corporation
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
import argparse
6+
7+
import numpy as np
8+
import openvino_genai
9+
import openvino
10+
from PIL import Image
11+
from openvino import Tensor
12+
from pathlib import Path
13+
import typing
14+
15+
def decrypt_model(model_dir, model_file_name, weights_file_name):
16+
with open(model_dir + '/' + model_file_name, "r") as file:
17+
model = file.read()
18+
# decrypt model
19+
20+
with open(model_dir + '/' + weights_file_name, "rb") as file:
21+
binary_data = file.read()
22+
# decrypt weights
23+
weights = np.frombuffer(binary_data, dtype=np.uint8).astype(np.uint8)
24+
25+
return model, Tensor(weights)
26+
27+
def read_tokenizer(model_dir):
28+
tokenizer_model_name = 'openvino_tokenizer.xml'
29+
tokenizer_weights_name = 'openvino_tokenizer.bin'
30+
tokenizer_model, tokenizer_weights = decrypt_model(model_dir, tokenizer_model_name, tokenizer_weights_name)
31+
32+
detokenizer_model_name = 'openvino_detokenizer.xml'
33+
detokenizer_weights_name = 'openvino_detokenizer.bin'
34+
detokenizer_model, detokenizer_weights = decrypt_model(model_dir, detokenizer_model_name, detokenizer_weights_name)
35+
36+
return openvino_genai.Tokenizer(tokenizer_model, tokenizer_weights, detokenizer_model, detokenizer_weights)
37+
38+
def streamer(subword: str) -> bool:
39+
'''
40+
41+
Args:
42+
subword: sub-word of the generated text.
43+
44+
Returns: Return flag corresponds whether generation should be stopped.
45+
46+
'''
47+
print(subword, end='', flush=True)
48+
49+
# No value is returned as in this example we don't want to stop the generation in this method.
50+
# "return None" will be treated the same as "return openvino_genai.StreamingStatus.RUNNING".
51+
52+
53+
def read_image(path: str) -> Tensor:
54+
'''
55+
56+
Args:
57+
path: The path to the image.
58+
59+
Returns: the ov.Tensor containing the image.
60+
61+
'''
62+
pic = Image.open(path).convert("RGB")
63+
image_data = np.array(pic)
64+
return Tensor(image_data)
65+
66+
67+
def read_images(path: str) -> list[Tensor]:
68+
entry = Path(path)
69+
if entry.is_dir():
70+
return [read_image(str(file)) for file in sorted(entry.iterdir())]
71+
return [read_image(path)]
72+
73+
def main():
74+
parser = argparse.ArgumentParser()
75+
parser.add_argument('model_dir')
76+
parser.add_argument('image_dir', help="Image file or dir with images")
77+
parser.add_argument('prompt', help="Image file or dir with images")
78+
args = parser.parse_args()
79+
80+
model_name_to_file_map = {
81+
('language', 'openvino_language_model'),
82+
('resampler', 'openvino_resampler_model'),
83+
('text_embeddings', 'openvino_text_embeddings_model'),
84+
('vision_embeddings', 'openvino_vision_embeddings_model')}
85+
86+
models_map = dict()
87+
for model_name, file_name in model_name_to_file_map:
88+
model, weights = decrypt_model(args.model_dir, file_name + '.xml', file_name + '.bin')
89+
models_map[model_name] = (model, weights)
90+
91+
tokenizer = read_tokenizer(args.model_dir)
92+
93+
# GPU and NPU can be used as well.
94+
# Note: If NPU selected, only language model will be run on NPU
95+
device = 'CPU'
96+
enable_compile_cache = dict()
97+
if "GPU" == device:
98+
# Cache compiled models on disk for GPU to save time on the
99+
# next run. It's not beneficial for CPU.
100+
enable_compile_cache["CACHE_DIR"] = "vlm_cache"
101+
102+
pipe = openvino_genai.VLMPipeline(models_map, tokenizer, args.model_dir, device, **enable_compile_cache)
103+
104+
config = openvino_genai.GenerationConfig()
105+
config.max_new_tokens = 100
106+
107+
rgbs = read_images(args.image_dir)
108+
109+
pipe.generate(args.prompt, images=rgbs, generation_config=config, streamer=streamer)
110+
111+
if '__main__' == __name__:
112+
main()

src/cpp/src/visual_language/pipeline.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ class VLMPipeline::VLMPipelineImpl : public VLMPipelineBase{
122122
) :
123123
m_generation_config{generation_config} {
124124
m_is_npu = device.find("NPU") != std::string::npos;
125-
OPENVINO_ASSERT(m_is_npu,
125+
OPENVINO_ASSERT(!m_is_npu,
126126
"VLMPipeline initialization from string isn't supported for NPU device");
127127

128128
m_inputs_embedder = std::make_shared<InputsEmbedder>(models_map, tokenizer, config_dir_path, device, properties);

src/python/openvino_genai/py_openvino_genai.pyi

+12-1
Original file line numberDiff line numberDiff line change
@@ -2092,14 +2092,25 @@ class VLMPipeline:
20922092
"""
20932093
This class is used for generation with VLMs
20942094
"""
2095+
@typing.overload
20952096
def __init__(self, models_path: os.PathLike, device: str, **kwargs) -> None:
20962097
"""
2097-
device on which inference will be done
20982098
VLMPipeline class constructor.
20992099
models_path (os.PathLike): Path to the folder with exported model files.
21002100
device (str): Device to run the model on (e.g., CPU, GPU). Default is 'CPU'.
21012101
kwargs: Device properties
21022102
"""
2103+
@typing.overload
2104+
def __init__(self, models: typing.Dict[str, typing.Tuple[str, openvino.Tensor]], tokenizer: Tokenizer, config_dir_path: os.PathLike, device: str, generation_config : GenerationConfig | None = None, **kwargs) -> None:
2105+
"""
2106+
VLMPipeline class constructor.
2107+
models (typing.Dict[str, typing.Tuple[str, openvino.Tensor]]): Map with decrypted models. It should contains next models: language, resampler, text_embeddings, vision_embeddings.
2108+
tokenizer (Tokenizer): Genai Tokenizers.
2109+
config_dir_path (os.PathLike): Path to folder with model configs.
2110+
device (str): Device to run the model on (e.g., CPU, GPU). Default is 'CPU'.
2111+
generation_config (GenerationConfig | None): Device properties.
2112+
kwargs: Device properties
2113+
"""
21032114
def finish_chat(self) -> None:
21042115
...
21052116
@typing.overload

src/python/py_vlm_pipeline.cpp

+27-1
Original file line numberDiff line numberDiff line change
@@ -163,14 +163,40 @@ void init_vlm_pipeline(py::module_& m) {
163163
return std::make_unique<ov::genai::VLMPipeline>(models_path, device, pyutils::kwargs_to_any_map(kwargs));
164164
}),
165165
py::arg("models_path"), "folder with exported model files",
166-
py::arg("device"), "device on which inference will be done"
166+
py::arg("device"), "device on which inference will be done",
167167
R"(
168168
VLMPipeline class constructor.
169169
models_path (os.PathLike): Path to the folder with exported model files.
170170
device (str): Device to run the model on (e.g., CPU, GPU). Default is 'CPU'.
171171
kwargs: Device properties
172172
)")
173173

174+
.def(py::init([](
175+
const ov::genai::ModelsMap& models,
176+
const ov::genai::Tokenizer& tokenizer,
177+
const std::filesystem::path& config_dir_path,
178+
const std::string& device,
179+
const ov::genai::OptionalGenerationConfig& generation_config,
180+
const py::kwargs& kwargs
181+
) {
182+
//return std::make_unique<ov::genai::VLMPipeline>(config_dir_path, device);
183+
return std::make_unique<ov::genai::VLMPipeline>(models, tokenizer, config_dir_path, device, pyutils::kwargs_to_any_map(kwargs), generation_config.value_or(ov::genai::GenerationConfig()));
184+
}),
185+
py::arg("models"), "map with decrypted models",
186+
py::arg("tokenizer"), "genai Tokenizers",
187+
py::arg("config_dir_path"), "Path to folder with model configs",
188+
py::arg("device"), "device on which inference will be done",
189+
py::arg("generation_config") = std::nullopt, "generation config",
190+
R"(
191+
VLMPipeline class constructor.
192+
models (typing.Dict[str, typing.Tuple[str, openvino.Tensor]]): Map with decrypted models. It should contains next models: language, resampler, text_embeddings, vision_embeddings.
193+
tokenizer (Tokenizer): Genai Tokenizers.
194+
config_dir_path (os.PathLike): Path to folder with model configs.
195+
device (str): Device to run the model on (e.g., CPU, GPU). Default is 'CPU'.
196+
generation_config (GenerationConfig | None): Device properties.
197+
kwargs: Device properties
198+
)")
199+
174200
.def("start_chat", &ov::genai::VLMPipeline::start_chat, py::arg("system_message") = "")
175201
.def("finish_chat", &ov::genai::VLMPipeline::finish_chat)
176202
.def("set_chat_template", &ov::genai::VLMPipeline::set_chat_template, py::arg("chat_template"))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
# Copyright (C) 2025 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
import os
5+
import pytest
6+
import sys
7+
8+
from conftest import SAMPLES_PY_DIR, SAMPLES_CPP_DIR
9+
from test_utils import run_sample
10+
11+
def generate_images(path):
12+
from PIL import Image
13+
import numpy as np
14+
import requests
15+
res = 28, 28
16+
lines = np.arange(res[0] * res[1] * 3, dtype=np.uint8) % 255
17+
lines = lines.reshape([*res, 3])
18+
lines_image = Image.fromarray(lines)
19+
cat = Image.open(requests.get("https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11", stream=True).raw).convert('RGB')
20+
21+
lines_image_path = path + "/lines.png"
22+
cat_path = path + "/cat.png"
23+
lines_image.save(lines_image_path)
24+
cat.save(cat_path)
25+
yield lines_image_path, cat_path
26+
27+
os.remove(lines_image_path)
28+
os.remove(cat_path)
29+
30+
class TestEncryptedVLM:
31+
@pytest.mark.llm
32+
@pytest.mark.samples
33+
@pytest.mark.parametrize("convert_model", ["tiny-random-minicpmv-2_6"], indirect=True)
34+
@pytest.mark.parametrize("sample_args", ["Describe the images."])
35+
36+
def test_sample_encrypted_lm(self, convert_model, sample_args, tmp_path):
37+
generate_images(tmp_path)
38+
39+
# Test Python sample
40+
py_script = os.path.join(SAMPLES_PY_DIR, "visual_language_chat/encrypted_model_vlm.py")
41+
py_command = [sys.executable, py_script, convert_model, tmp_path, sample_args]
42+
py_result = run_sample(py_command)
43+
44+
# Test CPP sample
45+
cpp_sample = os.path.join(SAMPLES_CPP_DIR, 'encrypted_model_vlm')
46+
cpp_command =[cpp_sample, convert_model, tmp_path, sample_args]
47+
cpp_result = run_sample(cpp_command)
48+
49+
# Test common sample
50+
py_common_script = os.path.join(SAMPLES_PY_DIR, "visual_language_chat/visual_language_chat.py")
51+
py_common_command = [sys.executable, py_common_script, convert_model, tmp_path]
52+
py_common_result = run_sample(py_common_command, sample_args)
53+
54+
# Compare results
55+
assert py_result.stdout == cpp_result.stdout, f"Results should match"
56+
assert py_result.stdout == py_common_result.stdout, f"Results should match"

0 commit comments

Comments
 (0)