Skip to content

Commit 35b6d89

Browse files
committed
Clean up VLMPipeline
Address comments in openvinotoolkit#912
1 parent 871f334 commit 35b6d89

File tree

12 files changed

+61
-8473
lines changed

12 files changed

+61
-8473
lines changed

.github/workflows/causal_lm_cpp.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -711,10 +711,10 @@ jobs:
711711
- run: >
712712
source ./ov/setupvars.sh
713713
&& python ./samples/cpp/visual_language_chat/export_MiniCPM-V-2_6.py ./miniCPM-V-2_6/
714-
- run: wget https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11
714+
- run: wget https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11 --output-document cat.jpg
715715
- run: >
716716
source ./ov/setupvars.sh
717-
&& ./build/samples/cpp/visual_language_chat/visual_language_chat ./miniCPM-V-2_6/ d5fbbd1a-d484-415c-88cb-9986625b7b11
717+
&& ./build/samples/cpp/visual_language_chat/visual_language_chat ./miniCPM-V-2_6/ cat.jpg
718718
<<< $'What is on the image?\nWhat is special on the image?'
719719
timeout-minutes: 110
720720

samples/cpp/visual_language_chat/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ export_MiniCPM-V-2_6.py miniCPM-V-2_6
1515

1616
## Run
1717

18-
https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11 can be used as a sample image.
18+
[This image](https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11) can be used as a sample image.
1919

2020
`visual_language_chat miniCPM-V-2_6 319483352-d5fbbd1a-d484-415c-88cb-9986625b7b11.jpg`
2121

samples/cpp/visual_language_chat/visual_language_chat.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ int main(int argc, char* argv[]) try {
3131
}
3232
pipe.generate(
3333
prompt,
34-
ov::genai::image(std::move(image)),
34+
ov::genai::image(image),
3535
ov::genai::streamer(print_subword)
3636
);
3737
std::cout << "\n----------\n"

src/cpp/CMakeLists.txt

+3-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,9 @@ file(GLOB_RECURSE SOURCE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/src/*.cpp" "${CMAKE_
5151

5252
set(TARGET_NAME openvino_genai)
5353
add_library(${TARGET_NAME} SHARED ${SOURCE_FILES})
54-
add_dependencies(${TARGET_NAME} openvino_tokenizers)
54+
if(TARGET openvino_tokenizers)
55+
add_dependencies(${TARGET_NAME} openvino_tokenizers)
56+
endif()
5557
add_library(openvino::genai ALIAS ${TARGET_NAME})
5658

5759
target_include_directories(${TARGET_NAME}

src/cpp/include/openvino/genai/vision_encoder.hpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
namespace ov::genai {
1010
/// @brief A pair describing image size.
11-
struct HeightWidth {
11+
struct ImageSize {
1212
/// @brief Height of a corresponding image.
1313
size_t height;
1414
/// @brief Width of a corresponding image.
@@ -25,16 +25,16 @@ struct EncodedImage {
2525
ov::Tensor resized_source;
2626
/// @brief A size of an image used to compute embeddings for
2727
/// divided by ProcessorConfig's patch_size.
28-
HeightWidth resized_source_size;
28+
ImageSize resized_source_size;
2929
/// @brief Embeddings of images obtained from a source image by
3030
/// slicing at no more than max_slice_nums pieces and resizing.
3131
/// The tensor's shape is
3232
/// [slice_y, slice_x, number_of_embeddings, embedding_size].
3333
/// slices_sizes.size() == slice_y * slice_x.
3434
ov::Tensor slices;
35-
/// @brief Flattened sizes of images used to compute embeddings
35+
/// @brief A size of images used to compute embeddings
3636
/// stored in slices member divided by ProcessorConfig's patch_size.
37-
std::vector<HeightWidth> slices_sizes;
37+
ImageSize slices_size;
3838
};
3939

4040
/// @brief A class used to infer embeddings of an image using

src/cpp/include/openvino/genai/vlm_pipeline.hpp

+4-27
Original file line numberDiff line numberDiff line change
@@ -65,37 +65,14 @@ class OPENVINO_GENAI_EXPORTS VLMPipeline {
6565
explicit VLMPipeline(
6666
const std::filesystem::path& model_dir,
6767
const std::string& device="CPU",
68-
const ov::AnyMap device_config={},
69-
ov::Core core=ov::Core{}
70-
) : VLMPipeline{
71-
model_dir,
72-
Tokenizer(model_dir.string(), device_config),
73-
device,
74-
device_config,
75-
core
76-
} {}
77-
78-
/// @brief Construct a pipeline form a folder containing model IRs
79-
/// and from a Tokenizer instance.
80-
/// @param model_dir A folder to read model IRs.
81-
/// @param tokenizer An instance of Tokenizer to use.
82-
/// @param device Inference device.
83-
/// @param device_config A config to pass to ov::Core.set_property()
84-
/// and ov::Core::compile_model().
85-
/// @param core ov::Core instance to use.
86-
VLMPipeline(
87-
const std::filesystem::path& model_dir,
88-
const ov::genai::Tokenizer& tokenizer,
89-
const std::string& device="CPU",
90-
const ov::AnyMap device_config={},
91-
ov::Core core=ov::Core{}
68+
const ov::AnyMap device_config={}
9269
);
9370

9471
/// @brief Default destructor.
9572
~VLMPipeline();
9673

9774
/// @brief Generate a response given a prompt and any number of
98-
/// uint8 RGB images.
75+
/// uint8 RGB images with [HWC] layout.
9976
/// @param prompt A prompt to respond to.
10077
/// @param images Images to be prepended to a prompt.
10178
/// @param generation_config A config to follow for text generation.
@@ -120,7 +97,7 @@ class OPENVINO_GENAI_EXPORTS VLMPipeline {
12097
/// @brief Generate a response given a prompt and arbitrary number
12198
/// of ov::Property instances.
12299
/// Example:
123-
/// generate("text", image(std::move(rgb)), do_sample(true));
100+
/// generate("text", image(rgb), do_sample(true));
124101
/// @param prompt A prompt to respond to.
125102
/// @param ...properties ov::Property instances to be combined into
126103
/// ov::AnyMap.
@@ -166,7 +143,7 @@ class OPENVINO_GENAI_EXPORTS VLMPipeline {
166143

167144
/*
168145
* utils that allow to use generate() in the following way:
169-
* pipe.generate(prompt, ov::genai::image(std::move(image_tensor))).
146+
* pipe.generate(prompt, ov::genai::image(image_tensor)).
170147
*/
171148
static constexpr ov::Property<ov::Tensor> image{"image"};
172149
static constexpr ov::Property<std::vector<ov::Tensor>> images{"images"};

src/cpp/src/clip.cpp

-3
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,6 @@
66
// I'll gradually clean and extend it
77
// Note: Even when using identical normalized image inputs (see normalize_image_u8_to_f32()) we have a significant difference in resulting embeddings compared to pytorch
88

9-
#define STB_IMAGE_IMPLEMENTATION
10-
#include "stb_image.hpp"
11-
129
#include <cassert>
1310
#include <cmath>
1411
#include <cstdlib>

src/cpp/src/clip.hpp

+1-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
// Copyright (C) 2023-2024 Intel Corporation
22
// SPDX-License-Identifier: Apache-2.0
33

4-
#ifndef CLIP_H
5-
#define CLIP_H
4+
#pragma once
65

76
#include <vector>
87
#include <numeric>
@@ -53,4 +52,3 @@ bool bicubic_resize(const clip_image_u8& img, clip_image_u8& dst, int target_wid
5352

5453
/** preprocess img and store the result in res_imgs, pad_to_square may be overriden to false depending on model configuration */
5554
clip_image_f32 clip_image_preprocess(struct clip_ctx& ctx, const clip_image_u8& img);
56-
#endif // CLIP_H

0 commit comments

Comments
 (0)