Skip to content

Commit f327e8c

Browse files
committed
fix jump
1 parent 02c928e commit f327e8c

File tree

4 files changed

+30
-16
lines changed

4 files changed

+30
-16
lines changed

.github/workflows/causal_lm_cpp.yml

+7-2
Original file line numberDiff line numberDiff line change
@@ -701,8 +701,13 @@ jobs:
701701
run: >
702702
curl ${{ env.m_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz
703703
&& brew install coreutils scons
704-
- run: OpenVINO_DIR=./ov/runtime/cmake/ cmake -DCMAKE_BUILD_TYPE=Release -B ./build/ ./
705-
- run: LD_LIBRARY_PATH=${{ github.workspace }}/ov/runtime/3rdparty/tbb/lib/:$LD_LIBRARY_PATH cmake --build ./build/ --config Release --target visual_language_chat -j
704+
- run: >
705+
OpenVINO_DIR=./ov/runtime/cmake/
706+
TBB_DIR=./ov/runtime/3rdparty/tbb/lib/cmake
707+
cmake -DCMAKE_BUILD_TYPE=Release -B ./build/ ./
708+
- run: >
709+
LD_LIBRARY_PATH=${{ github.workspace }}/ov/runtime/3rdparty/tbb/lib/:$LD_LIBRARY_PATH
710+
cmake --build ./build/ --config Release --target visual_language_chat -j
706711
- run: >
707712
source ./ov/setupvars.sh
708713
&& python -m pip install --upgrade-strategy eager ./thirdparty/openvino_tokenizers/[transformers] -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly

samples/cpp/visual_language_chat/visual_language_chat.cpp

+11-11
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ bool print_subword(std::string&& subword) {
99
return !(std::cout << subword << std::flush);
1010
}
1111

12-
int main(int argc, char* argv[]) {
12+
int main(int argc, char* argv[]) try {
1313
if (3 != argc) {
1414
throw std::runtime_error(std::string{"Usage "} + argv[0] + " <MODEL_DIR> <IMAGE_FILE>");
1515
}
@@ -42,14 +42,14 @@ int main(int argc, char* argv[]) {
4242
"question:\n";
4343
}
4444
pipe.finish_chat();
45-
// } catch (const std::exception& error) {
46-
// try {
47-
// std::cerr << error.what() << '\n';
48-
// } catch (const std::ios_base::failure&) {}
49-
// return EXIT_FAILURE;
50-
// } catch (...) {
51-
// try {
52-
// std::cerr << "Non-exception object thrown\n";
53-
// } catch (const std::ios_base::failure&) {}
54-
// return EXIT_FAILURE;
45+
} catch (const std::exception& error) {
46+
try {
47+
std::cerr << error.what() << '\n';
48+
} catch (const std::ios_base::failure&) {}
49+
return EXIT_FAILURE;
50+
} catch (...) {
51+
try {
52+
std::cerr << "Non-exception object thrown\n";
53+
} catch (const std::ios_base::failure&) {}
54+
return EXIT_FAILURE;
5555
}

src/cpp/src/vision_encoder.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ EncodedImage llava_image_embed_make_with_bytes_slice(clip_ctx& ctx_clip, const o
285285
}
286286
}
287287
// Override prev output tensor that doesn't own memory.
288-
encoder.set_output_tensor(resized_source);
288+
encoder.set_output_tensor(ov::Tensor{ov::element::f32, {0, 0, old_hidden_size}});
289289
return {resized_source, resized_source_size, encoded_slices, sliced_sizes};
290290
}
291291
}

src/cpp/src/vlm_pipeline.cpp

+11-2
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,7 @@ DecodedResults VLMPipeline::generate(
380380
m_history.push_back({{"role", "user"}, {"content", images_prompt}});
381381
constexpr bool add_generation_prompt = true;
382382
std::string new_templated_chat_history = m_tokenizer.apply_chat_template(m_history, add_generation_prompt);
383+
std::cout << new_templated_chat_history << '\n';
383384
ov::Tensor new_chat_tokens = m_tokenizer.encode(new_templated_chat_history).input_ids;
384385
if (0 == m_language.get_tensor("attention_mask").get_shape().at(1)) {
385386
encoded_input = new_chat_tokens;
@@ -418,6 +419,10 @@ DecodedResults VLMPipeline::generate(
418419
int64_t slice_end_id = special_tokens.data<int64_t>()[3];
419420
int64_t im_start_pos = 0, slice_start_pos = 0;
420421
int64_t* begin = encoded_input.data<int64_t>();
422+
for (size_t cont = 0; cont < encoded_input.get_size(); ++cont) {
423+
std::cout << begin[cont] << ", ";
424+
}
425+
std::cout << '\n';
421426
int64_t* ids = begin;
422427
size_t encoded_input_size = encoded_input.get_size();
423428
int64_t* end = ids + encoded_input_size;
@@ -426,9 +431,11 @@ DecodedResults VLMPipeline::generate(
426431
const ov::Tensor& resampled_source = resample(*this, encoded_image.resized_source, {encoded_image.resized_source_size});
427432
float* emb = resampled_source.data<float>();
428433
ids = std::find(ids, end, im_start_id);
434+
std::cout << std::distance(begin, ids) << '\n';
429435
OPENVINO_ASSERT(end != ids);
430436
std::copy_n(emb, resampled_source.get_size(), inputs_embeds_data + std::distance(begin, ids) * m_vlm_config.hidden_size);
431-
ids += m_vlm_config.hidden_size;
437+
ids += m_vlm_config.query_num;
438+
std::cout << std::distance(begin, ids) << '\n';
432439
if (encoded_image.slices) {
433440
size_t token_idx = 0;
434441
const ov::Shape& slices_shape = encoded_image.slices.get_shape();
@@ -440,9 +447,11 @@ DecodedResults VLMPipeline::generate(
440447
ov::Tensor encoded_view{ov::element::f32, {1, d2, d3}, encoded_image.slices.data<float>() + (i * slices_shape.at(1) + ja) * d2 * d3};
441448
const ov::Tensor& vision_embed_tensor_i_j = resample(*this, encoded_view, {sliced_sizes.at(i * slices_shape.at(1) + ja)});
442449
ids = std::find(ids, end, slice_start_id);
450+
std::cout << std::distance(begin, ids) << '\n';
443451
OPENVINO_ASSERT(end != ids);
444452
std::copy_n(vision_embed_tensor_i_j.data<float>(), vision_embed_tensor_i_j.get_size(), inputs_embeds_data + std::distance(begin, ids) * m_vlm_config.hidden_size);
445-
ids += m_vlm_config.hidden_size;
453+
ids += m_vlm_config.query_num;
454+
std::cout << std::distance(begin, ids) << '\n';
446455
}
447456
}
448457
}

0 commit comments

Comments
 (0)