Skip to content

Commit 439a255

Browse files
committed
debug prints
1 parent 12b2389 commit 439a255

File tree

4 files changed

+42
-8
lines changed

4 files changed

+42
-8
lines changed

src/cpp/src/debug_utils.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
template <typename T>
1313
void print_array(T * array, size_t size) {
1414
std::cout << " => [ ";
15-
for (size_t i = 0; i < std::min(size, size_t(10)); ++i) {
15+
for (size_t i = 0; i < size; ++i) {
1616
std::cout << array[i] << " ";
1717
}
1818
std::cout << " ] " << std::endl;

src/cpp/src/utils.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,8 @@ void trim_kv_cache(ov::InferRequest request, uint64_t remove_from_end, size_t se
349349
auto shape = old_tensor.get_shape();
350350
shape[seq_length_axis] -= remove_from_end;
351351

352+
std::cout << " trim_kv_cache " << shape[seq_length_axis] << std::endl;
353+
352354
ov::Coordinate new_shape_begin{0, 0, 0, 0};
353355
ov::Coordinate new_shape_end{shape};
354356

src/cpp/src/visual_language/inputs_embedder.cpp

+34-7
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include "openvino/opsets/opset13.hpp"
1111

1212
#include "utils.hpp"
13+
#include "debug_utils.hpp"
1314
#include <regex>
1415

1516
namespace ov::genai {
@@ -82,25 +83,36 @@ class InputsEmbedder::IInputsEmbedder {
8283
m_stop_token_ids = stop_token_ids;
8384
}
8485

85-
void update_tokenized_history(const ov::genai::utils::GenerationFinishInfo generation_finish_info, bool is_beam_search, size_t last_answer_len, size_t inputs_embeds_size) {
86+
void update_tokenized_history(const ov::genai::utils::GenerationFinishInfo generation_finish_info, bool is_beam_search, size_t last_answer_len, size_t full_len) {
8687
if (is_beam_search) {
8788
m_kv_history_manager.trusted_history_length = m_tokenized_history.size();
8889
m_kv_history_manager.num_tokens_to_remove_from_kv_cache = last_answer_len;
90+
std::cout << " 1 1" << std::endl;
8991
} else {
9092
m_kv_history_manager.reset();
93+
std::cout << " 1 2" << std::endl;
9194
}
9295

9396
m_last_disappeared_token = generation_finish_info.probably_disappeared_token;
9497

9598
if (generation_finish_info.streaming_finish_status == ov::genai::GenerationStatus::CANCEL) {
9699
// let's remove last answer and prompt
97-
m_kv_history_manager.num_tokens_to_remove_from_kv_cache = inputs_embeds_size + last_answer_len;
100+
m_kv_history_manager.num_tokens_to_remove_from_kv_cache = full_len + last_answer_len;
98101
m_tokenized_history = std::move(m_prev_tokenized_history);
99102
m_kv_history_manager.reset_kv_cache = m_tokenized_history.empty();
103+
// std::cout << " 2 1" << std::endl;
100104
} else {
101105
auto encoded_result = generation_finish_info.results.tokens[0];
102106
std::copy(encoded_result.begin(), encoded_result.end(), std::back_inserter(m_tokenized_history));
107+
std::cout << " 2 2" << std::endl;
103108
}
109+
std::cout << " \nCANCEL " << m_tokenized_history.size() << std::endl;
110+
print_array(m_tokenized_history.data(), m_tokenized_history.size());
111+
112+
std::cout << " last_answer_len " << last_answer_len << std::endl;
113+
std::cout << " full_len " << full_len << std::endl;
114+
std::cout << " m_kv_history_manager.num_tokens_to_remove_from_kv_cache " << m_kv_history_manager.num_tokens_to_remove_from_kv_cache << std::endl;
115+
std::cout << " m_kv_history_manager.trusted_history_length " << m_kv_history_manager.trusted_history_length << std::endl;
104116
}
105117

106118
void set_apply_chat_template_status(bool apply_chat_template) {
@@ -114,7 +126,6 @@ class InputsEmbedder::IInputsEmbedder {
114126
m_history.clear();
115127
m_templated_chat_history.clear();
116128
m_tokenized_history.clear();
117-
m_prev_tokenized_history.clear();
118129
}
119130
if (system_message.empty()) {
120131
return;
@@ -143,7 +154,6 @@ class InputsEmbedder::IInputsEmbedder {
143154
m_history.clear();
144155
m_templated_chat_history.clear();
145156
m_tokenized_history.clear();
146-
m_prev_tokenized_history.clear();
147157
}
148158

149159
protected:
@@ -194,6 +204,9 @@ class InputsEmbedder::IInputsEmbedder {
194204
auto end_tokenizer_time = std::chrono::steady_clock::now();
195205
metrics.raw_metrics.tokenization_durations.emplace_back(PerfMetrics::get_microsec(end_tokenizer_time - start_tokenizer_time));
196206
m_templated_chat_history = std::move(new_templated_chat_history);
207+
208+
print_tensor("prev_chat_tokens", prev_chat_tokens);
209+
print_tensor("new_chat_tokens", new_chat_tokens);
197210
return {new_chat_tokens, prev_chat_tokens};
198211
} else {
199212
ov::Tensor encoded_input_ids;
@@ -235,24 +248,36 @@ class InputsEmbedder::IInputsEmbedder {
235248
}
236249

237250
m_prev_tokenized_history.clear();
251+
238252
if (m_tokenized_history.empty()) {
239253
encoded_input_ids = new_chat_tokens;
240-
241254
} else if (trusted_history_length != SIZE_MAX || m_kv_history_manager.does_history_cache_need_to_update()) {
255+
std::cout << "trusted_history_length " << trusted_history_length << std::endl;
256+
std::cout << "m_kv_history_manager " << m_kv_history_manager.trusted_history_length << std::endl;
257+
std::cout << "m_kv_history_manager " << m_kv_history_manager.num_tokens_to_remove_from_kv_cache << std::endl;
258+
259+
std::cout << "prev_chat_tokens " << prev_chat_tokens.get_size() << std::endl;
260+
std::cout << "m_tokenized_history " << m_tokenized_history.size() << std::endl;
261+
242262
// does_history_cache_need_to_update will be true here if beam search is activated
243263
// in beam search mode we want to remove all history about last model answer from kv cache and add the best answer directly
244264
// if we have difference in model answer and decoded answer it anyway will be less then entire history, so let's use data from m_kv_history_manager
245265
if (m_kv_history_manager.does_history_cache_need_to_update()) {
246266
trusted_history_length = m_kv_history_manager.trusted_history_length;
247267
} else {
248-
auto num_tokens_to_remove_from_kv_cache = m_tokenized_history.size() - trusted_history_length;
268+
int64_t num_tokens_to_remove_from_kv_cache = m_tokenized_history.size() - trusted_history_length;
269+
std::cout << "num_tokens_to_remove_from_kv_cache " << num_tokens_to_remove_from_kv_cache << std::endl;
249270
// last generated token is present in tokenized_history, but not included to attention mask, let's keep it in history
250-
num_tokens_to_remove_from_kv_cache -= 1;
271+
if (num_tokens_to_remove_from_kv_cache > 0)
272+
num_tokens_to_remove_from_kv_cache -= 1;
273+
274+
std::cout << "num_tokens_to_remove_from_kv_cache " << num_tokens_to_remove_from_kv_cache << std::endl;
251275

252276
// if streaming was used and cancelled on prev step, m_kv_history_manager.num_tokens_to_remove_from_kv_cache could be already set
253277
// and it would be bigger as it includes answer + prompt
254278
m_kv_history_manager.num_tokens_to_remove_from_kv_cache = m_kv_history_manager.num_tokens_to_remove_from_kv_cache > num_tokens_to_remove_from_kv_cache ?
255279
m_kv_history_manager.num_tokens_to_remove_from_kv_cache : num_tokens_to_remove_from_kv_cache;
280+
std::cout << "m_kv_history_manager.num_tokens_to_remove_from_kv_cache " << m_kv_history_manager.num_tokens_to_remove_from_kv_cache << std::endl;
256281
}
257282

258283
std::copy_n(m_tokenized_history.data(), trusted_history_length, std::back_inserter(m_prev_tokenized_history));
@@ -277,6 +302,8 @@ class InputsEmbedder::IInputsEmbedder {
277302
}
278303
m_tokenized_history.clear();
279304
std::copy_n(new_chat_tokens.data<int64_t>(), new_chat_tokens.get_size(), std::back_inserter(m_tokenized_history));
305+
306+
print_tensor("encoded_input_ids", encoded_input_ids);
280307
return encoded_input_ids;
281308
} else {
282309
m_tokenized_history.clear();

src/cpp/src/visual_language/pipeline.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -193,10 +193,15 @@ class ov::genai::VLMPipeline::VLMPipelineImpl {
193193
size_t request_id = 0;
194194
size_t block_size = 1; // not used
195195

196+
196197
size_t history_size = m_language.get_tensor("attention_mask").get_shape().at(1) - to_remove_from_hist;
197198
size_t inputs_embeds_size = inputs_embeds.get_shape().at(1);
198199

200+
std::cout << "history_size " << history_size << std::endl;
201+
std::cout << "history_size + inputs_embeds_size " << inputs_embeds_size + history_size << std::endl;
202+
199203
auto tokenized_history = m_inputs_embedder->get_tokenized_history();
204+
std::cout << "tokenized_history " << tokenized_history.size() << std::endl;
200205
ov::Tensor prompt_ids(ov::element::i64, { history_size + inputs_embeds_size });
201206
OPENVINO_ASSERT(prompt_ids.get_size() >= tokenized_history.size(), "Prompt ids size is less than tokenized history size");
202207
std::fill_n(prompt_ids.data<int64_t>(), prompt_ids.get_size(), m_tokenizer.get_pad_token_id());

0 commit comments

Comments
 (0)