@@ -44,7 +44,10 @@ std::vector<int64_t> kmp_search(const std::vector<int64_t>& haystack, const std:
44
44
return res;
45
45
}
46
46
47
- struct Token {float log_prob; int64_t idx;};
47
+ struct Token {
48
+ float log_prob;
49
+ int64_t idx;
50
+ };
48
51
49
52
std::vector<Token> log_softmax (const ov::Tensor& logits, size_t batch_idx) {
50
53
if (logits.get_shape ().at (0 ) <= batch_idx) {
@@ -55,10 +58,10 @@ std::vector<Token> log_softmax(const ov::Tensor& logits, size_t batch_idx) {
55
58
size_t sequence_offset = (logits.get_shape ().at (1 ) - 1 ) * vocab_size;
56
59
const float * beam_logits = logits.data <const float >() + batch_offset + sequence_offset;
57
60
float max_logit = *std::max_element (beam_logits, beam_logits + vocab_size);
58
- float log_sum = std::log (std::accumulate (
59
- beam_logits, beam_logits + vocab_size, 0 .0f , [max_logit](float accumulated, float to_add) {
61
+ float log_sum = std::log (
62
+ std::accumulate ( beam_logits, beam_logits + vocab_size, 0 .0f , [max_logit](float accumulated, float to_add) {
60
63
return accumulated + std::exp (to_add - max_logit);
61
- }));
64
+ }));
62
65
std::vector<Token> tokens;
63
66
tokens.reserve (vocab_size);
64
67
for (size_t idx = 0 ; idx < vocab_size; ++idx) {
@@ -77,7 +80,7 @@ bool greater(const Beam& left, const Beam& right) {
77
80
return left.score > right.score ;
78
81
}
79
82
80
- enum class StopCriteria {early, heuristic, never};
83
+ enum class StopCriteria { early, heuristic, never };
81
84
82
85
struct Parameters {
83
86
std::vector<int64_t > prompt;
@@ -90,11 +93,13 @@ struct Parameters {
90
93
size_t no_repeat_ngram_size = std::numeric_limits<size_t >::max();
91
94
// There's no way to extract special token values from the tokenizer for now
92
95
int64_t eos_token = 2 ;
93
- std::function<bool (const Beam&)> early_finish = [](const Beam&){return false ;};
96
+ std::function<bool (const Beam&)> early_finish = [](const Beam&) {
97
+ return false ;
98
+ };
94
99
};
95
100
96
101
struct Group {
97
- std::vector<Beam> ongoing; // Best beams in front
102
+ std::vector<Beam> ongoing; // Best beams in front
98
103
std::vector<Beam> min_heap; // The worst of the best completed beams is the first
99
104
bool done = false ;
100
105
@@ -121,26 +126,30 @@ struct Group {
121
126
float best_sum_logprobs = ongoing.front ().score ;
122
127
float worst_score = min_heap.front ().score ;
123
128
switch (parameters.stop_criteria ) {
124
- case StopCriteria::early:
125
- done = true ;
126
- return ;
127
- case StopCriteria::heuristic: {
128
- float highest_attainable_score = best_sum_logprobs / std::pow (float (cur_len), parameters.length_penalty );
129
- done = worst_score >= highest_attainable_score;
130
- return ;
131
- }
132
- case StopCriteria::never: {
133
- size_t length = parameters.length_penalty > 0.0 ? parameters.max_new_tokens : cur_len;
134
- float highest_attainable_score = best_sum_logprobs / std::pow (float (length), parameters.length_penalty );
135
- done = worst_score >= highest_attainable_score;
136
- return ;
137
- }
138
- default : throw std::runtime_error (" Never reached" );
129
+ case StopCriteria::early:
130
+ done = true ;
131
+ return ;
132
+ case StopCriteria::heuristic: {
133
+ float highest_attainable_score = best_sum_logprobs / std::pow (float (cur_len), parameters.length_penalty );
134
+ done = worst_score >= highest_attainable_score;
135
+ return ;
136
+ }
137
+ case StopCriteria::never: {
138
+ size_t length = parameters.length_penalty > 0.0 ? parameters.max_new_tokens : cur_len;
139
+ float highest_attainable_score = best_sum_logprobs / std::pow (float (length), parameters.length_penalty );
140
+ done = worst_score >= highest_attainable_score;
141
+ return ;
142
+ }
143
+ default :
144
+ throw std::runtime_error (" Never reached" );
139
145
}
140
146
}
141
147
};
142
148
143
- struct TokenToBeam {int64_t token_idx; int32_t beam_idx;};
149
+ struct TokenToBeam {
150
+ int64_t token_idx;
151
+ int32_t beam_idx;
152
+ };
144
153
145
154
// GroupBeamSearcher processes logits prduced by a language model and accumulates beams using group beam search
146
155
// algorithm. select_next_tokens() returns token ids selected by the algorithm and corresponding beam ids. These values
0 commit comments