@@ -44,10 +44,7 @@ std::vector<int64_t> kmp_search(const std::vector<int64_t>& haystack, const std:
44
44
return res;
45
45
}
46
46
47
- struct Token {
48
- float log_prob;
49
- int64_t idx;
50
- };
47
+ struct Token {float log_prob; int64_t idx;};
51
48
52
49
std::vector<Token> log_softmax (const ov::Tensor& logits, size_t batch_idx) {
53
50
if (logits.get_shape ().at (0 ) <= batch_idx) {
@@ -58,10 +55,10 @@ std::vector<Token> log_softmax(const ov::Tensor& logits, size_t batch_idx) {
58
55
size_t sequence_offset = (logits.get_shape ().at (1 ) - 1 ) * vocab_size;
59
56
const float * beam_logits = logits.data <const float >() + batch_offset + sequence_offset;
60
57
float max_logit = *std::max_element (beam_logits, beam_logits + vocab_size);
61
- float log_sum = std::log (
62
- std::accumulate ( beam_logits, beam_logits + vocab_size, 0 .0f , [max_logit](float accumulated, float to_add) {
58
+ float log_sum = std::log (std::accumulate (
59
+ beam_logits, beam_logits + vocab_size, 0 .0f , [max_logit](float accumulated, float to_add) {
63
60
return accumulated + std::exp (to_add - max_logit);
64
- }));
61
+ }));
65
62
std::vector<Token> tokens;
66
63
tokens.reserve (vocab_size);
67
64
for (size_t idx = 0 ; idx < vocab_size; ++idx) {
@@ -80,26 +77,24 @@ bool greater(const Beam& left, const Beam& right) {
80
77
return left.score > right.score ;
81
78
}
82
79
83
- enum class StopCriteria { early, heuristic, never };
80
+ enum class StopCriteria {early, heuristic, never};
84
81
85
82
struct Parameters {
86
83
std::vector<int64_t > prompt;
84
+ int64_t eos_token;
87
85
size_t n_groups = 3 ;
88
86
size_t group_size = 5 ;
89
87
float diversity_penalty = 1.0 ;
90
88
size_t max_new_tokens = 20 ;
91
89
StopCriteria stop_criteria = StopCriteria::heuristic;
92
90
float length_penalty = 1.0 ;
93
91
size_t no_repeat_ngram_size = std::numeric_limits<size_t >::max();
94
- // There's no way to extract special token values from the tokenizer for now
95
- int64_t eos_token = 2 ;
96
- std::function<bool (const Beam&)> early_finish = [](const Beam&) {
97
- return false ;
98
- };
92
+
93
+ std::function<bool (const Beam&)> early_finish = [](const Beam&){return false ;};
99
94
};
100
95
101
96
struct Group {
102
- std::vector<Beam> ongoing; // Best beams in front
97
+ std::vector<Beam> ongoing; // Best beams in front
103
98
std::vector<Beam> min_heap; // The worst of the best completed beams is the first
104
99
bool done = false ;
105
100
@@ -126,30 +121,26 @@ struct Group {
126
121
float best_sum_logprobs = ongoing.front ().score ;
127
122
float worst_score = min_heap.front ().score ;
128
123
switch (parameters.stop_criteria ) {
129
- case StopCriteria::early:
130
- done = true ;
131
- return ;
132
- case StopCriteria::heuristic: {
133
- float highest_attainable_score = best_sum_logprobs / std::pow (float (cur_len), parameters.length_penalty );
134
- done = worst_score >= highest_attainable_score;
135
- return ;
136
- }
137
- case StopCriteria::never: {
138
- size_t length = parameters.length_penalty > 0.0 ? parameters.max_new_tokens : cur_len;
139
- float highest_attainable_score = best_sum_logprobs / std::pow (float (length), parameters.length_penalty );
140
- done = worst_score >= highest_attainable_score;
141
- return ;
142
- }
143
- default :
144
- throw std::runtime_error (" Never reached" );
124
+ case StopCriteria::early:
125
+ done = true ;
126
+ return ;
127
+ case StopCriteria::heuristic: {
128
+ float highest_attainable_score = best_sum_logprobs / std::pow (float (cur_len), parameters.length_penalty );
129
+ done = worst_score >= highest_attainable_score;
130
+ return ;
131
+ }
132
+ case StopCriteria::never: {
133
+ size_t length = parameters.length_penalty > 0.0 ? parameters.max_new_tokens : cur_len;
134
+ float highest_attainable_score = best_sum_logprobs / std::pow (float (length), parameters.length_penalty );
135
+ done = worst_score >= highest_attainable_score;
136
+ return ;
137
+ }
138
+ default : throw std::runtime_error (" Never reached" );
145
139
}
146
140
}
147
141
};
148
142
149
- struct TokenToBeam {
150
- int64_t token_idx;
151
- int32_t beam_idx;
152
- };
143
+ struct TokenToBeam {int64_t token_idx; int32_t beam_idx;};
153
144
154
145
// GroupBeamSearcher processes logits prduced by a language model and accumulates beams using group beam search
155
146
// algorithm. select_next_tokens() returns token ids selected by the algorithm and corresponding beam ids. These values
0 commit comments