3
3
4
4
#pragma once
5
5
6
- #include < filesystem>
7
6
#include < optional>
7
+ #include < variant>
8
8
9
- #include < openvino/runtime/infer_request.hpp>
10
9
#include < openvino/core/any.hpp>
11
-
12
10
#include " openvino/genai/generation_config.hpp"
13
11
#include " openvino/genai/tokenizer.hpp"
14
12
#include " openvino/genai/streamer_base.hpp"
@@ -84,18 +82,24 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline {
84
82
* @param streamer optional streamer
85
83
* @return std::string decoded resulting text
86
84
*/
87
- std::string generate (std::string text, OptionalGenerationConfig generation_config, OptionalStreamerVariant streamer);
85
+ std::string generate (std::string text, OptionalGenerationConfig generation_config=nullopt , OptionalStreamerVariant streamer=nullopt );
88
86
89
-
90
87
template <typename ... Properties>
91
88
util::EnableIfAllStringAny<std::string, Properties...> generate (
92
89
std::string text,
93
90
Properties&&... properties) {
94
91
return generate (text, AnyMap{std::forward<Properties>(properties)...});
95
92
}
96
-
97
93
std::string generate (std::string text, const ov::AnyMap& config);
98
94
95
+ template <typename ... Properties>
96
+ util::EnableIfAllStringAny<EncodedResults, Properties...> generate (
97
+ ov::Tensor input_ids,
98
+ Properties&&... properties) {
99
+ return generate (input_ids, AnyMap{std::forward<Properties>(properties)...});
100
+ }
101
+ EncodedResults generate (ov::Tensor input_ids, const ov::AnyMap& config);
102
+
99
103
/* *
100
104
* @brief High level generate for batched prompts which encodes inputs and returns decoded outputs.
101
105
* Streamer cannot be used for multibatch inputs.
@@ -119,16 +123,22 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline {
119
123
*/
120
124
EncodedResults generate (ov::Tensor input_ids,
121
125
std::optional<ov::Tensor> attention_mask,
122
- OptionalGenerationConfig generation_config,
123
- OptionalStreamerVariant streamer);
124
-
125
- std::string operator ()(std::string text, OptionalGenerationConfig generation_config);
126
- DecodedResults operator ()(std::vector<std::string> text, OptionalGenerationConfig generation_config);
127
- DecodedResults operator ()(std::initializer_list<std::string> text, OptionalGenerationConfig generation_config);
126
+ OptionalGenerationConfig generation_config=nullopt,
127
+ OptionalStreamerVariant streamer=nullopt);
128
+
129
+ template <typename InputsType, typename ... Properties>
130
+ util::EnableIfAllStringAny<std::string, Properties...> operator ()(
131
+ InputsType text,
132
+ Properties&&... properties) {
133
+ return generate (text, AnyMap{std::forward<Properties>(properties)...});
134
+ }
135
+
136
+ DecodedResults operator ()(std::vector<std::string> text, OptionalGenerationConfig generation_config=nullopt);
137
+ DecodedResults operator ()(std::initializer_list<std::string> text, OptionalGenerationConfig generation_config=nullopt);
128
138
129
139
// generate with streamers
130
- std::string operator ()(std::string text, OptionalGenerationConfig generation_config, StreamerVariant streamer);
131
- std::string operator ()(std::string text, StreamerVariant streamer);
140
+ std::string operator ()(std::string text, OptionalGenerationConfig generation_config=nullopt, OptionalStreamerVariant streamer=nullopt );
141
+ std::string operator ()(std::string text, OptionalStreamerVariant streamer);
132
142
133
143
ov::Tokenizer get_tokenizer ();
134
144
GenerationConfig get_generation_config () const ;
@@ -143,9 +153,40 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline {
143
153
std::unique_ptr<LLMPipelineImpl> m_pimpl;
144
154
};
145
155
156
+ /*
157
+ * utils that allow to use generate and operarator() in the folllowing way:
158
+ * pipe.generate(input_ids, ov::max_new_tokens(200), ov::temperature(1.0f),...)
159
+ * pipe(text, ov::max_new_tokens(200), ov::temperature(1.0f),...)
160
+ * All names match to names in cofnig except streamer.
161
+ */
146
162
static constexpr ov::Property<size_t > max_new_tokens{" max_new_tokens" };
163
+ static constexpr ov::Property<size_t > max_length{" max_length" };
164
+ static constexpr ov::Property<bool > ignore_eos{" ignore_eos" };
165
+
166
+ static constexpr ov::Property<size_t > num_beam_groups{" num_beam_groups" };
167
+ static constexpr ov::Property<size_t > num_beams{" num_beams" };
168
+ static constexpr ov::Property<float > diversity_penalty{" diversity_penalty" };
169
+ static constexpr ov::Property<float > length_penalty{" length_penalty" };
170
+ static constexpr ov::Property<size_t > num_return_sequences{" num_return_sequences" };
171
+ static constexpr ov::Property<size_t > no_repeat_ngram_size{" no_repeat_ngram_size" };
172
+ static constexpr ov::Property<StopCriteria> stop_criteria{" stop_criteria" };
173
+
147
174
static constexpr ov::Property<float > temperature{" temperature" };
148
- static constexpr ov::Property<std::function<void (std::string)>> streamer_lambda{" streamer_lambda" };
149
- static constexpr ov::Property<std::shared_ptr<StreamerBase>> streamer{" streamer" };
175
+ static constexpr ov::Property<float > top_p{" top_p" };
176
+ static constexpr ov::Property<int > top_k{" top_k" };
177
+ static constexpr ov::Property<bool > do_sample{" do_sample" };
178
+ static constexpr ov::Property<float > repetition_penalty{" repetition_penalty" };
179
+
180
+
181
+ static constexpr ov::Property<int64_t > pad_token_id{" pad_token_id" };
182
+ static constexpr ov::Property<int64_t > bos_token_id{" bos_token_id" };
183
+ static constexpr ov::Property<int64_t > eos_token_id{" eos_token_id" };
184
+
185
+ static constexpr ov::Property<std::string> bos_token{" bos_token" };
186
+ static constexpr ov::Property<std::string> eos_token{" eos_token" };
187
+
188
+ // only lambda streamer can be set via ov::streamer(),... syntaxic sugar,
189
+ // because std::variant<StremaerBase, std::function<>> can not be stored in AnyMap
190
+ static constexpr ov::Property<std::function<void (std::string)>> streamer_lambda{" streamer" };
150
191
151
192
} // namespace ov
0 commit comments