You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardexpand all lines: src/cpp/include/openvino/genai/generation_handle.hpp
+19-5
Original file line number
Diff line number
Diff line change
@@ -11,14 +11,17 @@
11
11
#include"openvino/genai/perf_metrics.hpp"
12
12
13
13
namespaceov::genai {
14
+
14
15
enumclassGenerationStatus {
15
16
RUNNING = 0, // Default status for ongoing generation
16
17
FINISHED = 1, // Status set when generation has been finished
17
18
IGNORED = 2, // Status set when generation run into out-of-memory condition and could not be continued
18
-
DROPPED_BY_PIPELINE = 3, // Currently not used, TODO: implement abort functionality
19
-
DROPPED_BY_HANDLE = 4// Status set when generation handle is dropped
19
+
CANCEL = 3, // Status set when generation handle is cancelled. The last prompt and all generated tokens will be dropped from history, KV cache will include history but last step.
20
+
STOP = 4, // Status set when generation handle is stopped. History will be kept, KV cache will include the last prompt and generated tokens.
21
+
DROPPED_BY_HANDLE OPENVINO_ENUM_DEPRECATED("Please, use `STOP` instead of `DROPPED_BY_HANDLE`.") = GenerationStatus::STOP // Status set when generation handle is dropped.
20
22
};
21
23
24
+
22
25
structEncodedGenerationResult {
23
26
// request ID - obsolete when handle API is approved as handle will connect results with prompts.
24
27
uint64_t m_request_id;
@@ -70,10 +73,10 @@ using GenerationOutputs = std::unordered_map<uint64_t, GenerationOutput>;
Copy file name to clipboardexpand all lines: src/cpp/include/openvino/genai/llm_pipeline.hpp
+4-2
Original file line number
Diff line number
Diff line change
@@ -18,8 +18,10 @@
18
18
namespaceov {
19
19
namespacegenai {
20
20
21
-
// Return flag corresponds whether generation should be stopped: false means continue generation, true means stop.
22
-
using StreamerVariant = std::variant<std::function<bool(std::string)>, std::shared_ptr<StreamerBase>, std::monostate>;
21
+
// Return flag corresponds whether generation should be stopped. It could be:
22
+
// ov::genai::StreamingStatus flag, RUNNING means continue generation, STOP means stop generation, CANCEL means stop generation and remove last propmt and answer from history
23
+
// *DEPRECATED* bool flag, false means continue generation, true means stop. Please, use `ov::genai::StreamingStatus` instead.
24
+
using StreamerVariant = std::variant<std::function<bool(std::string)>, std::function<StreamingStatus(std::string)>, std::shared_ptr<StreamerBase>, std::monostate>;
23
25
using OptionalGenerationConfig = std::optional<GenerationConfig>;
24
26
using EncodedInputs = std::variant<ov::Tensor, TokenizedInputs>;
25
27
using StringInputs = std::variant<std::string, std::vector<std::string>>;
0 commit comments