forked from openvinotoolkit/openvino.genai
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbeam_search_causal_lm.cpp
40 lines (32 loc) · 1.29 KB
/
beam_search_causal_lm.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
// Copyright (C) 2023-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include <openvino/genai/llm_pipeline.hpp>
namespace {
enum SPECIAL_TOKEN { PAD_TOKEN = 2 };
}
int main(int argc, char* argv[]) try {
if (argc < 3) {
throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR> '<PROMPT 1>' ['<PROMPT 2>' ...]");
}
auto prompts = std::vector<std::string>(argv + 2, argv + argc);
std::string model_path = argv[1];
std::string device = "CPU"; // GPU can be used as well
ov::LLMPipeline pipe(model_path, device);
ov::GenerationConfig config = pipe.get_generation_config();
config.max_new_tokens = 20;
config.num_beam_groups = 3;
config.num_beams = 15;
config.num_return_sequences = config.num_beams * prompts.size();
// workaround until pad_token_id is not written into IR
pipe.get_tokenizer().set_pad_token_id(PAD_TOKEN);
auto beams = pipe.generate(prompts, config);
for (int i = 0; i < beams.scores.size(); i++)
std::cout << beams.scores[i] << ": " << beams.texts[i] << '\n';
return 0;
} catch (const std::exception& error) {
std::cerr << error.what() << '\n';
return EXIT_FAILURE;
} catch (...) {
std::cerr << "Non-exception object thrown\n";
return EXIT_FAILURE;
}