Skip to content

Commit 7c9ccbe

Browse files
committed
Improve with comments
1 parent c3da5c5 commit 7c9ccbe

File tree

8 files changed

+30
-46
lines changed

8 files changed

+30
-46
lines changed

.github/workflows/llama_cpp_plugin_build_and_test.yml

+1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ jobs:
2929
with:
3030
submodules: recursive
3131
repository: vshampor/openvino
32+
branch: llama_cpp_mod
3233
path: openvino
3334

3435
- name: CMake - configure

modules/llama_cpp_plugin/CMakeLists.txt

+7-5
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,15 @@ find_package(OpenVINODeveloperPackage REQUIRED)
66

77
ov_option(ENABLE_LLAMA_CPP_PLUGIN_REGISTRATION "Enables registration of LLAMA_CPP plugin" ON)
88

9-
if(CMAKE_COMPILER_IS_GNUCXX)
10-
ov_add_compiler_flags(-Wall)
11-
endif()
12-
139
add_subdirectory(src)
1410

15-
add_subdirectory(third_party/llama.cpp)
11+
FetchContent_Declare(
12+
llama_cpp
13+
GIT_REPOSITORY https://github.com/ggerganov/llama.cpp
14+
GIT_TAG b2417
15+
)
16+
17+
FetchContent_MakeAvailable(llama_cpp)
1618

1719
if(ENABLE_TESTS)
1820
include(CTest)

modules/llama_cpp_plugin/build.sh

-19
This file was deleted.

modules/llama_cpp_plugin/include/compiled_model.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ namespace ov {
7070

7171
llama_model* m_llama_model_ptr = nullptr;
7272
llama_context* m_llama_ctx = nullptr;
73-
std::shared_ptr<ov::Model> m_model;
73+
std::shared_ptr<ov::Model> m_fake_model;
7474
size_t* num_tokens_processed_ptr = nullptr; // TODO: (vshampor) find a better place for this kind of storage
7575

7676
std::vector<ov::Output<const ov::Node>> m_fake_inputs;

modules/llama_cpp_plugin/src/CMakeLists.txt

-5
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,6 @@ target_include_directories(${TARGET_NAME} PRIVATE
3535
"${CMAKE_CURRENT_SOURCE_DIR}"
3636
"${LlamaCppPlugin_SOURCE_DIR}/include")
3737

38-
# link common OpenVINO Runtime libraries
39-
target_link_libraries(${TARGET_NAME} PRIVATE
40-
openvino::interpreter_backend
41-
openvino::reference)
42-
4338
set( LLAMA_TARGET_NAME CACHE STRING "Exact target exposed by llama.cpp to link against as the main llama.cpp library")
4439
if(NOT LLAMA_TARGET_NAME)
4540
set( LLAMA_TARGET_NAME "llama" )

modules/llama_cpp_plugin/src/compiled_model.cpp

+14-13
Original file line numberDiff line numberDiff line change
@@ -54,28 +54,29 @@ LlamaCppModel::LlamaCppModel(const std::string& gguf_fname, const std::shared_pt
5454

5555
ov::ParameterVector inputs{input_ids};
5656

57-
std::vector<std::pair<std::string, ov::element::Type_t>> unused_names_in_order = {
58-
{"attention_mask", ov::element::Type_t::i64},
59-
{"position_ids", ov::element::Type_t::i64},
60-
{"beam_idx", ov::element::Type_t::i32}};
61-
for (const auto& descr : unused_names_in_order) {
62-
auto unused_inp = std::make_shared<ov::opset13::Parameter>(descr.second, ov::PartialShape({-1, -1}));
57+
std::vector<std::tuple<std::string, ov::element::Type_t, ov::PartialShape>> additional_inputs_in_order = {
58+
{"attention_mask", ov::element::Type_t::i64, {-1, -1}},
59+
{"position_ids", ov::element::Type_t::i64, {-1, -1}},
60+
{"beam_idx", ov::element::Type_t::i32, {-1, -1}}};
61+
62+
for (const auto& descr : additional_inputs_in_order) {
63+
auto unused_inp = std::make_shared<ov::opset13::Parameter>(std::get<1>(descr), std::get<2>(descr));
6364
inputs.push_back(unused_inp);
6465
}
6566

66-
m_model = std::make_shared<ov::Model>(logits, inputs, "fake_ov_model_for_io_specification");
67+
m_fake_model = std::make_shared<ov::Model>(logits, inputs, "fake_ov_model_for_io_specification");
6768

68-
m_model->inputs()[0].set_names({"input_ids"});
69-
for (size_t i = 0; i < unused_names_in_order.size(); i++) {
70-
m_model->inputs()[i + 1].set_names({unused_names_in_order[i].first});
69+
m_fake_model->inputs()[0].set_names({"input_ids"});
70+
for (size_t i = 0; i < additional_inputs_in_order.size(); i++) {
71+
m_fake_model->inputs()[i + 1].set_names({std::get<0>(additional_inputs_in_order[i])});
7172
}
7273

73-
m_model->outputs()[0].set_names({"logits"});
74+
m_fake_model->outputs()[0].set_names({"logits"});
7475

75-
for (auto input : m_model->inputs()) {
76+
for (auto input : m_fake_model->inputs()) {
7677
m_fake_inputs.emplace_back(input);
7778
}
78-
for (auto output : m_model->outputs()) {
79+
for (auto output : m_fake_model->outputs()) {
7980
m_fake_outputs.emplace_back(output);
8081
}
8182
}

modules/llama_cpp_plugin/src/infer_request.cpp

+7-2
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@ void llama_batch_add_reimpl(struct llama_batch& batch,
6262
void LlamaCppSyncInferRequest::infer() {
6363
auto input_ids_tensor_ptr = get_tensor(get_inputs()[0]); // TODO (vshampor) correctly identify input_ids among
6464
// all inputs without hardcode
65+
//
66+
auto position_ids_tensor_ptr = get_tensor(get_inputs()[2]); // TODO (vshampor) correctly identify input_ids among
67+
// all inputs without hardcode
6568
OPENVINO_ASSERT(input_ids_tensor_ptr->get_element_type() == ov::element::Type_t::i64);
6669
OPENVINO_ASSERT(input_ids_tensor_ptr->get_shape().size() == 2);
6770
size_t sequence_length = input_ids_tensor_ptr->get_shape()[1];
@@ -72,15 +75,17 @@ void LlamaCppSyncInferRequest::infer() {
7275

7376
const int64_t* sequence_start_ptr = data_ptr /* + seq_idx */;
7477

78+
const int64_t* position_idx_ptr = position_ids_tensor_ptr->data<int64_t>();
79+
7580
for (size_t tok_idx = 0; tok_idx < sequence_length; ++tok_idx) {
7681
const int64_t token_id = sequence_start_ptr[tok_idx];
82+
const int64_t position_id = position_idx_ptr[tok_idx];
7783
llama_batch_add_reimpl(batch,
7884
token_id,
79-
*(m_compiled_model_ptr->num_tokens_processed_ptr),
85+
position_id,
8086
{0},
8187
true); // the last `true` here is a marker that the logits for this
8288
// token should be computed and returned
83-
*(m_compiled_model_ptr->num_tokens_processed_ptr) += 1;
8489
}
8590

8691
llama_context* ctx = m_compiled_model_ptr->m_llama_ctx;
Submodule llama.cpp deleted from c8b02d3

0 commit comments

Comments
 (0)