We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent abc2364 commit afbdf0eCopy full SHA for afbdf0e
modules/llama_cpp_plugin/src/compiled_model.cpp
@@ -50,6 +50,7 @@ LlamaCppModel::LlamaCppModel(const std::string& gguf_fname, const std::shared_pt
50
llama_context_params cparams = llama_context_default_params();
51
cparams.n_threads =
52
std::thread::hardware_concurrency(); // TODO (vshampor): reuse equivalent setting defined by OV API
53
+ cparams.n_ctx = 0; // this means that the actual n_ctx will be taken equal to the model's train-time value
54
m_llama_ctx = llama_new_context_with_model(m_llama_model_ptr, cparams);
55
OPENVINO_DEBUG << "llama_cpp_plugin: llama model loaded successfully from GGUF..." << std::endl;
56
0 commit comments