Remove state

vshampor · vshampor · commit f82ee68ab4fe · 2024-03-14T13:46:33.000+01:00
diff --git a/modules/llama_cpp_plugin/include/compiled_model.hpp b/modules/llama_cpp_plugin/include/compiled_model.hpp
@@ -74,7 +74,6 @@ namespace ov {
             llama_model* m_llama_model_ptr = nullptr;
             llama_context* m_llama_ctx = nullptr;
             std::shared_ptr<ov::Model> m_fake_model;
-            size_t* num_tokens_processed_ptr = nullptr;  // TODO: (vshampor) find a better place for this kind of storage
 
             std::vector<ov::Output<const ov::Node>> m_fake_inputs;
             std::vector<ov::Output<const ov::Node>> m_fake_outputs;
diff --git a/modules/llama_cpp_plugin/src/compiled_model.cpp b/modules/llama_cpp_plugin/src/compiled_model.cpp
@@ -37,14 +37,11 @@ LlamaCppModel::~LlamaCppModel() {
     llama_free(m_llama_ctx);
     llama_free_model(m_llama_model_ptr);
     llama_backend_free();
-    delete num_tokens_processed_ptr;
 }
 
 LlamaCppModel::LlamaCppModel(const std::string& gguf_fname, const std::shared_ptr<const IPlugin>& plugin)
     : ICompiledModel(nullptr, plugin),
       m_gguf_fname(gguf_fname) {
-    num_tokens_processed_ptr = new size_t;  // TODO (vshampor): hack, remove
-    *num_tokens_processed_ptr = 0;
     OPENVINO_DEBUG << "llama_cpp_plugin: loading llama model directly from GGUF... " << std::endl;
     llama_model_params mparams = llama_model_default_params();
     mparams.n_gpu_layers = 99;