|
4 | 4 | #ifndef LLAMA_CPP_COMPILED_MODEL_HPP
|
5 | 5 | #define LLAMA_CPP_COMPILED_MODEL_HPP
|
6 | 6 |
|
| 7 | +#include "llama.h" |
7 | 8 | #include "openvino/runtime/icompiled_model.hpp"
|
8 | 9 | #include "openvino/runtime/isync_infer_request.hpp"
|
9 |
| -#include "llama.h" |
10 | 10 |
|
11 | 11 | namespace ov {
|
12 |
| - namespace llama_cpp_plugin { |
13 |
| - class LlamaCppSyncInferRequest; |
14 |
| - class LlamaCppPlugin; |
15 |
| - class LlamaCppState; |
16 |
| - class LlamaCppModel: public ICompiledModel { |
17 |
| - public: |
18 |
| - LlamaCppModel(const std::shared_ptr<ov::Model>& model, |
19 |
| - const std::shared_ptr<const ov::IPlugin>& plugin, |
20 |
| - const ov::SoPtr<ov::IRemoteContext>& context, |
21 |
| - const std::shared_ptr<ov::threading::ITaskExecutor>& task_executor |
22 |
| - ); |
23 |
| - |
24 |
| - LlamaCppModel(const std::shared_ptr<ov::Model>& ov_model, |
25 |
| - std::istream& input_file, |
26 |
| - const std::shared_ptr<const IPlugin>& plugin); |
| 12 | +namespace llama_cpp_plugin { |
| 13 | +class LlamaCppSyncInferRequest; |
| 14 | +class LlamaCppPlugin; |
| 15 | +class LlamaCppState; |
| 16 | +class LlamaCppModel : public ICompiledModel { |
| 17 | +public: |
| 18 | + LlamaCppModel(const std::string& gguf_fname, const std::shared_ptr<const IPlugin>& plugin); |
| 19 | + /** |
| 20 | + * @brief Export compiled model to stream |
| 21 | + * |
| 22 | + * @param model output stream |
| 23 | + */ |
| 24 | + virtual void export_model(std::ostream& model) const override; |
27 | 25 |
|
28 |
| - LlamaCppModel(const std::string& gguf_fname, |
29 |
| - const std::shared_ptr<const IPlugin>& plugin); |
30 |
| - /** |
31 |
| - * @brief Export compiled model to stream |
32 |
| - * |
33 |
| - * @param model output stream |
34 |
| - */ |
35 |
| - virtual void export_model(std::ostream& model) const override; |
| 26 | + /** |
| 27 | + * @brief Returns runtime model |
| 28 | + * |
| 29 | + * @return OpenVINO Model which represents runtime graph |
| 30 | + */ |
| 31 | + virtual std::shared_ptr<const ov::Model> get_runtime_model() const override; |
36 | 32 |
|
37 |
| - /** |
38 |
| - * @brief Returns runtime model |
39 |
| - * |
40 |
| - * @return OpenVINO Model which represents runtime graph |
41 |
| - */ |
42 |
| - virtual std::shared_ptr<const ov::Model> get_runtime_model() const override; |
| 33 | + /** |
| 34 | + * @brief Allows to set property |
| 35 | + * |
| 36 | + * @param properties new plugin properties |
| 37 | + */ |
| 38 | + virtual void set_property(const ov::AnyMap& properties) override; |
43 | 39 |
|
44 |
| - /** |
45 |
| - * @brief Allows to set property |
46 |
| - * |
47 |
| - * @param properties new plugin properties |
48 |
| - */ |
49 |
| - virtual void set_property(const ov::AnyMap& properties) override; |
| 40 | + /** |
| 41 | + * @brief Returns property |
| 42 | + * |
| 43 | + * @param name Property name |
| 44 | + * |
| 45 | + * @return Property value |
| 46 | + * virtual std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const override; |
| 47 | + **/ |
| 48 | + virtual ov::Any get_property(const std::string& name) const override; |
| 49 | + virtual const std::vector<ov::Output<const ov::Node>>& inputs() const override; |
| 50 | + virtual const std::vector<ov::Output<const ov::Node>>& outputs() const override; |
| 51 | + virtual ~LlamaCppModel(); |
50 | 52 |
|
51 |
| - /** |
52 |
| - * @brief Returns property |
53 |
| - * |
54 |
| - * @param name Property name |
55 |
| - * |
56 |
| - * @return Property value |
57 |
| - * virtual std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const override; |
58 |
| - **/ |
59 |
| - virtual ov::Any get_property(const std::string& name) const override; |
60 |
| - virtual const std::vector<ov::Output<const ov::Node>>& inputs() const override; |
61 |
| - virtual const std::vector<ov::Output<const ov::Node>>& outputs() const override; |
62 |
| - virtual ~LlamaCppModel(); |
63 |
| - protected: |
64 |
| - /** |
65 |
| - * @brief Method creates infer request implementation |
66 |
| - * |
67 |
| - * @return Sync infer request |
68 |
| - */ |
69 |
| - virtual std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const override; |
| 53 | +protected: |
| 54 | + /** |
| 55 | + * @brief Method creates infer request implementation |
| 56 | + * |
| 57 | + * @return Sync infer request |
| 58 | + */ |
| 59 | + virtual std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const override; |
70 | 60 |
|
71 |
| - private: |
72 |
| - gguf_context* m_gguf_ctx = nullptr; |
73 |
| - std::string m_gguf_fname; |
| 61 | +private: |
| 62 | + gguf_context* m_gguf_ctx = nullptr; |
| 63 | + std::string m_gguf_fname; |
74 | 64 |
|
75 |
| - llama_model* m_llama_model_ptr = nullptr; |
76 |
| - llama_context* m_llama_ctx = nullptr; |
77 |
| - std::shared_ptr<ov::Model> m_fake_model; |
| 65 | + llama_model* m_llama_model_ptr = nullptr; |
| 66 | + llama_context* m_llama_ctx = nullptr; |
| 67 | + std::shared_ptr<ov::Model> m_fake_model; |
78 | 68 |
|
79 |
| - std::vector<ov::Output<const ov::Node>> m_fake_inputs; |
80 |
| - std::vector<ov::Output<const ov::Node>> m_fake_outputs; |
| 69 | + std::vector<ov::Output<const ov::Node>> m_fake_inputs; |
| 70 | + std::vector<ov::Output<const ov::Node>> m_fake_outputs; |
81 | 71 |
|
82 |
| - friend class ov::llama_cpp_plugin::LlamaCppSyncInferRequest; |
83 |
| - friend class ov::llama_cpp_plugin::LlamaCppState; |
84 |
| - }; |
85 |
| - } |
| 72 | + friend class ov::llama_cpp_plugin::LlamaCppSyncInferRequest; |
| 73 | + friend class ov::llama_cpp_plugin::LlamaCppState; |
| 74 | +}; |
| 75 | +} // namespace llama_cpp_plugin |
86 | 76 | } // namespace ov
|
87 | 77 |
|
88 | 78 | #endif // LLAMA_CPP_COMPILED_MODEL_HPP
|
0 commit comments