openvinotoolkit · AlexKoff88 · Feb 3, 2025 · Mar 3, 2025 · Mar 5, 2025 · Mar 6, 2025
diff --git a/cmake/features.cmake b/cmake/features.cmake
@@ -5,6 +5,7 @@
 option(ENABLE_PYTHON "Enable Python API build" ON)
 option(ENABLE_JS "Enable JS API build" OFF)
 option(ENABLE_SAMPLES "Enable samples build" ON)
+option(ENABLE_GGUF "Enable support for GGUF format" ON)
 
 # Disable building samples for NPM package
 if(CPACK_GENERATOR STREQUAL "NPM")

diff --git a/gguf-tools b/gguf-tools
diff --git a/samples/cpp/text_generation/CMakeLists.txt b/samples/cpp/text_generation/CMakeLists.txt
@@ -23,6 +23,7 @@ endfunction()
 
 set (SAMPLE_LIST
     greedy_causal_lm
+    gguf_example
     encrypted_model_causal_lm
     beam_search_causal_lm
     chat_sample

diff --git a/samples/cpp/text_generation/gguf_example.cpp b/samples/cpp/text_generation/gguf_example.cpp
@@ -0,0 +1,17 @@
+// Copyright (C) 2023-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "openvino/genai/llm_pipeline.hpp"
+
+#include "gguf_modeling.hpp"
+
+#include "openvino/openvino.hpp"
+
+int main(int argc, char* argv[]) {
+    std::string models_path = argv[1];
+    std::string output_path = argv[2];
+
+    auto model = create_from_gguf(models_path);
+
+    ov::save_model(model, output_path + "/openvino_model.xml", false);
+}
diff --git a/src/cpp/CMakeLists.txt b/src/cpp/CMakeLists.txt
@@ -75,13 +75,35 @@ if(TARGET openvino_tokenizers)
     add_dependencies(${TARGET_NAME_OBJ} openvino_tokenizers)
 endif()
 
+if(ENABLE_GGUF)
+  message(STATUS "Downloading gguflib")
+  FetchContent_Declare(
+    gguflib
+    GIT_REPOSITORY https://github.com/antirez/gguf-tools/
+    GIT_TAG af7d88d808a7608a33723fba067036202910acb3)
+  FetchContent_MakeAvailable(gguflib)
+  target_include_directories(${TARGET_NAME_OBJ}
+                             PRIVATE "${gguflib_SOURCE_DIR}")
+  target_include_directories(${TARGET_NAME_OBJ}
+                            PRIVATE "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src/gguf_utils>")
+  set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+  add_library(gguflib STATIC ${gguflib_SOURCE_DIR}/fp16.c
+                             ${gguflib_SOURCE_DIR}/gguflib.c)
+  #target_compile_features(gguflib PRIVATE fPIC)
+  target_link_libraries(${TARGET_NAME_OBJ} PRIVATE $<BUILD_INTERFACE:gguflib>) 
+  target_sources(${TARGET_NAME_OBJ} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src/gguf_utils/gguf.cpp
+                             ${CMAKE_CURRENT_SOURCE_DIR}/src/gguf_utils/gguf_quants.cpp
+                             ${CMAKE_CURRENT_SOURCE_DIR}/src/gguf_utils/gguf_modeling.cpp
+                             ${CMAKE_CURRENT_SOURCE_DIR}/src/gguf_utils/building_blocks.cpp)
+endif()
+
 target_include_directories(${TARGET_NAME_OBJ}
     PUBLIC "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
            "$<BUILD_INTERFACE:${OpenVINOGenAI_SOURCE_DIR}/src/c/include>"
            "$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>"
     PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/src")
 
-target_include_directories(${TARGET_NAME_OBJ} SYSTEM PRIVATE "${safetensors.h_SOURCE_DIR}")
+target_include_directories(${TARGET_NAME_OBJ} SYSTEM PRIVATE "${safetensors.h_SOURCE_DIR}" "${gguflib_SOURCE_DIR}")
 
 target_link_libraries(${TARGET_NAME_OBJ} PRIVATE openvino::runtime openvino::runtime::c openvino::threading nlohmann_json::nlohmann_json jinja2cpp)
 
@@ -99,10 +121,15 @@ add_library(openvino::genai ALIAS ${TARGET_NAME})
 target_include_directories(${TARGET_NAME} INTERFACE "$<INSTALL_INTERFACE:runtime/include>"
                                                     "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
                                                     "$<BUILD_INTERFACE:${OpenVINOGenAI_SOURCE_DIR}/src/c/include>"
+                                                    "$<BUILD_INTERFACE:${OpenVINOGenAI_SOURCE_DIR}/src/cpp/src/gguf_utils>"
                                                     "$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>")
 
 target_link_libraries(${TARGET_NAME} PUBLIC openvino::runtime openvino::runtime::c PRIVATE openvino::threading nlohmann_json::nlohmann_json jinja2cpp ${CMAKE_DL_LIBS})
 
+if(ENABLE_GGUF)
+  target_link_libraries(${TARGET_NAME} PRIVATE gguflib ${CMAKE_DL_LIBS})
+endif()
+
 target_compile_features(${TARGET_NAME} INTERFACE cxx_std_17)
 
 set_target_properties(${TARGET_NAME} PROPERTIES

diff --git a/src/cpp/src/gguf_utils/building_blocks.cpp b/src/cpp/src/gguf_utils/building_blocks.cpp
diff --git a/src/cpp/src/gguf_utils/building_blocks.hpp b/src/cpp/src/gguf_utils/building_blocks.hpp
@@ -0,0 +1,55 @@
+#pragma once
+
+#include <vector>
+#include <stdexcept>
+#include <algorithm>
+#include <unordered_map>
+#include <cstdarg>
+
+#include <openvino/openvino.hpp>
+
+#include "gguf.hpp"
+
+ov::Output<ov::Node> make_lm_head(
+    const std::string& key,
+    const ov::Output<ov::Node>& input,
+    const std::unordered_map<std::string, ov::Tensor>& consts,
+    const ov::Output<ov::Node>& embeddings_node,
+    QType qtype);
+
+ov::Output<ov::Node> make_rms_norm(
+    const std::string& key,
+    const ov::Output<ov::Node>& input,
+    const std::unordered_map<std::string, ov::Tensor>& consts,
+    float epsilon);
+
+std::tuple<ov::Output<ov::Node>, ov::Output<ov::Node>> make_embedding(
+    const std::string& key,
+    const ov::Output<ov::Node>& input,
+    const std::unordered_map<std::string, ov::Tensor>& consts,
+    QType qtype);
+
+std::tuple<ov::Output<ov::Node>, 
+           ov::SinkVector,
+           ov::Output<ov::Node>,
+           std::pair<ov::Output<ov::Node>, ov::Output<ov::Node>>,
+           std::shared_ptr<ov::Node>> 
+    layer(const std::map<std::string, GGUFMetaData>& configs,
+        std::unordered_map<std::string, ov::Tensor>& consts,
+        int layer_idx,
+        const ov::Output<ov::Node>& hidden_states,
+        const ov::Output<ov::Node>& attn_mask,
+        const ov::Output<ov::Node>& causal_mask,
+        const ov::Output<ov::Node>& position_ids,
+        const ov::Output<ov::Node>& rope_const,
+        const ov::Output<ov::Node>& beam_idx,
+        const ov::Output<ov::Node>& batch_dim,
+        const ov::Output<ov::Node>& hidden_dim,
+        const std::pair<ov::Output<ov::Node>, ov::Output<ov::Node>>& cos_sin_cached,
+        const std::shared_ptr<ov::Node>& output_shape);
+
+ov::Output<ov::Node> init_rope(
+    int64_t head_dim,
+    int64_t max_position_embeddings = 2048,
+    float base = 10000.0f,
+    float scaling_factor = 1.0f);