Skip to content

Commit b73b250

Browse files
authored
Introduce NF4 data type (openvinotoolkit#19900)
* NF4 reference initial commit * Compilable version. * Executable NF4. * Fixed nf4 unpacking. * 1) Fixed warnings with nf4. 2) Removed unused functions. * Added one test for nf4. * Fixed code-style errors. * Fixed code-style errors. * Fixed NamingConventionCheck errors. * Fixed test with nf4. * Fixed windows compilation. * Fixed casting warning. * Fixed incorrect changes. * Changed order of elements in nf4 pack/unpack. * 1) Made Convert only on direction nf4->other type. 2) Applied reviewers suggestions. * Fixed code style. * Fised code style. * 1) Added array header. 2) Added Bitsandbytes to third-party-programs.txt. * 1) Removed unused code. 2) Fixed style typos. 3) Revert submodule version. * Added test for nf4 compression. * NF4 test refactoring. * Added cpp tests for NF4. * Removed model compilation from NF4 tests. * Reverted submodule version.
1 parent 3de1332 commit b73b250

File tree

29 files changed

+431
-17
lines changed

29 files changed

+431
-17
lines changed

cmake/developer_package/ncc_naming_style/openvino.style

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ VariableReference: '^\w+$'
1818

1919
EnumName: '^[A-Z][\w]+$'
2020
# excepts element_type
21-
EnumConstantName: '^([A-Z\d_]+|undefined|dynamic|boolean|bf16|f16|f32|f64|i4|i8|i16|i32|i64|u1|u4|u8|u16|u32|u64|asymmetric|align_corners|round_prefer_floor|round_prefer_ceil|floor|ceil|simple|nearest|linear|linear_onnx|cubic|area|scales|sizes|half_pixel|tf_half_pixel_for_nn|pytorch_half_pixel|asymetric)$'
21+
EnumConstantName: '^([A-Z\d_]+|undefined|dynamic|boolean|bf16|f16|f32|f64|i4|i8|i16|i32|i64|u1|u4|u8|u16|u32|u64|nf4|asymmetric|align_corners|round_prefer_floor|round_prefer_ceil|floor|ceil|simple|nearest|linear|linear_onnx|cubic|area|scales|sizes|half_pixel|tf_half_pixel_for_nn|pytorch_half_pixel|asymetric)$'
2222
# TODO: align
2323
UsingDeclaration: '^.*$'
2424
TypedefName: '^.*$'

licensing/third-party-programs.txt

+26
Original file line numberDiff line numberDiff line change
@@ -1640,3 +1640,29 @@ INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
16401640
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
16411641
OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
16421642
PERFORMANCE OF THIS SOFTWARE.
1643+
1644+
-------------------------------------------------------------
1645+
1646+
30. Bitsandbytes (https://github.com/TimDettmers/bitsandbytes)
1647+
1648+
MIT License
1649+
1650+
Copyright (c) Facebook, Inc. and its affiliates.
1651+
1652+
Permission is hereby granted, free of charge, to any person obtaining a copy
1653+
of this software and associated documentation files (the "Software"), to deal
1654+
in the Software without restriction, including without limitation the rights
1655+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1656+
copies of the Software, and to permit persons to whom the Software is
1657+
furnished to do so, subject to the following conditions:
1658+
1659+
The above copyright notice and this permission notice shall be included in all
1660+
copies or substantial portions of the Software.
1661+
1662+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1663+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1664+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1665+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1666+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
1667+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
1668+
SOFTWARE.

src/bindings/c/include/openvino/c/ov_common.h

+1
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ typedef enum {
186186
U16, //!< u16 element type
187187
U32, //!< u32 element type
188188
U64, //!< u64 element type
189+
NF4, //!< nf4 element type
189190
} ov_element_type_e;
190191

191192
/**

src/bindings/c/src/ov_tensor.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ const std::map<ov_element_type_e, ov::element::Type> element_type_map = {
2323
{ov_element_type_e::U8, ov::element::u8},
2424
{ov_element_type_e::U16, ov::element::u16},
2525
{ov_element_type_e::U32, ov::element::u32},
26-
{ov_element_type_e::U64, ov::element::u64}};
26+
{ov_element_type_e::U64, ov::element::u64},
27+
{ov_element_type_e::NF4, ov::element::nf4}};
2728

2829
inline ov_element_type_e find_ov_element_type_e(ov::element::Type type) {
2930
for (auto iter = element_type_map.begin(); iter != element_type_map.end(); iter++) {

src/bindings/python/src/openvino/helpers/packing.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def pack_data(array: np.ndarray, type: Type) -> np.ndarray:
2323
:param type: Type to interpret the array values. Type must be u1, u4 or i4.
2424
:type type: openvino.runtime.Type
2525
"""
26-
assert type in [Type.u1, Type.u4, Type.i4], "Packing algorithm for the" "data types stored in 1, 2 or 4 bits"
26+
assert type in [Type.u1, Type.u4, Type.i4, Type.nf4], "Packing algorithm for the" "data types stored in 1, 2 or 4 bits"
2727

2828
minimum_regular_dtype = np.int8 if type == Type.i4 else np.uint8
2929
casted_to_regular_type = array.astype(dtype=minimum_regular_dtype, casting="unsafe")
@@ -62,7 +62,7 @@ def unpack_data(array: np.ndarray, type: Type, shape: Union[list, Shape]) -> np.
6262
:param shape: the new shape for the unpacked array.
6363
:type shape: Union[list, openvino.runtime.Shape]
6464
"""
65-
assert type in [Type.u1, Type.u4, Type.i4], "Unpacking algorithm for the" "data types stored in 1, 2 or 4 bits"
65+
assert type in [Type.u1, Type.u4, Type.i4, Type.nf4], "Unpacking algorithm for the" "data types stored in 1, 2 or 4 bits"
6666
unpacked = np.unpackbits(array.view(np.uint8))
6767
shape = list(shape)
6868
if type.bitwidth == 1:

src/bindings/python/src/pyopenvino/core/common.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ const std::map<ov::element::Type, py::dtype>& ov_type_to_dtype() {
3030
{ov::element::boolean, py::dtype("bool")},
3131
{ov::element::u1, py::dtype("uint8")},
3232
{ov::element::u4, py::dtype("uint8")},
33+
{ov::element::nf4, py::dtype("uint8")},
3334
{ov::element::i4, py::dtype("int8")},
3435
};
3536
return ov_type_to_dtype_mapping;

src/bindings/python/src/pyopenvino/graph/types/element_type.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ void regclass_graph_Type(py::module m) {
4848
type.attr("u32") = ov::element::u32;
4949
type.attr("u64") = ov::element::u64;
5050
type.attr("bf16") = ov::element::bf16;
51+
type.attr("nf4") = ov::element::nf4;
5152

5253
type.def("__hash__", &ov::element::Type::hash);
5354
type.def("__repr__", [](const ov::element::Type& self) {

src/bindings/python/tests/test_runtime/test_tensor.py

+1
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,7 @@ def test_init_with_packed_buffer(dtype, ov_type):
377377
(0, 2, ov.Type.u1, np.uint8),
378378
(0, 16, ov.Type.u4, np.uint8),
379379
(-8, 7, ov.Type.i4, np.int8),
380+
(0, 16, ov.Type.nf4, np.uint8),
380381
])
381382
def test_packing(shape, low, high, ov_type, dtype):
382383
ov_tensor = Tensor(ov_type, shape)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# -*- coding: utf-8 -*-
2+
# Copyright (C) 2018-2023 Intel Corporation
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
6+
import numpy as np
7+
from openvino.runtime import opset9 as opset
8+
9+
import openvino as ov
10+
import pytest
11+
12+
13+
@pytest.mark.parametrize(("ov_type", "numpy_dtype"), [
14+
(ov.Type.f32, np.float32),
15+
(ov.Type.f64, np.float64),
16+
(ov.Type.f16, np.float16),
17+
])
18+
def test_float_to_nf4_convert(ov_type, numpy_dtype):
19+
data = np.linspace(-1.5, 1.5, num=41, dtype=numpy_dtype)
20+
21+
compressed_const = opset.constant(data, dtype=ov.Type.nf4, name="nf4_constant")
22+
convert = opset.convert(compressed_const, data.dtype)
23+
parameter = opset.parameter(ov.PartialShape([-1]), ov_type)
24+
add_op = opset.add(parameter, convert)
25+
model = ov.Model([add_op], [parameter])
26+
27+
compiled = ov.compile_model(model)
28+
tensor = np.zeros(data.shape, dtype=numpy_dtype)
29+
result = compiled(tensor)[0]
30+
31+
uniq = []
32+
for res_val in result:
33+
if res_val not in uniq:
34+
uniq.append(res_val)
35+
uniq = np.array(uniq)
36+
37+
assert len(uniq) == 16
38+
39+
target = [-1.0, -0.6961928009986877, -0.5250730514526367,
40+
-0.39491748809814453, -0.28444138169288635,
41+
-0.18477343022823334, -0.09105003625154495,
42+
0.0, 0.07958029955625534, 0.16093020141124725,
43+
0.24611230194568634, 0.33791524171829224,
44+
0.44070982933044434, 0.5626170039176941,
45+
0.7229568362236023, 1.0]
46+
target = np.array(target)
47+
48+
diff = np.max(np.abs(target - uniq))
49+
50+
assert diff < 0.001

src/common/low_precision_transformations/include/low_precision/layer_transformation.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ class LP_TRANSFORMATIONS_API DataPrecision {
8888
switch (precision) {
8989
case element::i4:
9090
case element::u4:
91+
case element::nf4:
9192
return (levels == low_precision::levels::int4) || (levels == low_precision::levels::int4_narrow_range);
9293
case element::i8:
9394
case element::u8:

src/core/builder/include/ngraph/builder/make_constant.hpp

+3
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,9 @@ std::shared_ptr<Node> make_constant(const element::Type& type, const Shape& shap
9999
case element::Type_t::u4:
100100
unsupported_data_type = "u4";
101101
break;
102+
case element::Type_t::nf4:
103+
unsupported_data_type = "nf4";
104+
break;
102105
case element::Type_t::undefined:
103106
unsupported_data_type = "undefined";
104107
break;

src/core/include/ngraph/type/element_type.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ using ov::element::i32;
4040
using ov::element::i4;
4141
using ov::element::i64;
4242
using ov::element::i8;
43+
using ov::element::nf4;
4344
using ov::element::u1;
4445
using ov::element::u16;
4546
using ov::element::u32;

src/core/include/openvino/core/type/element_type.hpp

+6-1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "openvino/core/rtti.hpp"
2121
#include "openvino/core/type/bfloat16.hpp"
2222
#include "openvino/core/type/float16.hpp"
23+
#include "openvino/core/type/nf4.hpp"
2324

2425
/**
2526
* @defgroup ov_element_cpp_api Element types
@@ -50,7 +51,8 @@ enum class Type_t {
5051
u8, //!< u8 element type
5152
u16, //!< u16 element type
5253
u32, //!< u32 element type
53-
u64 //!< u64 element type
54+
u64, //!< u64 element type
55+
nf4 //!< nf4 element type
5456
};
5557

5658
/// \brief Base class to define element type
@@ -177,6 +179,9 @@ constexpr Type u32(Type_t::u32);
177179
/// \brief u64 element type
178180
/// \ingroup ov_element_cpp_api
179181
constexpr Type u64(Type_t::u64);
182+
/// \brief nf4 element type
183+
/// \ingroup ov_element_cpp_api
184+
constexpr Type nf4(Type_t::nf4);
180185

181186
template <typename T>
182187
Type from() {

src/core/include/openvino/core/type/element_type_traits.hpp

+5
Original file line numberDiff line numberDiff line change
@@ -92,4 +92,9 @@ template <>
9292
struct element_type_traits<element::Type_t::u64> {
9393
using value_type = uint64_t;
9494
};
95+
96+
template <>
97+
struct element_type_traits<element::Type_t::nf4> {
98+
using value_type = int8_t;
99+
};
95100
} // namespace ov
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
// Copyright (C) 2018-2023 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#pragma once
6+
7+
#include <array>
8+
#include <cmath>
9+
#include <iostream>
10+
#include <limits>
11+
#include <memory>
12+
#include <string>
13+
#include <vector>
14+
15+
#include "openvino/core/core_visibility.hpp"
16+
17+
namespace ov {
18+
class OPENVINO_API ConvertNF4 {
19+
public:
20+
constexpr ConvertNF4() = default;
21+
22+
template <typename T, typename std::enable_if<!std::is_integral<T>::value, bool>::type = true>
23+
static void unpack(T* dst, const uint8_t* src, std::size_t idx) {
24+
uint8_t nf4_idx = get_u4(src, idx);
25+
float val = dequantize(nf4_idx);
26+
dst[idx] = static_cast<T>(val);
27+
}
28+
29+
template <typename T, typename std::enable_if<std::is_integral<T>::value, bool>::type = true>
30+
static void unpack(T* dst, const uint8_t* src, std::size_t idx) {
31+
uint8_t nf4_idx = get_u4(src, idx);
32+
dst[idx] = static_cast<T>(nf4_idx);
33+
}
34+
35+
static float dequantize(uint8_t val);
36+
37+
static uint8_t quantize(float x);
38+
39+
private:
40+
static inline uint8_t get_u4(const uint8_t* buf, size_t idx) {
41+
const size_t byte_idx = idx / 2;
42+
const uint8_t bit_shift = 4 * (idx % 2);
43+
return (buf[byte_idx] >> bit_shift) & 0xF;
44+
}
45+
};
46+
47+
}; // namespace ov

src/core/include/openvino/op/constant.hpp

+67-6
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,9 @@ class OPENVINO_API Constant : public Op {
143143
case Type_t::u64:
144144
fill_data<Type_t::u64>(value);
145145
break;
146+
case Type_t::nf4:
147+
fill_data<Type_t::nf4>(value);
148+
break;
146149
case Type_t::undefined:
147150
case Type_t::dynamic:
148151
OPENVINO_THROW("unsupported type");
@@ -408,7 +411,7 @@ class OPENVINO_API Constant : public Op {
408411
template <element::Type_t Type,
409412
typename StorageDataType = fundamental_type_for<Type>,
410413
typename std::enable_if<Type != element::Type_t::u1 && Type != element::Type_t::u4 &&
411-
Type != element::Type_t::i4,
414+
Type != element::Type_t::i4 && Type != element::Type_t::nf4,
412415
bool>::type = true>
413416
StorageDataType get_element_value(size_t index) const {
414417
return get_data_ptr<Type>()[index];
@@ -428,6 +431,13 @@ class OPENVINO_API Constant : public Op {
428431
return (get_data_ptr<uint8_t>()[index / 2] >> (index % 2 ? 0 : 4)) & 0x0F;
429432
}
430433

434+
template <element::Type_t Type,
435+
typename StorageDataType = fundamental_type_for<Type>,
436+
typename std::enable_if<Type == element::Type_t::nf4, bool>::type = true>
437+
StorageDataType get_element_value(size_t index) const {
438+
return (get_data_ptr<uint8_t>()[index / 2] >> (index % 2 ? 4 : 0)) & 0x0F;
439+
}
440+
431441
template <element::Type_t Type,
432442
typename StorageDataType = fundamental_type_for<Type>,
433443
typename std::enable_if<Type == element::Type_t::i4, bool>::type = true>
@@ -554,7 +564,7 @@ class OPENVINO_API Constant : public Op {
554564
typename T,
555565
typename StorageDataType = fundamental_type_for<Type>,
556566
typename std::enable_if<Type != element::Type_t::u1 && Type != element::Type_t::u4 &&
557-
Type != element::Type_t::i4,
567+
Type != element::Type_t::i4 && Type != element::Type_t::nf4,
558568
bool>::type = true>
559569
void fill_data(const T& value) {
560570
#ifdef __clang__
@@ -607,7 +617,9 @@ class OPENVINO_API Constant : public Op {
607617
template <element::Type_t Type,
608618
typename T,
609619
typename StorageDataType = fundamental_type_for<Type>,
610-
typename std::enable_if<Type == element::Type_t::u4 || Type == element::Type_t::i4, bool>::type = true>
620+
typename std::enable_if<Type == element::Type_t::u4 || Type == element::Type_t::i4 ||
621+
Type == element::Type_t::nf4,
622+
bool>::type = true>
611623
void fill_data(const T& value) {
612624
uint8_t v = value_in_range<Type>(value);
613625
v &= 0x0F;
@@ -640,8 +652,8 @@ class OPENVINO_API Constant : public Op {
640652
template <element::Type_t Type,
641653
typename T,
642654
typename StorageDataType = fundamental_type_for<Type>,
643-
typename std::enable_if<Type != element::Type_t::u1 && Type != element::Type_t::u4 &&
644-
Type != element::Type_t::i4,
655+
typename std::enable_if<Type != element::Type_t::nf4 && Type != element::Type_t::u1 &&
656+
Type != element::Type_t::u4 && Type != element::Type_t::i4,
645657
bool>::type = true>
646658
void write_buffer(const std::vector<T>& source) {
647659
auto p = get_data_ptr_nc<Type>();
@@ -670,6 +682,50 @@ class OPENVINO_API Constant : public Op {
670682
}
671683
}
672684

685+
template <element::Type_t Type,
686+
typename T,
687+
typename StorageDataType = fundamental_type_for<Type>,
688+
typename std::enable_if<Type == element::Type_t::nf4 && std::is_integral<T>::value, bool>::type = true>
689+
void write_buffer(const std::vector<T>& source) {
690+
auto p = get_data_ptr_nc<Type>();
691+
size_t i = 0;
692+
for (; i < source.size() / 2; i++) {
693+
const auto v1 = value_in_range<Type>(source[i * 2]) & 0x0F;
694+
const auto v2 = value_in_range<Type>(source[i * 2 + 1]) & 0x0F;
695+
const auto v = (v2 << 4) | v1;
696+
p[i] = static_cast<StorageDataType>(v);
697+
}
698+
if (source.size() % 2) {
699+
const auto v = value_in_range<Type>(source[i * 2]) & 0x0F;
700+
p[i] = static_cast<StorageDataType>(v);
701+
}
702+
}
703+
704+
template <element::Type_t Type,
705+
typename T,
706+
typename StorageDataType = fundamental_type_for<Type>,
707+
typename std::enable_if<Type == element::Type_t::nf4 &&
708+
(std::is_floating_point<T>::value || std::is_same<T, bfloat16>::value ||
709+
std::is_same<T, float16>::value),
710+
bool>::type = true>
711+
void write_buffer(const std::vector<T>& source) {
712+
auto p = get_data_ptr_nc<Type>();
713+
size_t i = 0;
714+
for (; i < source.size() / 2; i++) {
715+
const auto idx1 = ConvertNF4::quantize(static_cast<float>(source[i * 2]));
716+
const auto idx2 = ConvertNF4::quantize(static_cast<float>(source[i * 2 + 1]));
717+
const auto v1 = value_in_range<Type>(idx1) & 0x0F;
718+
const auto v2 = value_in_range<Type>(idx2) & 0x0F;
719+
const auto v = (v2 << 4) | v1;
720+
p[i] = static_cast<StorageDataType>(v);
721+
}
722+
if (source.size() % 2) {
723+
const auto idx1 = ConvertNF4::quantize(static_cast<float>(source[i * 2]));
724+
const auto v = value_in_range<Type>(idx1) & 0x0F;
725+
p[i] = static_cast<StorageDataType>(v);
726+
}
727+
}
728+
673729
template <element::Type_t Type,
674730
typename T,
675731
typename StorageDataType = fundamental_type_for<Type>,
@@ -755,6 +811,9 @@ class OPENVINO_API Constant : public Op {
755811
case Type_t::u64:
756812
write_buffer<Type_t::u64>(source);
757813
break;
814+
case Type_t::nf4:
815+
write_buffer<Type_t::nf4>(source);
816+
break;
758817
case element::Type_t::undefined:
759818
case element::Type_t::dynamic:
760819
OPENVINO_THROW("unsupported type");
@@ -765,7 +824,9 @@ class OPENVINO_API Constant : public Op {
765824
}
766825
template <ov::element::Type_t Type,
767826
typename ValueT,
768-
typename std::enable_if<Type == ov::element::Type_t::u4, bool>::type = true>
827+
typename std::enable_if<Type == ov::element::Type_t::u4 || Type == ov::element::Type_t::u4 ||
828+
Type == ov::element::Type_t::nf4,
829+
bool>::type = true>
769830
static ov::fundamental_type_for<Type> value_in_range(const ValueT& value) {
770831
const auto result = ov::fundamental_type_for<Type>(value);
771832
OPENVINO_ASSERT(0 <= result && result <= 15, "assigned value out of range u4 values");

0 commit comments

Comments
 (0)