Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FP8 types support in NNCF graph building #3344

Merged
merged 4 commits into from
Mar 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion nncf/openvino/graph/nncf_graph_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,13 @@ def convert_to_nncf_dtype(ov_type: ov.Type) -> Dtype:
"""
type_name = ov_type.get_type_name()
conversion_map = {
"nf4": "float",
"f8e4m3": "float",
"f8e5m2": "float",
"f16": "float",
"bf16": "float",
"f32": "float",
"f64": "float",
"nf4": "float",
"i4": "int",
"i8": "int",
"i16": "int",
Expand Down
11 changes: 9 additions & 2 deletions nncf/quantization/algorithms/weight_compression/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
from nncf.quantization.algorithms.weight_compression.weight_lowering import WeightCompressionConfig
from nncf.scopes import IgnoredScope
from nncf.scopes import get_ignored_node_names_from_ignored_scope
from nncf.tensor.definitions import TensorDataType

TModel = TypeVar("TModel")
TTensor = TypeVar("TTensor")
Expand All @@ -56,6 +57,12 @@
CompressWeightsMode.NF4,
CompressWeightsMode.E2M1,
]
SUPPORTED_DATA_TYPES = [
TensorDataType.float16,
TensorDataType.bfloat16,
TensorDataType.float32,
TensorDataType.float64,
]


def get_weight_compression_configuration(
Expand Down Expand Up @@ -489,7 +496,7 @@ def _get_ignored_scope_weight_statistics(self, model: TModel, graph: NNCFGraph)
continue
for _, weight_port_id in self._backend_entity.get_weight_names_and_port_ids(node, graph):
weight_dtype = self._backend_entity.get_weight_dtype(node, weight_port_id, model, graph)
if weight_dtype.is_float():
if weight_dtype in SUPPORTED_DATA_TYPES:
continue
weight_shape = self._backend_entity.get_weight_shape(node, weight_port_id, graph)
weight_size = reduce(operator.mul, weight_shape, 1)
Expand Down Expand Up @@ -535,7 +542,7 @@ def apply(
continue

weight_dtype = self._backend_entity.get_weight_dtype(node, weight_port_id, model, graph)
if not weight_dtype.is_float():
if weight_dtype not in SUPPORTED_DATA_TYPES:
continue
weight_shape = self._backend_entity.get_weight_shape(node, weight_port_id, graph)
weight_size = reduce(operator.mul, weight_shape, 1)
Expand Down
13 changes: 12 additions & 1 deletion nncf/tensor/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ class TensorDataType(Enum):
bfloat16 = auto()
float32 = auto()
float64 = auto()
f8e4m3 = auto()
f8e5m2 = auto()
nf4 = auto()
int8 = auto()
int32 = auto()
int64 = auto()
Expand All @@ -50,7 +53,15 @@ def is_float(self) -> bool:
"""
:return: True if the tensor data type is a floating-point type, else False.
"""
return self in [TensorDataType.float16, TensorDataType.bfloat16, TensorDataType.float32, TensorDataType.float64]
return self in [
TensorDataType.float16,
TensorDataType.bfloat16,
TensorDataType.float32,
TensorDataType.float64,
TensorDataType.f8e4m3,
TensorDataType.f8e5m2,
TensorDataType.nf4,
]


class TensorDeviceType(Enum):
Expand Down
25 changes: 20 additions & 5 deletions nncf/tensor/functions/openvino_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
from nncf.tensor.functions import numeric

DTYPE_MAP: Dict[TensorDataType, ov.Type] = {
TensorDataType.nf4: ov.Type.nf4,
TensorDataType.f8e4m3: ov.Type.f8e4m3,
TensorDataType.f8e5m2: ov.Type.f8e5m2,
TensorDataType.float16: ov.Type.f16,
TensorDataType.bfloat16: ov.Type.bf16,
TensorDataType.float32: ov.Type.f32,
Expand Down Expand Up @@ -48,12 +51,17 @@ def _(a: ov.Tensor) -> TensorBackend:

@numeric.astype.register
def _(a: ov.Tensor, dtype: TensorDataType) -> ov.Tensor:
if a.get_element_type() in [ov.Type.bf16, ov.Type.i4, ov.Type.u4] or dtype in [
ov_cast_types = [
TensorDataType.bfloat16,
TensorDataType.int4,
TensorDataType.uint4,
]:
# Cannot cast to/from bfloat16, uint4, int4 directly
TensorDataType.nf4,
TensorDataType.f8e4m3,
TensorDataType.f8e5m2,
]
a_dtype = DTYPE_MAP_REV[a.get_element_type()]
if a_dtype in ov_cast_types or dtype in ov_cast_types:
# Cast using OpenVINO because the target or source dtype requires special handling
return _astype_ov(a, dtype)
return ov.Tensor(numeric.astype(a.data, dtype).data)

Expand All @@ -75,9 +83,16 @@ def _(a: ov.Tensor, shape: Union[int, Tuple[int, ...]]) -> ov.Tensor:

@numeric.as_numpy_tensor.register
def _(a: ov.Tensor) -> NDArray[Any]:
# Cannot convert bfloat16, uint4, int4 to numpy directly
# Cannot convert bfloat16, uint4, int4, nf4, f8e4m3, f8e5m2 to numpy directly
a_dtype = DTYPE_MAP_REV[a.get_element_type()]
if a_dtype in [TensorDataType.bfloat16, TensorDataType.uint4, TensorDataType.int4]:
if a_dtype in [
TensorDataType.bfloat16,
TensorDataType.uint4,
TensorDataType.int4,
TensorDataType.nf4,
TensorDataType.f8e4m3,
TensorDataType.f8e5m2,
]:
dtype = TensorDataType.float32
if a_dtype == TensorDataType.uint4:
dtype = TensorDataType.uint8
Expand Down
6 changes: 6 additions & 0 deletions tests/cross_fw/test_templates/template_test_nncf_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1512,6 +1512,9 @@ def test_fn_zeros(self):
in [
TensorDataType.int4,
TensorDataType.uint4,
TensorDataType.nf4,
TensorDataType.f8e4m3,
TensorDataType.f8e5m2,
]
):
continue
Expand Down Expand Up @@ -1541,6 +1544,9 @@ def test_fn_eye(self, n, m, ref):
in [
TensorDataType.int4,
TensorDataType.uint4,
TensorDataType.nf4,
TensorDataType.f8e4m3,
TensorDataType.f8e5m2,
]
):
continue
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
strict digraph {
"0 Input" [id=0, type=Parameter];
"1 MatMul" [id=1, type=MatMul];
"2 Add" [id=2, type=Add];
"3 Result_Add" [id=3, type=Result];
"4 Convert_6" [id=4, type=Convert];
"5 MatMul_bias" [id=5, type=Constant];
"6 Convert_3" [id=6, type=Convert];
"7 MatMul_const" [id=7, type=Constant];
"0 Input" -> "1 MatMul" [label="[1, 3, 4, 2]", style=solid];
"1 MatMul" -> "2 Add" [label="[1, 3, 2, 5]", style=solid];
"2 Add" -> "3 Result_Add" [label="[1, 3, 2, 5]", style=solid];
"4 Convert_6" -> "2 Add" [label="[1, 3, 1, 1]", style=solid];
"5 MatMul_bias" -> "4 Convert_6" [label="[1, 3, 1, 1]", style=solid];
"6 Convert_3" -> "1 MatMul" [label="[1, 3, 4, 5]", style=solid];
"7 MatMul_const" -> "6 Convert_3" [label="[1, 3, 4, 5]", style=solid];
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
strict digraph {
"0 Input" [id=0, type=Parameter];
"1 MatMul" [id=1, type=MatMul];
"2 Add" [id=2, type=Add];
"3 Result_Add" [id=3, type=Result];
"4 Convert_6" [id=4, type=Convert];
"5 MatMul_bias" [id=5, type=Constant];
"6 Convert_3" [id=6, type=Convert];
"7 MatMul_const" [id=7, type=Constant];
"0 Input" -> "1 MatMul" [label="[1, 3, 4, 2]", style=solid];
"1 MatMul" -> "2 Add" [label="[1, 3, 2, 5]", style=solid];
"2 Add" -> "3 Result_Add" [label="[1, 3, 2, 5]", style=solid];
"4 Convert_6" -> "2 Add" [label="[1, 3, 1, 1]", style=solid];
"5 MatMul_bias" -> "4 Convert_6" [label="[1, 3, 1, 1]", style=solid];
"6 Convert_3" -> "1 MatMul" [label="[1, 3, 4, 5]", style=solid];
"7 MatMul_const" -> "6 Convert_3" [label="[1, 3, 4, 5]", style=solid];
}
8 changes: 5 additions & 3 deletions tests/openvino/native/test_nncf_graph_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ def test_compare_nncf_graph_synthetic_models(model_cls_to_test):
"model,precision",
[
(FPModel(const_dtype=ov.Type.nf4), "nf4"),
(FPModel(const_dtype=ov.Type.f8e4m3), "f8e4m3"),
(FPModel(const_dtype=ov.Type.f8e5m2), "f8e5m2"),
],
)
def test_compare_nncf_graph_precision_synthetic_models(model, precision):
Expand Down Expand Up @@ -112,10 +114,12 @@ def _get_default_nncf_graph_edge(from_node, to_node, input_port_id, output_port_
@pytest.mark.parametrize(
"ov_type,expected_nncf_dtype",
[
(ov.Type.nf4, Dtype.FLOAT),
(ov.Type.f8e4m3, Dtype.FLOAT),
(ov.Type.f8e5m2, Dtype.FLOAT),
(ov.Type.f16, Dtype.FLOAT),
(ov.Type.f32, Dtype.FLOAT),
(ov.Type.f64, Dtype.FLOAT),
(ov.Type.nf4, Dtype.FLOAT),
(ov.Type.i4, Dtype.INTEGER),
(ov.Type.i8, Dtype.INTEGER),
(ov.Type.i16, Dtype.INTEGER),
Expand All @@ -140,8 +144,6 @@ def test_convert_to_nncf_dtype_supported_types(ov_type: ov.Type, expected_nncf_d
"ov_type",
[
ov.Type.undefined,
ov.Type.f8e4m3,
ov.Type.f8e5m2,
],
)
def test_convert_to_nncf_dtype_unsupported_types(ov_type: ov.Type):
Expand Down
13 changes: 11 additions & 2 deletions tests/openvino/native/test_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,23 @@
class TestOVNNCFTensorOperators:
@staticmethod
def to_tensor(x, backend=TensorBackend.ov, dtype=TensorDataType.float32):
no_numpy_support_dtypes = [
TensorDataType.bfloat16,
TensorDataType.uint4,
TensorDataType.int4,
TensorDataType.nf4,
TensorDataType.f8e5m2,
TensorDataType.f8e4m3,
]

if backend == TensorBackend.ov:
if dtype in [TensorDataType.bfloat16, TensorDataType.uint4, TensorDataType.int4]:
if dtype in no_numpy_support_dtypes:
ov_const = opset.constant(x, dtype=DTYPE_MAP_OV[dtype])
return ov.Tensor(ov_const.data, ov_const.data.shape, DTYPE_MAP_OV[dtype])
else:
return ov.Tensor(np.array(x, dtype=DTYPE_MAP_NP[dtype]))
elif backend == TensorBackend.numpy:
if dtype in [TensorDataType.bfloat16, TensorDataType.uint4, TensorDataType.int4]:
if dtype in no_numpy_support_dtypes:
msg = f"Can't create NumPY tensor in dtype {dtype}"
raise ValueError(msg)
return np.array(x, dtype=DTYPE_MAP_NP[dtype])
Expand Down