diff --git a/nncf/openvino/graph/nncf_graph_builder.py b/nncf/openvino/graph/nncf_graph_builder.py index a4df03ccd7e..850b98b9f9a 100644 --- a/nncf/openvino/graph/nncf_graph_builder.py +++ b/nncf/openvino/graph/nncf_graph_builder.py @@ -44,11 +44,13 @@ def convert_to_nncf_dtype(ov_type: ov.Type) -> Dtype: """ type_name = ov_type.get_type_name() conversion_map = { + "nf4": "float", + "f8e4m3": "float", + "f8e5m2": "float", "f16": "float", "bf16": "float", "f32": "float", "f64": "float", - "nf4": "float", "i4": "int", "i8": "int", "i16": "int", diff --git a/nncf/quantization/algorithms/weight_compression/algorithm.py b/nncf/quantization/algorithms/weight_compression/algorithm.py index 0da1416a40b..6852f7463b8 100644 --- a/nncf/quantization/algorithms/weight_compression/algorithm.py +++ b/nncf/quantization/algorithms/weight_compression/algorithm.py @@ -45,6 +45,7 @@ from nncf.quantization.algorithms.weight_compression.weight_lowering import WeightCompressionConfig from nncf.scopes import IgnoredScope from nncf.scopes import get_ignored_node_names_from_ignored_scope +from nncf.tensor.definitions import TensorDataType TModel = TypeVar("TModel") TTensor = TypeVar("TTensor") @@ -56,6 +57,12 @@ CompressWeightsMode.NF4, CompressWeightsMode.E2M1, ] +SUPPORTED_DATA_TYPES = [ + TensorDataType.float16, + TensorDataType.bfloat16, + TensorDataType.float32, + TensorDataType.float64, +] def get_weight_compression_configuration( @@ -489,7 +496,7 @@ def _get_ignored_scope_weight_statistics(self, model: TModel, graph: NNCFGraph) continue for _, weight_port_id in self._backend_entity.get_weight_names_and_port_ids(node, graph): weight_dtype = self._backend_entity.get_weight_dtype(node, weight_port_id, model, graph) - if weight_dtype.is_float(): + if weight_dtype in SUPPORTED_DATA_TYPES: continue weight_shape = self._backend_entity.get_weight_shape(node, weight_port_id, graph) weight_size = reduce(operator.mul, weight_shape, 1) @@ -535,7 +542,7 @@ def apply( continue weight_dtype = self._backend_entity.get_weight_dtype(node, weight_port_id, model, graph) - if not weight_dtype.is_float(): + if weight_dtype not in SUPPORTED_DATA_TYPES: continue weight_shape = self._backend_entity.get_weight_shape(node, weight_port_id, graph) weight_size = reduce(operator.mul, weight_shape, 1) diff --git a/nncf/tensor/definitions.py b/nncf/tensor/definitions.py index 576a812ec7b..584c95d726b 100644 --- a/nncf/tensor/definitions.py +++ b/nncf/tensor/definitions.py @@ -39,6 +39,9 @@ class TensorDataType(Enum): bfloat16 = auto() float32 = auto() float64 = auto() + f8e4m3 = auto() + f8e5m2 = auto() + nf4 = auto() int8 = auto() int32 = auto() int64 = auto() @@ -50,7 +53,15 @@ def is_float(self) -> bool: """ :return: True if the tensor data type is a floating-point type, else False. """ - return self in [TensorDataType.float16, TensorDataType.bfloat16, TensorDataType.float32, TensorDataType.float64] + return self in [ + TensorDataType.float16, + TensorDataType.bfloat16, + TensorDataType.float32, + TensorDataType.float64, + TensorDataType.f8e4m3, + TensorDataType.f8e5m2, + TensorDataType.nf4, + ] class TensorDeviceType(Enum): diff --git a/nncf/tensor/functions/openvino_numeric.py b/nncf/tensor/functions/openvino_numeric.py index 52a043ceb8f..e6eedc3f13a 100644 --- a/nncf/tensor/functions/openvino_numeric.py +++ b/nncf/tensor/functions/openvino_numeric.py @@ -21,6 +21,9 @@ from nncf.tensor.functions import numeric DTYPE_MAP: Dict[TensorDataType, ov.Type] = { + TensorDataType.nf4: ov.Type.nf4, + TensorDataType.f8e4m3: ov.Type.f8e4m3, + TensorDataType.f8e5m2: ov.Type.f8e5m2, TensorDataType.float16: ov.Type.f16, TensorDataType.bfloat16: ov.Type.bf16, TensorDataType.float32: ov.Type.f32, @@ -48,12 +51,17 @@ def _(a: ov.Tensor) -> TensorBackend: @numeric.astype.register def _(a: ov.Tensor, dtype: TensorDataType) -> ov.Tensor: - if a.get_element_type() in [ov.Type.bf16, ov.Type.i4, ov.Type.u4] or dtype in [ + ov_cast_types = [ TensorDataType.bfloat16, TensorDataType.int4, TensorDataType.uint4, - ]: - # Cannot cast to/from bfloat16, uint4, int4 directly + TensorDataType.nf4, + TensorDataType.f8e4m3, + TensorDataType.f8e5m2, + ] + a_dtype = DTYPE_MAP_REV[a.get_element_type()] + if a_dtype in ov_cast_types or dtype in ov_cast_types: + # Cast using OpenVINO because the target or source dtype requires special handling return _astype_ov(a, dtype) return ov.Tensor(numeric.astype(a.data, dtype).data) @@ -75,9 +83,16 @@ def _(a: ov.Tensor, shape: Union[int, Tuple[int, ...]]) -> ov.Tensor: @numeric.as_numpy_tensor.register def _(a: ov.Tensor) -> NDArray[Any]: - # Cannot convert bfloat16, uint4, int4 to numpy directly + # Cannot convert bfloat16, uint4, int4, nf4, f8e4m3, f8e5m2 to numpy directly a_dtype = DTYPE_MAP_REV[a.get_element_type()] - if a_dtype in [TensorDataType.bfloat16, TensorDataType.uint4, TensorDataType.int4]: + if a_dtype in [ + TensorDataType.bfloat16, + TensorDataType.uint4, + TensorDataType.int4, + TensorDataType.nf4, + TensorDataType.f8e4m3, + TensorDataType.f8e5m2, + ]: dtype = TensorDataType.float32 if a_dtype == TensorDataType.uint4: dtype = TensorDataType.uint8 diff --git a/tests/cross_fw/test_templates/template_test_nncf_tensor.py b/tests/cross_fw/test_templates/template_test_nncf_tensor.py index 8a2f54a03af..a65406b4c02 100644 --- a/tests/cross_fw/test_templates/template_test_nncf_tensor.py +++ b/tests/cross_fw/test_templates/template_test_nncf_tensor.py @@ -1512,6 +1512,9 @@ def test_fn_zeros(self): in [ TensorDataType.int4, TensorDataType.uint4, + TensorDataType.nf4, + TensorDataType.f8e4m3, + TensorDataType.f8e5m2, ] ): continue @@ -1541,6 +1544,9 @@ def test_fn_eye(self, n, m, ref): in [ TensorDataType.int4, TensorDataType.uint4, + TensorDataType.nf4, + TensorDataType.f8e4m3, + TensorDataType.f8e5m2, ] ): continue diff --git a/tests/openvino/native/data/2025.0/reference_graphs/original_nncf_graph/f8e4m3_FPModel.dot b/tests/openvino/native/data/2025.0/reference_graphs/original_nncf_graph/f8e4m3_FPModel.dot new file mode 100644 index 00000000000..6c0045a641c --- /dev/null +++ b/tests/openvino/native/data/2025.0/reference_graphs/original_nncf_graph/f8e4m3_FPModel.dot @@ -0,0 +1,17 @@ +strict digraph { +"0 Input" [id=0, type=Parameter]; +"1 MatMul" [id=1, type=MatMul]; +"2 Add" [id=2, type=Add]; +"3 Result_Add" [id=3, type=Result]; +"4 Convert_6" [id=4, type=Convert]; +"5 MatMul_bias" [id=5, type=Constant]; +"6 Convert_3" [id=6, type=Convert]; +"7 MatMul_const" [id=7, type=Constant]; +"0 Input" -> "1 MatMul" [label="[1, 3, 4, 2]", style=solid]; +"1 MatMul" -> "2 Add" [label="[1, 3, 2, 5]", style=solid]; +"2 Add" -> "3 Result_Add" [label="[1, 3, 2, 5]", style=solid]; +"4 Convert_6" -> "2 Add" [label="[1, 3, 1, 1]", style=solid]; +"5 MatMul_bias" -> "4 Convert_6" [label="[1, 3, 1, 1]", style=solid]; +"6 Convert_3" -> "1 MatMul" [label="[1, 3, 4, 5]", style=solid]; +"7 MatMul_const" -> "6 Convert_3" [label="[1, 3, 4, 5]", style=solid]; +} diff --git a/tests/openvino/native/data/2025.0/reference_graphs/original_nncf_graph/f8e5m2_FPModel.dot b/tests/openvino/native/data/2025.0/reference_graphs/original_nncf_graph/f8e5m2_FPModel.dot new file mode 100644 index 00000000000..6c0045a641c --- /dev/null +++ b/tests/openvino/native/data/2025.0/reference_graphs/original_nncf_graph/f8e5m2_FPModel.dot @@ -0,0 +1,17 @@ +strict digraph { +"0 Input" [id=0, type=Parameter]; +"1 MatMul" [id=1, type=MatMul]; +"2 Add" [id=2, type=Add]; +"3 Result_Add" [id=3, type=Result]; +"4 Convert_6" [id=4, type=Convert]; +"5 MatMul_bias" [id=5, type=Constant]; +"6 Convert_3" [id=6, type=Convert]; +"7 MatMul_const" [id=7, type=Constant]; +"0 Input" -> "1 MatMul" [label="[1, 3, 4, 2]", style=solid]; +"1 MatMul" -> "2 Add" [label="[1, 3, 2, 5]", style=solid]; +"2 Add" -> "3 Result_Add" [label="[1, 3, 2, 5]", style=solid]; +"4 Convert_6" -> "2 Add" [label="[1, 3, 1, 1]", style=solid]; +"5 MatMul_bias" -> "4 Convert_6" [label="[1, 3, 1, 1]", style=solid]; +"6 Convert_3" -> "1 MatMul" [label="[1, 3, 4, 5]", style=solid]; +"7 MatMul_const" -> "6 Convert_3" [label="[1, 3, 4, 5]", style=solid]; +} diff --git a/tests/openvino/native/test_nncf_graph_builder.py b/tests/openvino/native/test_nncf_graph_builder.py index b07c7590216..6b8670d7514 100644 --- a/tests/openvino/native/test_nncf_graph_builder.py +++ b/tests/openvino/native/test_nncf_graph_builder.py @@ -39,6 +39,8 @@ def test_compare_nncf_graph_synthetic_models(model_cls_to_test): "model,precision", [ (FPModel(const_dtype=ov.Type.nf4), "nf4"), + (FPModel(const_dtype=ov.Type.f8e4m3), "f8e4m3"), + (FPModel(const_dtype=ov.Type.f8e5m2), "f8e5m2"), ], ) def test_compare_nncf_graph_precision_synthetic_models(model, precision): @@ -112,10 +114,12 @@ def _get_default_nncf_graph_edge(from_node, to_node, input_port_id, output_port_ @pytest.mark.parametrize( "ov_type,expected_nncf_dtype", [ + (ov.Type.nf4, Dtype.FLOAT), + (ov.Type.f8e4m3, Dtype.FLOAT), + (ov.Type.f8e5m2, Dtype.FLOAT), (ov.Type.f16, Dtype.FLOAT), (ov.Type.f32, Dtype.FLOAT), (ov.Type.f64, Dtype.FLOAT), - (ov.Type.nf4, Dtype.FLOAT), (ov.Type.i4, Dtype.INTEGER), (ov.Type.i8, Dtype.INTEGER), (ov.Type.i16, Dtype.INTEGER), @@ -140,8 +144,6 @@ def test_convert_to_nncf_dtype_supported_types(ov_type: ov.Type, expected_nncf_d "ov_type", [ ov.Type.undefined, - ov.Type.f8e4m3, - ov.Type.f8e5m2, ], ) def test_convert_to_nncf_dtype_unsupported_types(ov_type: ov.Type): diff --git a/tests/openvino/native/test_tensor.py b/tests/openvino/native/test_tensor.py index 58df3d19204..94fab62317c 100644 --- a/tests/openvino/native/test_tensor.py +++ b/tests/openvino/native/test_tensor.py @@ -26,14 +26,23 @@ class TestOVNNCFTensorOperators: @staticmethod def to_tensor(x, backend=TensorBackend.ov, dtype=TensorDataType.float32): + no_numpy_support_dtypes = [ + TensorDataType.bfloat16, + TensorDataType.uint4, + TensorDataType.int4, + TensorDataType.nf4, + TensorDataType.f8e5m2, + TensorDataType.f8e4m3, + ] + if backend == TensorBackend.ov: - if dtype in [TensorDataType.bfloat16, TensorDataType.uint4, TensorDataType.int4]: + if dtype in no_numpy_support_dtypes: ov_const = opset.constant(x, dtype=DTYPE_MAP_OV[dtype]) return ov.Tensor(ov_const.data, ov_const.data.shape, DTYPE_MAP_OV[dtype]) else: return ov.Tensor(np.array(x, dtype=DTYPE_MAP_NP[dtype])) elif backend == TensorBackend.numpy: - if dtype in [TensorDataType.bfloat16, TensorDataType.uint4, TensorDataType.int4]: + if dtype in no_numpy_support_dtypes: msg = f"Can't create NumPY tensor in dtype {dtype}" raise ValueError(msg) return np.array(x, dtype=DTYPE_MAP_NP[dtype])