Skip to content

Commit bd7580e

Browse files
authored
Added conversion to new mxfp4 type: e2m1 weights with e8m0 scale. (openvinotoolkit#588)
1 parent 5d222f9 commit bd7580e

File tree

2 files changed

+8
-2
lines changed

2 files changed

+8
-2
lines changed

llm_bench/python/convert.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1408,13 +1408,14 @@ def main():
14081408
"-c",
14091409
"--compress_weights",
14101410
type=str,
1411-
choices=["INT8", "INT8_ASYM", "INT8_SYM", "4BIT_DEFAULT", "4BIT_MAXIMUM", "INT4_SYM", "INT4_ASYM"],
1411+
choices=["INT8", "INT8_ASYM", "INT8_SYM", "4BIT_DEFAULT", "4BIT_MAXIMUM", "INT4_SYM", "INT4_ASYM", "E2M1"],
14121412
nargs="+",
14131413
help=(
14141414
"The weight compression option, e.g. INT8 - INT8 weights (deprecated, please use INT8_ASYM instead), "
14151415
"4BIT_DEFAULT - for 4-bit compression with predefined configs with performance-accuracy trade-off, "
14161416
"4BIT_MAXIMUM - for 4-bit compression with predefined configs for the best performance, "
1417-
"INT4_* - for INT4 compressed weights."
1417+
"INT4_* - for INT4 compressed weights, "
1418+
"E2M1 - for fp4 compression with fp8 (e8m0) scales."
14181419
),
14191420
)
14201421
compression_group.add_argument(

llm_bench/python/utils/nncf_utils.py

+5
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@
1919
"ratio": 1,
2020
"all_layers": True,
2121
},
22+
"E2M1": {
23+
"mode": nncf.CompressWeightsMode.E2M1,
24+
"group_size": 32,
25+
"all_layers": True,
26+
},
2227
}
2328

2429
if "INT8_ASYM" in nncf.CompressWeightsMode.__members__:

0 commit comments

Comments
 (0)