Skip to content

Commit 483793b

Browse files
Disabled dynamic quantization by default
1 parent 406c032 commit 483793b

File tree

4 files changed

+9
-5
lines changed

4 files changed

+9
-5
lines changed

src/plugins/intel_cpu/src/config.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ struct Config {
5555
std::string dumpToDot = {};
5656
std::string device_id = {};
5757
float fcSparseWeiDecompressionRate = 1.0f;
58-
uint64_t fcDynamicQuantizationGroupSize = 32;
58+
uint64_t fcDynamicQuantizationGroupSize = 0;
5959
#if defined(OPENVINO_ARCH_X86_64)
6060
size_t rtCacheCapacity = 5000ul;
6161
#else

src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.cpp

+5-2
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ bool DnnlFCPrimitive::useWeightsDecompressionImpl(const ov::element::Type inputT
116116
one_of(weightsType, u8, nf4, u4, i4);
117117
}
118118

119-
bool DnnlFCPrimitive::useDynamicQuantizationImpl(size_t dqGroupSize, const MemoryDescPtr weightsDesc,
119+
bool DnnlFCPrimitive::useDynamicQuantizationImpl(size_t dqGroupSize, const MemoryDescPtr srcDesc, const MemoryDescPtr weightsDesc,
120120
MemoryCPtr scalesPtr, MemoryCPtr zpPtr, bool needTranspose) {
121121
if (dqGroupSize == 0)
122122
return false;
@@ -125,6 +125,9 @@ bool DnnlFCPrimitive::useDynamicQuantizationImpl(size_t dqGroupSize, const Memor
125125
!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_vnni))
126126
return false;
127127

128+
if (srcDesc->getPrecision() != ov::element::f32)
129+
return false;
130+
128131
if (!one_of(weightsDesc->getPrecision(), ov::element::u8, ov::element::u4))
129132
return false;
130133

@@ -315,7 +318,7 @@ DnnlShapeAgnosticDataPtr DnnlFCPrimitive::createShapeAgnosticData(const FCAttrs&
315318

316319
const auto useWeightsDecompression = useWeightsDecompressionImpl(srcDesc->getPrecision(), weiDesc->getPrecision());
317320
const auto useDynamicQuantization = useWeightsDecompression &&
318-
useDynamicQuantizationImpl(attrs.dynamicQuantizationGroupSize, weiDesc,
321+
useDynamicQuantizationImpl(attrs.dynamicQuantizationGroupSize, srcDesc, weiDesc,
319322
attrs.decompressionMultiplyPtr, attrs.decompressionSubtractPtr, !attrs.weightsNonTransposed);
320323

321324
const auto postOpData = createPrimitiveAttrs(attrs, postOps, memory, context, useDynamicQuantization);

src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_fullyconnected_primitive.hpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ class DnnlFCPrimitive {
7070
const DnnlShapeAgnosticDataPtr& shapeAgnosticData);
7171

7272
private:
73-
static bool useDynamicQuantizationImpl(size_t dqGroupSize, const MemoryDescPtr weightsDesc,
73+
static bool useDynamicQuantizationImpl(size_t dqGroupSize, const MemoryDescPtr srcDesc, const MemoryDescPtr weightsDesc,
7474
MemoryCPtr scalesPtr, MemoryCPtr zpPtr, bool needTranspose);
7575

7676
dnnl::stream m_stream;

src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,8 @@ void Transformations::PreLpt(const std::vector<ov::element::Type>& defaultPrecis
288288
ov::element::u4,
289289
ov::element::i4,
290290
ov::element::nf4};
291-
CPU_REGISTER_PASS_X64(decompression_handling_manager, ov::pass::MarkDequantizationSubgraph, decompression_precisions, false);
291+
bool fold_subtract_const = config.fcDynamicQuantizationGroupSize != 0;
292+
CPU_REGISTER_PASS_X64(decompression_handling_manager, ov::pass::MarkDequantizationSubgraph, decompression_precisions, fold_subtract_const);
292293
CPU_SET_CALLBACK_X64(decompression_handling_manager, [&](const_node_ptr &node) -> bool {
293294
return !is_decompression_multiply(node);
294295
}, ov::pass::MarkDequantizationSubgraph);

0 commit comments

Comments
 (0)