@@ -116,7 +116,7 @@ bool DnnlFCPrimitive::useWeightsDecompressionImpl(const ov::element::Type inputT
116
116
one_of (weightsType, u8, nf4, u4, i4);
117
117
}
118
118
119
- bool DnnlFCPrimitive::useDynamicQuantizationImpl (size_t dqGroupSize, const MemoryDescPtr weightsDesc,
119
+ bool DnnlFCPrimitive::useDynamicQuantizationImpl (size_t dqGroupSize, const MemoryDescPtr srcDesc, const MemoryDescPtr weightsDesc,
120
120
MemoryCPtr scalesPtr, MemoryCPtr zpPtr, bool needTranspose) {
121
121
if (dqGroupSize == 0 )
122
122
return false ;
@@ -125,6 +125,9 @@ bool DnnlFCPrimitive::useDynamicQuantizationImpl(size_t dqGroupSize, const Memor
125
125
!dnnl::impl::cpu::x64::mayiuse (dnnl::impl::cpu::x64::avx512_core_vnni))
126
126
return false ;
127
127
128
+ if (srcDesc->getPrecision () != ov::element::f32)
129
+ return false ;
130
+
128
131
if (!one_of (weightsDesc->getPrecision (), ov::element::u8, ov::element::u4))
129
132
return false ;
130
133
@@ -315,7 +318,7 @@ DnnlShapeAgnosticDataPtr DnnlFCPrimitive::createShapeAgnosticData(const FCAttrs&
315
318
316
319
const auto useWeightsDecompression = useWeightsDecompressionImpl (srcDesc->getPrecision (), weiDesc->getPrecision ());
317
320
const auto useDynamicQuantization = useWeightsDecompression &&
318
- useDynamicQuantizationImpl (attrs.dynamicQuantizationGroupSize , weiDesc,
321
+ useDynamicQuantizationImpl (attrs.dynamicQuantizationGroupSize , srcDesc, weiDesc,
319
322
attrs.decompressionMultiplyPtr , attrs.decompressionSubtractPtr , !attrs.weightsNonTransposed );
320
323
321
324
const auto postOpData = createPrimitiveAttrs (attrs, postOps, memory, context, useDynamicQuantization);
0 commit comments