Skip to content

Commit 3daccec

Browse files
authored
Deprecate _get_compression_options (#563)
1 parent 6c8fa79 commit 3daccec

File tree

1 file changed

+29
-91
lines changed

1 file changed

+29
-91
lines changed

optimum/intel/openvino/quantization.py

+29-91
Original file line numberDiff line numberDiff line change
@@ -54,15 +54,6 @@
5454
)
5555

5656

57-
# TODO : remove as unused
58-
_COMPRESSION_OPTIONS = {
59-
"int8": {"mode": nncf.CompressWeightsMode.INT8},
60-
"int4_sym_g128": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 128},
61-
"int4_asym_g128": {"mode": nncf.CompressWeightsMode.INT4_ASYM, "group_size": 128},
62-
"int4_sym_g64": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 64},
63-
"int4_asym_g64": {"mode": nncf.CompressWeightsMode.INT4_ASYM, "group_size": 64},
64-
}
65-
6657
register_module(ignored_algorithms=[])(Conv1D)
6758

6859
core = Core()
@@ -234,27 +225,16 @@ def quantize(
234225
ov_config = ov_config or quantization_config
235226

236227
if isinstance(self.model, OVBaseModel):
237-
if self.model.export_feature == "text-generation" and self.model.use_cache:
238-
self._quantize_ovcausallm(
239-
calibration_dataset,
240-
save_directory,
241-
batch_size,
242-
data_collator,
243-
remove_unused_columns,
244-
weights_only,
245-
ov_config,
246-
**kwargs,
247-
)
248-
else:
249-
self._quantize_ovbasemodel(
250-
calibration_dataset,
251-
save_directory,
252-
batch_size,
253-
data_collator,
254-
remove_unused_columns,
255-
weights_only,
256-
**kwargs,
257-
)
228+
self._quantize_ovbasemodel(
229+
calibration_dataset,
230+
save_directory,
231+
batch_size,
232+
data_collator,
233+
remove_unused_columns,
234+
weights_only,
235+
ov_config,
236+
**kwargs,
237+
)
258238

259239
elif isinstance(self.model, torch.nn.Module):
260240
self._quantize_torchmodel(
@@ -270,51 +250,7 @@ def quantize(
270250
else:
271251
raise TypeError(f"Unsupported model type: {type(self.model)}")
272252

273-
def _get_compression_options(self, config: OVConfig):
274-
options = {}
275-
if config is not None and "type" in config.compression:
276-
options = _COMPRESSION_OPTIONS[config.compression["type"]]
277-
if "ratio" in config.compression:
278-
options["ratio"] = config.compression["ratio"]
279-
return options
280-
281253
def _quantize_ovbasemodel(
282-
self,
283-
calibration_dataset: Dataset,
284-
save_directory: Union[str, Path],
285-
batch_size: int = 1,
286-
data_collator: Optional[DataCollator] = None,
287-
remove_unused_columns: bool = True,
288-
weights_only: bool = False,
289-
**kwargs,
290-
):
291-
save_directory = Path(save_directory)
292-
save_directory.mkdir(parents=True, exist_ok=True)
293-
294-
if weights_only:
295-
self.model.model = nncf.compress_weights(self.model.model)
296-
self.model.save_pretrained(save_directory)
297-
return
298-
299-
calibration_dataloader = self._get_calibration_dataloader(
300-
calibration_dataset=calibration_dataset,
301-
batch_size=batch_size,
302-
remove_unused_columns=remove_unused_columns,
303-
data_collator=data_collator,
304-
)
305-
306-
quantization_dataset = nncf.Dataset(calibration_dataloader, lambda x: x)
307-
quantized_model = nncf.quantize(
308-
self.model.model,
309-
quantization_dataset,
310-
model_type=nncf.ModelType.TRANSFORMER if not kwargs.get("model_type") else kwargs.get("model_type"),
311-
fast_bias_correction=kwargs.get("fast_bias_correction", True),
312-
**kwargs,
313-
)
314-
self.model.model = quantized_model
315-
self.model.save_pretrained(save_directory)
316-
317-
def _quantize_ovcausallm(
318254
self,
319255
calibration_dataset: Dataset,
320256
save_directory: Union[str, Path],
@@ -329,11 +265,11 @@ def _quantize_ovcausallm(
329265
save_directory.mkdir(parents=True, exist_ok=True)
330266

331267
if weights_only:
332-
quantization_config = None if ov_config is None else ov_config.quantization_config
333-
if quantization_config is None:
334-
# Use default 8-bit compression
335-
quantization_config = OVWeightQuantizationConfig(bits=8, sym=True)
336-
_weight_only_quantization(self.model, quantization_config)
268+
# Use default 8-bit compression if not provided
269+
q_config = (
270+
OVWeightQuantizationConfig(bits=8, sym=True) if ov_config is None else ov_config.quantization_config
271+
)
272+
_weight_only_quantization(self.model, q_config)
337273

338274
self.model.save_pretrained(save_directory)
339275
return
@@ -345,21 +281,23 @@ def _quantize_ovcausallm(
345281
data_collator=data_collator,
346282
)
347283

348-
# Prefeth past_key_values
349-
self.model.update_pkv_precision(True)
350-
self.model.compile()
351-
subset_size = kwargs.get("subset_size", 300)
352-
data_cache = []
284+
if self.model.export_feature == "text-generation" and self.model.use_cache:
285+
# Prefeth past_key_values
286+
self.model.update_pkv_precision(True)
287+
self.model.compile()
288+
subset_size = kwargs.get("subset_size", 300)
289+
data_cache = []
353290

354-
self.model.request = InferRequestWrapper(self.model.request, data_cache)
355-
for _, data in enumerate(calibration_dataloader):
356-
self.model.generate(**data, max_new_tokens=1)
357-
if len(data_cache) >= subset_size:
358-
break
359-
self.model.request = self.model.request.request
291+
self.model.request = InferRequestWrapper(self.model.request, data_cache)
292+
for _, data in enumerate(calibration_dataloader):
293+
self.model.generate(**data, max_new_tokens=1)
294+
if len(data_cache) >= subset_size:
295+
break
296+
self.model.request = self.model.request.request
297+
calibration_dataloader = data_cache
360298

361299
# Actual model quantization
362-
quantization_dataset = nncf.Dataset(data_cache, lambda x: x)
300+
quantization_dataset = nncf.Dataset(calibration_dataloader, lambda x: x)
363301
quantized_model = nncf.quantize(
364302
self.model.model,
365303
quantization_dataset,

0 commit comments

Comments
 (0)