Skip to content

Commit 70c9305

Browse files
committed
Relax dependency on accelerate and datasets in OVQuantizer
1 parent 552de65 commit 70c9305

File tree

3 files changed

+41
-12
lines changed

3 files changed

+41
-12
lines changed

optimum/intel/openvino/quantization.py

+21-11
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@
2222
import openvino
2323
import torch
2424
import transformers
25-
from accelerate.data_loader import DataLoaderStateMixin
26-
from datasets import Dataset, load_dataset
2725
from nncf import NNCFConfig, compress_weights
2826
from nncf.torch import create_compressed_model, register_default_init_args, register_module
2927
from nncf.torch.dynamic_graph.io_handling import wrap_nncf_model_inputs_with_objwalk
@@ -33,13 +31,15 @@
3331
from torch.utils.data import DataLoader, RandomSampler
3432
from transformers import DataCollator, PreTrainedModel, default_data_collator
3533
from transformers.pytorch_utils import Conv1D
34+
from transformers.utils import is_accelerate_available
3635

3736
from optimum.exporters.tasks import TasksManager
3837
from optimum.quantization_base import OptimumQuantizer
3938

4039
from ...exporters.openvino import export, export_pytorch_via_onnx
4140
from ...exporters.openvino.stateful import ensure_export_task_support_stateful
4241
from ..utils.constant import _TASK_ALIASES
42+
from ..utils.import_utils import DATASETS_IMPORT_ERROR, is_datasets_available
4343
from .configuration import OVConfig
4444
from .modeling_base import OVBaseModel
4545
from .modeling_decoder import OVBaseDecoderModel
@@ -51,6 +51,9 @@
5151
)
5252

5353

54+
if is_datasets_available():
55+
from datasets import Dataset
56+
5457
COMPRESSION_OPTIONS = {
5558
"int8": {"mode": nncf.CompressWeightsMode.INT8},
5659
"int4_sym_g128": {"mode": nncf.CompressWeightsMode.INT4_SYM, "group_size": 128},
@@ -72,8 +75,11 @@ def get_inputs(self, dataloader_output) -> Tuple[Tuple, Dict]:
7275
@property
7376
def batch_size(self):
7477
batch_size = self._data_loader.batch_size
75-
if batch_size is None and isinstance(self._data_loader, DataLoaderStateMixin):
76-
batch_size = self._data_loader.total_batch_size
78+
if is_accelerate_available():
79+
from accelerate.data_loader import DataLoaderStateMixin
80+
81+
if batch_size is None and isinstance(self._data_loader, DataLoaderStateMixin):
82+
batch_size = self._data_loader.total_batch_size
7783
return batch_size
7884

7985

@@ -155,7 +161,7 @@ def from_pretrained(cls, model: PreTrainedModel, **kwargs):
155161

156162
def quantize(
157163
self,
158-
calibration_dataset: Dataset = None,
164+
calibration_dataset: "Dataset" = None,
159165
save_directory: Union[str, Path] = None,
160166
quantization_config: OVConfig = None,
161167
file_name: Optional[str] = None,
@@ -268,7 +274,7 @@ def _get_compression_options(self, config: OVConfig):
268274

269275
def _quantize_ovbasemodel(
270276
self,
271-
calibration_dataset: Dataset,
277+
calibration_dataset: "Dataset",
272278
save_directory: Union[str, Path],
273279
batch_size: int = 1,
274280
data_collator: Optional[DataCollator] = None,
@@ -304,7 +310,7 @@ def _quantize_ovbasemodel(
304310

305311
def _quantize_ovcausallm(
306312
self,
307-
calibration_dataset: Dataset,
313+
calibration_dataset: "Dataset",
308314
save_directory: Union[str, Path],
309315
batch_size: int = 1,
310316
data_collator: Optional[DataCollator] = None,
@@ -358,7 +364,7 @@ def _quantize_ovcausallm(
358364

359365
def _quantize_torchmodel(
360366
self,
361-
calibration_dataset: Dataset,
367+
calibration_dataset: "Dataset",
362368
save_directory: Union[str, Path],
363369
quantization_config: OVConfig = None,
364370
file_name: Optional[str] = None,
@@ -482,7 +488,7 @@ def get_calibration_dataset(
482488
preprocess_batch: bool = True,
483489
use_auth_token: bool = False,
484490
cache_dir: Optional[str] = None,
485-
) -> Dataset:
491+
) -> "Dataset":
486492
"""
487493
Create the calibration `datasets.Dataset` to use for the post-training static quantization calibration step.
488494
@@ -507,6 +513,10 @@ def get_calibration_dataset(
507513
Returns:
508514
The calibration `datasets.Dataset` to use for the post-training static quantization calibration step.
509515
"""
516+
if not is_datasets_available():
517+
raise ValueError(DATASETS_IMPORT_ERROR.format("OVQuantizer.get_calibration_dataset"))
518+
from datasets import load_dataset
519+
510520
calibration_dataset = load_dataset(
511521
dataset_name,
512522
name=dataset_config_name,
@@ -526,7 +536,7 @@ def get_calibration_dataset(
526536

527537
def _get_calibration_dataloader(
528538
self,
529-
calibration_dataset: Dataset,
539+
calibration_dataset: "Dataset",
530540
batch_size: int,
531541
remove_unused_columns: bool,
532542
data_collator: Optional[DataCollator] = None,
@@ -543,6 +553,6 @@ def _get_calibration_dataloader(
543553
)
544554
return OVDataLoader(calibration_dataloader)
545555

546-
def _remove_unused_columns(self, dataset: Dataset):
556+
def _remove_unused_columns(self, dataset: "Dataset"):
547557
ignored_columns = list(set(dataset.column_names) - set(self._signature_columns))
548558
return dataset.remove_columns(ignored_columns)

optimum/intel/utils/import_utils.py

+19
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,16 @@
119119
_timm_available = False
120120

121121

122+
_datasets_available = importlib.util.find_spec("datasets") is not None
123+
_datasets_version = "N/A"
124+
125+
if _datasets_available:
126+
try:
127+
_datasets_version = importlib_metadata.version("datasets")
128+
except importlib_metadata.PackageNotFoundError:
129+
_datasets_available = False
130+
131+
122132
def is_transformers_available():
123133
return _transformers_available
124134

@@ -151,6 +161,10 @@ def is_timm_available():
151161
return _timm_available
152162

153163

164+
def is_datasets_available():
165+
return _datasets_available
166+
167+
154168
# This function was copied from: https://github.com/huggingface/accelerate/blob/874c4967d94badd24f893064cc3bef45f57cadf7/src/accelerate/utils/versions.py#L319
155169
def compare_versions(library_or_version: Union[str, Version], operation: str, requirement_version: str):
156170
"""
@@ -267,6 +281,11 @@ def is_timm_version(operation: str, version: str):
267281
`pip install neural-compressor`. Please note that you may need to restart your runtime after installation.
268282
"""
269283

284+
DATASETS_IMPORT_ERROR = """
285+
{0} requires the datasets library but it was not found in your environment. You can install it with pip:
286+
`pip install datasets`. Please note that you may need to restart your runtime after installation.
287+
"""
288+
270289
BACKENDS_MAPPING = OrderedDict(
271290
[
272291
("diffusers", (is_diffusers_available, DIFFUSERS_IMPORT_ERROR)),

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
"transformers>=4.34.0",
4646
],
4747
"openvino": ["openvino>=2023.2", "onnx", "onnxruntime", "transformers>=4.36.0", "optimum>=1.16.1"],
48-
"nncf": ["nncf>=2.7.0"],
48+
"nncf": ["nncf>=2.7.0", "datasets", "accelerate"],
4949
"ipex": ["intel-extension-for-pytorch", "onnx"],
5050
"diffusers": ["diffusers"],
5151
"quality": QUALITY_REQUIRE,

0 commit comments

Comments
 (0)