Skip to content

Commit 2a397e3

Browse files
authored
Fix torch and ITREX dependencies (#640)
1 parent 382d00f commit 2a397e3

File tree

3 files changed

+49
-50
lines changed

3 files changed

+49
-50
lines changed

optimum/intel/neural_compressor/modeling_base.py

+23-20
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@
4747
_torch_version,
4848
is_intel_extension_for_transformers_available,
4949
is_torch_version,
50-
requires_backends,
5150
)
5251
from .configuration import INCConfig
5352
from .utils import WEIGHTS_NAME
@@ -141,25 +140,29 @@ def _from_pretrained(
141140
model_save_dir = Path(model_cache_path).parent
142141
inc_config = None
143142
msg = None
144-
try:
145-
requires_backends(cls, ["intel_extension_for_transformers"])
146-
quantization_config = WeightOnlyQuantConfig.from_pretrained(model_id)
147-
if getattr(
148-
quantization_config, "algorithm", None
149-
) is not None and quantization_config.algorithm.lower() in ["rtn", "gptq", "awq", "autoaround"]:
150-
return ITREX_WOQ_MODEL.from_pretrained(
151-
pretrained_model_name_or_path=model_id,
152-
use_auth_token=use_auth_token,
153-
revision=revision,
154-
force_download=force_download,
155-
cache_dir=cache_dir,
156-
local_files_only=local_files_only,
157-
subfolder=subfolder,
158-
trust_remote_code=trust_remote_code,
159-
**kwargs,
160-
)
161-
except EnvironmentError:
162-
msg = "The model is not quantized with weight-only quantization."
143+
if is_intel_extension_for_transformers_available():
144+
try:
145+
quantization_config = WeightOnlyQuantConfig.from_pretrained(model_id)
146+
algorithm = getattr(quantization_config, "algorithm", None)
147+
if algorithm is not None and quantization_config.algorithm.lower() in {
148+
"rtn",
149+
"gptq",
150+
"awq",
151+
"autoaround",
152+
}:
153+
return ITREX_WOQ_MODEL.from_pretrained(
154+
pretrained_model_name_or_path=model_id,
155+
use_auth_token=use_auth_token,
156+
revision=revision,
157+
force_download=force_download,
158+
cache_dir=cache_dir,
159+
local_files_only=local_files_only,
160+
subfolder=subfolder,
161+
trust_remote_code=trust_remote_code,
162+
**kwargs,
163+
)
164+
except EnvironmentError:
165+
msg = "The model is not quantized with weight-only quantization."
163166
try:
164167
inc_config = INCConfig.from_pretrained(model_id)
165168
if not is_torch_version("==", inc_config.torch_version):

optimum/intel/neural_compressor/quantization.py

+8-13
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747

4848
from ..utils.constant import _TASK_ALIASES, MIN_QDQ_ONNX_OPSET, ONNX_WEIGHTS_NAME, WEIGHTS_NAME
4949
from ..utils.import_utils import (
50+
INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR,
5051
_intel_extension_for_transformers_version,
5152
_ipex_version,
5253
_neural_compressor_version,
@@ -78,26 +79,17 @@
7879
f"Found an incompatible version of `intel-extension-for-transformers`. Found version {_intel_extension_for_transformers_version}, "
7980
f"but only version {INTEL_EXTENSION_FOR_TRANSFORMERS_MINIMUM_VERSION} is supported."
8081
)
81-
TORCH_VERSION = "2.1.0"
82-
if is_torch_version("!=", TORCH_VERSION):
83-
raise ImportError(
84-
f"Found an incompatible version of `torch`. Found version {_torch_version}, "
85-
f"but only version {TORCH_VERSION} is supported."
86-
)
87-
8882
from intel_extension_for_transformers.llm.quantization.utils import convert_to_quantized_model
8983
from intel_extension_for_transformers.transformers.modeling.modeling_auto import save_low_bit
9084
from intel_extension_for_transformers.transformers.utils.config import WeightOnlyQuantConfig
9185

92-
Config = Union[PostTrainingQuantConfig, WeightOnlyQuantConfig]
93-
else:
94-
Config = PostTrainingQuantConfig
9586

9687
logger = logging.getLogger(__name__)
9788

9889
NEURAL_COMPRESSOR_MINIMUM_VERSION = "2.1.0"
9990
NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION = "2.3.0"
10091
IPEX_MINIMUM_VERSION = "2.1.0"
92+
_ITREX_TORCH_VERSION = "2.1.0"
10193

10294
if is_neural_compressor_version("<", NEURAL_COMPRESSOR_MINIMUM_VERSION):
10395
raise ImportError(
@@ -160,7 +152,7 @@ def from_pretrained(cls, model: PreTrainedModel, **kwargs):
160152

161153
def quantize(
162154
self,
163-
quantization_config: Config,
155+
quantization_config: Union["PostTrainingQuantConfig", "WeightOnlyQuantConfig"],
164156
save_directory: Union[str, Path],
165157
calibration_dataset: Dataset = None,
166158
batch_size: int = 8,
@@ -213,9 +205,12 @@ def quantize(
213205
f"but only version {NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION} or higher supports weight-only quantization."
214206
)
215207
if not is_intel_extension_for_transformers_available():
208+
raise ImportError(INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR.format("Weight only quantization"))
209+
210+
if is_torch_version("!=", _ITREX_TORCH_VERSION):
216211
raise ImportError(
217-
"Didn't find out intel-etension-for-transformers package. "
218-
"Please install packages: pip install intel-etension-for-transformers and pip install peft."
212+
f"Found an incompatible version of `torch`. Found version {_torch_version}, "
213+
f"but only version {_ITREX_TORCH_VERSION} is supported."
219214
)
220215

221216
if quantization_config is None:

tests/neural_compressor/test_optimization.py

+18-17
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,10 @@ class OptimizationTest(INCTestMixin):
8888
)
8989

9090
WEIGHT_ONLY_CONFIG = (
91-
(False, "RTN", "int4_clip"),
92-
(False, "GPTQ", "int4_clip"),
93-
(False, "RTN", "int8"),
94-
(True, "", ""),
91+
("RTN", "int4_clip"),
92+
("GPTQ", "int4_clip"),
93+
("RTN", "int8"),
94+
("", ""),
9595
)
9696

9797
@parameterized.expand(SUPPORTED_ARCHITECTURES_DYNAMIC)
@@ -212,7 +212,7 @@ def test_ipex_static_quantization_with_smoothquant(self, task, model_name, expec
212212
@unittest.skipIf(
213213
not is_intel_extension_for_transformers_available(), reason="Intel-extension-for-transformers not available!"
214214
)
215-
def test_weight_only_quantization(self, no_config, algo, weight_dtype):
215+
def test_weight_only_quantization(self, methodology, weight_dtype):
216216
model_name = "hf-internal-testing/tiny-random-GPTNeoForCausalLM"
217217
model = AutoModelForCausalLM.from_pretrained(model_name)
218218
tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -221,29 +221,30 @@ def test_weight_only_quantization(self, no_config, algo, weight_dtype):
221221
calibration_dataset = _generate_dataset(quantizer, tokenizer, num_samples=2)
222222

223223
with tempfile.TemporaryDirectory() as tmp_dir:
224-
if not no_config:
225-
if algo == "GPTQ":
226-
algorithm_args = {
227-
"percdamp": 0.01,
228-
"act_order": False,
229-
"scheme": "sym",
230-
}
224+
if methodology:
225+
gptq_args = {
226+
"percdamp": 0.01,
227+
"act_order": False,
228+
"scheme": "sym",
229+
}
230+
231231
quantization_config = WeightOnlyQuantConfig(
232-
algorithm=algo,
233-
algorithm_args=algorithm_args if algo == "GPTQ" else None,
232+
algorithm=methodology,
233+
algorithm_args=gptq_args if methodology == "GPTQ" else None,
234234
weight_dtype=weight_dtype,
235235
)
236-
q_model = quantizer.quantize(
236+
quantizer.quantize(
237237
quantization_config=quantization_config,
238-
calibration_dataset=calibration_dataset if algo == "GPTQ" else None,
238+
calibration_dataset=calibration_dataset,
239239
save_directory=tmp_dir,
240240
)
241241
else:
242-
q_model = quantizer.quantize(
242+
quantizer.quantize(
243243
quantization_config=None,
244244
save_directory=tmp_dir,
245245
weight_only=True, # use RTN quantization method and NF4 weight data type is default.
246246
)
247+
247248
q_model = INCModelForCausalLM.from_pretrained(tmp_dir)
248249
inp = torch.tensor([calibration_dataset[0]["input_ids"]])
249250
out = model(inp)[0]

0 commit comments

Comments
 (0)