Skip to content

Commit a0dc06c

Browse files
authored
Update minimum itrex version (#675)
* Update minimum itrex version * Trigger test * trigger test * trigger test * fix * remove tests itrex version contraint
1 parent 00581ab commit a0dc06c

File tree

8 files changed

+47
-162
lines changed

8 files changed

+47
-162
lines changed

.github/workflows/test_inc.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ jobs:
3333
pip install cmake
3434
pip install py-cpuinfo
3535
pip install .[neural-compressor,diffusers,tests]
36-
pip install intel-extension-for-transformers==1.4.0
36+
pip install intel-extension-for-transformers
3737
pip install peft
3838
3939
- name: Test with Pytest

examples/neural_compressor/language-modeling/run_clm.py

+4-7
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,10 @@
5757
from transformers.utils.versions import require_version
5858

5959
from optimum.intel.neural_compressor import INCModelForCausalLM, INCQuantizer, INCTrainer
60-
from optimum.intel.utils.import_utils import (
61-
INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR,
62-
is_intel_extension_for_transformers_available,
63-
)
60+
from optimum.intel.utils.import_utils import ITREX_IMPORT_ERROR, is_itrex_available
6461

6562

66-
if is_intel_extension_for_transformers_available():
63+
if is_itrex_available():
6764
from intel_extension_for_transformers.transformers.utils.config import GPTQConfig, RtnConfig
6865

6966
os.environ["CUDA_VISIBLE_DEVICES"] = ""
@@ -658,8 +655,8 @@ def compute_metrics(eval_preds):
658655
else:
659656
recipes = {}
660657
if optim_args.quantization_approach == "weight_only":
661-
if not is_intel_extension_for_transformers_available():
662-
raise ImportError(INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR.format("WeightOnly quantization"))
658+
if not is_itrex_available():
659+
raise ImportError(ITREX_IMPORT_ERROR.format("WeightOnly quantization"))
663660
if optim_args.apply_pruning or optim_args.apply_distillation:
664661
raise ValueError("Weight only quantization and pruning or distillation cannot be combined.")
665662

optimum/intel/neural_compressor/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from ..utils.import_utils import is_diffusers_available, is_intel_extension_for_transformers_available
15+
from ..utils.import_utils import is_diffusers_available
1616
from .configuration import INCConfig
1717
from .modeling_base import (
1818
INCModel,

optimum/intel/neural_compressor/modeling_base.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,7 @@
4444
from optimum.intel.generation import BaseModelForCausalLM
4545

4646
from ...modeling_base import OptimizedModel
47-
from ..utils.import_utils import (
48-
_torch_version,
49-
is_intel_extension_for_transformers_available,
50-
is_torch_version,
51-
)
47+
from ..utils.import_utils import _torch_version, is_itrex_available, is_torch_version
5248
from .configuration import INCConfig
5349
from .utils import WEIGHTS_NAME
5450

@@ -136,7 +132,7 @@ def _from_pretrained(
136132
model_save_dir = Path(model_cache_path).parent
137133
inc_config = None
138134
msg = None
139-
if is_intel_extension_for_transformers_available():
135+
if is_itrex_available():
140136
try:
141137
quantization_config = PretrainedConfig.from_pretrained(model_save_dir / "quantize_config.json")
142138
algorithm = getattr(quantization_config, "quant_method", None)

optimum/intel/neural_compressor/quantization.py

+19-86
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,10 @@
1919
from enum import Enum
2020
from itertools import chain
2121
from pathlib import Path
22-
from typing import Callable, Dict, Optional, Union
22+
from typing import Callable, Optional, Union
2323

2424
import torch
2525
from datasets import Dataset, load_dataset
26-
from neural_compressor.adaptor.pytorch import PyTorch_FXAdaptor, _cfg_to_qconfig, _propagate_qconfig
2726
from neural_compressor.config import PostTrainingQuantConfig
2827
from neural_compressor.experimental.export import torch_to_int8_onnx
2928
from neural_compressor.model.onnx_model import ONNXModel
@@ -47,14 +46,14 @@
4746

4847
from ..utils.constant import _TASK_ALIASES, MIN_QDQ_ONNX_OPSET, ONNX_WEIGHTS_NAME, WEIGHTS_NAME
4948
from ..utils.import_utils import (
50-
INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR,
51-
_intel_extension_for_transformers_version,
49+
ITREX_IMPORT_ERROR,
5250
_ipex_version,
51+
_itrex_version,
5352
_neural_compressor_version,
5453
_torch_version,
55-
is_intel_extension_for_transformers_available,
56-
is_intel_extension_for_transformers_version,
5754
is_ipex_version,
55+
is_itrex_available,
56+
is_itrex_version,
5857
is_neural_compressor_version,
5958
is_torch_version,
6059
)
@@ -69,16 +68,21 @@
6968
INCModelForTokenClassification,
7069
INCModelForVision2Seq,
7170
)
72-
from .utils import INCDataLoader, _cfgs_to_fx_cfgs
73-
71+
from .utils import (
72+
IPEX_MINIMUM_VERSION,
73+
ITREX_MINIMUM_TORCH_VERSION,
74+
ITREX_MINIMUM_VERSION,
75+
NEURAL_COMPRESSOR_MINIMUM_VERSION,
76+
NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION,
77+
INCDataLoader,
78+
)
7479

75-
INTEL_EXTENSION_FOR_TRANSFORMERS_MINIMUM_VERSION = "1.4.0"
7680

77-
if is_intel_extension_for_transformers_available():
78-
if is_intel_extension_for_transformers_version("!=", INTEL_EXTENSION_FOR_TRANSFORMERS_MINIMUM_VERSION):
81+
if is_itrex_available():
82+
if is_itrex_version("<", ITREX_MINIMUM_VERSION):
7983
raise ImportError(
80-
f"Found an incompatible version of `intel-extension-for-transformers`. Found version {_intel_extension_for_transformers_version}, "
81-
f"but only version {INTEL_EXTENSION_FOR_TRANSFORMERS_MINIMUM_VERSION} is supported."
84+
f"Found an incompatible version of `intel-extension-for-transformers`. Found version {_itrex_version}, "
85+
f"but only version {ITREX_MINIMUM_VERSION} or higher is supported."
8286
)
8387
from intel_extension_for_transformers.transformers.llm.quantization.utils import convert_to_quantized_model
8488
from intel_extension_for_transformers.transformers.modeling.modeling_auto import save_low_bit
@@ -92,10 +96,6 @@
9296

9397
logger = logging.getLogger(__name__)
9498

95-
NEURAL_COMPRESSOR_MINIMUM_VERSION = "2.1.0"
96-
NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION = "2.3.0"
97-
IPEX_MINIMUM_VERSION = "2.1.0"
98-
ITREX_MINIMUM_TORCH_VERSION = "2.2.0"
9999

100100
if is_neural_compressor_version("<", NEURAL_COMPRESSOR_MINIMUM_VERSION):
101101
raise ImportError(
@@ -231,8 +231,8 @@ def quantize(
231231
f"Found an incompatible version of neural-compressor. Found version {_neural_compressor_version}, "
232232
f"but only version {NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION} or higher supports weight-only quantization."
233233
)
234-
if not is_intel_extension_for_transformers_available():
235-
raise ImportError(INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR.format("Weight only quantization"))
234+
if not is_itrex_available():
235+
raise ImportError(ITREX_IMPORT_ERROR.format("Weight only quantization"))
236236

237237
if is_torch_version("<", ITREX_MINIMUM_TORCH_VERSION):
238238
raise ImportError(
@@ -514,70 +514,3 @@ def _get_calibration_dataloader(
514514
def _remove_unused_columns(self, dataset: Dataset):
515515
ignored_columns = list(set(dataset.column_names) - set(self._signature_columns))
516516
return dataset.remove_columns(ignored_columns)
517-
518-
519-
# Adapted from https://github.com/intel/neural-compressor/blob/master/neural_compressor/utils/pytorch.py#L96
520-
def _apply_quantization_from_config(q_config: Dict, model: torch.nn.Module) -> torch.nn.Module:
521-
"""
522-
Apply Intel Neural Compressor quantization steps on the given model.
523-
524-
Arguments:
525-
q_config (`Dict`):
526-
Dictionary containing all quantization information such as approach, dtype, scheme and granularity.
527-
model (`torch.nn.Module`):
528-
Model to quantize.
529-
Returns:
530-
q_model (`torch.nn.Module`):
531-
Quantized model.
532-
"""
533-
from torch.quantization import add_observer_, convert
534-
from torch.quantization.quantize_fx import convert_fx, prepare_fx, prepare_qat_fx
535-
536-
approach = q_config.get("approach")
537-
framework = q_config.get("framework")
538-
539-
if approach not in SUPPORTED_QUANT_MODE:
540-
raise ValueError(
541-
"Unknown quantization approach. Supported approach are " + ", ".join(SUPPORTED_QUANT_MODE.keys())
542-
)
543-
544-
quant_mode = INCQuantizationMode(approach)
545-
q_model = copy.deepcopy(model)
546-
q_model.eval()
547-
548-
if framework == "pytorch_fx":
549-
op_cfgs = _cfg_to_qconfig(q_config, approach)
550-
fx_op_cfgs = _cfgs_to_fx_cfgs(op_cfgs, approach)
551-
552-
if not q_config["fx_sub_module_list"]:
553-
if quant_mode == INCQuantizationMode.AWARE_TRAINING:
554-
q_model.train()
555-
q_model = prepare_qat_fx(q_model, fx_op_cfgs)
556-
else:
557-
q_model = prepare_fx(q_model, fx_op_cfgs)
558-
q_model = convert_fx(q_model)
559-
560-
else:
561-
sub_module_list = q_config["fx_sub_module_list"]
562-
if q_config["approach"] == "quant_aware_training":
563-
q_model.train()
564-
PyTorch_FXAdaptor.prepare_sub_graph(sub_module_list, fx_op_cfgs, q_model, prefix="", is_qat=True)
565-
else:
566-
PyTorch_FXAdaptor.prepare_sub_graph(sub_module_list, fx_op_cfgs, q_model, prefix="")
567-
PyTorch_FXAdaptor.convert_sub_graph(sub_module_list, q_model, prefix="")
568-
569-
else:
570-
if quant_mode == INCQuantizationMode.DYNAMIC:
571-
q_mapping = torch.quantization.quantization_mappings.get_default_dynamic_quant_module_mappings()
572-
op_cfgs = _cfg_to_qconfig(q_config, approach)
573-
else:
574-
q_mapping = torch.quantization.quantization_mappings.get_default_static_quant_module_mappings()
575-
op_cfgs = _cfg_to_qconfig(q_config)
576-
577-
_propagate_qconfig(q_model, op_cfgs, approach=approach)
578-
579-
if quant_mode != INCQuantizationMode.DYNAMIC:
580-
add_observer_(q_model)
581-
q_model = convert(q_model, mapping=q_mapping, inplace=True)
582-
583-
return q_model

optimum/intel/neural_compressor/utils.py

+6-44
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,9 @@
1616
import os
1717
import warnings
1818
from collections import UserDict
19-
from typing import Dict
2019

2120
import torch
2221
from neural_compressor.utils.pytorch import load
23-
from packaging import version
2422
from torch.utils.data import DataLoader
2523

2624
from ..utils.constant import WEIGHTS_NAME
@@ -31,6 +29,12 @@
3129

3230
CONFIG_NAME = "best_configure.yaml"
3331

32+
NEURAL_COMPRESSOR_MINIMUM_VERSION = "2.1.0"
33+
NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION = "2.3.0"
34+
IPEX_MINIMUM_VERSION = "2.1.0"
35+
ITREX_MINIMUM_VERSION = "1.4.0"
36+
ITREX_MINIMUM_TORCH_VERSION = "2.2.0"
37+
3438

3539
_HEAD_TO_AUTOMODELS = {
3640
"fill-mask": "INCModelForMaskedLM",
@@ -45,10 +49,6 @@
4549
}
4650

4751

48-
parsed_torch_version_base = version.parse(version.parse(torch.__version__).base_version)
49-
is_torch_less_than_1_13 = parsed_torch_version_base < version.parse("1.13.0")
50-
51-
5252
class INCDataLoader(DataLoader):
5353
use_label = True
5454

@@ -73,44 +73,6 @@ def __iter__(self):
7373
yield input
7474

7575

76-
def _cfgs_to_fx_cfgs(op_cfgs: Dict, observer_type: str = "post_training_static_quant") -> Dict:
77-
"""Inc function which convert a quantization config to a format that meets the requirements of torch.fx.
78-
79-
Arguments:
80-
op_cfgs (`dict`):
81-
Dictionary of quantization configure for each op.
82-
observer_type (`str`):
83-
Specify observer type.
84-
Returns:
85-
fx_op_cfgs (`dict`):
86-
Dictionary of quantization configure that meets the requirements of torch.fx.
87-
"""
88-
if not is_torch_less_than_1_13:
89-
from torch.ao.quantization import QConfigMapping
90-
91-
fx_op_cfgs = QConfigMapping()
92-
else:
93-
fx_op_cfgs = {}
94-
op_tuple_cfg_list = []
95-
for key, value in op_cfgs.items():
96-
if key == "default_qconfig":
97-
if not is_torch_less_than_1_13:
98-
fx_op_cfgs.set_global(value)
99-
else:
100-
fx_op_cfgs[""] = value
101-
continue
102-
if not is_torch_less_than_1_13:
103-
fx_op_cfgs.set_module_name(key, value)
104-
else:
105-
op_tuple = (key, value)
106-
op_tuple_cfg_list.append(op_tuple)
107-
108-
if is_torch_less_than_1_13:
109-
fx_op_cfgs["module_name"] = op_tuple_cfg_list
110-
111-
return fx_op_cfgs
112-
113-
11476
def load_quantized_model(checkpoint_dir_or_file: str, model: torch.nn.Module, **kwargs) -> torch.nn.Module:
11577
"""
11678
Returns the quantized model, which was quantized through neural_compressor.

optimum/intel/utils/import_utils.py

+12-15
Original file line numberDiff line numberDiff line change
@@ -61,14 +61,14 @@
6161
_neural_compressor_available = False
6262

6363

64-
_intel_extension_for_transformers_available = importlib.util.find_spec("intel_extension_for_transformers") is not None
65-
_intel_extension_for_transformers_version = "N/A"
66-
if _intel_extension_for_transformers_available:
64+
_itrex_available = importlib.util.find_spec("intel_extension_for_transformers") is not None
65+
_itrex_version = "N/A"
66+
if _itrex_available:
6767
try:
68-
_intel_extension_for_transformers_version = importlib_metadata.version("intel_extension_for_transformers")
68+
_itrex_version = importlib_metadata.version("intel_extension_for_transformers")
6969
logging.warn("`transformers` version >= 4.31 is requirements by intel-extension-for-transformers.")
7070
except importlib_metadata.PackageNotFoundError:
71-
_intel_extension_for_transformers_available = False
71+
_itrex_available = False
7272

7373

7474
_ipex_available = importlib.util.find_spec("intel_extension_for_pytorch") is not None
@@ -158,8 +158,8 @@ def is_neural_compressor_available():
158158
return _neural_compressor_available
159159

160160

161-
def is_intel_extension_for_transformers_available():
162-
return _intel_extension_for_transformers_available
161+
def is_itrex_available():
162+
return _itrex_available
163163

164164

165165
def is_ipex_available():
@@ -314,13 +314,13 @@ def is_neural_compressor_version(operation: str, version: str):
314314
return compare_versions(parse(_neural_compressor_version), operation, version)
315315

316316

317-
def is_intel_extension_for_transformers_version(operation: str, version: str):
317+
def is_itrex_version(operation: str, version: str):
318318
"""
319319
Compare the current intel_extension_for_transformers version to a given reference with an operation.
320320
"""
321-
if not _intel_extension_for_transformers_available:
321+
if not _itrex_available:
322322
return False
323-
return compare_versions(parse(_intel_extension_for_transformers_version), operation, version)
323+
return compare_versions(parse(_itrex_version), operation, version)
324324

325325

326326
def is_openvino_version(operation: str, version: str):
@@ -396,7 +396,7 @@ def is_timm_version(operation: str, version: str):
396396
`pip install neural-compressor`. Please note that you may need to restart your runtime after installation.
397397
"""
398398

399-
INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR = """
399+
ITREX_IMPORT_ERROR = """
400400
{0} requires the intel-extension-for-transformers library but it was not found in your environment. You can install it with pip:
401401
`pip install intel-extension-for-transformers` and `pip install peft`. Please note that you may need to restart your runtime after installation.
402402
"""
@@ -418,10 +418,7 @@ def is_timm_version(operation: str, version: str):
418418
("nncf", (is_nncf_available, NNCF_IMPORT_ERROR)),
419419
("openvino", (is_openvino_available, OPENVINO_IMPORT_ERROR)),
420420
("neural_compressor", (is_neural_compressor_available, NEURAL_COMPRESSOR_IMPORT_ERROR)),
421-
(
422-
"intel_extension_for_transformers",
423-
(is_intel_extension_for_transformers_available, INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR),
424-
),
421+
("itrex", (is_itrex_available, ITREX_IMPORT_ERROR)),
425422
("accelerate", (is_accelerate_available, ACCELERATE_IMPORT_ERROR)),
426423
]
427424
)

tests/neural_compressor/test_optimization.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
set_seed,
4646
)
4747
from utils_tests import MODEL_NAMES, SEED, INCTestMixin, _generate_dataset
48-
from optimum.intel.utils.import_utils import is_torch_version, is_intel_extension_for_transformers_available
48+
from optimum.intel.utils.import_utils import is_torch_version, is_itrex_available
4949

5050

5151
from optimum.intel import (
@@ -511,7 +511,7 @@ class WeightOnlyQuantizationTest(INCTestMixin):
511511
)
512512

513513
@parameterized.expand(WEIGHT_ONLY_CONFIG)
514-
@unittest.skipIf(not is_intel_extension_for_transformers_available(), reason="ITREX not available")
514+
@unittest.skipIf(not is_itrex_available(), reason="ITREX not available")
515515
def test_weight_only_quantization(self, methodology, weight_dtype):
516516
model_name = "hf-internal-testing/tiny-random-GPTNeoForCausalLM"
517517

0 commit comments

Comments
 (0)