|
19 | 19 | from enum import Enum
|
20 | 20 | from itertools import chain
|
21 | 21 | from pathlib import Path
|
22 |
| -from typing import Callable, Dict, Optional, Union |
| 22 | +from typing import Callable, Optional, Union |
23 | 23 |
|
24 | 24 | import torch
|
25 | 25 | from datasets import Dataset, load_dataset
|
26 |
| -from neural_compressor.adaptor.pytorch import PyTorch_FXAdaptor, _cfg_to_qconfig, _propagate_qconfig |
27 | 26 | from neural_compressor.config import PostTrainingQuantConfig
|
28 | 27 | from neural_compressor.experimental.export import torch_to_int8_onnx
|
29 | 28 | from neural_compressor.model.onnx_model import ONNXModel
|
|
47 | 46 |
|
48 | 47 | from ..utils.constant import _TASK_ALIASES, MIN_QDQ_ONNX_OPSET, ONNX_WEIGHTS_NAME, WEIGHTS_NAME
|
49 | 48 | from ..utils.import_utils import (
|
50 |
| - INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR, |
51 |
| - _intel_extension_for_transformers_version, |
| 49 | + ITREX_IMPORT_ERROR, |
52 | 50 | _ipex_version,
|
| 51 | + _itrex_version, |
53 | 52 | _neural_compressor_version,
|
54 | 53 | _torch_version,
|
55 |
| - is_intel_extension_for_transformers_available, |
56 |
| - is_intel_extension_for_transformers_version, |
57 | 54 | is_ipex_version,
|
| 55 | + is_itrex_available, |
| 56 | + is_itrex_version, |
58 | 57 | is_neural_compressor_version,
|
59 | 58 | is_torch_version,
|
60 | 59 | )
|
|
69 | 68 | INCModelForTokenClassification,
|
70 | 69 | INCModelForVision2Seq,
|
71 | 70 | )
|
72 |
| -from .utils import INCDataLoader, _cfgs_to_fx_cfgs |
73 |
| - |
| 71 | +from .utils import ( |
| 72 | + IPEX_MINIMUM_VERSION, |
| 73 | + ITREX_MINIMUM_TORCH_VERSION, |
| 74 | + ITREX_MINIMUM_VERSION, |
| 75 | + NEURAL_COMPRESSOR_MINIMUM_VERSION, |
| 76 | + NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION, |
| 77 | + INCDataLoader, |
| 78 | +) |
74 | 79 |
|
75 |
| -INTEL_EXTENSION_FOR_TRANSFORMERS_MINIMUM_VERSION = "1.4.0" |
76 | 80 |
|
77 |
| -if is_intel_extension_for_transformers_available(): |
78 |
| - if is_intel_extension_for_transformers_version("!=", INTEL_EXTENSION_FOR_TRANSFORMERS_MINIMUM_VERSION): |
| 81 | +if is_itrex_available(): |
| 82 | + if is_itrex_version("<", ITREX_MINIMUM_VERSION): |
79 | 83 | raise ImportError(
|
80 |
| - f"Found an incompatible version of `intel-extension-for-transformers`. Found version {_intel_extension_for_transformers_version}, " |
81 |
| - f"but only version {INTEL_EXTENSION_FOR_TRANSFORMERS_MINIMUM_VERSION} is supported." |
| 84 | + f"Found an incompatible version of `intel-extension-for-transformers`. Found version {_itrex_version}, " |
| 85 | + f"but only version {ITREX_MINIMUM_VERSION} or higher is supported." |
82 | 86 | )
|
83 | 87 | from intel_extension_for_transformers.transformers.llm.quantization.utils import convert_to_quantized_model
|
84 | 88 | from intel_extension_for_transformers.transformers.modeling.modeling_auto import save_low_bit
|
|
92 | 96 |
|
93 | 97 | logger = logging.getLogger(__name__)
|
94 | 98 |
|
95 |
| -NEURAL_COMPRESSOR_MINIMUM_VERSION = "2.1.0" |
96 |
| -NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION = "2.3.0" |
97 |
| -IPEX_MINIMUM_VERSION = "2.1.0" |
98 |
| -ITREX_MINIMUM_TORCH_VERSION = "2.2.0" |
99 | 99 |
|
100 | 100 | if is_neural_compressor_version("<", NEURAL_COMPRESSOR_MINIMUM_VERSION):
|
101 | 101 | raise ImportError(
|
@@ -231,8 +231,8 @@ def quantize(
|
231 | 231 | f"Found an incompatible version of neural-compressor. Found version {_neural_compressor_version}, "
|
232 | 232 | f"but only version {NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION} or higher supports weight-only quantization."
|
233 | 233 | )
|
234 |
| - if not is_intel_extension_for_transformers_available(): |
235 |
| - raise ImportError(INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR.format("Weight only quantization")) |
| 234 | + if not is_itrex_available(): |
| 235 | + raise ImportError(ITREX_IMPORT_ERROR.format("Weight only quantization")) |
236 | 236 |
|
237 | 237 | if is_torch_version("<", ITREX_MINIMUM_TORCH_VERSION):
|
238 | 238 | raise ImportError(
|
@@ -514,70 +514,3 @@ def _get_calibration_dataloader(
|
514 | 514 | def _remove_unused_columns(self, dataset: Dataset):
|
515 | 515 | ignored_columns = list(set(dataset.column_names) - set(self._signature_columns))
|
516 | 516 | return dataset.remove_columns(ignored_columns)
|
517 |
| - |
518 |
| - |
519 |
| -# Adapted from https://github.com/intel/neural-compressor/blob/master/neural_compressor/utils/pytorch.py#L96 |
520 |
| -def _apply_quantization_from_config(q_config: Dict, model: torch.nn.Module) -> torch.nn.Module: |
521 |
| - """ |
522 |
| - Apply Intel Neural Compressor quantization steps on the given model. |
523 |
| -
|
524 |
| - Arguments: |
525 |
| - q_config (`Dict`): |
526 |
| - Dictionary containing all quantization information such as approach, dtype, scheme and granularity. |
527 |
| - model (`torch.nn.Module`): |
528 |
| - Model to quantize. |
529 |
| - Returns: |
530 |
| - q_model (`torch.nn.Module`): |
531 |
| - Quantized model. |
532 |
| - """ |
533 |
| - from torch.quantization import add_observer_, convert |
534 |
| - from torch.quantization.quantize_fx import convert_fx, prepare_fx, prepare_qat_fx |
535 |
| - |
536 |
| - approach = q_config.get("approach") |
537 |
| - framework = q_config.get("framework") |
538 |
| - |
539 |
| - if approach not in SUPPORTED_QUANT_MODE: |
540 |
| - raise ValueError( |
541 |
| - "Unknown quantization approach. Supported approach are " + ", ".join(SUPPORTED_QUANT_MODE.keys()) |
542 |
| - ) |
543 |
| - |
544 |
| - quant_mode = INCQuantizationMode(approach) |
545 |
| - q_model = copy.deepcopy(model) |
546 |
| - q_model.eval() |
547 |
| - |
548 |
| - if framework == "pytorch_fx": |
549 |
| - op_cfgs = _cfg_to_qconfig(q_config, approach) |
550 |
| - fx_op_cfgs = _cfgs_to_fx_cfgs(op_cfgs, approach) |
551 |
| - |
552 |
| - if not q_config["fx_sub_module_list"]: |
553 |
| - if quant_mode == INCQuantizationMode.AWARE_TRAINING: |
554 |
| - q_model.train() |
555 |
| - q_model = prepare_qat_fx(q_model, fx_op_cfgs) |
556 |
| - else: |
557 |
| - q_model = prepare_fx(q_model, fx_op_cfgs) |
558 |
| - q_model = convert_fx(q_model) |
559 |
| - |
560 |
| - else: |
561 |
| - sub_module_list = q_config["fx_sub_module_list"] |
562 |
| - if q_config["approach"] == "quant_aware_training": |
563 |
| - q_model.train() |
564 |
| - PyTorch_FXAdaptor.prepare_sub_graph(sub_module_list, fx_op_cfgs, q_model, prefix="", is_qat=True) |
565 |
| - else: |
566 |
| - PyTorch_FXAdaptor.prepare_sub_graph(sub_module_list, fx_op_cfgs, q_model, prefix="") |
567 |
| - PyTorch_FXAdaptor.convert_sub_graph(sub_module_list, q_model, prefix="") |
568 |
| - |
569 |
| - else: |
570 |
| - if quant_mode == INCQuantizationMode.DYNAMIC: |
571 |
| - q_mapping = torch.quantization.quantization_mappings.get_default_dynamic_quant_module_mappings() |
572 |
| - op_cfgs = _cfg_to_qconfig(q_config, approach) |
573 |
| - else: |
574 |
| - q_mapping = torch.quantization.quantization_mappings.get_default_static_quant_module_mappings() |
575 |
| - op_cfgs = _cfg_to_qconfig(q_config) |
576 |
| - |
577 |
| - _propagate_qconfig(q_model, op_cfgs, approach=approach) |
578 |
| - |
579 |
| - if quant_mode != INCQuantizationMode.DYNAMIC: |
580 |
| - add_observer_(q_model) |
581 |
| - q_model = convert(q_model, mapping=q_mapping, inplace=True) |
582 |
| - |
583 |
| - return q_model |
0 commit comments