|
19 | 19 | from enum import Enum
|
20 | 20 | from itertools import chain
|
21 | 21 | from pathlib import Path
|
22 |
| -from typing import Callable, Dict, Optional, Union |
| 22 | +from typing import Callable, Optional, Union |
23 | 23 |
|
24 | 24 | import torch
|
25 | 25 | from datasets import Dataset, load_dataset
|
26 |
| -from neural_compressor.adaptor.pytorch import PyTorch_FXAdaptor, _cfg_to_qconfig, _propagate_qconfig |
27 | 26 | from neural_compressor.config import PostTrainingQuantConfig
|
28 | 27 | from neural_compressor.experimental.export import torch_to_int8_onnx
|
29 | 28 | from neural_compressor.model.onnx_model import ONNXModel
|
|
47 | 46 |
|
48 | 47 | from ..utils.constant import _TASK_ALIASES, MIN_QDQ_ONNX_OPSET, ONNX_WEIGHTS_NAME, WEIGHTS_NAME
|
49 | 48 | from ..utils.import_utils import (
|
50 |
| - INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR, |
51 |
| - _intel_extension_for_transformers_version, |
| 49 | + ITREX_IMPORT_ERROR, |
52 | 50 | _ipex_version,
|
| 51 | + _itrex_version, |
53 | 52 | _neural_compressor_version,
|
54 | 53 | _torch_version,
|
55 |
| - is_intel_extension_for_transformers_available, |
56 |
| - is_intel_extension_for_transformers_version, |
57 | 54 | is_ipex_version,
|
| 55 | + is_itrex_available, |
| 56 | + is_itrex_version, |
58 | 57 | is_neural_compressor_version,
|
59 | 58 | is_torch_version,
|
60 | 59 | )
|
|
69 | 68 | INCModelForTokenClassification,
|
70 | 69 | INCModelForVision2Seq,
|
71 | 70 | )
|
72 |
| -from .utils import INCDataLoader, _cfgs_to_fx_cfgs |
73 |
| - |
| 71 | +from .utils import ( |
| 72 | + IPEX_MINIMUM_VERSION, |
| 73 | + ITREX_MINIMUM_TORCH_VERSION, |
| 74 | + ITREX_MINIMUM_VERSION, |
| 75 | + NEURAL_COMPRESSOR_MINIMUM_VERSION, |
| 76 | + NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION, |
| 77 | + INCDataLoader, |
| 78 | +) |
74 | 79 |
|
75 |
| -INTEL_EXTENSION_FOR_TRANSFORMERS_MINIMUM_VERSION = "1.4.0" |
76 | 80 |
|
77 |
| -if is_intel_extension_for_transformers_available(): |
78 |
| - if is_intel_extension_for_transformers_version("!=", INTEL_EXTENSION_FOR_TRANSFORMERS_MINIMUM_VERSION): |
| 81 | +if is_itrex_available(): |
| 82 | + if is_itrex_version("<", ITREX_MINIMUM_VERSION): |
79 | 83 | raise ImportError(
|
80 |
| - f"Found an incompatible version of `intel-extension-for-transformers`. Found version {_intel_extension_for_transformers_version}, " |
81 |
| - f"but only version {INTEL_EXTENSION_FOR_TRANSFORMERS_MINIMUM_VERSION} is supported." |
| 84 | + f"Found an incompatible version of `intel-extension-for-transformers`. Found version {_itrex_version}, " |
| 85 | + f"but only version {ITREX_MINIMUM_VERSION} or higher is supported." |
82 | 86 | )
|
83 | 87 | from intel_extension_for_transformers.transformers.llm.quantization.utils import convert_to_quantized_model
|
84 | 88 | from intel_extension_for_transformers.transformers.modeling.modeling_auto import save_low_bit
|
|
92 | 96 |
|
93 | 97 | logger = logging.getLogger(__name__)
|
94 | 98 |
|
95 |
| -NEURAL_COMPRESSOR_MINIMUM_VERSION = "2.1.0" |
96 |
| -NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION = "2.3.0" |
97 |
| -IPEX_MINIMUM_VERSION = "2.1.0" |
98 |
| -ITREX_MINIMUM_TORCH_VERSION = "2.2.0" |
99 | 99 |
|
100 | 100 | if is_neural_compressor_version("<", NEURAL_COMPRESSOR_MINIMUM_VERSION):
|
101 | 101 | raise ImportError(
|
@@ -231,8 +231,8 @@ def quantize(
|
231 | 231 | f"Found an incompatible version of neural-compressor. Found version {_neural_compressor_version}, "
|
232 | 232 | f"but only version {NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION} or higher supports weight-only quantization."
|
233 | 233 | )
|
234 |
| - if not is_intel_extension_for_transformers_available(): |
235 |
| - raise ImportError(INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR.format("Weight only quantization")) |
| 234 | + if not is_itrex_available(): |
| 235 | + raise ImportError(ITREX_IMPORT_ERROR.format("Weight only quantization")) |
236 | 236 |
|
237 | 237 | if is_torch_version("<", ITREX_MINIMUM_TORCH_VERSION):
|
238 | 238 | raise ImportError(
|
@@ -516,70 +516,3 @@ def _get_calibration_dataloader(
|
516 | 516 | def _remove_unused_columns(self, dataset: Dataset):
|
517 | 517 | ignored_columns = list(set(dataset.column_names) - set(self._signature_columns))
|
518 | 518 | return dataset.remove_columns(ignored_columns)
|
519 |
| - |
520 |
| - |
521 |
| -# Adapted from https://github.com/intel/neural-compressor/blob/master/neural_compressor/utils/pytorch.py#L96 |
522 |
| -def _apply_quantization_from_config(q_config: Dict, model: torch.nn.Module) -> torch.nn.Module: |
523 |
| - """ |
524 |
| - Apply Intel Neural Compressor quantization steps on the given model. |
525 |
| - |
526 |
| - Arguments: |
527 |
| - q_config (`Dict`): |
528 |
| - Dictionary containing all quantization information such as approach, dtype, scheme and granularity. |
529 |
| - model (`torch.nn.Module`): |
530 |
| - Model to quantize. |
531 |
| - Returns: |
532 |
| - q_model (`torch.nn.Module`): |
533 |
| - Quantized model. |
534 |
| - """ |
535 |
| - from torch.quantization import add_observer_, convert |
536 |
| - from torch.quantization.quantize_fx import convert_fx, prepare_fx, prepare_qat_fx |
537 |
| - |
538 |
| - approach = q_config.get("approach") |
539 |
| - framework = q_config.get("framework") |
540 |
| - |
541 |
| - if approach not in SUPPORTED_QUANT_MODE: |
542 |
| - raise ValueError( |
543 |
| - "Unknown quantization approach. Supported approach are " + ", ".join(SUPPORTED_QUANT_MODE.keys()) |
544 |
| - ) |
545 |
| - |
546 |
| - quant_mode = INCQuantizationMode(approach) |
547 |
| - q_model = copy.deepcopy(model) |
548 |
| - q_model.eval() |
549 |
| - |
550 |
| - if framework == "pytorch_fx": |
551 |
| - op_cfgs = _cfg_to_qconfig(q_config, approach) |
552 |
| - fx_op_cfgs = _cfgs_to_fx_cfgs(op_cfgs, approach) |
553 |
| - |
554 |
| - if not q_config["fx_sub_module_list"]: |
555 |
| - if quant_mode == INCQuantizationMode.AWARE_TRAINING: |
556 |
| - q_model.train() |
557 |
| - q_model = prepare_qat_fx(q_model, fx_op_cfgs) |
558 |
| - else: |
559 |
| - q_model = prepare_fx(q_model, fx_op_cfgs) |
560 |
| - q_model = convert_fx(q_model) |
561 |
| - |
562 |
| - else: |
563 |
| - sub_module_list = q_config["fx_sub_module_list"] |
564 |
| - if q_config["approach"] == "quant_aware_training": |
565 |
| - q_model.train() |
566 |
| - PyTorch_FXAdaptor.prepare_sub_graph(sub_module_list, fx_op_cfgs, q_model, prefix="", is_qat=True) |
567 |
| - else: |
568 |
| - PyTorch_FXAdaptor.prepare_sub_graph(sub_module_list, fx_op_cfgs, q_model, prefix="") |
569 |
| - PyTorch_FXAdaptor.convert_sub_graph(sub_module_list, q_model, prefix="") |
570 |
| - |
571 |
| - else: |
572 |
| - if quant_mode == INCQuantizationMode.DYNAMIC: |
573 |
| - q_mapping = torch.quantization.quantization_mappings.get_default_dynamic_quant_module_mappings() |
574 |
| - op_cfgs = _cfg_to_qconfig(q_config, approach) |
575 |
| - else: |
576 |
| - q_mapping = torch.quantization.quantization_mappings.get_default_static_quant_module_mappings() |
577 |
| - op_cfgs = _cfg_to_qconfig(q_config) |
578 |
| - |
579 |
| - _propagate_qconfig(q_model, op_cfgs, approach=approach) |
580 |
| - |
581 |
| - if quant_mode != INCQuantizationMode.DYNAMIC: |
582 |
| - add_observer_(q_model) |
583 |
| - q_model = convert(q_model, mapping=q_mapping, inplace=True) |
584 |
| - |
585 |
| - return q_model |
0 commit comments