Skip to content

Commit 3afa9d0

Browse files
Merge branch 'main' into deprecate-use_auth_token
2 parents 8fca1d6 + a0dc06c commit 3afa9d0

File tree

21 files changed

+124
-184
lines changed

21 files changed

+124
-184
lines changed

.github/workflows/test_inc.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ jobs:
3333
pip install cmake
3434
pip install py-cpuinfo
3535
pip install .[neural-compressor,diffusers,tests]
36-
pip install intel-extension-for-transformers==1.4.0
36+
pip install intel-extension-for-transformers
3737
pip install peft
3838
3939
- name: Test with Pytest

.github/workflows/test_offline.yaml

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
name: Offline usage / Python - Test
2+
3+
on:
4+
push:
5+
branches: [main]
6+
pull_request:
7+
branches: [main]
8+
9+
concurrency:
10+
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
11+
cancel-in-progress: true
12+
13+
jobs:
14+
build:
15+
strategy:
16+
fail-fast: false
17+
matrix:
18+
python-version: [3.9]
19+
os: [ubuntu-latest]
20+
21+
runs-on: ${{ matrix.os }}
22+
steps:
23+
- uses: actions/checkout@v3
24+
- name: Setup Python ${{ matrix.python-version }}
25+
uses: actions/setup-python@v3
26+
with:
27+
python-version: ${{ matrix.python-version }}
28+
- name: Install dependencies
29+
run: |
30+
pip install .[tests,openvino]
31+
- name: Test
32+
run: |
33+
HF_HOME=/tmp/ huggingface-cli download hf-internal-testing/tiny-random-gpt2
34+
HF_HOME=/tmp/ HF_HUB_OFFLINE=1 optimum-cli export openvino --model hf-internal-testing/tiny-random-gpt2 gpt2_openvino --task text-generation
35+
36+
huggingface-cli download hf-internal-testing/tiny-random-gpt2
37+
HF_HUB_OFFLINE=1 optimum-cli export openvino --model hf-internal-testing/tiny-random-gpt2 gpt2_openvino --task text-generation
38+
39+
pytest tests/openvino/test_modeling.py -k "test_load_from_hub" -s -vvvvv
40+
HF_HUB_OFFLINE=1 pytest tests/openvino/test_modeling.py -k "test_load_from_hub" -s -vvvvv

examples/neural_compressor/language-modeling/run_clm.py

+4-7
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,10 @@
5757
from transformers.utils.versions import require_version
5858

5959
from optimum.intel.neural_compressor import INCModelForCausalLM, INCQuantizer, INCTrainer
60-
from optimum.intel.utils.import_utils import (
61-
INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR,
62-
is_intel_extension_for_transformers_available,
63-
)
60+
from optimum.intel.utils.import_utils import ITREX_IMPORT_ERROR, is_itrex_available
6461

6562

66-
if is_intel_extension_for_transformers_available():
63+
if is_itrex_available():
6764
from intel_extension_for_transformers.transformers.utils.config import GPTQConfig, RtnConfig
6865

6966
os.environ["CUDA_VISIBLE_DEVICES"] = ""
@@ -658,8 +655,8 @@ def compute_metrics(eval_preds):
658655
else:
659656
recipes = {}
660657
if optim_args.quantization_approach == "weight_only":
661-
if not is_intel_extension_for_transformers_available():
662-
raise ImportError(INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR.format("WeightOnly quantization"))
658+
if not is_itrex_available():
659+
raise ImportError(ITREX_IMPORT_ERROR.format("WeightOnly quantization"))
663660
if optim_args.apply_pruning or optim_args.apply_distillation:
664661
raise ValueError("Weight only quantization and pruning or distillation cannot be combined.")
665662

optimum/commands/export/openvino.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
from pathlib import Path
1919
from typing import TYPE_CHECKING, Optional
2020

21+
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
22+
2123
from ...exporters import TasksManager
2224
from ...intel.utils.import_utils import DIFFUSERS_IMPORT_ERROR, is_diffusers_available
2325
from ..base import BaseOptimumCLICommand, CommandInfo
@@ -47,7 +49,9 @@ def parse_args_openvino(parser: "ArgumentParser"):
4749
f" {str(TasksManager.get_all_tasks())}. For decoder models, use `xxx-with-past` to export the model using past key values in the decoder."
4850
),
4951
)
50-
optional_group.add_argument("--cache_dir", type=str, default=None, help="Path indicating where to store cache.")
52+
optional_group.add_argument(
53+
"--cache_dir", type=str, default=HUGGINGFACE_HUB_CACHE, help="Path indicating where to store cache."
54+
)
5155
optional_group.add_argument(
5256
"--framework",
5357
type=str,

optimum/exporters/openvino/__main__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from pathlib import Path
1717
from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Union
1818

19+
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
1920
from requests.exceptions import ConnectionError as RequestsConnectionError
2021
from transformers import AutoConfig, AutoTokenizer, PreTrainedTokenizerBase
2122

@@ -48,7 +49,7 @@ def main_export(
4849
task: str = "auto",
4950
device: str = "cpu",
5051
framework: Optional[str] = None,
51-
cache_dir: Optional[str] = None,
52+
cache_dir: str = HUGGINGFACE_HUB_CACHE,
5253
trust_remote_code: bool = False,
5354
pad_token_id: Optional[int] = None,
5455
subfolder: str = "",

optimum/intel/generation/modeling.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
import torch
2323
from huggingface_hub import hf_hub_download
24+
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
2425
from transformers import AutoConfig, AutoModelForCausalLM, GenerationConfig, PretrainedConfig, PreTrainedModel
2526
from transformers.generation import GenerationMixin
2627
from transformers.modeling_outputs import CausalLMOutputWithPast
@@ -357,7 +358,7 @@ def _from_pretrained(
357358
token: Optional[Union[bool, str]] = None,
358359
revision: Optional[Union[str, None]] = None,
359360
force_download: bool = False,
360-
cache_dir: Optional[str] = None,
361+
cache_dir: str = HUGGINGFACE_HUB_CACHE,
361362
file_name: Optional[str] = WEIGHTS_NAME,
362363
local_files_only: bool = False,
363364
use_cache: bool = True,
@@ -403,7 +404,7 @@ def _from_transformers(
403404
token: Optional[Union[bool, str]] = None,
404405
revision: Optional[str] = None,
405406
force_download: bool = False,
406-
cache_dir: Optional[str] = None,
407+
cache_dir: str = HUGGINGFACE_HUB_CACHE,
407408
subfolder: str = "",
408409
local_files_only: bool = False,
409410
use_cache: bool = True,

optimum/intel/ipex/modeling_base.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import intel_extension_for_pytorch as ipex
2323
import torch
2424
from huggingface_hub import hf_hub_download
25+
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
2526
from intel_extension_for_pytorch.cpu._auto_kernel_selection import _enable_tpp
2627
from intel_extension_for_pytorch.transformers.optimize import get_dummy_input
2728
from transformers import (
@@ -154,7 +155,7 @@ def _from_transformers(
154155
token: Optional[Union[bool, str]] = None,
155156
revision: Optional[str] = None,
156157
force_download: bool = False,
157-
cache_dir: Optional[str] = None,
158+
cache_dir: str = HUGGINGFACE_HUB_CACHE,
158159
subfolder: str = "",
159160
local_files_only: bool = False,
160161
torch_dtype: Optional[Union[str, "torch.dtype"]] = None,
@@ -193,7 +194,7 @@ def _from_pretrained(
193194
token: Optional[Union[bool, str]] = None,
194195
revision: Optional[Union[str, None]] = None,
195196
force_download: bool = False,
196-
cache_dir: Optional[str] = None,
197+
cache_dir: str = HUGGINGFACE_HUB_CACHE,
197198
file_name: Optional[str] = WEIGHTS_NAME,
198199
local_files_only: bool = False,
199200
subfolder: str = "",

optimum/intel/neural_compressor/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from ..utils.import_utils import is_diffusers_available, is_intel_extension_for_transformers_available
15+
from ..utils.import_utils import is_diffusers_available
1616
from .configuration import INCConfig
1717
from .modeling_base import (
1818
INCModel,

optimum/intel/neural_compressor/modeling_base.py

+4-7
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import torch
2222
from huggingface_hub import hf_hub_download
23+
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
2324
from neural_compressor.utils.pytorch import load
2425
from transformers import (
2526
AutoConfig,
@@ -43,11 +44,7 @@
4344
from optimum.intel.generation import BaseModelForCausalLM
4445

4546
from ...modeling_base import OptimizedModel
46-
from ..utils.import_utils import (
47-
_torch_version,
48-
is_intel_extension_for_transformers_available,
49-
is_torch_version,
50-
)
47+
from ..utils.import_utils import _torch_version, is_itrex_available, is_torch_version
5148
from .configuration import INCConfig
5249
from .utils import WEIGHTS_NAME
5350

@@ -105,7 +102,7 @@ def _from_pretrained(
105102
token: Optional[Union[bool, str]] = None,
106103
revision: Optional[Union[str, None]] = None,
107104
force_download: bool = False,
108-
cache_dir: Optional[str] = None,
105+
cache_dir: str = HUGGINGFACE_HUB_CACHE,
109106
file_name: str = WEIGHTS_NAME,
110107
local_files_only: bool = False,
111108
subfolder: str = "",
@@ -137,7 +134,7 @@ def _from_pretrained(
137134
model_save_dir = Path(model_cache_path).parent
138135
inc_config = None
139136
msg = None
140-
if is_intel_extension_for_transformers_available():
137+
if is_itrex_available():
141138
try:
142139
quantization_config = PretrainedConfig.from_pretrained(model_save_dir / "quantize_config.json")
143140
algorithm = getattr(quantization_config, "quant_method", None)

optimum/intel/neural_compressor/quantization.py

+19-86
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,10 @@
1919
from enum import Enum
2020
from itertools import chain
2121
from pathlib import Path
22-
from typing import Callable, Dict, Optional, Union
22+
from typing import Callable, Optional, Union
2323

2424
import torch
2525
from datasets import Dataset, load_dataset
26-
from neural_compressor.adaptor.pytorch import PyTorch_FXAdaptor, _cfg_to_qconfig, _propagate_qconfig
2726
from neural_compressor.config import PostTrainingQuantConfig
2827
from neural_compressor.experimental.export import torch_to_int8_onnx
2928
from neural_compressor.model.onnx_model import ONNXModel
@@ -47,14 +46,14 @@
4746

4847
from ..utils.constant import _TASK_ALIASES, MIN_QDQ_ONNX_OPSET, ONNX_WEIGHTS_NAME, WEIGHTS_NAME
4948
from ..utils.import_utils import (
50-
INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR,
51-
_intel_extension_for_transformers_version,
49+
ITREX_IMPORT_ERROR,
5250
_ipex_version,
51+
_itrex_version,
5352
_neural_compressor_version,
5453
_torch_version,
55-
is_intel_extension_for_transformers_available,
56-
is_intel_extension_for_transformers_version,
5754
is_ipex_version,
55+
is_itrex_available,
56+
is_itrex_version,
5857
is_neural_compressor_version,
5958
is_torch_version,
6059
)
@@ -69,16 +68,21 @@
6968
INCModelForTokenClassification,
7069
INCModelForVision2Seq,
7170
)
72-
from .utils import INCDataLoader, _cfgs_to_fx_cfgs
73-
71+
from .utils import (
72+
IPEX_MINIMUM_VERSION,
73+
ITREX_MINIMUM_TORCH_VERSION,
74+
ITREX_MINIMUM_VERSION,
75+
NEURAL_COMPRESSOR_MINIMUM_VERSION,
76+
NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION,
77+
INCDataLoader,
78+
)
7479

75-
INTEL_EXTENSION_FOR_TRANSFORMERS_MINIMUM_VERSION = "1.4.0"
7680

77-
if is_intel_extension_for_transformers_available():
78-
if is_intel_extension_for_transformers_version("!=", INTEL_EXTENSION_FOR_TRANSFORMERS_MINIMUM_VERSION):
81+
if is_itrex_available():
82+
if is_itrex_version("<", ITREX_MINIMUM_VERSION):
7983
raise ImportError(
80-
f"Found an incompatible version of `intel-extension-for-transformers`. Found version {_intel_extension_for_transformers_version}, "
81-
f"but only version {INTEL_EXTENSION_FOR_TRANSFORMERS_MINIMUM_VERSION} is supported."
84+
f"Found an incompatible version of `intel-extension-for-transformers`. Found version {_itrex_version}, "
85+
f"but only version {ITREX_MINIMUM_VERSION} or higher is supported."
8286
)
8387
from intel_extension_for_transformers.transformers.llm.quantization.utils import convert_to_quantized_model
8488
from intel_extension_for_transformers.transformers.modeling.modeling_auto import save_low_bit
@@ -92,10 +96,6 @@
9296

9397
logger = logging.getLogger(__name__)
9498

95-
NEURAL_COMPRESSOR_MINIMUM_VERSION = "2.1.0"
96-
NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION = "2.3.0"
97-
IPEX_MINIMUM_VERSION = "2.1.0"
98-
ITREX_MINIMUM_TORCH_VERSION = "2.2.0"
9999

100100
if is_neural_compressor_version("<", NEURAL_COMPRESSOR_MINIMUM_VERSION):
101101
raise ImportError(
@@ -231,8 +231,8 @@ def quantize(
231231
f"Found an incompatible version of neural-compressor. Found version {_neural_compressor_version}, "
232232
f"but only version {NEURAL_COMPRESSOR_WEIGHT_ONLY_MINIMUM_VERSION} or higher supports weight-only quantization."
233233
)
234-
if not is_intel_extension_for_transformers_available():
235-
raise ImportError(INTEL_EXTENSION_FOR_TRANSFORMERS_IMPORT_ERROR.format("Weight only quantization"))
234+
if not is_itrex_available():
235+
raise ImportError(ITREX_IMPORT_ERROR.format("Weight only quantization"))
236236

237237
if is_torch_version("<", ITREX_MINIMUM_TORCH_VERSION):
238238
raise ImportError(
@@ -516,70 +516,3 @@ def _get_calibration_dataloader(
516516
def _remove_unused_columns(self, dataset: Dataset):
517517
ignored_columns = list(set(dataset.column_names) - set(self._signature_columns))
518518
return dataset.remove_columns(ignored_columns)
519-
520-
521-
# Adapted from https://github.com/intel/neural-compressor/blob/master/neural_compressor/utils/pytorch.py#L96
522-
def _apply_quantization_from_config(q_config: Dict, model: torch.nn.Module) -> torch.nn.Module:
523-
"""
524-
Apply Intel Neural Compressor quantization steps on the given model.
525-
526-
Arguments:
527-
q_config (`Dict`):
528-
Dictionary containing all quantization information such as approach, dtype, scheme and granularity.
529-
model (`torch.nn.Module`):
530-
Model to quantize.
531-
Returns:
532-
q_model (`torch.nn.Module`):
533-
Quantized model.
534-
"""
535-
from torch.quantization import add_observer_, convert
536-
from torch.quantization.quantize_fx import convert_fx, prepare_fx, prepare_qat_fx
537-
538-
approach = q_config.get("approach")
539-
framework = q_config.get("framework")
540-
541-
if approach not in SUPPORTED_QUANT_MODE:
542-
raise ValueError(
543-
"Unknown quantization approach. Supported approach are " + ", ".join(SUPPORTED_QUANT_MODE.keys())
544-
)
545-
546-
quant_mode = INCQuantizationMode(approach)
547-
q_model = copy.deepcopy(model)
548-
q_model.eval()
549-
550-
if framework == "pytorch_fx":
551-
op_cfgs = _cfg_to_qconfig(q_config, approach)
552-
fx_op_cfgs = _cfgs_to_fx_cfgs(op_cfgs, approach)
553-
554-
if not q_config["fx_sub_module_list"]:
555-
if quant_mode == INCQuantizationMode.AWARE_TRAINING:
556-
q_model.train()
557-
q_model = prepare_qat_fx(q_model, fx_op_cfgs)
558-
else:
559-
q_model = prepare_fx(q_model, fx_op_cfgs)
560-
q_model = convert_fx(q_model)
561-
562-
else:
563-
sub_module_list = q_config["fx_sub_module_list"]
564-
if q_config["approach"] == "quant_aware_training":
565-
q_model.train()
566-
PyTorch_FXAdaptor.prepare_sub_graph(sub_module_list, fx_op_cfgs, q_model, prefix="", is_qat=True)
567-
else:
568-
PyTorch_FXAdaptor.prepare_sub_graph(sub_module_list, fx_op_cfgs, q_model, prefix="")
569-
PyTorch_FXAdaptor.convert_sub_graph(sub_module_list, q_model, prefix="")
570-
571-
else:
572-
if quant_mode == INCQuantizationMode.DYNAMIC:
573-
q_mapping = torch.quantization.quantization_mappings.get_default_dynamic_quant_module_mappings()
574-
op_cfgs = _cfg_to_qconfig(q_config, approach)
575-
else:
576-
q_mapping = torch.quantization.quantization_mappings.get_default_static_quant_module_mappings()
577-
op_cfgs = _cfg_to_qconfig(q_config)
578-
579-
_propagate_qconfig(q_model, op_cfgs, approach=approach)
580-
581-
if quant_mode != INCQuantizationMode.DYNAMIC:
582-
add_observer_(q_model)
583-
q_model = convert(q_model, mapping=q_mapping, inplace=True)
584-
585-
return q_model

0 commit comments

Comments
 (0)