Skip to content

Commit 94f1ac5

Browse files
committed
Merge remote-tracking branch 'upstream/main' into penghuic/weight_only_with_itrex
2 parents dd981df + a3bf172 commit 94f1ac5

File tree

7 files changed

+35
-60
lines changed

7 files changed

+35
-60
lines changed

optimum/exporters/openvino/__main__.py

+1
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,7 @@ class StoreAttr(object):
323323
fn_get_submodels=fn_get_submodels,
324324
preprocessors=preprocessors,
325325
device=device,
326+
trust_remote_code=trust_remote_code,
326327
**kwargs_shapes,
327328
)
328329

optimum/exporters/openvino/convert.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -493,6 +493,7 @@ def export_from_model(
493493
fn_get_submodels: Optional[Callable] = None,
494494
preprocessors: List = None,
495495
device: str = "cpu",
496+
trust_remote_code: bool = False,
496497
**kwargs_shapes,
497498
):
498499
if ov_config is not None and ov_config.quantization_config and not is_nncf_available():
@@ -605,7 +606,7 @@ def export_from_model(
605606
generation_config.save_pretrained(output)
606607

607608
model_name_or_path = model.config._name_or_path
608-
maybe_save_preprocessors(model_name_or_path, output)
609+
maybe_save_preprocessors(model_name_or_path, output, trust_remote_code=trust_remote_code)
609610

610611
files_subpaths = ["openvino_" + model_name + ".xml" for model_name in models_and_export_configs.keys()]
611612

optimum/intel/ipex/modeling_base.py

+8-14
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ def ipex_jit_trace(model, task, use_cache):
9090
model.config.return_dict = False
9191

9292
model = ipex.optimize(model.eval(), dtype=model.dtype, inplace=True)
93+
# Disable repack while jit tracing to reduce the memory
94+
ipex._C.disable_jit_linear_repack()
9395
with torch.no_grad():
9496
trace_model = torch.jit.trace(
9597
model,
@@ -171,23 +173,10 @@ def _from_transformers(
171173
model = TasksManager.get_model_from_task(task, model_id, **model_kwargs)
172174
traced_model = ipex_jit_trace(model, task, use_cache)
173175

174-
save_dir = TemporaryDirectory()
175-
save_dir_path = Path(save_dir.name)
176-
torch.jit.save(traced_model, save_dir_path / WEIGHTS_NAME)
177176
config.torchscript = True
178177
config.torch_dtype = torch_dtype
179178

180-
return cls._from_pretrained(
181-
model_id=save_dir_path,
182-
config=config,
183-
use_auth_token=use_auth_token,
184-
revision=revision,
185-
force_download=force_download,
186-
cache_dir=cache_dir,
187-
local_files_only=local_files_only,
188-
use_cache=use_cache,
189-
model_dtype=torch_dtype,
190-
)
179+
return cls(traced_model, config=config, model_save_dir=model_id, use_cache=use_cache, warmup=False)
191180

192181
@classmethod
193182
def _from_pretrained(
@@ -203,6 +192,11 @@ def _from_pretrained(
203192
subfolder: str = "",
204193
**kwargs,
205194
):
195+
if not getattr(config, "torchscript", False):
196+
raise ValueError(
197+
"`config.torchscript` should be set to `True`, if your model is not a TorchScript model and needs to be traced please set `export=True` when loading it with `.from_pretrained()`"
198+
)
199+
206200
# Load the model from local directory
207201
if os.path.isdir(model_id):
208202
model_cache_path = os.path.join(model_id, file_name)

optimum/intel/openvino/quantization.py

+22-34
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,9 @@
2424
import openvino
2525
import torch
2626
import transformers
27-
from nncf import CompressWeightsMode, IgnoredScope, NNCFConfig, SensitivityMetric
27+
from nncf import CompressWeightsMode, IgnoredScope, SensitivityMetric
2828
from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters
29-
from nncf.torch import create_compressed_model, register_default_init_args, register_module
30-
from nncf.torch.dynamic_graph.io_handling import wrap_nncf_model_inputs_with_objwalk
29+
from nncf.torch import register_module
3130
from nncf.torch.initialization import PTInitializingDataLoader
3231
from openvino._offline_transformations import compress_quantize_weights_transformation
3332
from openvino.runtime import Core, Tensor
@@ -47,7 +46,7 @@
4746
from ..utils.constant import _TASK_ALIASES
4847
from ..utils.import_utils import DATASETS_IMPORT_ERROR, is_datasets_available
4948
from ..utils.modeling_utils import get_model_device
50-
from .configuration import DEFAULT_QUANTIZATION_CONFIG, OVConfig, OVWeightQuantizationConfig
49+
from .configuration import OVConfig, OVWeightQuantizationConfig
5150
from .modeling_base import OVBaseModel
5251
from .utils import (
5352
MAX_ONNX_OPSET,
@@ -240,8 +239,6 @@ def quantize(
240239
if ov_config is not None:
241240
if not isinstance(ov_config, OVConfig):
242241
raise TypeError(f"`ov_config` should be an `OVConfig`, but got: {type(ov_config)} instead.")
243-
elif ov_config.compression is None:
244-
ov_config.compression = DEFAULT_QUANTIZATION_CONFIG
245242

246243
if isinstance(self.model, OVBaseModel):
247244
self._quantize_ovbasemodel(
@@ -263,7 +260,6 @@ def quantize(
263260
self._quantize_torchmodel(
264261
calibration_dataset,
265262
save_directory,
266-
ov_config,
267263
file_name,
268264
batch_size,
269265
data_collator,
@@ -319,7 +315,7 @@ def _quantize_ovbasemodel(
319315
calibration_dataloader = data_cache
320316

321317
# Actual model quantization
322-
quantization_dataset = nncf.Dataset(calibration_dataloader, lambda x: x)
318+
quantization_dataset = nncf.Dataset(calibration_dataloader)
323319
quantized_model = nncf.quantize(
324320
self.model.model,
325321
quantization_dataset,
@@ -334,12 +330,13 @@ def _quantize_torchmodel(
334330
self,
335331
calibration_dataset: "Dataset",
336332
save_directory: Union[str, Path],
337-
ov_config: OVConfig = None,
338333
file_name: Optional[str] = None,
339334
batch_size: int = 1,
340335
data_collator: Optional[DataCollator] = None,
341336
remove_unused_columns: bool = True,
342337
weights_only: bool = False,
338+
save_onnx_model: bool = False,
339+
**kwargs,
343340
):
344341
self._set_task()
345342
save_directory = Path(save_directory)
@@ -356,15 +353,8 @@ def _quantize_torchmodel(
356353
model_type=model_type,
357354
)
358355

359-
if ov_config is None:
360-
logger.info(
361-
"No configuration describing the quantization process was provided, a default OVConfig will be generated."
362-
)
363-
ov_config = OVConfig(compression=DEFAULT_QUANTIZATION_CONFIG)
364356
onnx_file_name = (
365-
ONNX_WEIGHTS_NAME
366-
if file_name is None and ov_config.save_onnx_model
367-
else Path(ov_file_name).with_suffix(".onnx")
357+
ONNX_WEIGHTS_NAME if file_name is None and save_onnx_model else Path(ov_file_name).with_suffix(".onnx")
368358
)
369359

370360
task = self.task
@@ -398,7 +388,7 @@ def _quantize_torchmodel(
398388
if stateful:
399389
logger.warn(
400390
"Quantization algorithm does not support optimized stateful models. "
401-
"The original model without optimization will be quantized and export."
391+
"The original model without optimization will be quantized and exported."
402392
)
403393
stateful = False
404394

@@ -409,40 +399,38 @@ def _quantize_torchmodel(
409399
data_collator=data_collator,
410400
)
411401

412-
model_inputs = next(iter(calibration_dataloader))
413-
ov_config.add_input_info(model_inputs)
414-
nncf_config = NNCFConfig.from_dict(ov_config.__dict__)
415-
nncf_config = register_default_init_args(nncf_config, calibration_dataloader)
416-
controller, model = create_compressed_model(
417-
model, nncf_config, wrap_inputs_fn=wrap_nncf_model_inputs_with_objwalk
402+
quantization_dataset = nncf.Dataset(calibration_dataloader)
403+
model = nncf.quantize(
404+
model,
405+
quantization_dataset,
406+
model_type=nncf.ModelType.TRANSFORMER if not kwargs.get("model_type") else kwargs.get("model_type"),
407+
fast_bias_correction=kwargs.get("fast_bias_correction", True),
408+
**kwargs,
418409
)
419-
model = controller.strip(do_copy=False)
420410

421-
model_path = save_directory / (onnx_file_name if ov_config.save_onnx_model else ov_file_name)
411+
model_path = save_directory / (onnx_file_name if save_onnx_model else ov_file_name)
422412
onnx_path = save_directory / onnx_file_name
423-
export_fn = export if not ov_config.save_onnx_model else export_pytorch_via_onnx
413+
export_fn = export if not save_onnx_model else export_pytorch_via_onnx
424414
opset = min(onnx_config.DEFAULT_ONNX_OPSET, MAX_ONNX_OPSET)
425415
opset = max(opset, MIN_ONNX_QDQ_OPSET)
426-
kwargs = {}
427-
if not ov_config.save_onnx_model:
428-
kwargs = {"stateful": stateful}
416+
export_kwargs = {}
417+
if not save_onnx_model:
418+
export_kwargs = {"stateful": stateful}
429419

430-
_, _, is_onnx = export_fn(model=model, config=onnx_config, output=model_path, opset=opset, **kwargs)
420+
_, _, is_onnx = export_fn(model=model, config=onnx_config, output=model_path, opset=opset, **export_kwargs)
431421
if is_onnx:
432422
# Load and save the compressed model
433423
model = core.read_model(onnx_path)
434424
# Model required second saving for appling weights compression transformations
435425
self._save_pretrained(model, output_path)
436426
# if onnx conversion happens as fallback for pytorch conversion, remove onnx model
437-
if not ov_config.save_onnx_model:
427+
if not save_onnx_model:
438428
os.remove(onnx_path)
439429
try:
440430
os.remove(f"{onnx_path}_data")
441431
except FileNotFoundError:
442432
pass
443433

444-
ov_config.save_pretrained(save_directory)
445-
446434
@staticmethod
447435
def _save_pretrained(model: openvino.runtime.Model, output_path: str):
448436
compress_quantize_weights_transformation(model)

optimum/intel/version.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,4 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
__version__ = "1.16.0.dev0"
15+
__version__ = "1.17.0.dev0"

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
INSTALL_REQUIRE = [
3030
"torch>=1.11",
3131
"transformers>=4.36.0,<4.40.0",
32-
"optimum @ git+https://github.com/huggingface/optimum.git#egg=optimum",
32+
"optimum~=1.18",
3333
"datasets>=1.4.0",
3434
"sentencepiece",
3535
"scipy",

tests/openvino/test_quantization.py

-9
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@
5757
OVWeightQuantizationConfig,
5858
)
5959

60-
from optimum.intel.openvino.configuration import INT8_WEIGHT_COMPRESSION_CONFIG, DEFAULT_QUANTIZATION_CONFIG
6160
from optimum.intel.openvino.quantization import InferRequestWrapper
6261
from optimum.intel.utils.import_utils import is_openvino_version
6362
from utils_tests import MODEL_NAMES, get_num_quantized_nodes, _ARCHITECTURES_TO_EXPECTED_INT8
@@ -110,10 +109,6 @@ def preprocess_function(examples, tokenizer):
110109
outputs = model(**tokens)
111110
self.assertTrue("logits" in outputs)
112111

113-
# Verify that that the configuration is correctly saved and loaded
114-
loaded_config = OVConfig.from_pretrained(tmp_dir)
115-
self.assertEqual(DEFAULT_QUANTIZATION_CONFIG, loaded_config.to_dict()["compression"])
116-
117112
@parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_QUANTIZED_MATMULS)
118113
def test_ovmodel_static_quantization(self, model_cls, model_name, expected_fake_quantize, expected_int8):
119114
task = model_cls.export_feature
@@ -265,10 +260,6 @@ def test_automodel_weight_compression(self, model_cls, model_name, expected_pt_i
265260
outputs = model(**tokens)
266261
self.assertTrue("logits" in outputs)
267262

268-
# Verify that that the configuration is correctly saved and loaded
269-
loaded_config = OVConfig.from_pretrained(tmp_dir)
270-
self.assertIsNotNone(loaded_config)
271-
272263
@parameterized.expand(SUPPORTED_ARCHITECTURES_WITH_EXPECTED_8BIT_COMPRESSED_MATMULS)
273264
def test_ovmodel_8bit_weight_compression(self, model_cls, model_name, expected_pt_int8, expected_ov_int8):
274265
task = model_cls.export_feature

0 commit comments

Comments
 (0)