Skip to content

Commit db22a52

Browse files
committed
save openvino config to include quantization configuration
1 parent dfc4893 commit db22a52

File tree

4 files changed

+53
-21
lines changed

4 files changed

+53
-21
lines changed

optimum/intel/openvino/modeling_base.py

+24-4
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ def __init__(
5757
dynamic_shapes: bool = True,
5858
ov_config: Optional[Dict[str, str]] = None,
5959
model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
60+
quantization_config: Optional[Union[OVWeightQuantizationConfig, Dict]] = None,
6061
**kwargs,
6162
):
6263
self.config = config
@@ -91,6 +92,10 @@ def __init__(
9192

9293
self.generation_config = GenerationConfig.from_model_config(config) if self.can_generate() else None
9394

95+
self._openvino_config = None
96+
if quantization_config:
97+
self._openvino_config = OVConfig(quantization_config=quantization_config)
98+
9499
@staticmethod
95100
def load_model(file_name: Union[str, Path], quantization_config: Union[OVWeightQuantizationConfig, Dict] = None):
96101
"""
@@ -143,6 +148,14 @@ def _save_pretrained(self, save_directory: Union[str, Path]):
143148
dst_path = os.path.join(save_directory, OV_XML_FILE_NAME)
144149
openvino.save_model(self.model, dst_path, compress_to_fp16=False)
145150

151+
self._save_openvino_config(save_directory)
152+
153+
154+
def _save_openvino_config(self, save_directory: Union[str, Path]):
155+
if self._openvino_config is not None:
156+
self._openvino_config.save_pretrained(save_directory)
157+
158+
146159
@classmethod
147160
def _from_pretrained(
148161
cls,
@@ -203,12 +216,19 @@ def _from_pretrained(
203216
local_files_only=local_files_only,
204217
)
205218

206-
# Give default quantization config if not provided and load_in_8bit=True
207-
if load_in_8bit:
208-
quantization_config = quantization_config or {"bits": 8}
219+
quantization_config = self._prepare_quantization_config(quantization_config, load_in_8bit)
209220

210221
model = cls.load_model(model_cache_path, quantization_config=quantization_config)
211-
return cls(model, config=config, model_save_dir=model_cache_path.parent, **kwargs)
222+
return cls(model, config=config, model_save_dir=model_cache_path.parent, quantization_config=quantization_config, **kwargs)
223+
224+
def _prepare_quantization_config(quantization_config : Optional[Union[OVWeightQuantizationConfig, Dict]] = None, load_in_8bit:bool= False):
225+
# Give default quantization config if not provided and load_in_8bit=True
226+
if not quantization_config and load_in_8bit:
227+
quantization_config = OVWeightQuantizationConfig(bits=8)
228+
elif isinstance(quantization_config, dict):
229+
quantization_config = OVWeightQuantizationConfig.from_dict(quantization_config)
230+
231+
return quantization_config
212232

213233
@staticmethod
214234
def _cached_file(

optimum/intel/openvino/modeling_base_seq2seq.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ def __init__(
5858
dynamic_shapes: bool = True,
5959
ov_config: Optional[Dict[str, str]] = None,
6060
model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
61+
quantization_config: Union[OVWeightQuantizationConfig, Dict] = None,
6162
**kwargs,
6263
):
6364
self.config = config
@@ -76,6 +77,12 @@ def __init__(
7677
self.decoder_model = decoder
7778
self.decoder_with_past_model = decoder_with_past
7879
self.generation_config = GenerationConfig.from_model_config(config) if self.can_generate() else None
80+
if quantization_config:
81+
self.ov_config["quantization_config"] = quantization_config
82+
83+
self._openvino_config = None
84+
if quantization_config:
85+
self._openvino_config = OVConfig(quantization_config=quantization_config)
7986

8087
def _save_pretrained(self, save_directory: Union[str, Path]):
8188
"""
@@ -96,6 +103,8 @@ def _save_pretrained(self, save_directory: Union[str, Path]):
96103
dst_path = os.path.join(save_directory, dst_file_name)
97104
openvino.save_model(src_file, dst_path, compress_to_fp16=False)
98105

106+
self._save_openvino_config(save_directory)
107+
99108
@classmethod
100109
def _from_pretrained(
101110
cls,
@@ -155,9 +164,7 @@ def _from_pretrained(
155164
decoder_with_past_file_name = decoder_with_past_file_name or default_decoder_with_past_file_name
156165
decoder_with_past = None
157166

158-
# Give default quantization config if not provided and load_in_8bit=True
159-
if load_in_8bit:
160-
quantization_config = quantization_config or {"bits": 8}
167+
quantization_config = self._prepare_quantization_config(quantization_config, load_in_8bit)
161168

162169
# Load model from a local directory
163170
if os.path.isdir(model_id):
@@ -205,6 +212,7 @@ def _from_pretrained(
205212
decoder_with_past=decoder_with_past,
206213
config=config,
207214
model_save_dir=model_save_dir,
215+
quantization_config=quantization_config,
208216
**kwargs,
209217
)
210218

optimum/intel/openvino/modeling_decoder.py

+8-9
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ def __init__(
100100
dynamic_shapes: bool = True,
101101
ov_config: Optional[Dict[str, str]] = None,
102102
model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
103+
quantization_config: Optional[Union[OVWeightQuantizationConfig, Dict]] = None,
103104
**kwargs,
104105
):
105106
if not dynamic_shapes:
@@ -117,6 +118,7 @@ def __init__(
117118
dynamic_shapes=False,
118119
ov_config=ov_config,
119120
model_save_dir=model_save_dir,
121+
quantization_config=quantization_config,
120122
**kwargs,
121123
)
122124

@@ -224,6 +226,8 @@ def _save_pretrained(self, save_directory: Union[str, Path]):
224226
dst_path = os.path.join(save_directory, OV_XML_FILE_NAME)
225227
openvino.save_model(model_to_save, dst_path, compress_to_fp16=False)
226228

229+
self._save_openvino_config(save_directory)
230+
227231
@classmethod
228232
def _from_transformers(
229233
cls,
@@ -576,15 +580,10 @@ def _from_pretrained(
576580
local_files_only=local_files_only,
577581
)
578582

579-
# Give default quantization config if not provided and load_in_8bit=True
580-
if load_in_8bit:
581-
quantization_config = quantization_config or {"bits": 8}
582-
583-
if isinstance(quantization_config, dict):
584-
if quantization_config == {"bits": 4} and config.name_or_path in _DEFAULT_4BIT_CONFIGS:
585-
quantization_config = _DEFAULT_4BIT_CONFIGS[config.name_or_path]
583+
if isinstance(quantization_config, dict) and quantization_config == {"bits": 4}:
584+
quantization_config = _DEFAULT_4BIT_CONFIGS.get(config.name_or_path, quantization_config)
586585

587-
quantization_config = OVWeightQuantizationConfig.from_dict(quantization_config)
586+
quantization_config = self._prepare_quantization_config(quantization_config, load_in_8bit)
588587

589588
load_in_4bit = quantization_config.bits == 4 if quantization_config else False
590589
model = cls.load_model(model_cache_path, quantization_config=None if load_in_4bit else quantization_config)
@@ -603,7 +602,7 @@ def _from_pretrained(
603602

604603
enable_compilation = kwargs.pop("compile", True) and not load_in_4bit
605604
causal_model = init_cls(
606-
model=model, config=config, model_save_dir=model_cache_path.parent, compile=enable_compilation, **kwargs
605+
model=model, config=config, model_save_dir=model_cache_path.parent, compile=enable_compilation, quantization_config=quantization_config, **kwargs
607606
)
608607

609608
if load_in_4bit:

optimum/intel/openvino/modeling_diffusion.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ def __init__(
8787
compile: bool = True,
8888
ov_config: Optional[Dict[str, str]] = None,
8989
model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
90+
quantization_config: Optional[Union[OVWeightQuantizationConfig, Dict]] = None,
9091
**kwargs,
9192
):
9293
self._internal_dict = config
@@ -140,6 +141,11 @@ def __init__(
140141

141142
self._internal_dict.pop("vae", None)
142143

144+
self._openvino_config = None
145+
if quantization_config:
146+
self._openvino_config = OVConfig(quantization_config=quantization_config)
147+
148+
143149
def _save_pretrained(self, save_directory: Union[str, Path]):
144150
"""
145151
Saves the model to the OpenVINO IR format so that it can be re-loaded using the
@@ -177,6 +183,8 @@ def _save_pretrained(self, save_directory: Union[str, Path]):
177183
if self.tokenizer_2 is not None:
178184
self.tokenizer_2.save_pretrained(save_directory / "tokenizer_2")
179185

186+
self._save_openvino_config(save_directory)
187+
180188
@classmethod
181189
def _from_pretrained(
182190
cls,
@@ -257,10 +265,7 @@ def _from_pretrained(
257265
else:
258266
kwargs[name] = load_method(new_model_save_dir)
259267

260-
# Give default quantization config if not provided and load_in_8bit=True
261-
if load_in_8bit:
262-
quantization_config = quantization_config or {"bits": 8}
263-
268+
quantization_config = self._prepare_quantization_config(quantization_config, load_in_8bit)
264269
unet = cls.load_model(
265270
new_model_save_dir / DIFFUSION_MODEL_UNET_SUBFOLDER / unet_file_name, quantization_config
266271
)
@@ -278,7 +283,7 @@ def _from_pretrained(
278283
if model_save_dir is None:
279284
model_save_dir = new_model_save_dir
280285

281-
return cls(unet=unet, config=config, model_save_dir=model_save_dir, **components, **kwargs)
286+
return cls(unet=unet, config=config, model_save_dir=model_save_dir, quantization_config=quantization_config, **components, **kwargs)
282287

283288
@classmethod
284289
def _from_transformers(

0 commit comments

Comments
 (0)