Skip to content

Commit b555a67

Browse files
committed
Fixed issues. Applied comments.
1 parent 9943624 commit b555a67

6 files changed

+23
-13
lines changed

optimum/intel/openvino/modeling_base_seq2seq.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ def _from_transformers(
253253

254254
compression_option = None
255255
if load_in_8bit is not None:
256-
compression_option = "int8" if load_in_8bit else "fp32"
256+
compression_option = "fp32"
257257
main_export(
258258
model_name_or_path=model_id,
259259
output=save_dir_path,
@@ -270,7 +270,7 @@ def _from_transformers(
270270

271271
config.save_pretrained(save_dir_path)
272272
return cls._from_pretrained(
273-
model_id=save_dir_path, config=config, use_cache=use_cache, load_in_8bit=False, **kwargs
273+
model_id=save_dir_path, config=config, use_cache=use_cache, load_in_8bit=load_in_8bit, **kwargs
274274
)
275275

276276
def _reshape(self, model: openvino.runtime.Model, batch_size: int, sequence_length: int, is_decoder=True):

optimum/intel/openvino/modeling_decoder.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,7 @@ def _from_transformers(
289289
model_id=save_dir_path,
290290
config=config,
291291
use_cache=use_cache,
292-
load_in_8bit=False,
292+
load_in_8bit=load_in_8bit,
293293
stateful=None,
294294
load_in_4bit=load_in_4bit,
295295
quantization_config=quantization_config,
@@ -360,7 +360,7 @@ class OVModelForCausalLM(OVBaseDecoderModel, GenerationMixin):
360360
checkpoint="gpt2",
361361
)
362362
)
363-
def prepare_forward_inputs(
363+
def prepare_inputs(
364364
self,
365365
input_ids: torch.LongTensor,
366366
attention_mask: Optional[torch.LongTensor] = None,

optimum/intel/openvino/weight_quantization.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ def compress_decoder_weights(model, quantization_config: Union[OVWeightQuantizat
143143

144144
dataset = get_dataset(config.dataset, tokenizer, seqlen=32)
145145
dataset = prepare_dataset(dataset)
146-
dataset = nncf.Dataset(dataset, lambda x: model.prepare_forward_inputs(**x))
146+
dataset = nncf.Dataset(dataset, lambda x: model.prepare_inputs(**x))
147147

148148
model.model = nncf.compress_weights(
149149
ov_model,

optimum/intel/utils/dummy_openvino_and_nncf_objects.py

+11
Original file line numberDiff line numberDiff line change
@@ -57,3 +57,14 @@ def __init__(self, *args, **kwargs):
5757
@classmethod
5858
def from_pretrained(cls, *args, **kwargs):
5959
requires_backends(cls, ["openvino", "nncf"])
60+
61+
62+
class OVWeightQuantizationConfig(metaclass=DummyObject):
63+
_backends = ["openvino", "nncf"]
64+
65+
def __init__(self, *args, **kwargs):
66+
requires_backends(self, ["openvino", "nncf"])
67+
68+
@classmethod
69+
def from_pretrained(cls, *args, **kwargs):
70+
requires_backends(cls, ["openvino", "nncf"])

tests/openvino/test_quantization.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -303,13 +303,12 @@ def test_ovmodel_4bit_weight_compression(self, model_cls, model_name, expected_i
303303

304304
@parameterized.expand(SUPPORTED_ARCHITECTURES_STATEFUL_WITH_EXPECTED_8BIT_COMPRESSED_MATMULS)
305305
@unittest.skipIf(not IS_SUPPORT_STATEFUL, "Stateful models supported only in 2023.3 and above")
306-
def test_ovmodel_8bit_weight_compression_stateful(self, model_cls, model_name, expected_pt_int8, expected_ov_int8):
306+
def test_ovmodel_8bit_weight_compression_stateful(self, model_cls, model_id, expected_pt_int8, expected_ov_int8):
307307
task = model_cls.export_feature
308308

309309
with tempfile.TemporaryDirectory() as tmp_dir:
310-
model_id = MODEL_NAMES[model_name]
311310
transformers_model = model_cls.from_pretrained(model_id, export=True, stateful=True)
312-
tokenizer = AutoTokenizer.from_pretrained(model_name)
311+
tokenizer = AutoTokenizer.from_pretrained(model_id)
313312
if tokenizer.pad_token is None:
314313
tokenizer.pad_token = tokenizer.eos_token
315314

tests/openvino/utils_tests.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -103,15 +103,15 @@
103103
"bert": (70,),
104104
"roberta": (68,),
105105
"albert": (84,),
106-
"vit": (62,),
106+
"vit": (64,),
107107
"blenderbot": (70,),
108108
"gpt2": (46,),
109-
"wav2vec2": (30,),
109+
"wav2vec2": (34,),
110110
"distilbert": (66,),
111111
"t5": (64, 104, 84),
112-
"stable-diffusion": (148, 8, 8, 64),
113-
"stable-diffusion-xl": (296, 8, 8, 66),
114-
"stable-diffusion-xl-refiner": (296, 8, 8, 66),
112+
"stable-diffusion": (242, 34, 42, 64),
113+
"stable-diffusion-xl": (366, 34, 42, 66),
114+
"stable-diffusion-xl-refiner": (366, 34, 42, 66),
115115
}
116116

117117
_ARCHITECTURES_TO_EXPECTED_INT4_INT8 = {"opt125m": (64, 477)}

0 commit comments

Comments
 (0)