Skip to content

Commit 25da061

Browse files
committed
Update ov_config, change warning in .to() to debug
- set PERFORMANCE_HINT to LATENCY if not specified in ov_config - replace warning log in .to() about devices with debug log (to prevent confusing users who create a pipeline(), which always shows this warning)
1 parent 64d9a49 commit 25da061

File tree

5 files changed

+32
-5
lines changed

5 files changed

+32
-5
lines changed

optimum/intel/openvino/modeling.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def to(self, device: str):
137137
self._device = device.upper()
138138
self.request = None
139139
else:
140-
logger.warning(f"device must be of type {str} but got {type(device)} instead")
140+
logger.debug(f"device must be of type {str} but got {type(device)} instead")
141141

142142
return self
143143

optimum/intel/openvino/modeling_base.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,10 @@ def __init__(
6464
self.model_save_dir = model_save_dir
6565
self._device = device.upper()
6666
self.is_dynamic = dynamic_shapes
67-
self.ov_config = ov_config if ov_config is not None else {"PERFORMANCE_HINT": "LATENCY"}
67+
self.ov_config = ov_config if ov_config is not None else {}
68+
if self.ov_config.get("PERFORMANCE_HINT") is None:
69+
self.ov_config["PERFORMANCE_HINT"] = "LATENCY"
70+
6871
self.preprocessors = kwargs.get("preprocessors", [])
6972
enable_compilation = kwargs.get("compile", True)
7073

optimum/intel/openvino/modeling_diffusion.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ def __init__(
101101
self._device = device.upper()
102102
self.is_dynamic = dynamic_shapes
103103
self.ov_config = ov_config if ov_config is not None else {}
104+
if self.ov_config.get("PERFORMANCE_HINT") is None:
105+
self.ov_config["PERFORMANCE_HINT"] = "LATENCY"
104106

105107
# This attribute is needed to keep one reference on the temporary directory, since garbage collecting
106108
# would end-up removing the directory containing the underlying OpenVINO model
@@ -456,7 +458,7 @@ def to(self, device: str):
456458
self._device = device.upper()
457459
self.clear_requests()
458460
else:
459-
logger.warning(f"device must be of type {str} but got {type(device)} instead")
461+
logger.debug(f"device must be of type {str} but got {type(device)} instead")
460462

461463
return self
462464

optimum/intel/openvino/modeling_seq2seq.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,7 @@ def __init__(
253253
decoder: openvino.runtime.Model,
254254
decoder_with_past: openvino.runtime.Model = None,
255255
config: transformers.PretrainedConfig = None,
256+
ov_config: Optional[Dict[str, str]] = None,
256257
**kwargs,
257258
):
258259
super().__init__(
@@ -263,7 +264,10 @@ def __init__(
263264
enable_compilation = kwargs.get("compile", True)
264265
self.encoder = OVEncoder(self.encoder_model, parent_model=self)
265266
self.decoder = OVDecoder(self.decoder_model, parent_model=self)
267+
self.ov_config = ov_config if ov_config is not None else {}
266268

269+
if self.ov_config.get("PERFORMANCE_HINT") is None:
270+
self.ov_config["PERFORMANCE_HINT"] = "LATENCY"
267271
if self.use_cache:
268272
self.decoder_with_past = OVDecoder(self.decoder_with_past_model, parent_model=self)
269273
if enable_compilation:
@@ -285,7 +289,7 @@ def to(self, device: str):
285289
self.decoder_with_past._device = self._device
286290
self.clear_requests()
287291
else:
288-
logger.warning(f"device must be of type {str} but got {type(device)} instead")
292+
logger.debug(f"device must be of type {str} but got {type(device)} instead")
289293

290294
return self
291295

tests/openvino/test_modeling.py

+19-1
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,9 @@ def test_load_from_hub_and_save_model(self):
116116
tokens = tokenizer("This is a sample input", return_tensors="pt")
117117
loaded_model = OVModelForSequenceClassification.from_pretrained(self.OV_MODEL_ID)
118118
self.assertIsInstance(loaded_model.config, PretrainedConfig)
119+
# Test that PERFORMANCE_HINT is set to LATENCY by default
120+
self.assertEqual(loaded_model.ov_config.get("PERFORMANCE_HINT"), "LATENCY")
121+
self.assertEqual(loaded_model.request.get_property("PERFORMANCE_HINT"), "LATENCY")
119122
loaded_model_outputs = loaded_model(**tokens)
120123

121124
# Test specifying ov_config with throughput hint and manual cache dir
@@ -134,7 +137,10 @@ def test_load_from_hub_and_save_model(self):
134137
folder_contents = os.listdir(tmpdirname)
135138
self.assertTrue(OV_XML_FILE_NAME in folder_contents)
136139
self.assertTrue(OV_XML_FILE_NAME.replace(".xml", ".bin") in folder_contents)
137-
model = OVModelForSequenceClassification.from_pretrained(tmpdirname)
140+
model = OVModelForSequenceClassification.from_pretrained(tmpdirname, ov_config={"NUM_STREAMS": 2})
141+
# Test that PERFORMANCE_HINT is set to LATENCY by default even with ov_config provided
142+
self.assertEqual(model.ov_config.get("PERFORMANCE_HINT"), "LATENCY")
143+
self.assertEqual(model.request.get_property("PERFORMANCE_HINT"), "LATENCY")
138144

139145
outputs = model(**tokens)
140146
self.assertTrue(torch.equal(loaded_model_outputs.logits, outputs.logits))
@@ -150,6 +156,9 @@ def test_load_from_hub_and_save_decoder_model(self, use_cache):
150156
tokens = tokenizer("This is a sample input", return_tensors="pt")
151157
loaded_model = OVModelForCausalLM.from_pretrained(model_id, use_cache=use_cache)
152158
self.assertIsInstance(loaded_model.config, PretrainedConfig)
159+
# Test that PERFORMANCE_HINT is set to LATENCY by default
160+
self.assertEqual(loaded_model.ov_config.get("PERFORMANCE_HINT"), "LATENCY")
161+
self.assertEqual(loaded_model.request.get_compiled_model().get_property("PERFORMANCE_HINT"), "LATENCY")
153162
loaded_model_outputs = loaded_model(**tokens)
154163

155164
with tempfile.TemporaryDirectory() as tmpdirname:
@@ -172,6 +181,11 @@ def test_load_from_hub_and_save_seq2seq_model(self):
172181
loaded_model = OVModelForSeq2SeqLM.from_pretrained(self.OV_SEQ2SEQ_MODEL_ID, compile=False)
173182
self.assertIsInstance(loaded_model.config, PretrainedConfig)
174183
loaded_model.to("cpu")
184+
loaded_model.compile()
185+
# Test that PERFORMANCE_HINT is set to LATENCY by default
186+
self.assertEqual(loaded_model.ov_config.get("PERFORMANCE_HINT"), "LATENCY")
187+
self.assertEqual(loaded_model.decoder.request.get_compiled_model().get_property("PERFORMANCE_HINT"), "LATENCY")
188+
175189
loaded_model_outputs = loaded_model.generate(**tokens)
176190

177191
with tempfile.TemporaryDirectory() as tmpdirname:
@@ -192,6 +206,10 @@ def test_load_from_hub_and_save_seq2seq_model(self):
192206
def test_load_from_hub_and_save_stable_diffusion_model(self):
193207
loaded_pipeline = OVStableDiffusionPipeline.from_pretrained(self.OV_DIFFUSION_MODEL_ID, compile=False)
194208
self.assertIsInstance(loaded_pipeline.config, Dict)
209+
# Test that PERFORMANCE_HINT is set to LATENCY by default
210+
self.assertEqual(loaded_pipeline.ov_config.get("PERFORMANCE_HINT"), "LATENCY")
211+
loaded_pipeline.compile()
212+
self.assertEqual(loaded_pipeline.unet.request.get_property("PERFORMANCE_HINT"), "LATENCY")
195213
batch_size, height, width = 2, 16, 16
196214
np.random.seed(0)
197215
inputs = {

0 commit comments

Comments
 (0)