Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Small OpenVINO UX improvements #629

Merged
merged 2 commits into from
Mar 22, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion optimum/intel/openvino/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def to(self, device: str):
self._device = device.upper()
self.request = None
else:
logger.warning(f"device must be of type {str} but got {type(device)} instead")
logger.debug(f"device must be of type {str} but got {type(device)} instead")

return self

Expand Down
5 changes: 4 additions & 1 deletion optimum/intel/openvino/modeling_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,10 @@ def __init__(
self.model_save_dir = model_save_dir
self._device = device.upper()
self.is_dynamic = dynamic_shapes
self.ov_config = ov_config if ov_config is not None else {"PERFORMANCE_HINT": "LATENCY"}
self.ov_config = ov_config if ov_config is not None else {}
if self.ov_config.get("PERFORMANCE_HINT") is None:
self.ov_config["PERFORMANCE_HINT"] = "LATENCY"

self.preprocessors = kwargs.get("preprocessors", [])
enable_compilation = kwargs.get("compile", True)

Expand Down
4 changes: 3 additions & 1 deletion optimum/intel/openvino/modeling_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ def __init__(
self._device = device.upper()
self.is_dynamic = dynamic_shapes
self.ov_config = ov_config if ov_config is not None else {}
if self.ov_config.get("PERFORMANCE_HINT") is None:
self.ov_config["PERFORMANCE_HINT"] = "LATENCY"

# This attribute is needed to keep one reference on the temporary directory, since garbage collecting
# would end-up removing the directory containing the underlying OpenVINO model
Expand Down Expand Up @@ -456,7 +458,7 @@ def to(self, device: str):
self._device = device.upper()
self.clear_requests()
else:
logger.warning(f"device must be of type {str} but got {type(device)} instead")
logger.debug(f"device must be of type {str} but got {type(device)} instead")

return self

Expand Down
6 changes: 5 additions & 1 deletion optimum/intel/openvino/modeling_seq2seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ def __init__(
decoder: openvino.runtime.Model,
decoder_with_past: openvino.runtime.Model = None,
config: transformers.PretrainedConfig = None,
ov_config: Optional[Dict[str, str]] = None,
**kwargs,
):
super().__init__(
Expand All @@ -263,7 +264,10 @@ def __init__(
enable_compilation = kwargs.get("compile", True)
self.encoder = OVEncoder(self.encoder_model, parent_model=self)
self.decoder = OVDecoder(self.decoder_model, parent_model=self)
self.ov_config = ov_config if ov_config is not None else {}

if self.ov_config.get("PERFORMANCE_HINT") is None:
self.ov_config["PERFORMANCE_HINT"] = "LATENCY"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't this be included to OVBaseModelForSeq2SeqLM instead ?

self.ov_config = ov_config if ov_config is not None else {}

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, thank you! I changed it.

if self.use_cache:
self.decoder_with_past = OVDecoder(self.decoder_with_past_model, parent_model=self)
if enable_compilation:
Expand All @@ -285,7 +289,7 @@ def to(self, device: str):
self.decoder_with_past._device = self._device
self.clear_requests()
else:
logger.warning(f"device must be of type {str} but got {type(device)} instead")
logger.debug(f"device must be of type {str} but got {type(device)} instead")

return self

Expand Down
20 changes: 19 additions & 1 deletion tests/openvino/test_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,9 @@ def test_load_from_hub_and_save_model(self):
tokens = tokenizer("This is a sample input", return_tensors="pt")
loaded_model = OVModelForSequenceClassification.from_pretrained(self.OV_MODEL_ID)
self.assertIsInstance(loaded_model.config, PretrainedConfig)
# Test that PERFORMANCE_HINT is set to LATENCY by default
self.assertEqual(loaded_model.ov_config.get("PERFORMANCE_HINT"), "LATENCY")
self.assertEqual(loaded_model.request.get_property("PERFORMANCE_HINT"), "LATENCY")
loaded_model_outputs = loaded_model(**tokens)

# Test specifying ov_config with throughput hint and manual cache dir
Expand All @@ -134,7 +137,10 @@ def test_load_from_hub_and_save_model(self):
folder_contents = os.listdir(tmpdirname)
self.assertTrue(OV_XML_FILE_NAME in folder_contents)
self.assertTrue(OV_XML_FILE_NAME.replace(".xml", ".bin") in folder_contents)
model = OVModelForSequenceClassification.from_pretrained(tmpdirname)
model = OVModelForSequenceClassification.from_pretrained(tmpdirname, ov_config={"NUM_STREAMS": 2})
# Test that PERFORMANCE_HINT is set to LATENCY by default even with ov_config provided
self.assertEqual(model.ov_config.get("PERFORMANCE_HINT"), "LATENCY")
self.assertEqual(model.request.get_property("PERFORMANCE_HINT"), "LATENCY")

outputs = model(**tokens)
self.assertTrue(torch.equal(loaded_model_outputs.logits, outputs.logits))
Expand All @@ -150,6 +156,9 @@ def test_load_from_hub_and_save_decoder_model(self, use_cache):
tokens = tokenizer("This is a sample input", return_tensors="pt")
loaded_model = OVModelForCausalLM.from_pretrained(model_id, use_cache=use_cache)
self.assertIsInstance(loaded_model.config, PretrainedConfig)
# Test that PERFORMANCE_HINT is set to LATENCY by default
self.assertEqual(loaded_model.ov_config.get("PERFORMANCE_HINT"), "LATENCY")
self.assertEqual(loaded_model.request.get_compiled_model().get_property("PERFORMANCE_HINT"), "LATENCY")
loaded_model_outputs = loaded_model(**tokens)

with tempfile.TemporaryDirectory() as tmpdirname:
Expand All @@ -172,6 +181,11 @@ def test_load_from_hub_and_save_seq2seq_model(self):
loaded_model = OVModelForSeq2SeqLM.from_pretrained(self.OV_SEQ2SEQ_MODEL_ID, compile=False)
self.assertIsInstance(loaded_model.config, PretrainedConfig)
loaded_model.to("cpu")
loaded_model.compile()
# Test that PERFORMANCE_HINT is set to LATENCY by default
self.assertEqual(loaded_model.ov_config.get("PERFORMANCE_HINT"), "LATENCY")
self.assertEqual(loaded_model.decoder.request.get_compiled_model().get_property("PERFORMANCE_HINT"), "LATENCY")

loaded_model_outputs = loaded_model.generate(**tokens)

with tempfile.TemporaryDirectory() as tmpdirname:
Expand All @@ -192,6 +206,10 @@ def test_load_from_hub_and_save_seq2seq_model(self):
def test_load_from_hub_and_save_stable_diffusion_model(self):
loaded_pipeline = OVStableDiffusionPipeline.from_pretrained(self.OV_DIFFUSION_MODEL_ID, compile=False)
self.assertIsInstance(loaded_pipeline.config, Dict)
# Test that PERFORMANCE_HINT is set to LATENCY by default
self.assertEqual(loaded_pipeline.ov_config.get("PERFORMANCE_HINT"), "LATENCY")
loaded_pipeline.compile()
self.assertEqual(loaded_pipeline.unet.request.get_property("PERFORMANCE_HINT"), "LATENCY")
batch_size, height, width = 2, 16, 16
np.random.seed(0)
inputs = {
Expand Down
Loading