@@ -116,6 +116,9 @@ def test_load_from_hub_and_save_model(self):
116
116
tokens = tokenizer ("This is a sample input" , return_tensors = "pt" )
117
117
loaded_model = OVModelForSequenceClassification .from_pretrained (self .OV_MODEL_ID )
118
118
self .assertIsInstance (loaded_model .config , PretrainedConfig )
119
+ # Test that PERFORMANCE_HINT is set to LATENCY by default
120
+ self .assertEqual (loaded_model .ov_config .get ("PERFORMANCE_HINT" ), "LATENCY" )
121
+ self .assertEqual (loaded_model .request .get_property ("PERFORMANCE_HINT" ), "LATENCY" )
119
122
loaded_model_outputs = loaded_model (** tokens )
120
123
121
124
# Test specifying ov_config with throughput hint and manual cache dir
@@ -134,7 +137,10 @@ def test_load_from_hub_and_save_model(self):
134
137
folder_contents = os .listdir (tmpdirname )
135
138
self .assertTrue (OV_XML_FILE_NAME in folder_contents )
136
139
self .assertTrue (OV_XML_FILE_NAME .replace (".xml" , ".bin" ) in folder_contents )
137
- model = OVModelForSequenceClassification .from_pretrained (tmpdirname )
140
+ model = OVModelForSequenceClassification .from_pretrained (tmpdirname , ov_config = {"NUM_STREAMS" : 2 })
141
+ # Test that PERFORMANCE_HINT is set to LATENCY by default even with ov_config provided
142
+ self .assertEqual (model .ov_config .get ("PERFORMANCE_HINT" ), "LATENCY" )
143
+ self .assertEqual (model .request .get_property ("PERFORMANCE_HINT" ), "LATENCY" )
138
144
139
145
outputs = model (** tokens )
140
146
self .assertTrue (torch .equal (loaded_model_outputs .logits , outputs .logits ))
@@ -150,6 +156,9 @@ def test_load_from_hub_and_save_decoder_model(self, use_cache):
150
156
tokens = tokenizer ("This is a sample input" , return_tensors = "pt" )
151
157
loaded_model = OVModelForCausalLM .from_pretrained (model_id , use_cache = use_cache )
152
158
self .assertIsInstance (loaded_model .config , PretrainedConfig )
159
+ # Test that PERFORMANCE_HINT is set to LATENCY by default
160
+ self .assertEqual (loaded_model .ov_config .get ("PERFORMANCE_HINT" ), "LATENCY" )
161
+ self .assertEqual (loaded_model .request .get_compiled_model ().get_property ("PERFORMANCE_HINT" ), "LATENCY" )
153
162
loaded_model_outputs = loaded_model (** tokens )
154
163
155
164
with tempfile .TemporaryDirectory () as tmpdirname :
@@ -172,6 +181,11 @@ def test_load_from_hub_and_save_seq2seq_model(self):
172
181
loaded_model = OVModelForSeq2SeqLM .from_pretrained (self .OV_SEQ2SEQ_MODEL_ID , compile = False )
173
182
self .assertIsInstance (loaded_model .config , PretrainedConfig )
174
183
loaded_model .to ("cpu" )
184
+ loaded_model .compile ()
185
+ # Test that PERFORMANCE_HINT is set to LATENCY by default
186
+ self .assertEqual (loaded_model .ov_config .get ("PERFORMANCE_HINT" ), "LATENCY" )
187
+ self .assertEqual (loaded_model .decoder .request .get_compiled_model ().get_property ("PERFORMANCE_HINT" ), "LATENCY" )
188
+
175
189
loaded_model_outputs = loaded_model .generate (** tokens )
176
190
177
191
with tempfile .TemporaryDirectory () as tmpdirname :
@@ -192,6 +206,10 @@ def test_load_from_hub_and_save_seq2seq_model(self):
192
206
def test_load_from_hub_and_save_stable_diffusion_model (self ):
193
207
loaded_pipeline = OVStableDiffusionPipeline .from_pretrained (self .OV_DIFFUSION_MODEL_ID , compile = False )
194
208
self .assertIsInstance (loaded_pipeline .config , Dict )
209
+ # Test that PERFORMANCE_HINT is set to LATENCY by default
210
+ self .assertEqual (loaded_pipeline .ov_config .get ("PERFORMANCE_HINT" ), "LATENCY" )
211
+ loaded_pipeline .compile ()
212
+ self .assertEqual (loaded_pipeline .unet .request .get_property ("PERFORMANCE_HINT" ), "LATENCY" )
195
213
batch_size , height , width = 2 , 16 , 16
196
214
np .random .seed (0 )
197
215
inputs = {
0 commit comments