1
1
import argparse
2
2
import difflib
3
3
import os
4
-
5
4
import json
6
5
import pandas as pd
6
+ import logging
7
7
from datasets import load_dataset
8
8
from optimum .exporters import TasksManager
9
9
from optimum .intel .openvino import OVModelForCausalLM
10
10
from optimum .utils import NormalizedConfigManager , NormalizedTextConfig
11
- from transformers import AutoConfig , AutoTokenizer
11
+ from transformers import AutoConfig , AutoTokenizer , AutoModelForCausalLM
12
12
13
13
from . import Evaluator
14
14
15
+ # Configure logging
16
+ logging .basicConfig (level = logging .INFO )
17
+ logger = logging .getLogger (__name__ )
18
+
15
19
TasksManager ._SUPPORTED_MODEL_TYPE ["stablelm-epoch" ] = TasksManager ._SUPPORTED_MODEL_TYPE ["llama" ]
16
20
NormalizedConfigManager ._conf ["stablelm-epoch" ] = NormalizedTextConfig .with_args (
17
21
num_layers = "num_hidden_layers" ,
18
22
num_attention_heads = "num_attention_heads" ,
19
23
)
20
24
21
25
22
- def load_model (model_id , device = "CPU" , ov_config = None ):
26
+ class GenAIModelWrapper ():
27
+ """
28
+ A helper class to store additional attributes for GenAI models
29
+ """
30
+ def __init__ (self , model , model_dir ):
31
+ self .model = model
32
+ self .config = AutoConfig .from_pretrained (model_dir )
33
+
34
+ def __getattr__ (self , attr ):
35
+ if attr in self .__dict__ :
36
+ return getattr (self , attr )
37
+ else :
38
+ return getattr (self .model , attr )
39
+
40
+
41
+ def load_genai_pipeline (model_dir , device = "CPU" ):
42
+ try :
43
+ import openvino_genai
44
+ except ImportError :
45
+ logger .error ("Failed to import openvino_genai package. Please install it." )
46
+ exit (- 1 )
47
+ logger .info ("Using OpenVINO GenAI API" )
48
+ return GenAIModelWrapper (openvino_genai .LLMPipeline (model_dir , device ), model_dir )
49
+
50
+
51
+ def load_model (model_id , device = "CPU" , ov_config = None , use_hf = False , use_genai = False ):
52
+ if use_hf :
53
+ logger .info ("Using HF Transformers API" )
54
+ return AutoModelForCausalLM .from_pretrained (model_id , trust_remote_code = True , device_map = device .lower ())
55
+
56
+ if use_genai :
57
+ return load_genai_pipeline (model_id , device )
58
+
23
59
if ov_config :
24
60
with open (ov_config ) as f :
25
61
ov_options = json .load (f )
@@ -157,6 +193,16 @@ def parse_args():
157
193
default = None ,
158
194
help = "Used to select default prompts based on the primary model language, e.g. 'en', 'ch'." ,
159
195
)
196
+ parser .add_argument (
197
+ "--hf" ,
198
+ action = "store_true" ,
199
+ help = "Use AutoModelForCausalLM from transformers library to instantiate the model." ,
200
+ )
201
+ parser .add_argument (
202
+ "--genai" ,
203
+ action = "store_true" ,
204
+ help = "Use LLMPipeline from transformers library to instantiate the model." ,
205
+ )
160
206
161
207
return parser .parse_args ()
162
208
@@ -211,6 +257,11 @@ def diff_strings(a: str, b: str, *, use_loguru_colors: bool = False) -> str:
211
257
return "" .join (output )
212
258
213
259
260
+ def genai_gen_answer (model , tokenizer , question , max_new_tokens , skip_question ):
261
+ out = model .generate (question , max_new_tokens = max_new_tokens )
262
+ return out
263
+
264
+
214
265
def main ():
215
266
args = parse_args ()
216
267
check_args (args )
@@ -228,24 +279,25 @@ def main():
228
279
language = args .language ,
229
280
)
230
281
else :
231
- base_model = load_model (args .base_model , args .device , args .ov_config )
282
+ base_model = load_model (args .base_model , args .device , args .ov_config , args . hf , args . genai )
232
283
evaluator = Evaluator (
233
284
base_model = base_model ,
234
285
test_data = prompts ,
235
286
tokenizer = tokenizer ,
236
287
similarity_model_id = args .text_encoder ,
237
288
num_samples = args .num_samples ,
238
289
language = args .language ,
290
+ gen_answer_fn = genai_gen_answer if args .genai else None
239
291
)
240
292
if args .gt_data :
241
293
evaluator .dump_gt (args .gt_data )
242
294
del base_model
243
295
244
296
if args .target_model :
245
- target_model = load_model (args .target_model , args .device , args .ov_config )
246
- all_metrics_per_question , all_metrics = evaluator .score (target_model )
247
- print ("Metrics for model: " , args .target_model )
248
- print (all_metrics )
297
+ target_model = load_model (args .target_model , args .device , args .ov_config , args . hf , args . genai )
298
+ all_metrics_per_question , all_metrics = evaluator .score (target_model , genai_gen_answer if args . genai else None )
299
+ logger . info ("Metrics for model: %s " , args .target_model )
300
+ logger . info (all_metrics )
249
301
250
302
if args .output :
251
303
if not os .path .exists (args .output ):
@@ -269,11 +321,11 @@ def main():
269
321
actual_text += l2 + "\n "
270
322
diff += diff_strings (l1 , l2 ) + "\n "
271
323
272
- print ("--------------------------------------------------------------------------------------" )
273
- print ("## Reference text {} :\n " . format ( i + 1 ) , ref_text )
274
- print ("## Actual text {} :\n " . format ( i + 1 ) , actual_text )
275
- print ("## Diff {} : " . format ( i + 1 ) )
276
- print (diff )
324
+ logger . info ("--------------------------------------------------------------------------------------" )
325
+ logger . info ("## Reference text %d :\n %s" , i + 1 , ref_text )
326
+ logger . info ("## Actual text %d :\n %s" , i + 1 , actual_text )
327
+ logger . info ("## Diff %d : " , i + 1 )
328
+ logger . info (diff )
277
329
278
330
279
331
if __name__ == "__main__" :
0 commit comments