50
50
help = "Pad input ids to max length." )
51
51
parser .add_argument ("--calib_iters" , default = 512 , type = int ,
52
52
help = "calibration iters." )
53
- parser .add_argument ("--tasks" , nargs = '+' , default = ["lambada_openai" ,
54
- "hellaswag" , "winogrande" , "piqa" , "wikitext" ],
53
+ parser .add_argument ("--tasks" , default = "lambada_openai,hellaswag,winogrande,piqa,wikitext" ,
55
54
type = str , help = "tasks list for accuracy validation" )
56
55
parser .add_argument ("--peft_model_id" , type = str , default = None , help = "model_name_or_path of peft model" )
57
56
# ============SmoothQuant configs==============
@@ -390,24 +389,27 @@ def run_fn(model):
390
389
391
390
if args .accuracy :
392
391
user_model .eval ()
393
- from intel_extension_for_transformers .transformers .llm .evaluation .lm_eval import evaluate
394
-
395
- results = evaluate (
396
- model = "hf-causal" ,
392
+ from intel_extension_for_transformers .transformers .llm .evaluation .lm_eval import evaluate , LMEvalParser
393
+ eval_args = LMEvalParser (
394
+ model = "hf" ,
397
395
model_args = 'pretrained=' + args .model + ',tokenizer=' + args .model + ',dtype=float32' ,
398
396
user_model = user_model ,
397
+ tokenizer = tokenizer ,
399
398
batch_size = args .batch_size ,
400
399
tasks = args .tasks ,
400
+ device = "cpu" ,
401
401
)
402
+ results = evaluate (eval_args )
403
+
402
404
dumped = json .dumps (results , indent = 2 )
403
405
if args .save_accuracy_path :
404
406
with open (args .save_accuracy_path , "w" ) as f :
405
407
f .write (dumped )
406
- for task_name in args .tasks :
408
+ for task_name in args .tasks . split ( "," ) :
407
409
if task_name == "wikitext" :
408
- acc = results ["results" ][task_name ]["word_perplexity" ]
410
+ acc = results ["results" ][task_name ]["word_perplexity,none " ]
409
411
else :
410
- acc = results ["results" ][task_name ]["acc" ]
412
+ acc = results ["results" ][task_name ]["acc,none " ]
411
413
print ("Accuracy: %.5f" % acc )
412
414
print ('Batch size = %d' % args .batch_size )
413
415
@@ -417,21 +419,25 @@ def run_fn(model):
417
419
import time
418
420
419
421
samples = args .iters * args .batch_size
420
- start = time . time ()
421
- results = evaluate (
422
- model = "hf-causal " ,
422
+ from intel_extension_for_transformers . transformers . llm . evaluation . lm_eval import evaluate , LMEvalParser
423
+ eval_args = LMEvalParser (
424
+ model = "hf" ,
423
425
model_args = 'pretrained=' + args .model + ',tokenizer=' + args .model + ',dtype=float32' ,
424
426
user_model = user_model ,
427
+ tokenizer = tokenizer ,
425
428
batch_size = args .batch_size ,
426
429
tasks = args .tasks ,
427
430
limit = samples ,
431
+ device = "cpu" ,
428
432
)
433
+ start = time .time ()
434
+ results = evaluate (eval_args )
429
435
end = time .time ()
430
- for task_name in args .tasks :
436
+ for task_name in args .tasks . split ( "," ) :
431
437
if task_name == "wikitext" :
432
- acc = results ["results" ][task_name ]["word_perplexity" ]
438
+ acc = results ["results" ][task_name ]["word_perplexity,none " ]
433
439
else :
434
- acc = results ["results" ][task_name ]["acc" ]
440
+ acc = results ["results" ][task_name ]["acc,none " ]
435
441
print ("Accuracy: %.5f" % acc )
436
442
print ('Throughput: %.3f samples/sec' % (samples / (end - start )))
437
443
print ('Latency: %.3f ms' % ((end - start ) * 1000 / samples ))
0 commit comments