@@ -336,8 +336,8 @@ This report is autogenerated and includes tokenizers and detokenizers tests. The
336
336
<tbody >
337
337
<tr>
338
338
<td >BPE</td>
339
- <td >96.57 </td>
340
- <td >4991 </td>
339
+ <td >94.45 </td>
340
+ <td >5535 </td>
341
341
</tr>
342
342
<tr>
343
343
<td >SentencePiece</td>
@@ -346,13 +346,13 @@ This report is autogenerated and includes tokenizers and detokenizers tests. The
346
346
</tr>
347
347
<tr>
348
348
<td >Tiktoken</td>
349
- <td >98.17 </td>
350
- <td >218 </td>
349
+ <td >93.98 </td>
350
+ <td >266 </td>
351
351
</tr>
352
352
<tr>
353
353
<td >WordPiece</td>
354
- <td >94.97 </td>
355
- <td >1053 </td>
354
+ <td >91.31 </td>
355
+ <td >1301 </td>
356
356
</tr>
357
357
</tbody >
358
358
</table >
@@ -372,140 +372,140 @@ This report is autogenerated and includes tokenizers and detokenizers tests. The
372
372
<tr>
373
373
<td >BPE</td>
374
374
<td >EleutherAI/gpt-j-6b</td>
375
- <td >98.16 </td>
376
- <td >217 </td>
375
+ <td >95.18 </td>
376
+ <td >249 </td>
377
377
</tr>
378
378
<tr>
379
379
<td >BPE</td>
380
380
<td >EleutherAI/gpt-neo-125m</td>
381
- <td >98.16 </td>
382
- <td >217 </td>
381
+ <td >95.18 </td>
382
+ <td >249 </td>
383
383
</tr>
384
384
<tr>
385
385
<td >BPE</td>
386
386
<td >EleutherAI/gpt-neox-20b</td>
387
- <td >97.24 </td>
388
- <td >217 </td>
387
+ <td >95.71 </td>
388
+ <td >233 </td>
389
389
</tr>
390
390
<tr>
391
391
<td >BPE</td>
392
392
<td >EleutherAI/pythia-12b-deduped</td>
393
- <td >97.24 </td>
394
- <td >217 </td>
393
+ <td >95.71 </td>
394
+ <td >233 </td>
395
395
</tr>
396
396
<tr>
397
397
<td >BPE</td>
398
398
<td >KoboldAI/fairseq-dense-13B</td>
399
- <td >98.16 </td>
400
- <td >217 </td>
399
+ <td >96.57 </td>
400
+ <td >233 </td>
401
401
</tr>
402
402
<tr>
403
403
<td >BPE</td>
404
404
<td >NousResearch/Meta-Llama-3-8B-Instruct</td>
405
- <td >97.24 </td>
406
- <td >217 </td>
405
+ <td >95.71 </td>
406
+ <td >233 </td>
407
407
</tr>
408
408
<tr>
409
409
<td >BPE</td>
410
410
<td >Salesforce/codegen-16B-multi</td>
411
- <td >99.08 </td>
412
- <td >217 </td>
411
+ <td >95.98 </td>
412
+ <td >249 </td>
413
413
</tr>
414
414
<tr>
415
415
<td >BPE</td>
416
416
<td >Xenova/gpt-4o</td>
417
- <td >97.24 </td>
418
- <td >217 </td>
417
+ <td >94.38 </td>
418
+ <td >249 </td>
419
419
</tr>
420
420
<tr>
421
421
<td >BPE</td>
422
422
<td >ai-forever/rugpt3large_based_on_gpt2</td>
423
- <td >96.31 </td>
424
- <td >217 </td>
423
+ <td >90.36 </td>
424
+ <td >249 </td>
425
425
</tr>
426
426
<tr>
427
427
<td >BPE</td>
428
428
<td >bigscience/bloom</td>
429
- <td >99.08 </td>
430
- <td >217 </td>
429
+ <td >97.42 </td>
430
+ <td >233 </td>
431
431
</tr>
432
432
<tr>
433
433
<td >BPE</td>
434
434
<td >databricks/dolly-v2-3b</td>
435
- <td >97.24 </td>
436
- <td >217 </td>
435
+ <td >95.71 </td>
436
+ <td >233 </td>
437
437
</tr>
438
438
<tr>
439
439
<td >BPE</td>
440
440
<td >facebook/bart-large-mnli</td>
441
- <td >98.16 </td>
442
- <td >217 </td>
441
+ <td >95.18 </td>
442
+ <td >249 </td>
443
443
</tr>
444
444
<tr>
445
445
<td >BPE</td>
446
446
<td >facebook/galactica-120b</td>
447
- <td >97.24 </td>
448
- <td >217 </td>
447
+ <td >95.71 </td>
448
+ <td >233 </td>
449
449
</tr>
450
450
<tr>
451
451
<td >BPE</td>
452
452
<td >facebook/opt-66b</td>
453
- <td >98.16 </td>
454
- <td >217 </td>
453
+ <td >96.57 </td>
454
+ <td >233 </td>
455
455
</tr>
456
456
<tr>
457
457
<td >BPE</td>
458
458
<td >gpt2</td>
459
- <td >98.16 </td>
460
- <td >217 </td>
459
+ <td >95.18 </td>
460
+ <td >249 </td>
461
461
</tr>
462
462
<tr>
463
463
<td >BPE</td>
464
464
<td >laion/CLIP-ViT-bigG-14-laion2B-39B-b160k</td>
465
- <td >70.97 </td>
466
- <td >217 </td>
465
+ <td >74.70 </td>
466
+ <td >249 </td>
467
467
</tr>
468
468
<tr>
469
469
<td >BPE</td>
470
470
<td >microsoft/deberta-base</td>
471
- <td >98.16 </td>
472
- <td >217 </td>
471
+ <td >96.57 </td>
472
+ <td >233 </td>
473
473
</tr>
474
474
<tr>
475
475
<td >BPE</td>
476
476
<td >roberta-base</td>
477
- <td >98.16 </td>
478
- <td >217 </td>
477
+ <td >95.18 </td>
478
+ <td >249 </td>
479
479
</tr>
480
480
<tr>
481
481
<td >BPE</td>
482
482
<td >sentence-transformers/all-roberta-large-v1</td>
483
- <td >98.16 </td>
484
- <td >217 </td>
483
+ <td >95.18 </td>
484
+ <td >249 </td>
485
485
</tr>
486
486
<tr>
487
487
<td >BPE</td>
488
488
<td >stabilityai/stablecode-completion-alpha-3b-4k</td>
489
- <td >97.24 </td>
490
- <td >217 </td>
489
+ <td >95.71 </td>
490
+ <td >233 </td>
491
491
</tr>
492
492
<tr>
493
493
<td >BPE</td>
494
494
<td >stabilityai/stablelm-2-1_6b</td>
495
- <td >97.24 </td>
496
- <td >217 </td>
495
+ <td >95.71 </td>
496
+ <td >233 </td>
497
497
</tr>
498
498
<tr>
499
499
<td >BPE</td>
500
500
<td >stabilityai/stablelm-tuned-alpha-7b</td>
501
- <td >97.24 </td>
502
- <td >217 </td>
501
+ <td >95.71 </td>
502
+ <td >233 </td>
503
503
</tr>
504
504
<tr>
505
505
<td >BPE</td>
506
506
<td >tiiuae/falcon-7b</td>
507
- <td >97.24 </td>
508
- <td >217 </td>
507
+ <td >94.38 </td>
508
+ <td >249 </td>
509
509
</tr>
510
510
<tr>
511
511
<td >SentencePiece</td>
@@ -630,92 +630,92 @@ This report is autogenerated and includes tokenizers and detokenizers tests. The
630
630
<tr>
631
631
<td >Tiktoken</td>
632
632
<td >Qwen/Qwen-14B-Chat</td>
633
- <td >98.17 </td>
634
- <td >109 </td>
633
+ <td >92.91 </td>
634
+ <td >141 </td>
635
635
</tr>
636
636
<tr>
637
637
<td >Tiktoken</td>
638
638
<td >Salesforce/xgen-7b-8k-base</td>
639
- <td >98.17 </td>
640
- <td >109 </td>
639
+ <td >95.20 </td>
640
+ <td >125 </td>
641
641
</tr>
642
642
<tr>
643
643
<td >WordPiece</td>
644
644
<td >ProsusAI/finbert</td>
645
- <td >97.53 </td>
646
- <td >81 </td>
645
+ <td >91.43 </td>
646
+ <td >105 </td>
647
647
</tr>
648
648
<tr>
649
649
<td >WordPiece</td>
650
650
<td >bert-base-multilingual-cased</td>
651
- <td >97.53 </td>
652
- <td >81 </td>
651
+ <td >91.43 </td>
652
+ <td >105 </td>
653
653
</tr>
654
654
<tr>
655
655
<td >WordPiece</td>
656
656
<td >bert-base-uncased</td>
657
- <td >97.53 </td>
658
- <td >81 </td>
657
+ <td >91.43 </td>
658
+ <td >105 </td>
659
659
</tr>
660
660
<tr>
661
661
<td >WordPiece</td>
662
662
<td >cointegrated/rubert-tiny2</td>
663
- <td >91.36 </td>
664
- <td >81 </td>
663
+ <td >91.43 </td>
664
+ <td >105 </td>
665
665
</tr>
666
666
<tr>
667
667
<td >WordPiece</td>
668
668
<td >distilbert-base-uncased-finetuned-sst-2-english</td>
669
- <td >97.53 </td>
670
- <td >81 </td>
669
+ <td >91.43 </td>
670
+ <td >105 </td>
671
671
</tr>
672
672
<tr>
673
673
<td >WordPiece</td>
674
674
<td >google/electra-base-discriminator</td>
675
- <td >97.53 </td>
676
- <td >81 </td>
675
+ <td >91.43 </td>
676
+ <td >105 </td>
677
677
</tr>
678
678
<tr>
679
679
<td >WordPiece</td>
680
680
<td >google/mobilebert-uncased</td>
681
- <td >97.53 </td>
682
- <td >81 </td>
681
+ <td >94.38 </td>
682
+ <td >89 </td>
683
683
</tr>
684
684
<tr>
685
685
<td >WordPiece</td>
686
686
<td >jhgan/ko-sbert-sts</td>
687
- <td >87.65 </td>
688
- <td >81 </td>
687
+ <td >91.43 </td>
688
+ <td >105 </td>
689
689
</tr>
690
690
<tr>
691
691
<td >WordPiece</td>
692
692
<td >prajjwal1/bert-mini</td>
693
- <td >97.53 </td>
694
- <td >81 </td>
693
+ <td >94.38 </td>
694
+ <td >89 </td>
695
695
</tr>
696
696
<tr>
697
697
<td >WordPiece</td>
698
698
<td >rajiv003/ernie-finetuned-qqp</td>
699
- <td >97.53 </td>
700
- <td >81 </td>
699
+ <td >94.38 </td>
700
+ <td >89 </td>
701
701
</tr>
702
702
<tr>
703
703
<td >WordPiece</td>
704
704
<td >rasa/LaBSE</td>
705
- <td >90.12 </td>
706
- <td >81 </td>
705
+ <td >80.00 </td>
706
+ <td >105 </td>
707
707
</tr>
708
708
<tr>
709
709
<td >WordPiece</td>
710
710
<td >sentence-transformers/all-MiniLM-L6-v2</td>
711
- <td >87.65 </td>
712
- <td >81 </td>
711
+ <td >91.43 </td>
712
+ <td >105 </td>
713
713
</tr>
714
714
<tr>
715
715
<td >WordPiece</td>
716
716
<td >squeezebert/squeezebert-uncased</td>
717
- <td >97.53 </td>
718
- <td >81 </td>
717
+ <td >94.38 </td>
718
+ <td >89 </td>
719
719
</tr>
720
720
</tbody >
721
721
</table >
0 commit comments