From feba5bf2258481936348c0f86071e83fb2ff67f2 Mon Sep 17 00:00:00 2001
From: Sergey Lyalin <sergey.lyalin@intel.com>
Date: Fri, 5 Jan 2024 13:44:09 +0000
Subject: [PATCH 1/2] Convert tokenizers with openvino_tokenizers

---
 optimum/exporters/openvino/__main__.py | 27 ++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py
index 54fe1193e5..cc35620357 100644
--- a/optimum/exporters/openvino/__main__.py
+++ b/optimum/exporters/openvino/__main__.py
@@ -19,6 +19,7 @@
 
 from requests.exceptions import ConnectionError as RequestsConnectionError
 from transformers import AutoConfig, AutoTokenizer
+from openvino import save_model
 
 from optimum.exporters import TasksManager
 from optimum.exporters.onnx import __main__ as optimum_main
@@ -46,6 +47,24 @@
 logger = logging.getLogger(__name__)
 
 
+def tokenizer_export(
+    tokenizer,
+    output: Union[str, Path],
+    suffix: Optional[str] = ""
+):
+    try:
+        from openvino_tokenizers import convert_tokenizer
+        ov_tokenizer, ov_detokenizer = convert_tokenizer(tokenizer, with_detokenizer=True)
+        if isinstance(output, str):
+            output = Path(output)
+        tokenizer_path = output.joinpath("openvino_tokenizer" + suffix + ".xml")
+        detokenizer_path = output.joinpath("openvino_detokenizer" + suffix + ".xml")
+        save_model(ov_tokenizer, tokenizer_path)
+        save_model(ov_detokenizer, detokenizer_path)
+    except Exception as exception:
+        print("[ WARNING ] OpenVINO tokenizer/detokenizer models couldn't be exported because of exception:", exception)
+
+
 def main_export(
     model_name_or_path: str,
     output: Union[str, Path],
@@ -328,6 +347,12 @@ class StoreAttr(object):
         if generation_config is not None:
             generation_config.save_pretrained(output)
         maybe_save_preprocessors(model_name_or_path, output)
+        try:
+            # Avoid loding it for the second time if loaded before
+            tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
+            tokenizer_export(tokenizer, output)
+        except:
+            print("[ WARNING ] Could not load tokenizer using specified model ID or path. OpenVINO tokenizer/detokenizer models won't be generated.")
 
         if model.config.is_encoder_decoder and task.startswith("text-generation"):
             raise ValueError(
@@ -358,10 +383,12 @@ class StoreAttr(object):
         tokenizer = getattr(model, "tokenizer", None)
         if tokenizer is not None:
             tokenizer.save_pretrained(output.joinpath("tokenizer"))
+            tokenizer_export(tokenizer, output)
 
         tokenizer_2 = getattr(model, "tokenizer_2", None)
         if tokenizer_2 is not None:
             tokenizer_2.save_pretrained(output.joinpath("tokenizer_2"))
+            tokenizer_export(tokenizer, output, "_2")
 
         model.save_config(output)
 

From 4e7bfa94659468e1d1d3ff3348bb8b544c27b548 Mon Sep 17 00:00:00 2001
From: Sergey Lyalin <sergey.lyalin@intel.com>
Date: Fri, 5 Jan 2024 19:22:21 +0400
Subject: [PATCH 2/2] Update optimum/exporters/openvino/__main__.py

---
 optimum/exporters/openvino/__main__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/optimum/exporters/openvino/__main__.py b/optimum/exporters/openvino/__main__.py
index cc35620357..3c594679dd 100644
--- a/optimum/exporters/openvino/__main__.py
+++ b/optimum/exporters/openvino/__main__.py
@@ -348,7 +348,7 @@ class StoreAttr(object):
             generation_config.save_pretrained(output)
         maybe_save_preprocessors(model_name_or_path, output)
         try:
-            # Avoid loding it for the second time if loaded before
+            # TODO: Avoid loading the tokenizer again if loaded before
             tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
             tokenizer_export(tokenizer, output)
         except: