Remove convert_tokenizer

Wovchena · Wovchena · commit 3fb27c67c8c0 · 2024-05-08T20:17:33.000+04:00
diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
@@ -33,10 +33,9 @@ jobs:
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model openlm-research/open_llama_3b_v2 open_llama_3b_v2
           cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
           cmake --build ./build/ --config Release -j
-      - name: convert_tokenizer and run
+      - name: greedy_causal_lm
         run: |
           source ./ov/setupvars.sh
-          convert_tokenizer ./open_llama_3b_v2/ --output ./open_llama_3b_v2/ --with-detokenizer
           ./build/greedy_causal_lm ./open_llama_3b_v2/ "return 0"
 
   cpp-beam_search_causal_lm-ubuntu:
@@ -64,7 +63,6 @@ jobs:
       - name: Compare
         run: |
           source ./ov/setupvars.sh
-          convert_tokenizer ./TinyLlama-1.1B-Chat-v1.0/ --output ./TinyLlama-1.1B-Chat-v1.0/ --with-detokenizer
 
           timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ 69 > ./pred.txt
           python -c "
@@ -157,7 +155,6 @@ jobs:
         shell: cmd
         run: |
           call w_openvino_toolkit_windows_2024.1.0.15008.f4afc983258_x86_64\setupvars.bat
-          convert_tokenizer .\TinyLlama-1.1B-Chat-v1.0\ --output .\TinyLlama-1.1B-Chat-v1.0\ --with-detokenizer
 
           .\build\Release\beam_search_causal_lm.exe .\TinyLlama-1.1B-Chat-v1.0\ "69" > .\pred.txt
           echo import transformers > ref.py
@@ -197,7 +194,6 @@ jobs:
       - name: Compare
         run: |
           source ./ov/setupvars.sh
-          convert_tokenizer Qwen/Qwen-7B-Chat --output ./Qwen-7B-Chat/ --with-detokenizer --trust-remote-code
           timeout 50s ./build/beam_search_causal_lm ./Qwen-7B-Chat/ 69 > ./pred.txt
 
   cpp-beam_search_causal_lm-Qwen1_5-7B-Chat:
@@ -225,7 +221,6 @@ jobs:
       - name: Run
         run: |
           source ./ov/setupvars.sh
-          convert_tokenizer ./Qwen1.5-7B-Chat/ --output ./Qwen1.5-7B-Chat/ --with-detokenizer --trust-remote-code
           timeout 50s ./build/beam_search_causal_lm ./Qwen1.5-7B-Chat/ "你好！" > ./pred_qwen15.txt
 
   cpp-beam_search_causal_lm-Phi-2:
@@ -253,7 +248,6 @@ jobs:
       - name: Compare
         run: |
           source ./ov/setupvars.sh
-          convert_tokenizer ./phi-2/ --output ./phi-2/ --with-detokenizer --trust-remote-code
           timeout 50s ./build/beam_search_causal_lm ./phi-2/ 69 > ./pred.txt
 
   cpp-beam_search_causal_lm-notus-7b-v1:
@@ -281,7 +275,6 @@ jobs:
       - name: Compare
         run: |
           source ./ov/setupvars.sh
-          convert_tokenizer ./notus-7b-v1/ --output ./notus-7b-v1/ --with-detokenizer --trust-remote-code
           timeout 50s ./build/beam_search_causal_lm ./notus-7b-v1/ 69 > ./pred.txt
 
   cpp-speculative_decoding_lm-ubuntu:
@@ -305,8 +298,6 @@ jobs:
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers]
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-3b dolly-v2-3b
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-7b dolly-v2-7b
-          convert_tokenizer ./dolly-v2-3b/ --output ./dolly-v2-3b/ --with-detokenizer
-          convert_tokenizer ./dolly-v2-7b/ --output ./dolly-v2-7b/ --with-detokenizer
           cmake -DCMAKE_BUILD_TYPE=Release -S ./text_generation/causal_lm/cpp/ -B ./build/
           cmake --build ./build/ --config Release -j
       - name: run and compare
@@ -348,7 +339,6 @@ jobs:
       - name: Run Generation
         run: |
           source ./ov/setupvars.sh
-          convert_tokenizer ./phi-1_5/ --output ./phi-1_5/ --with-detokenizer --trust-remote-code
           timeout 50s ./build/greedy_causal_lm ./phi-1_5/ "Alan Turing was a" > ./pred_greedy.txt
           timeout 50s ./build/beam_search_causal_lm ./phi-1_5/ "Alan Turing was a" > ./pred_beam.txt
       - name: Compare
diff --git a/.github/workflows/lcm_dreamshaper_cpp.yml b/.github/workflows/lcm_dreamshaper_cpp.yml
@@ -53,9 +53,7 @@ jobs:
         working-directory: ${{ env.working_directory }}
         run: |
           conda activate openvino_lcm_cpp
-          export MODEL_PATH="models/lcm_dreamshaper_v7/FP16"
-          optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 $MODEL_PATH
-          convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/
+          optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --convert-tokenizer --weight-format fp16 models/lcm_dreamshaper_v7/FP16
 
       - name: Build app
         working-directory: ${{ env.working_directory }}
@@ -66,8 +64,7 @@ jobs:
       
       - name: Run app
         working-directory: ${{ env.working_directory }}
-        run: |
-          ./build/lcm_dreamshaper
+        run: ./build/lcm_dreamshaper
 
   lcm_dreamshaper_v7_cpp-windows:
     runs-on: windows-latest
@@ -101,9 +98,7 @@ jobs:
         working-directory: ${{ env.working_directory }}
         run: |
           conda activate openvino_lcm_cpp
-          $env:MODEL_PATH='models/lcm_dreamshaper_v7/FP16'
-          optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 $env:MODEL_PATH
-          convert_tokenizer $env:MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $env:MODEL_PATH/tokenizer/
+          optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --convert-tokenizer --weight-format fp16 models/lcm_dreamshaper_v7/FP16
   
       - name: Build app
         working-directory: ${{ env.working_directory }}
@@ -114,5 +109,4 @@ jobs:
       
       - name: Run app
         working-directory: ${{ env.working_directory }}
-        run: |
-          & "./build/Release/lcm_dreamshaper.exe" -r --dynamic
+        run: '& "./build/Release/lcm_dreamshaper.exe" -r --dynamic'
diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml
@@ -52,9 +52,7 @@ jobs:
         working-directory: ${{ env.working_directory }}
         run: |
           conda activate openvino_sd_cpp
-          export MODEL_PATH="models/stable_diffusion_v1_5_ov/FP16"
-          optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --convert-tokenizer --weight-format fp16 $MODEL_PATH
-          convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/
+          optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --convert-tokenizer --weight-format fp16 models/stable_diffusion_v1_5_ov/FP16
 
       - name: Build app
         working-directory: ${{ env.working_directory }}
@@ -65,8 +63,7 @@ jobs:
 
       - name: Run app
         working-directory: ${{ env.working_directory }}
-        run: |
-          ./build/stable_diffusion -m ./models/stable_diffusion_v1_5_ov -t FP16
+        run: ./build/stable_diffusion -m ./models/stable_diffusion_v1_5_ov -t FP16
 
   stable_diffusion_1_5_cpp-windows:
       runs-on: windows-latest
@@ -98,9 +95,7 @@ jobs:
           working-directory: ${{ env.working_directory }}
           run: |
             conda activate openvino_sd_cpp
-            $env:MODEL_PATH='models/stable_diffusion_v1_5_ov/FP16'
-            optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --convert-tokenizer --weight-format fp16 $env:MODEL_PATH
-            convert_tokenizer $env:MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $env:MODEL_PATH/tokenizer/
+            optimum-cli export openvino --model runwayml/stable-diffusion-v1-5 --task stable-diffusion --convert-tokenizer --weight-format fp16 models/stable_diffusion_v1_5_ov/FP16
   
         - name: Build app
           working-directory: ${{ env.working_directory }}
@@ -111,5 +106,4 @@ jobs:
   
         - name: Run app
           working-directory: ${{ env.working_directory }}
-          run: |
-            & "./build/Release/stable_diffusion.exe" -m ./models/stable_diffusion_v1_5_ov -t FP16 --dynamic
+          run: '& "./build/Release/stable_diffusion.exe" -m ./models/stable_diffusion_v1_5_ov -t FP16 --dynamic'
diff --git a/image_generation/lcm_dreamshaper_v7/cpp/README.md b/image_generation/lcm_dreamshaper_v7/cpp/README.md
@@ -37,13 +37,8 @@ conda env config vars set LD_LIBRARY_PATH=$CONDA_PREFIX/lib:$LD_LIBRARY_PATH
     ```
 
 2. Download the model from Huggingface and convert it to OpenVINO IR via [optimum-intel CLI](https://github.com/huggingface/optimum-intel). Example command for downloading and exporting FP16 model:
-    ```shell
-    export MODEL_PATH="models/lcm_dreamshaper_v7/FP16"
-    # Using optimum-cli for exporting model to OpenVINO format
-    optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 $MODEL_PATH
-    # Converting tokenizer
-    convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/
-    ```
+
+    `optimum-cli export openvino --model SimianLuo/LCM_Dreamshaper_v7 --convert-tokenizer --weight-format fp16 models/lcm_dreamshaper_v7/FP16`
 
 ### LoRA enabling with safetensors
 
diff --git a/image_generation/stable_diffusion_1_5/cpp/README.md b/image_generation/stable_diffusion_1_5/cpp/README.md
@@ -40,13 +40,8 @@ python -m pip install ../../../thirdparty/openvino_tokenizers/[transformers]
 - [dreamlike-anime-1.0](https://huggingface.co/dreamlike-art/dreamlike-anime-1.0) to run Stable Diffusion with LoRA adapters.
 
    Example command for downloading and exporting FP16 model:
-   ```shell
-   export MODEL_PATH="models/dreamlike_anime_1_0_ov/FP16"
-   # Using optimum-cli for exporting model to OpenVINO format
-   optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --convert-tokenizer --weight-format fp16 $MODEL_PATH
-   # Converting tokenizer manually (`--convert-tokenizer` flag of `optimum-cli` results in "OpenVINO Tokenizer export for CLIPTokenizer is not supported.")
-   convert_tokenizer $MODEL_PATH/tokenizer/ --tokenizer-output-type i32 -o $MODEL_PATH/tokenizer/
-   ```
+
+   `optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --convert-tokenizer --weight-format fp16 models/dreamlike_anime_1_0_ov/FP16`
 
    You can also choose other precision and export FP32 or INT8 model.
 
diff --git a/text_generation/causal_lm/cpp/README.md b/text_generation/causal_lm/cpp/README.md
@@ -1,6 +1,6 @@
 # Text generation C++ samples that support most popular models like LLaMA 2
 
-These examples showcase inference of text-generation Large Language Models (LLMs): `chatglm`, `LLaMA`, `Qwen` and other models with the same signature. The applications don't have many configuration options to encourage the reader to explore and modify the source code. Loading `openvino_tokenizers` to `ov::Core` enables tokenization. Run `convert_tokenizer` to generate IRs for the samples. [group_beam_searcher.hpp](group_beam_searcher.hpp) implements the algorithm of the same name, which is used by `beam_search_causal_lm`. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/254-llm-chatbot) which provides an example of LLM-powered Chatbot in Python.
+These examples showcase inference of text-generation Large Language Models (LLMs): `chatglm`, `LLaMA`, `Qwen` and other models with the same signature. The applications don't have many configuration options to encourage the reader to explore and modify the source code. Loading `openvino_tokenizers` to `ov::Core` enables tokenization. Run `optimum-cli` to generate IRs for the samples. [group_beam_searcher.hpp](group_beam_searcher.hpp) implements the algorithm of the same name, which is used by `beam_search_causal_lm`. There is also a Jupyter [notebook](https://github.com/openvinotoolkit/openvino_notebooks/tree/main/notebooks/254-llm-chatbot) which provides an example of LLM-powered Chatbot in Python.
 
 ## How it works
 
@@ -49,7 +49,7 @@ This approach reduces the need for multiple infer requests to the main model, en
 
 ## Install OpenVINO
 
-Install [OpenVINO Archives >= 2024.0](docs.openvino.ai/install). `master` and possibly the latest `releases/*` branch correspond to not yet released OpenVINO versions. https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/ can be used for these branches early testing. `<INSTALL_DIR>` below refers to the extraction location.
+Install [OpenVINO Archives >= 2024.1](docs.openvino.ai/install). `master` and possibly the latest `releases/*` branch correspond to not yet released OpenVINO versions. https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/ can be used for these branches early testing. `<INSTALL_DIR>` below refers to the extraction location.
 
 ## Build `greedy_causal_lm`, `beam_search_causal_lm` and `openvino_tokenizers`
 
@@ -81,7 +81,6 @@ python3 -m pip install --upgrade-strategy eager -r requirements.txt
 # Update openvino_tokenizers from the submodule
 python3 -m pip install ./../../../thirdparty/openvino_tokenizers/[transformers]
 optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
-convert_tokenizer ./TinyLlama-1.1B-Chat-v1.0/ --output ./TinyLlama-1.1B-Chat-v1.0/ --with-detokenizer --trust-remote-code
 ```
 
 #### Windows
@@ -92,7 +91,6 @@ python -m pip install --upgrade-strategy eager -r requirements.txt
 REM Update openvino_tokenizers from the submodule
 python -m pip install .\..\..\..\thirdparty\openvino_tokenizers\[transformers]
 optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
-convert_tokenizer .\TinyLlama-1.1B-Chat-v1.0\ --output .\TinyLlama-1.1B-Chat-v1.0\ --with-detokenizer --trust-remote-code
 ```
 
 ## Run