Skip to content

Commit 09b067f

Browse files
committed
Add --convert-tokenizer Option to CLI
1 parent 8c029e0 commit 09b067f

File tree

3 files changed

+11
-4
lines changed

3 files changed

+11
-4
lines changed

optimum/commands/export/openvino.py

+6
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,11 @@ def parse_args_openvino(parser: "ArgumentParser"):
103103
"OpenVINO native inference code that expects kv-cache inputs and outputs in the model."
104104
),
105105
)
106+
optional_group.add_argument(
107+
"--convert-tokenizer",
108+
action="store_true",
109+
help="Add converted tokenizer and detokenizer with OpenVINO Tokenizers",
110+
)
106111

107112

108113
class OVExportCommand(BaseOptimumCLICommand):
@@ -151,5 +156,6 @@ def run(self):
151156
compression_option=self.args.weight_format,
152157
compression_ratio=self.args.ratio,
153158
stateful=not self.args.disable_stateful,
159+
convert_tokenizer=self.args.convert_tokenizer,
154160
# **input_shapes,
155161
)

optimum/exporters/openvino/__main__.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ def main_export(
7171
compression_option: Optional[str] = None,
7272
compression_ratio: Optional[float] = None,
7373
stateful: bool = True,
74+
convert_tokenizer: bool = False,
7475
**kwargs_shapes,
7576
):
7677
"""
@@ -344,7 +345,7 @@ class StoreAttr(object):
344345
generation_config.save_pretrained(output)
345346
maybe_save_preprocessors(model_name_or_path, output)
346347

347-
if tokenizer is not None and is_openvino_tokenizers_available():
348+
if convert_tokenizer and tokenizer is not None and is_openvino_tokenizers_available():
348349
try:
349350
export_tokenizer(tokenizer, output)
350351
except Exception as exception:
@@ -380,12 +381,12 @@ class StoreAttr(object):
380381
feature_extractor.save_pretrained(output.joinpath("feature_extractor"))
381382

382383
tokenizer = getattr(model, "tokenizer", None)
383-
if tokenizer is not None and is_openvino_tokenizers_available():
384+
if convert_tokenizer and tokenizer is not None and is_openvino_tokenizers_available():
384385
tokenizer.save_pretrained(output.joinpath("tokenizer"))
385386
export_tokenizer(tokenizer, output)
386387

387388
tokenizer_2 = getattr(model, "tokenizer_2", None)
388-
if tokenizer_2 is not None and is_openvino_tokenizers_available():
389+
if convert_tokenizer and tokenizer_2 is not None and is_openvino_tokenizers_available():
389390
tokenizer_2.save_pretrained(output.joinpath("tokenizer_2"))
390391
export_tokenizer(tokenizer, output, suffix="_2")
391392

tests/openvino/test_exporters_cli.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ def test_exporters_cli(self, task: str, model_type: str):
122122
def test_exporters_cli_tokenizers(self, task: str, model_type: str):
123123
with TemporaryDirectory() as tmpdir:
124124
output = subprocess.check_output(
125-
f"optimum-cli export openvino --model {MODEL_NAMES[model_type]} --task {task} {tmpdir}",
125+
f"optimum-cli export openvino --model {MODEL_NAMES[model_type]} --convert-tokenizer --task {task} {tmpdir}",
126126
shell=True,
127127
stderr=subprocess.STDOUT,
128128
).decode()

0 commit comments

Comments
 (0)