Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

resolve complicated chat templates during tokenizer saving #1151

Merged
merged 9 commits into from
Feb 17, 2025
9 changes: 7 additions & 2 deletions optimum/exporters/openvino/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
from requests.exceptions import ConnectionError as RequestsConnectionError
from transformers import AutoConfig, AutoTokenizer, PreTrainedTokenizerBase
from transformers import AutoConfig, AutoTokenizer, PreTrainedTokenizerBase, ProcessorMixin
from transformers.utils import is_torch_available

from openvino.runtime import Core, Type, save_model
Expand Down Expand Up @@ -531,10 +531,15 @@ def maybe_convert_tokenizers(library_name: str, output: Path, model=None, prepro

if is_openvino_tokenizers_available():
if library_name != "diffusers" and preprocessors:
processor_chat_template = None
tokenizer = next(filter(lambda it: isinstance(it, PreTrainedTokenizerBase), preprocessors), None)
if len(preprocessors) > 1:
for processor in preprocessors:
if isinstance(processor, ProcessorMixin) and hasattr(processor, "chat_template"):
processor_chat_template = processor.chat_template
if tokenizer:
try:
export_tokenizer(tokenizer, output, task=task)
export_tokenizer(tokenizer, output, task=task, processor_chat_template=processor_chat_template)
except Exception as exception:
logger.warning(
"Could not load tokenizer using specified model ID or path. OpenVINO tokenizer/detokenizer "
Expand Down
6 changes: 5 additions & 1 deletion optimum/exporters/openvino/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
remove_none_from_dummy_inputs,
save_config,
save_preprocessors,
set_simplified_chat_template,
)


Expand Down Expand Up @@ -825,6 +826,7 @@ def export_tokenizer(
output: Union[str, Path],
suffix: Optional[str] = "",
task: Optional[str] = None,
processor_chat_template: Optional[str] = None,
):
# avoid circular imports
from optimum.intel.openvino import OV_DETOKENIZER_NAME, OV_TOKENIZER_NAME
Expand All @@ -849,7 +851,7 @@ def export_tokenizer(

if (
task is not None
and task.startswith("text-generation")
and (task.startswith("text-generation") or task.startswith("image-text-to-text"))
and compare_versions("openvino-tokenizers", ">=", "2024.3.0.0")
):
logger.info(f"Set tokenizer padding side to left for `{task}` task.")
Expand All @@ -858,6 +860,8 @@ def export_tokenizer(

try:
converted = convert_tokenizer(tokenizer, with_detokenizer=True)
set_simplified_chat_template(converted[0], processor_chat_template)

except NotImplementedError:
logger.info("Detokenizer is not supported, convert tokenizer only.")
converted = convert_tokenizer(tokenizer, with_detokenizer=False)
Expand Down
Loading
Loading