Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Infer if the model needs to be exported #825

Merged
merged 14 commits into from
Sep 5, 2024
2 changes: 1 addition & 1 deletion docs/source/openvino/reference.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ limitations under the License.
## Generic model classes

[[autodoc]] openvino.modeling_base.OVBaseModel
- _from_pretrained
- from_pretrained
- reshape

## Natural Language Processing
Expand Down
10 changes: 3 additions & 7 deletions docs/source/openvino/tutorials/diffusers.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -50,18 +50,14 @@ To further speed up inference, the model can be statically reshaped :

```python
# Define the shapes related to the inputs and desired outputs
batch_size = 1
num_images_per_prompt = 1
height = 512
width = 512

batch_size, num_images, height, width = 1, 1, 512, 512
# Statically reshape the model
pipeline.reshape(batch_size=batch_size, height=height, width=width, num_images_per_prompt=num_images_per_prompt)
pipeline.reshape(batch_size=batch_size, height=height, width=width, num_images_per_prompt=num_images)
# Compile the model before the first inference
pipeline.compile()

# Run inference
images = pipeline(prompt, height=height, width=width, num_images_per_prompt=num_images_per_prompt).images
images = pipeline(prompt, height=height, width=width, num_images_per_prompt=num_images).images
```

In case you want to change any parameters such as the outputs height or width, you'll need to statically reshape your model once again.
Expand Down
21 changes: 0 additions & 21 deletions optimum/intel/openvino/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

import logging
import os
import warnings
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Dict, Optional, Union
Expand Down Expand Up @@ -417,7 +416,6 @@ def _from_transformers(
cls,
model_id: str,
config: PretrainedConfig,
use_auth_token: Optional[Union[bool, str]] = None,
token: Optional[Union[bool, str]] = None,
revision: Optional[str] = None,
force_download: bool = False,
Expand All @@ -430,15 +428,6 @@ def _from_transformers(
quantization_config: Union[OVWeightQuantizationConfig, Dict] = None,
**kwargs,
):
if use_auth_token is not None:
warnings.warn(
"The `use_auth_token` argument is deprecated and will be removed soon. Please use the `token` argument instead.",
FutureWarning,
)
if token is not None:
raise ValueError("You cannot use both `use_auth_token` and `token` arguments at the same time.")
token = use_auth_token

save_dir = TemporaryDirectory()
save_dir_path = Path(save_dir.name)
# This attribute is needed to keep one reference on the temporary directory, since garbage collecting
Expand Down Expand Up @@ -591,7 +580,6 @@ def from_pretrained(
model_id: Union[str, Path],
export: bool = False,
config: Optional["PretrainedConfig"] = None,
use_auth_token: Optional[Union[bool, str]] = None,
token: Optional[Union[bool, str]] = None,
revision: Optional[str] = None,
force_download: bool = False,
Expand All @@ -602,15 +590,6 @@ def from_pretrained(
trust_remote_code: bool = False,
**kwargs,
):
if use_auth_token is not None:
warnings.warn(
"The `use_auth_token` argument is deprecated and will be removed soon. Please use the `token` argument instead.",
FutureWarning,
)
if token is not None:
raise ValueError("You cannot use both `use_auth_token` and `token` arguments at the same time.")
token = use_auth_token

# Fix the mismatch between timm_config and huggingface_config
local_timm_model = _is_timm_ov_dir(model_id)
if local_timm_model or (not os.path.isdir(model_id) and model_info(model_id).library_name == "timm"):
Expand Down
129 changes: 84 additions & 45 deletions optimum/intel/openvino/modeling_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,14 @@
from transformers import GenerationConfig, PretrainedConfig
from transformers.file_utils import add_start_docstrings
from transformers.generation import GenerationMixin
from transformers.utils import is_offline_mode

from optimum.exporters.onnx import OnnxConfig
from optimum.modeling_base import OptimizedModel
from optimum.modeling_base import FROM_PRETRAINED_START_DOCSTRING, OptimizedModel

from ...exporters.openvino import export, main_export
from ..utils.import_utils import is_nncf_available
from ..utils.modeling_utils import _find_files_matching_pattern
from .configuration import OVConfig, OVDynamicQuantizationConfig, OVWeightQuantizationConfig
from .utils import ONNX_WEIGHTS_NAME, OV_TO_PT_TYPE, OV_XML_FILE_NAME, _print_compiled_model_properties

Expand Down Expand Up @@ -220,7 +222,6 @@ def _from_pretrained(
cls,
model_id: Union[str, Path],
config: PretrainedConfig,
use_auth_token: Optional[Union[bool, str]] = None,
token: Optional[Union[bool, str]] = None,
revision: Optional[str] = None,
force_download: bool = False,
Expand All @@ -242,8 +243,6 @@ def _from_pretrained(
Can be either:
- The model id of a pretrained model hosted inside a model repo on huggingface.co.
- The path to a directory containing the model weights.
use_auth_token (Optional[Union[bool, str]], defaults to `None`):
Deprecated. Please use `token` instead.
token (Optional[Union[bool, str]], defaults to `None`):
The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
when running `huggingface-cli login` (stored in `~/.huggingface`).
Expand All @@ -263,15 +262,6 @@ def _from_pretrained(
load_in_8bit (`bool`, *optional*, defaults to `False`):
Whether or not to apply 8-bit weight quantization.
"""
if use_auth_token is not None:
warnings.warn(
"The `use_auth_token` argument is deprecated and will be removed soon. Please use the `token` argument instead.",
FutureWarning,
)
if token is not None:
raise ValueError("You cannot use both `use_auth_token` and `token` arguments at the same time.")
token = use_auth_token

model_path = Path(model_id)
default_file_name = ONNX_WEIGHTS_NAME if from_onnx else OV_XML_FILE_NAME
file_name = file_name or default_file_name
Expand Down Expand Up @@ -312,6 +302,87 @@ def _from_pretrained(
**kwargs,
)

@classmethod
@add_start_docstrings(FROM_PRETRAINED_START_DOCSTRING)
def from_pretrained(
cls,
model_id: Union[str, Path],
export: bool = False,
force_download: bool = False,
use_auth_token: Optional[Union[bool, str]] = None,
token: Optional[Union[bool, str]] = None,
cache_dir: str = HUGGINGFACE_HUB_CACHE,
subfolder: str = "",
config: Optional[PretrainedConfig] = None,
local_files_only: bool = False,
trust_remote_code: bool = False,
revision: Optional[str] = None,
**kwargs,
):
if use_auth_token is not None:
warnings.warn(
"The `use_auth_token` argument is deprecated and will be removed soon. Please use the `token` argument instead.",
FutureWarning,
)
if token is not None:
raise ValueError("You cannot use both `use_auth_token` and `token` arguments at the same time.")
token = use_auth_token

if is_offline_mode() and not local_files_only:
logger.info("Offline mode: forcing local_files_only=True")
local_files_only = True

_export = export
try:
if local_files_only:
object_id = model_id.replace("/", "--")
cached_model_dir = os.path.join(cache_dir, f"models--{object_id}")
refs_file = os.path.join(os.path.join(cached_model_dir, "refs"), revision or "main")
with open(refs_file) as f:
revision = f.read()
model_dir = os.path.join(cached_model_dir, "snapshots", revision)
else:
model_dir = model_id

ov_files = _find_files_matching_pattern(
model_dir,
pattern=r"(.*)?openvino(.*)?\_model.xml",
subfolder=subfolder,
use_auth_token=token,
revision=revision,
)
_export = len(ov_files) == 0
if _export ^ export:
if export:
logger.warning(
f"The model {model_id} was already converted to the OpenVINO IR but got `export=True`, the model will be converted to OpenVINO once again. "
"Don't forget to save the resulting model with `.save_pretrained()`"
)
_export = True
else:
logger.warning(
f"No OpenVINO files were found for {model_id}, setting `export=True` to convert the model to the OpenVINO IR. "
"Don't forget to save the resulting model with `.save_pretrained()`"
)
except Exception as exception:
logger.warning(
f"Could not infer whether the model was already converted or not to the OpenVINO IR, keeping `export={export}`.\n{exception}"
)

return super().from_pretrained(
model_id,
export=_export,
force_download=force_download,
token=token,
cache_dir=cache_dir,
subfolder=subfolder,
config=config,
local_files_only=local_files_only,
trust_remote_code=trust_remote_code,
revision=revision,
**kwargs,
)

@staticmethod
def _prepare_weight_quantization_config(
quantization_config: Optional[Union[OVWeightQuantizationConfig, Dict]] = None, load_in_8bit: bool = False
Expand All @@ -337,7 +408,6 @@ def _set_ov_config_parameters(self):
@staticmethod
def _cached_file(
model_path: Union[Path, str],
use_auth_token: Optional[Union[bool, str]] = None,
token: Optional[Union[bool, str]] = None,
revision: Optional[str] = None,
force_download: bool = False,
Expand All @@ -346,15 +416,6 @@ def _cached_file(
subfolder: str = "",
local_files_only: bool = False,
):
if use_auth_token is not None:
warnings.warn(
"The `use_auth_token` argument is deprecated and will be removed soon. Please use the `token` argument instead.",
FutureWarning,
)
if token is not None:
raise ValueError("You cannot use both `use_auth_token` and `token` arguments at the same time.")
token = use_auth_token

# locates a file in a local folder and repo, downloads and cache it if necessary.
model_path = Path(model_path)
if model_path.is_dir():
Expand Down Expand Up @@ -385,7 +446,6 @@ def _from_transformers(
cls,
model_id: str,
config: PretrainedConfig,
use_auth_token: Optional[Union[bool, str]] = None,
token: Optional[Union[bool, str]] = None,
revision: Optional[str] = None,
force_download: bool = False,
Expand All @@ -409,8 +469,6 @@ def _from_transformers(
- The path to a directory containing the model weights. save_dir (`str` or `Path`):
The directory where the exported ONNX model should be saved, default to
`transformers.file_utils.default_cache_path`, which is the cache directory for transformers.
use_auth_token (`Optional[str]`, defaults to `None`):
Deprecated. Please use `token` instead.
token (Optional[Union[bool, str]], defaults to `None`):
The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
when running `huggingface-cli login` (stored in `~/.huggingface`).
Expand All @@ -419,15 +477,6 @@ def _from_transformers(
kwargs (`Dict`, *optional*):
kwargs will be passed to the model during initialization
"""
if use_auth_token is not None:
warnings.warn(
"The `use_auth_token` argument is deprecated and will be removed soon. Please use the `token` argument instead.",
FutureWarning,
)
if token is not None:
raise ValueError("You cannot use both `use_auth_token` and `token` arguments at the same time.")
token = use_auth_token

save_dir = TemporaryDirectory()
save_dir_path = Path(save_dir.name)
# This attribute is needed to keep one reference on the temporary directory, since garbage collecting
Expand Down Expand Up @@ -469,7 +518,6 @@ def _to_load(
model,
config: PretrainedConfig,
onnx_config: OnnxConfig,
use_auth_token: Optional[Union[bool, str]] = None,
token: Optional[Union[bool, str]] = None,
revision: Optional[str] = None,
force_download: bool = False,
Expand All @@ -478,15 +526,6 @@ def _to_load(
stateful: bool = False,
**kwargs,
):
if use_auth_token is not None:
warnings.warn(
"The `use_auth_token` argument is deprecated and will be removed soon. Please use the `token` argument instead.",
FutureWarning,
)
if token is not None:
raise ValueError("You cannot use both `use_auth_token` and `token` arguments at the same time.")
token = use_auth_token

save_dir = TemporaryDirectory()
save_dir_path = Path(save_dir.name)

Expand Down
25 changes: 0 additions & 25 deletions optimum/intel/openvino/modeling_base_seq2seq.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

import logging
import os
import warnings
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Dict, Optional, Union
Expand Down Expand Up @@ -120,7 +119,6 @@ def _from_pretrained(
cls,
model_id: Union[str, Path],
config: PretrainedConfig,
use_auth_token: Optional[Union[bool, str]] = None,
token: Optional[Union[bool, str]] = None,
revision: Optional[str] = None,
force_download: bool = False,
Expand All @@ -144,8 +142,6 @@ def _from_pretrained(
Can be either:
- The model id of a pretrained model hosted inside a model repo on huggingface.co.
- The path to a directory containing the model weights.
use_auth_token (Optional[Union[bool, str]], defaults to `None`):
Deprecated. Please use `token` instead.
token (Optional[Union[bool, str]], defaults to `None`):
The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
when running `huggingface-cli login` (stored in `~/.huggingface`).
Expand All @@ -169,15 +165,6 @@ def _from_pretrained(
local_files_only(`bool`, *optional*, defaults to `False`):
Whether or not to only look at local files (i.e., do not try to download the model).
"""
if use_auth_token is not None:
warnings.warn(
"The `use_auth_token` argument is deprecated and will be removed soon. Please use the `token` argument instead.",
FutureWarning,
)
if token is not None:
raise ValueError("You cannot use both `use_auth_token` and `token` arguments at the same time.")
token = use_auth_token

default_encoder_file_name = ONNX_ENCODER_NAME if from_onnx else OV_ENCODER_NAME
default_decoder_file_name = ONNX_DECODER_NAME if from_onnx else OV_DECODER_NAME
default_decoder_with_past_file_name = ONNX_DECODER_WITH_PAST_NAME if from_onnx else OV_DECODER_WITH_PAST_NAME
Expand Down Expand Up @@ -256,7 +243,6 @@ def _from_transformers(
cls,
model_id: str,
config: PretrainedConfig,
use_auth_token: Optional[Union[bool, str]] = None,
token: Optional[Union[bool, str]] = None,
revision: Optional[str] = None,
force_download: bool = False,
Expand All @@ -282,8 +268,6 @@ def _from_transformers(
save_dir (`str` or `Path`):
The directory where the exported ONNX model should be saved, defaults to
`transformers.file_utils.default_cache_path`, which is the cache directory for transformers.
use_auth_token (`Optional[str]`, defaults to `None`):
Deprecated. Please use `token` instead.
token (Optional[Union[bool, str]], defaults to `None`):
The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
when running `huggingface-cli login` (stored in `~/.huggingface`).
Expand All @@ -292,15 +276,6 @@ def _from_transformers(
kwargs (`Dict`, *optional*):
kwargs will be passed to the model during initialization
"""
if use_auth_token is not None:
warnings.warn(
"The `use_auth_token` argument is deprecated and will be removed soon. Please use the `token` argument instead.",
FutureWarning,
)
if token is not None:
raise ValueError("You cannot use both `use_auth_token` and `token` arguments at the same time.")
token = use_auth_token

save_dir = TemporaryDirectory()
save_dir_path = Path(save_dir.name)

Expand Down
Loading
Loading