Skip to content

Commit e7566fb

Browse files
authored
Merge branch 'main' into ea/relax_accelerate_deps
2 parents 432abf2 + 6c8fa79 commit e7566fb

37 files changed

+1341
-518
lines changed

.github/workflows/test_ipex.yml

-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ jobs:
3030
- name: Install dependencies
3131
run: |
3232
python -m pip install --upgrade pip
33-
pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cpu
3433
pip install .[ipex,tests]
3534
- name: Test with Pytest
3635
run: |

.github/workflows/test_openvino.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ jobs:
3232
python -m pip install --upgrade pip
3333
# install PyTorch CPU version to avoid installing CUDA packages on GitHub runner without GPU
3434
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
35-
pip install .[openvino,nncf,tests,diffusers]
35+
pip install .[openvino,openvino-tokenizers,nncf,tests,diffusers]
3636
- name: Test with Pytest
3737
run: |
3838
pytest tests/openvino/ --ignore test_modeling_basic

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -78,10 +78,10 @@ It is possible to export your model to the [OpenVINO](https://docs.openvino.ai/2
7878
optimum-cli export openvino --model gpt2 ov_model
7979
```
8080

81-
If you add `--int8`, the model linear and embedding weights will be quantized to INT8, the activations will be kept in floating point precision.
81+
You can also apply 8-bit weight-only quantization when exporting your model : the model linear and embedding weights will be quantized to INT8, the activations will be kept in floating point precision.
8282

8383
```plain
84-
optimum-cli export openvino --model gpt2 --int8 ov_model
84+
optimum-cli export openvino --model gpt2 --weight-format int8 ov_model
8585
```
8686

8787
To apply quantization on both weights and activations, you can find more information in the [documentation](https://huggingface.co/docs/optimum/main/en/intel/optimization_ov).

examples/openvino/stable-diffusion/requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@ accelerate
22
diffusers
33
torch~=1.13
44
nncf @ git+https://github.com/openvinotoolkit/nncf.git
5-
tomesd @ git+https://github.com/AlexKoff88/tomesd/tree/openvino
5+
tomesd @ git+https://github.com/AlexKoff88/tomesd.git@openvino

examples/openvino/stable-diffusion/train_text_to_image_qat.py

+9-58
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
import math
2020
import os
2121
import random
22-
import tempfile
2322
from copy import deepcopy
2423
from functools import partial
2524
from io import BytesIO
@@ -34,7 +33,7 @@
3433
import torch.utils.checkpoint
3534
from accelerate import Accelerator
3635
from accelerate.logging import get_logger
37-
from accelerate.utils import set_seed
36+
from accelerate.utils import ProjectConfiguration, set_seed
3837
from datasets import load_dataset
3938
from diffusers import DDIMScheduler, DDPMScheduler, DiffusionPipeline, LMSDiscreteScheduler, StableDiffusionPipeline
4039
from diffusers.optimization import get_scheduler
@@ -44,20 +43,12 @@
4443
from nncf.torch import create_compressed_model, register_default_init_args
4544
from nncf.torch.initialization import PTInitializingDataLoader
4645
from nncf.torch.layer_utils import CompressionParameter
47-
from openvino._offline_transformations import apply_moc_transformations, compress_quantize_weights_transformation
4846
from PIL import Image
4947
from requests.packages.urllib3.exceptions import InsecureRequestWarning
5048
from torchvision import transforms
5149
from tqdm import tqdm
5250

53-
from optimum.exporters.onnx import export_models, get_stable_diffusion_models_for_export
54-
from optimum.intel import OVStableDiffusionPipeline
55-
from optimum.utils import (
56-
DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER,
57-
DIFFUSION_MODEL_UNET_SUBFOLDER,
58-
DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER,
59-
DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER,
60-
)
51+
from optimum.exporters.openvino import export_from_model
6152

6253

6354
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
@@ -583,47 +574,6 @@ def get_noise_scheduler(args):
583574
return noise_scheduler
584575

585576

586-
def export_to_onnx(pipeline, save_dir):
587-
unet = pipeline.unet
588-
vae = pipeline.vae
589-
text_encoder = pipeline.text_encoder
590-
591-
unet.eval().cpu()
592-
vae.eval().cpu()
593-
text_encoder.eval().cpu()
594-
595-
ONNX_WEIGHTS_NAME = "model.onnx"
596-
597-
output_names = [
598-
os.path.join(DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER, ONNX_WEIGHTS_NAME),
599-
os.path.join(DIFFUSION_MODEL_UNET_SUBFOLDER, ONNX_WEIGHTS_NAME),
600-
os.path.join(DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER, ONNX_WEIGHTS_NAME),
601-
os.path.join(DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER, ONNX_WEIGHTS_NAME),
602-
]
603-
604-
with torch.no_grad():
605-
models_and_onnx_configs = get_stable_diffusion_models_for_export(pipeline)
606-
pipeline.save_config(save_dir)
607-
export_models(
608-
models_and_onnx_configs=models_and_onnx_configs, output_dir=Path(save_dir), output_names=output_names
609-
)
610-
611-
612-
def export_to_openvino(pipeline, onnx_dir, save_dir):
613-
ov_pipe = OVStableDiffusionPipeline.from_pretrained(
614-
model_id=onnx_dir,
615-
from_onnx=True,
616-
model_save_dir=save_dir,
617-
tokenizer=pipeline.tokenizer,
618-
scheduler=pipeline.scheduler,
619-
feature_extractor=pipeline.feature_extractor,
620-
compile=False,
621-
)
622-
apply_moc_transformations(ov_pipe.unet.model, cf=False)
623-
compress_quantize_weights_transformation(ov_pipe.unet.model)
624-
ov_pipe.save_pretrained(save_dir)
625-
626-
627577
class UnetInitDataset(torch.utils.data.Dataset):
628578
def __init__(self, data):
629579
super().__init__()
@@ -700,7 +650,7 @@ def get_nncf_config(pipeline, dataloader, args):
700650
"ignored_scopes": [
701651
"{re}.*__add___[0-2]",
702652
"{re}.*layer_norm_0",
703-
"{re}.*Attention.*/bmm_0",
653+
# "{re}.*Attention.*/bmm_0",
704654
"{re}.*__truediv__*",
705655
"{re}.*group_norm_0",
706656
"{re}.*mul___[0-2]",
@@ -771,11 +721,13 @@ def main():
771721

772722
logging_dir = os.path.join(args.output_dir, args.logging_dir)
773723

724+
accelerator_project_config = ProjectConfiguration(project_dir=args.output_dir, logging_dir=logging_dir)
725+
774726
accelerator = Accelerator(
775727
gradient_accumulation_steps=args.gradient_accumulation_steps,
776728
mixed_precision=args.mixed_precision,
777729
log_with=args.report_to,
778-
logging_dir=logging_dir,
730+
project_config=accelerator_project_config,
779731
)
780732

781733
logging.basicConfig(
@@ -922,7 +874,7 @@ def tokenize_captions(examples, is_train=True):
922874

923875
with accelerator.main_process_first():
924876
if args.max_train_samples is not None:
925-
dataset["train"] = dataset["train"].shuffle(seed=42, buffer_size=args.max_train_samples)
877+
dataset["train"] = dataset["train"].shuffle(seed=42).select(range(args.max_train_samples))
926878
# Set the training transforms
927879
train_dataset = dataset["train"]
928880

@@ -1132,9 +1084,8 @@ def collate_fn(examples):
11321084
feature_extractor=pipeline.feature_extractor,
11331085
)
11341086

1135-
with tempfile.TemporaryDirectory() as tmpdirname:
1136-
export_to_onnx(export_pipeline, tmpdirname)
1137-
export_to_openvino(export_pipeline, tmpdirname, Path(args.output_dir) / "openvino")
1087+
save_directory = Path(args.output_dir) / "openvino"
1088+
export_from_model(export_pipeline, output=save_directory, task="stable-diffusion")
11381089

11391090

11401091
if __name__ == "__main__":

optimum/commands/export/openvino.py

+6
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,11 @@ def parse_args_openvino(parser: "ArgumentParser"):
103103
"OpenVINO native inference code that expects kv-cache inputs and outputs in the model."
104104
),
105105
)
106+
optional_group.add_argument(
107+
"--convert-tokenizer",
108+
action="store_true",
109+
help="Add converted tokenizer and detokenizer with OpenVINO Tokenizers",
110+
)
106111

107112

108113
class OVExportCommand(BaseOptimumCLICommand):
@@ -151,5 +156,6 @@ def run(self):
151156
compression_option=self.args.weight_format,
152157
compression_ratio=self.args.ratio,
153158
stateful=not self.args.disable_stateful,
159+
convert_tokenizer=self.args.convert_tokenizer,
154160
# **input_shapes,
155161
)

optimum/exporters/openvino/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from .__main__ import main_export
2-
from .convert import export, export_models, export_pytorch_via_onnx
2+
from .convert import export, export_from_model, export_models, export_pytorch_via_onnx
33
from .stateful import ensure_stateful_is_available, patch_stateful
44

55

0 commit comments

Comments
 (0)