Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor OV weight compression call inside from_pretrained #683

Merged
23 changes: 7 additions & 16 deletions optimum/intel/openvino/modeling_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import copy
import logging
import os
from pathlib import Path
Expand All @@ -24,7 +24,7 @@
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
from openvino.preprocess import PrePostProcessor
from openvino.runtime import Core, Tensor, Type
from transformers import AutoModelForCausalLM, AutoTokenizer, PretrainedConfig
from transformers import AutoModelForCausalLM, PretrainedConfig
from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward
from transformers.generation import GenerationMixin
from transformers.modeling_outputs import CausalLMOutputWithPast
Expand Down Expand Up @@ -625,9 +625,8 @@ def _from_pretrained(
raise ImportError(
"Quantization of the weights requires nncf, please install it with `pip install nncf`"
)
import nncf

from .quantization import _weight_only_quantization
from optimum.intel.openvino.quantization import OVQuantizer

default_config = _check_default_4bit_configs(config)

Expand All @@ -636,18 +635,10 @@ def _from_pretrained(
f"For the given model, we recommend the following `quantization_config` : {default_config}"
)

calibration_dataset = None
if isinstance(quantization_config.dataset, str):
tokenizer = quantization_config.tokenizer or AutoTokenizer.from_pretrained(model_id)

from optimum.gptq.data import get_dataset, prepare_dataset

nsamples = quantization_config.num_samples or 128
dataset = get_dataset(quantization_config.dataset, tokenizer, seqlen=32, nsamples=nsamples)
dataset = prepare_dataset(dataset)
calibration_dataset = nncf.Dataset(dataset, lambda x: causal_model.prepare_inputs(**x))

_weight_only_quantization(model, quantization_config, calibration_dataset)
quantizer = OVQuantizer(causal_model)
quantization_config_copy = copy.deepcopy(quantization_config)
quantization_config_copy.tokenizer = quantization_config.tokenizer or model_id
quantizer.quantize(ov_config=OVConfig(quantization_config=quantization_config_copy))

return causal_model

Expand Down
55 changes: 34 additions & 21 deletions optimum/intel/openvino/quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def from_pretrained(cls, model: PreTrainedModel, **kwargs):
def quantize(
self,
calibration_dataset: Optional[Union[datasets.Dataset, nncf.Dataset, Iterable]] = None,
save_directory: Union[str, Path] = None,
save_directory: Optional[Union[str, Path]] = None,
ov_config: OVConfig = None,
file_name: Optional[str] = None,
batch_size: int = 1,
Expand All @@ -215,7 +215,7 @@ def quantize(
calibration_dataset (`datasets.Dataset` or `nncf.Dataset` or `Iterable`, *optional*):
A collection of data samples to use for quantization calibration. Is optional for weight-only
quantization and is required for full quantization.
save_directory (`Union[str, Path]`):
save_directory (`Union[str, Path]`, *optional*):
The directory where the quantized model should be saved.
ov_config (`OVConfig`, *optional*):
The configuration containing the parameters related to quantization. If not provided, 8-bit symmetric
Expand Down Expand Up @@ -263,10 +263,6 @@ def quantize(
"as an instance of `OVWeightQuantizationConfig` for weight-only compression or as an instance of `OVQuantizationConfig` for full model quantization."
)

if save_directory is None:
# TODO : can be set to self.model.config.name_or_path for OVModels when not provided
raise ValueError("`save_directory` needs to be specified")

if ov_config is None:
ov_config = OVConfig()
if not isinstance(ov_config, OVConfig):
Expand Down Expand Up @@ -319,21 +315,41 @@ def quantize(
def _quantize_ovbasemodel(
self,
ov_config: OVConfig,
save_directory: Union[str, Path],
save_directory: Union[str, Path] = None,
calibration_dataset: Optional[Union[datasets.Dataset, nncf.Dataset, Iterable]] = None,
batch_size: int = 1,
data_collator: Optional[DataCollator] = None,
remove_unused_columns: bool = True,
**kwargs,
):
save_directory = Path(save_directory)
save_directory.mkdir(parents=True, exist_ok=True)
if save_directory is not None:
save_directory = Path(save_directory)
save_directory.mkdir(parents=True, exist_ok=True)

quantization_config = ov_config.quantization_config
if isinstance(quantization_config, OVWeightQuantizationConfig):
if calibration_dataset is None and isinstance(quantization_config.dataset, str):
from optimum.intel import OVModelForCausalLM

if isinstance(self.model, OVModelForCausalLM):
from optimum.gptq.data import get_dataset, prepare_dataset

tokenizer = AutoTokenizer.from_pretrained(quantization_config.tokenizer)
nsamples = quantization_config.num_samples if quantization_config.num_samples else 128
calibration_dataset = get_dataset(
quantization_config.dataset, tokenizer, seqlen=32, nsamples=nsamples
)
calibration_dataset = prepare_dataset(calibration_dataset)
Comment on lines +335 to +342
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be done for every OVModel no ?

Copy link
Collaborator Author

@nikita-savelyevv nikita-savelyevv Apr 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This particular part is for OVModelForCausalLM only. First of all, because GPTQ dataset creation logic is employed which is applicable for LLMs only. Secondly, self.model is required to have prepare_inputs method, which is specific for OVModelForCausalLM.

In theory we could extend this part to other model classes. There is some logic for the SD model class and I plan to migrate it to OVQuantizer in a future PR. There's also get_calibration_dataset method, maybe it should actually go there/be extended to multiple model types. Will need to think about it.

For other model types there is no such logic in the codebase at the moment if I'm not mistaken, so I'm not yet sure about those. Maybe we could add it in the future.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes I think it makes sense to make it available for other OVModel and to also extend get_calibration_dataset, but this can be done in a following PR !

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also we could add a warning that the dataset config argument will be ignored for models that are not an instances of OVModelForCausalLM

calibration_dataset = nncf.Dataset(calibration_dataset, lambda x: self.model.prepare_inputs(**x))
else:
raise ValueError(
f"Can't create weight compression calibration dataset from string for {type(self.model)}"
)

_weight_only_quantization(self.model.model, quantization_config, calibration_dataset)
self.model.save_pretrained(save_directory)
ov_config.save_pretrained(save_directory)
if save_directory is not None:
self.model.save_pretrained(save_directory)
ov_config.save_pretrained(save_directory)
return
if not isinstance(quantization_config, OVQuantizationConfig):
raise ValueError(f"Unsupported type of quantization config: {type(quantization_config)}")
Expand Down Expand Up @@ -385,8 +401,9 @@ def _quantize_ovbasemodel(
**kwargs,
)
self.model.model = quantized_model
self.model.save_pretrained(save_directory)
ov_config.save_pretrained(save_directory)
if save_directory is not None:
self.model.save_pretrained(save_directory)
ov_config.save_pretrained(save_directory)

def _quantize_torchmodel(
self,
Expand All @@ -399,6 +416,10 @@ def _quantize_torchmodel(
remove_unused_columns: bool = True,
**kwargs,
):
if save_directory is None:
# TODO : can be set to self.model.config.name_or_path for OVModels when not provided
raise ValueError("`save_directory` needs to be specified")

self._set_task()
save_directory = Path(save_directory)
save_directory.mkdir(parents=True, exist_ok=True)
Expand Down Expand Up @@ -645,14 +666,6 @@ def _weight_only_quantization(
dataset = calibration_dataset
else:
dataset = nncf.Dataset(calibration_dataset)
elif config.dataset is not None and isinstance(config.dataset, str):
tokenizer = AutoTokenizer.from_pretrained(config.tokenizer)

from optimum.gptq.data import get_dataset, prepare_dataset

nsamples = config.num_samples if config.num_samples else 128
dataset = get_dataset(config.dataset, tokenizer, seqlen=32, nsamples=nsamples)
dataset = prepare_dataset(dataset)
Comment on lines -648 to -655
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_weight_only_quantization is still used here and here we might need to update this places as well to ensure compatibility

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, I did want to do that, but the difference there is that only a raw openvino.runtime.Model is available, not an instance of transformers.PreTrainedModel, and the latter is required to initialize OVQuantizer. We could extend OVQuantizer to accept an instance of openvino.runtime.Model, but that's a rather serious API change.


sensitivity_metric = None
if isinstance(config.sensitivity_metric, str):
Expand Down
Loading