-
Notifications
You must be signed in to change notification settings - Fork 125
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactor OV weight compression call inside from_pretrained #683
Changes from all commits
d78950f
99471b2
b986830
cdbedb4
fa4065f
ea3f211
d1f4149
c35cd20
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -199,7 +199,7 @@ def from_pretrained(cls, model: PreTrainedModel, **kwargs): | |
def quantize( | ||
self, | ||
calibration_dataset: Optional[Union[datasets.Dataset, nncf.Dataset, Iterable]] = None, | ||
save_directory: Union[str, Path] = None, | ||
save_directory: Optional[Union[str, Path]] = None, | ||
ov_config: OVConfig = None, | ||
file_name: Optional[str] = None, | ||
batch_size: int = 1, | ||
|
@@ -215,7 +215,7 @@ def quantize( | |
calibration_dataset (`datasets.Dataset` or `nncf.Dataset` or `Iterable`, *optional*): | ||
A collection of data samples to use for quantization calibration. Is optional for weight-only | ||
quantization and is required for full quantization. | ||
save_directory (`Union[str, Path]`): | ||
save_directory (`Union[str, Path]`, *optional*): | ||
The directory where the quantized model should be saved. | ||
ov_config (`OVConfig`, *optional*): | ||
The configuration containing the parameters related to quantization. If not provided, 8-bit symmetric | ||
|
@@ -263,10 +263,6 @@ def quantize( | |
"as an instance of `OVWeightQuantizationConfig` for weight-only compression or as an instance of `OVQuantizationConfig` for full model quantization." | ||
) | ||
|
||
if save_directory is None: | ||
# TODO : can be set to self.model.config.name_or_path for OVModels when not provided | ||
raise ValueError("`save_directory` needs to be specified") | ||
|
||
if ov_config is None: | ||
ov_config = OVConfig() | ||
if not isinstance(ov_config, OVConfig): | ||
|
@@ -319,21 +315,41 @@ def quantize( | |
def _quantize_ovbasemodel( | ||
self, | ||
ov_config: OVConfig, | ||
save_directory: Union[str, Path], | ||
save_directory: Union[str, Path] = None, | ||
calibration_dataset: Optional[Union[datasets.Dataset, nncf.Dataset, Iterable]] = None, | ||
batch_size: int = 1, | ||
data_collator: Optional[DataCollator] = None, | ||
remove_unused_columns: bool = True, | ||
**kwargs, | ||
): | ||
save_directory = Path(save_directory) | ||
save_directory.mkdir(parents=True, exist_ok=True) | ||
if save_directory is not None: | ||
save_directory = Path(save_directory) | ||
save_directory.mkdir(parents=True, exist_ok=True) | ||
|
||
quantization_config = ov_config.quantization_config | ||
if isinstance(quantization_config, OVWeightQuantizationConfig): | ||
if calibration_dataset is None and isinstance(quantization_config.dataset, str): | ||
from optimum.intel import OVModelForCausalLM | ||
|
||
if isinstance(self.model, OVModelForCausalLM): | ||
from optimum.gptq.data import get_dataset, prepare_dataset | ||
|
||
tokenizer = AutoTokenizer.from_pretrained(quantization_config.tokenizer) | ||
nsamples = quantization_config.num_samples if quantization_config.num_samples else 128 | ||
calibration_dataset = get_dataset( | ||
quantization_config.dataset, tokenizer, seqlen=32, nsamples=nsamples | ||
) | ||
calibration_dataset = prepare_dataset(calibration_dataset) | ||
calibration_dataset = nncf.Dataset(calibration_dataset, lambda x: self.model.prepare_inputs(**x)) | ||
else: | ||
raise ValueError( | ||
f"Can't create weight compression calibration dataset from string for {type(self.model)}" | ||
) | ||
|
||
_weight_only_quantization(self.model.model, quantization_config, calibration_dataset) | ||
self.model.save_pretrained(save_directory) | ||
ov_config.save_pretrained(save_directory) | ||
if save_directory is not None: | ||
self.model.save_pretrained(save_directory) | ||
ov_config.save_pretrained(save_directory) | ||
return | ||
if not isinstance(quantization_config, OVQuantizationConfig): | ||
raise ValueError(f"Unsupported type of quantization config: {type(quantization_config)}") | ||
|
@@ -385,8 +401,9 @@ def _quantize_ovbasemodel( | |
**kwargs, | ||
) | ||
self.model.model = quantized_model | ||
self.model.save_pretrained(save_directory) | ||
ov_config.save_pretrained(save_directory) | ||
if save_directory is not None: | ||
self.model.save_pretrained(save_directory) | ||
ov_config.save_pretrained(save_directory) | ||
|
||
def _quantize_torchmodel( | ||
self, | ||
|
@@ -399,6 +416,10 @@ def _quantize_torchmodel( | |
remove_unused_columns: bool = True, | ||
**kwargs, | ||
): | ||
if save_directory is None: | ||
# TODO : can be set to self.model.config.name_or_path for OVModels when not provided | ||
raise ValueError("`save_directory` needs to be specified") | ||
|
||
self._set_task() | ||
save_directory = Path(save_directory) | ||
save_directory.mkdir(parents=True, exist_ok=True) | ||
|
@@ -645,14 +666,6 @@ def _weight_only_quantization( | |
dataset = calibration_dataset | ||
else: | ||
dataset = nncf.Dataset(calibration_dataset) | ||
elif config.dataset is not None and isinstance(config.dataset, str): | ||
tokenizer = AutoTokenizer.from_pretrained(config.tokenizer) | ||
|
||
from optimum.gptq.data import get_dataset, prepare_dataset | ||
|
||
nsamples = config.num_samples if config.num_samples else 128 | ||
dataset = get_dataset(config.dataset, tokenizer, seqlen=32, nsamples=nsamples) | ||
dataset = prepare_dataset(dataset) | ||
Comment on lines
-648
to
-655
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, I did want to do that, but the difference there is that only a raw |
||
|
||
sensitivity_metric = None | ||
if isinstance(config.sensitivity_metric, str): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this should be done for every OVModel no ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This particular part is for
OVModelForCausalLM
only. First of all, because GPTQ dataset creation logic is employed which is applicable for LLMs only. Secondly,self.model
is required to haveprepare_inputs
method, which is specific forOVModelForCausalLM
.In theory we could extend this part to other model classes. There is some logic for the SD model class and I plan to migrate it to
OVQuantizer
in a future PR. There's alsoget_calibration_dataset
method, maybe it should actually go there/be extended to multiple model types. Will need to think about it.For other model types there is no such logic in the codebase at the moment if I'm not mistaken, so I'm not yet sure about those. Maybe we could add it in the future.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes I think it makes sense to make it available for other
OVModel
and to also extendget_calibration_dataset
, but this can be done in a following PR !There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
also we could add a warning that the dataset config argument will be ignored for models that are not an instances of
OVModelForCausalLM