|
18 | 18 | import math
|
19 | 19 | import os
|
20 | 20 | import types
|
| 21 | +import re |
21 | 22 |
|
22 | 23 | from datasets import load_dataset
|
23 | 24 |
|
|
40 | 41 |
|
41 | 42 | if is_package_available("auto_round"):
|
42 | 43 | import auto_round
|
| 44 | + import transformers |
43 | 45 | from auto_round.export.export_to_itrex.model_wrapper import WeightOnlyLinear as auto_round_woq_linear
|
44 | 46 |
|
45 | 47 |
|
@@ -132,18 +134,18 @@ def _replace_linear(
|
132 | 134 | isinstance(module, torch.nn.Linear)
|
133 | 135 | or isinstance(module, INCWeightOnlyLinear)
|
134 | 136 | or (is_package_available("auto_round") and isinstance(module, auto_round_woq_linear))
|
135 |
| - or (is_ipex_available() and isinstance(module, ipex.nn.utils._weight_prepack._IPEXLinear)) |
136 | 137 | ) and (name not in modules_to_not_convert):
|
137 | 138 | # Check if the current key is not in the `modules_to_not_convert`
|
138 |
| - if not any(key in ".".join(current_key_name) for key in modules_to_not_convert): |
| 139 | + if not any(key in ".".join(current_key_name) for key in modules_to_not_convert) and \ |
| 140 | + not any(re.match(pattern, ".".join(current_key_name)) for pattern in modules_to_not_convert): |
139 | 141 | in_features = module.in_features
|
140 | 142 | out_features = module.out_features
|
141 | 143 | if device == "cpu" or device == torch.device("cpu") or device == "auto":
|
142 | 144 | from intel_extension_for_pytorch.nn.modules import WeightOnlyQuantizedLinear as ipex_linear
|
143 | 145 | from intel_extension_for_pytorch.utils.weight_only_quantization import (
|
144 | 146 | _convert_optimum_format_to_desired,
|
145 | 147 | )
|
146 |
| - |
| 148 | + |
147 | 149 | qweight = module.qweight
|
148 | 150 | scales = module.scales
|
149 | 151 | qzeros = module.qzeros
|
@@ -550,7 +552,41 @@ def convert_to_quantized_model(model, config, device="cpu"):
|
550 | 552 | gradient_accumulate_steps=config.gradient_accumulate_steps,
|
551 | 553 | export_format=config.export_format,
|
552 | 554 | )
|
553 |
| - |
| 555 | + |
| 556 | + # vlm set non-text module config |
| 557 | + if config.is_vlm is True: |
| 558 | + from neural_compressor.torch.utils.utility import ( |
| 559 | + get_multimodal_block_names, |
| 560 | + find_matching_blocks, |
| 561 | + get_layer_names_in_block, |
| 562 | + ) |
| 563 | + def set_nontext_module_config(model, to_quant_block_names, config): |
| 564 | + all_block_list = get_multimodal_block_names(model, quant_vision=True) |
| 565 | + all_block_set = set(tuple(block) for block in all_block_list) |
| 566 | + quant_block_set = set(tuple(block) for block in to_quant_block_names) |
| 567 | + set_to_full_prec = list(all_block_set - quant_block_set) |
| 568 | + set_to_full_prec = get_layer_names_in_block(model, to_quant_block_names=set_to_full_prec) |
| 569 | + for name in set_to_full_prec: |
| 570 | + config.modules_to_not_convert.append(name) |
| 571 | + |
| 572 | + # skip layers not in blocks |
| 573 | + config.modules_to_not_convert.append("model.vision_embed_tokens.img_projection*") |
| 574 | + config.modules_to_not_convert.append("transformer.visual.attn_pool.*_proj") |
| 575 | + config.modules_to_not_convert.append("model.mm_projector*") |
| 576 | + config.modules_to_not_convert.append("multi_modal_projector") |
| 577 | + config.modules_to_not_convert.append("visual.merger") |
| 578 | + |
| 579 | + all_blocks = get_multimodal_block_names(model, quant_config.quant_nontext_module) |
| 580 | + to_quant_block_names = find_matching_blocks(model, all_blocks, quant_config.to_quant_block_names) |
| 581 | + set_nontext_module_config(model, to_quant_block_names, config) |
| 582 | + |
| 583 | + for n, m in model.named_modules(): |
| 584 | + if isinstance(m, torch.nn.Linear) or isinstance(m, transformers.modeling_utils.Conv1D): |
| 585 | + if m.weight.shape[0] % 32 != 0 or m.weight.shape[1] % 32 != 0: |
| 586 | + config.modules_to_not_convert.append(n) |
| 587 | + print( |
| 588 | + f"{n} will not be quantized due to its shape not being divisible by 32," |
| 589 | + " resulting in an exporting issue to autogptq") |
554 | 590 | if config.modules_to_not_convert != []:
|
555 | 591 | for module in config.modules_to_not_convert:
|
556 | 592 | module_name = ".*" + module
|
|
0 commit comments