91
91
"text-classification" : ("glue" , "sst2" , "sentence" ),
92
92
}
93
93
94
+ pattern_prefix = "^__module.model.model" if is_transformers_version (">=" , "4.49" ) else "^__module.model"
95
+
94
96
95
97
class OVQuantizerTest (unittest .TestCase ):
96
98
SUPPORTED_ARCHITECTURES_TORCH_MODEL = (
@@ -158,12 +160,12 @@ class OVQuantizerTest(unittest.TestCase):
158
160
dtype = "nf4" ,
159
161
group_size = 16 ,
160
162
ratio = 0.5 ,
161
- ignored_scope = {"patterns" : ["^__module.model .layers.0.self_attn" ]},
163
+ ignored_scope = {"patterns" : [f" { pattern_prefix } .layers.0.self_attn" ]},
162
164
),
163
165
full_quantization_config = OVQuantizationConfig (
164
- dtype = "f8e4m3" , ignored_scope = {"patterns" : ["^__module.model .layers.0.mlp" ]}
166
+ dtype = "f8e4m3" , ignored_scope = {"patterns" : [f" { pattern_prefix } .layers.0.mlp" ]}
165
167
),
166
- ignored_scope = {"patterns" : ["^__module.model .layers.1.self_attn" ]},
168
+ ignored_scope = {"patterns" : [f" { pattern_prefix } .layers.1.self_attn" ]},
167
169
dataset = "wikitext2" ,
168
170
num_samples = 1 ,
169
171
),
@@ -183,12 +185,12 @@ class OVQuantizerTest(unittest.TestCase):
183
185
dtype = "nf4" ,
184
186
group_size = 16 ,
185
187
ratio = 0.5 ,
186
- ignored_scope = {"patterns" : ["^__module.model .layers.0.self_attn" ]},
188
+ ignored_scope = {"patterns" : [f" { pattern_prefix } .layers.0.self_attn" ]},
187
189
),
188
190
full_quantization_config = OVQuantizationConfig (
189
- dtype = "f8e5m2" , ignored_scope = {"patterns" : ["^__module.model .layers.0.mlp" ]}
191
+ dtype = "f8e5m2" , ignored_scope = {"patterns" : [f" { pattern_prefix } .layers.0.mlp" ]}
190
192
),
191
- ignored_scope = {"patterns" : ["^__module.model .layers.1.self_attn" ]},
193
+ ignored_scope = {"patterns" : [f" { pattern_prefix } .layers.1.self_attn" ]},
192
194
dataset = "wikitext2" ,
193
195
num_samples = 1 ,
194
196
),
@@ -435,7 +437,7 @@ class OVWeightCompressionTest(unittest.TestCase):
435
437
sensitivity_metric = "mean_activation_magnitude" ,
436
438
dataset = "c4" ,
437
439
),
438
- [{"int8" : 14 , "int4" : 25 }],
440
+ [{"int8" : 18 , "int4" : 23 }] if is_transformers_version ( ">=" , "4.49" ) else [{ "int8" : 14 , "int4" : 25 }],
439
441
),
440
442
(
441
443
OVModelForCausalLM ,
@@ -449,7 +451,7 @@ class OVWeightCompressionTest(unittest.TestCase):
449
451
sensitivity_metric = "mean_activation_magnitude" ,
450
452
dataset = ["one two, " * i for i in range (10 )],
451
453
),
452
- [{"int8" : 16 , "int4" : 24 }],
454
+ [{"int8" : 18 , "int4" : 23 }] if is_transformers_version ( ">=" , "4.49" ) else [{ "int8" : 16 , "int4" : 24 }],
453
455
),
454
456
(
455
457
OVModelForCausalLM ,
@@ -612,21 +614,23 @@ class OVWeightCompressionTest(unittest.TestCase):
612
614
),
613
615
[{"int8" : 8 , "int4" : 22 }, {"int8" : 1 }, {"int8" : 11 }],
614
616
),
615
- (
616
- OVModelForVisualCausalLM ,
617
- "phi3_v" ,
618
- True ,
619
- dict (
620
- bits = 4 ,
621
- group_size = 16 ,
622
- dataset = "contextual" ,
623
- ratio = 0.8 ,
624
- sensitivity_metric = "mean_activation_magnitude" ,
625
- num_samples = 1 ,
626
- trust_remote_code = True ,
627
- ),
628
- [{"int8" : 4 , "int4" : 14 }, {"int8" : 1 }, {"int8" : 7 }, {"int8" : 2 }],
629
- ),
617
+ # TODO: add back once https://huggingface.co/katuni4ka/tiny-random-phi3-vision/blob/main/processing_phi3_v.py#L313 modified to add chat_template
618
+ # currently incompatible with transformers >= v4.49
619
+ # (
620
+ # OVModelForVisualCausalLM,
621
+ # "phi3_v",
622
+ # True,
623
+ # dict(
624
+ # bits=4,
625
+ # group_size=16,
626
+ # dataset="contextual",
627
+ # ratio=0.8,
628
+ # sensitivity_metric="mean_activation_magnitude",
629
+ # num_samples=1,
630
+ # trust_remote_code=True,
631
+ # ),
632
+ # [{"int8": 4, "int4": 14}, {"int8": 1}, {"int8": 7}, {"int8": 2}],
633
+ # ),
630
634
(
631
635
OVModelForVisualCausalLM ,
632
636
"qwen2_vl" ,
0 commit comments