@@ -1251,16 +1251,8 @@ def __init__(self, detr_layer, config):
1251
1251
self .norm_first = True
1252
1252
1253
1253
self .original_layers_mapping = {
1254
- "in_proj_weight" : [
1255
- "self_attn.q_proj.weight" ,
1256
- "self_attn.k_proj.weight" ,
1257
- "self_attn.v_proj.weight"
1258
- ],
1259
- "in_proj_bias" : [
1260
- "self_attn.q_proj.bias" ,
1261
- "self_attn.k_proj.bias" ,
1262
- "self_attn.v_proj.bias"
1263
- ],
1254
+ "in_proj_weight" : ["self_attn.q_proj.weight" , "self_attn.k_proj.weight" , "self_attn.v_proj.weight" ],
1255
+ "in_proj_bias" : ["self_attn.q_proj.bias" , "self_attn.k_proj.bias" , "self_attn.v_proj.bias" ],
1264
1256
"out_proj_weight" : "self_attn.out_proj.weight" ,
1265
1257
"out_proj_bias" : "self_attn.out_proj.bias" ,
1266
1258
"linear1_weight" : "fc1.weight" ,
@@ -1272,7 +1264,7 @@ def __init__(self, detr_layer, config):
1272
1264
"norm2_weight" : "final_layer_norm.weight" ,
1273
1265
"norm2_bias" : "final_layer_norm.bias" ,
1274
1266
}
1275
-
1267
+
1276
1268
self .validate_bettertransformer ()
1277
1269
1278
1270
def forward (self , hidden_states , attention_mask , output_attentions : bool , * _ , ** __ ):
@@ -1303,15 +1295,15 @@ def forward(self, hidden_states, attention_mask, output_attentions: bool, *_, **
1303
1295
self .linear2_bias ,
1304
1296
attention_mask ,
1305
1297
)
1306
-
1298
+
1307
1299
if hidden_states .is_nested and self .is_last_layer :
1308
1300
hidden_states = hidden_states .to_padded_tensor (0.0 )
1309
-
1301
+
1310
1302
else :
1311
1303
raise NotImplementedError (
1312
1304
"Training and Autocast are not implemented for BetterTransformer + Detr. Please open an issue."
1313
1305
)
1314
-
1306
+
1315
1307
return (hidden_states ,)
1316
1308
1317
1309
0 commit comments