19
19
# since the model classes inherit torch.nn.Module.
20
20
import math
21
21
22
+ import numba
22
23
import numpy as np
23
24
import torch
24
25
from torch .autograd import Function
25
26
from torch .nn import functional as F
26
- import numba
27
27
28
28
from neural_compressor .torch .utils import accelerator , logger
29
29
@@ -301,11 +301,11 @@ def unpack_tensor_with_torch(self, packed_tensor):
301
301
unpacked_tensor [:, index ].copy_ (tmp .type (target_dtype ))
302
302
accelerator .synchronize ()
303
303
return unpacked_tensor
304
-
304
+
305
305
@staticmethod
306
306
@numba .jit (nopython = True , parallel = True )
307
307
def pack_array_with_numba_b4_c32 (
308
- raw_array : np .ndarray , packed_array :np .ndarray , n_pack : int , new_in_features :int
308
+ raw_array : np .ndarray , packed_array : np .ndarray , n_pack : int , new_in_features : int
309
309
) -> np .ndarray :
310
310
for i in range (new_in_features ):
311
311
packed_array [:, i ] = (
@@ -319,11 +319,11 @@ def pack_array_with_numba_b4_c32(
319
319
| (raw_array [:, i * n_pack ] & 0b1111 )
320
320
)
321
321
return packed_array
322
-
322
+
323
323
@staticmethod
324
324
@numba .jit (nopython = True , parallel = True )
325
325
def pack_array_with_numba_b4_c16 (
326
- raw_array : np .ndarray , packed_array :np .ndarray , n_pack : int , new_in_features :int
326
+ raw_array : np .ndarray , packed_array : np .ndarray , n_pack : int , new_in_features : int
327
327
) -> np .ndarray :
328
328
for i in range (new_in_features ):
329
329
packed_array [:, i ] = (
@@ -333,23 +333,20 @@ def pack_array_with_numba_b4_c16(
333
333
| (raw_array [:, i * n_pack ] & 0b1111 )
334
334
)
335
335
return packed_array
336
-
336
+
337
337
@staticmethod
338
338
@numba .jit (nopython = True , parallel = True )
339
339
def pack_array_with_numba_b4_c8 (
340
- raw_array : np .ndarray , packed_array :np .ndarray , n_pack : int , new_in_features :int
340
+ raw_array : np .ndarray , packed_array : np .ndarray , n_pack : int , new_in_features : int
341
341
) -> np .ndarray :
342
342
for i in range (new_in_features ):
343
- packed_array [:, i ] = (
344
- ((raw_array [:, i * n_pack + 1 ] & 0b1111 ) << 4 )
345
- | (raw_array [:, i * n_pack ] & 0b1111 )
346
- )
343
+ packed_array [:, i ] = ((raw_array [:, i * n_pack + 1 ] & 0b1111 ) << 4 ) | (raw_array [:, i * n_pack ] & 0b1111 )
347
344
return packed_array
348
-
345
+
349
346
@staticmethod
350
347
@numba .jit (nopython = True , parallel = True )
351
348
def pack_array_with_numba_b4_c64 (
352
- raw_array : np .ndarray , packed_array :np .ndarray , n_pack : int , new_in_features :int
349
+ raw_array : np .ndarray , packed_array : np .ndarray , n_pack : int , new_in_features : int
353
350
) -> np .ndarray :
354
351
for i in range (new_in_features ):
355
352
packed_array [:, i ] = (
@@ -372,11 +369,10 @@ def pack_array_with_numba_b4_c64(
372
369
)
373
370
return packed_array
374
371
375
-
376
372
@staticmethod
377
373
@numba .jit (nopython = True , parallel = True )
378
374
def pack_array_with_numba_b8_c32 (
379
- raw_array : np .ndarray , packed_array :np .ndarray , n_pack : int , new_in_features :int
375
+ raw_array : np .ndarray , packed_array : np .ndarray , n_pack : int , new_in_features : int
380
376
) -> np .ndarray :
381
377
for i in range (new_in_features ):
382
378
packed_array [:, i ] = (
@@ -386,11 +382,11 @@ def pack_array_with_numba_b8_c32(
386
382
| (raw_array [:, i * n_pack ] & 0b11111111 )
387
383
)
388
384
return packed_array
389
-
385
+
390
386
@staticmethod
391
387
@numba .jit (nopython = True , parallel = True )
392
388
def pack_array_with_numba_b8_c16 (
393
- raw_array : np .ndarray , packed_array :np .ndarray , n_pack : int , new_in_features :int
389
+ raw_array : np .ndarray , packed_array : np .ndarray , n_pack : int , new_in_features : int
394
390
) -> np .ndarray :
395
391
for i in range (new_in_features ):
396
392
packed_array [:, i ] = (
@@ -400,20 +396,20 @@ def pack_array_with_numba_b8_c16(
400
396
| (raw_array [:, i * n_pack ] & 0b11111111 )
401
397
)
402
398
return packed_array
403
-
399
+
404
400
@staticmethod
405
401
@numba .jit (nopython = True , parallel = True )
406
402
def pack_array_with_numba_b8_c8 (
407
- raw_array : np .ndarray , packed_array :np .ndarray , n_pack : int , new_in_features :int
403
+ raw_array : np .ndarray , packed_array : np .ndarray , n_pack : int , new_in_features : int
408
404
) -> np .ndarray :
409
405
for i in range (new_in_features ):
410
- packed_array [:, i ] = ( raw_array [:, i * n_pack ] & 0b11111111 )
406
+ packed_array [:, i ] = raw_array [:, i * n_pack ] & 0b11111111
411
407
return packed_array
412
-
408
+
413
409
@staticmethod
414
410
@numba .jit (nopython = True , parallel = True )
415
411
def pack_array_with_numba_b8_c64 (
416
- raw_array : np .ndarray , packed_array :np .ndarray , n_pack : int , new_in_features :int
412
+ raw_array : np .ndarray , packed_array : np .ndarray , n_pack : int , new_in_features : int
417
413
) -> np .ndarray :
418
414
for i in range (new_in_features ):
419
415
packed_array [:, i ] = (
@@ -427,11 +423,11 @@ def pack_array_with_numba_b8_c64(
427
423
| (raw_array [:, i * n_pack ] & 0b11111111 )
428
424
)
429
425
return packed_array
430
-
426
+
431
427
@staticmethod
432
428
@numba .jit (nopython = True , parallel = True )
433
429
def pack_array_with_numba_b2_c32 (
434
- raw_array : np .ndarray , packed_array :np .ndarray , n_pack : int , new_in_features :int
430
+ raw_array : np .ndarray , packed_array : np .ndarray , n_pack : int , new_in_features : int
435
431
) -> np .ndarray :
436
432
for i in range (new_in_features ):
437
433
packed_array [:, i ] = (
@@ -457,7 +453,7 @@ def pack_array_with_numba_b2_c32(
457
453
@staticmethod
458
454
@numba .jit (nopython = True , parallel = True )
459
455
def pack_array_with_numba_b2_c16 (
460
- raw_array : np .ndarray , packed_array :np .ndarray , n_pack : int , new_in_features :int
456
+ raw_array : np .ndarray , packed_array : np .ndarray , n_pack : int , new_in_features : int
461
457
) -> np .ndarray :
462
458
for i in range (new_in_features ):
463
459
packed_array [:, i ] = (
@@ -471,11 +467,11 @@ def pack_array_with_numba_b2_c16(
471
467
| (raw_array [:, i * n_pack ] & 0b11 )
472
468
)
473
469
return packed_array
474
-
470
+
475
471
@staticmethod
476
472
@numba .jit (nopython = True , parallel = True )
477
473
def pack_array_with_numba_b2_c8 (
478
- raw_array : np .ndarray , packed_array :np .ndarray , n_pack : int , new_in_features :int
474
+ raw_array : np .ndarray , packed_array : np .ndarray , n_pack : int , new_in_features : int
479
475
) -> np .ndarray :
480
476
for i in range (new_in_features ):
481
477
packed_array [:, i ] = (
@@ -485,11 +481,11 @@ def pack_array_with_numba_b2_c8(
485
481
| (raw_array [:, i * n_pack ] & 0b11 )
486
482
)
487
483
return packed_array
488
-
484
+
489
485
@staticmethod
490
486
@numba .jit (nopython = True , parallel = True )
491
487
def pack_array_with_numba_b2_c64 (
492
- raw_array : np .ndarray , packed_array :np .ndarray , n_pack : int , new_in_features :int
488
+ raw_array : np .ndarray , packed_array : np .ndarray , n_pack : int , new_in_features : int
493
489
) -> np .ndarray :
494
490
for i in range (new_in_features ):
495
491
packed_array [:, i ] = (
@@ -527,7 +523,7 @@ def pack_array_with_numba_b2_c64(
527
523
| (raw_array [:, i * n_pack ] & 0b11 )
528
524
)
529
525
return packed_array
530
-
526
+
531
527
def pack_array_with_numba (
532
528
self , raw_array : np .ndarray , n_pack : int , bits : int , compress_bits : int , compression_dtype = np .int32
533
529
) -> np .ndarray :
@@ -547,17 +543,18 @@ def pack_array_with_numba(
547
543
new_in_features = (in_features + n_pack - 1 ) // n_pack
548
544
packed_array = np .zeros ((out_features , new_in_features ), dtype = compression_dtype )
549
545
raw_array = raw_array .astype (compression_dtype )
550
-
546
+
551
547
pack_method_name = f"pack_array_with_numba_b{ bits } _c{ compress_bits } "
552
548
pack_method = getattr (self , pack_method_name )
553
549
return pack_method (raw_array , packed_array , n_pack , new_in_features )
554
-
550
+
555
551
@staticmethod
556
552
@numba .jit (nopython = True )
557
553
def pack_array_with_numba_yi (
558
554
raw_tensor : np .ndarray , n_pack : int , bits : int , compression_dtype = np .int32
559
555
) -> np .ndarray :
560
556
"""Packs the input tensor by combining elements into a specified bit-width format using NumPy.
557
+
561
558
Args:
562
559
raw_tensor (np.ndarray): The tensor to be packed. Shape: [out_features, in_features] or [1, in_features].
563
560
n_pack (int): The number of elements to be packed together.
@@ -575,7 +572,7 @@ def pack_array_with_numba_yi(
575
572
for i in range (new_in_features ):
576
573
packed_tensor [:, i ] = (
577
574
(raw_tensor [:, i * n_pack + 7 ] << 28 )
578
- | (raw_tensor [:, i * n_pack + 6 ] << 24 )
575
+ | (raw_tensor [:, i * n_pack + 6 ] << 24 )
579
576
| (raw_tensor [:, i * n_pack + 5 ] << 20 )
580
577
| (raw_tensor [:, i * n_pack + 4 ] << 16 )
581
578
| (raw_tensor [:, i * n_pack + 3 ] << 12 )
@@ -585,25 +582,29 @@ def pack_array_with_numba_yi(
585
582
)
586
583
587
584
return packed_tensor
588
-
585
+
589
586
def pack_tensor_with_reshape (self , raw_tensor ):
590
587
raw_array = raw_tensor .cpu ().numpy ()
591
588
target_len = np .ceil (raw_array .shape [1 ] / self .n_pack ).astype (int )
592
589
target_dtype = torch .tensor (0 , dtype = self .compression_dtype ).numpy ().dtype
593
590
reshaped = raw_array .reshape (- 1 , self .n_pack )
594
591
packed_array = np .zeros (reshaped .shape [0 ], dtype = target_dtype )
595
592
for i in range (self .n_pack ):
596
- packed_array |= (reshaped [:, i ].astype (target_dtype ) << (self .bits * i ))
597
-
598
- packed_tensor = torch .from_numpy (packed_array .reshape ((raw_array .shape [0 ], target_len ))).to (device = raw_tensor .device )
593
+ packed_array |= reshaped [:, i ].astype (target_dtype ) << (self .bits * i )
594
+
595
+ packed_tensor = torch .from_numpy (packed_array .reshape ((raw_array .shape [0 ], target_len ))).to (
596
+ device = raw_tensor .device
597
+ )
599
598
return packed_tensor
600
599
601
600
def pack_tensor_with_numpy (self , raw_tensor ):
602
601
if self .bits not in [2 , 4 , 8 ]:
603
602
return self .pack_tensor_with_reshape (raw_tensor )
604
603
compression_dtype = torch .tensor (0 , dtype = self .compression_dtype ).numpy ().dtype
605
604
# packed_array = self.pack_array_with_numba_yi(raw_tensor.cpu().numpy(), self.n_pack, self.bits, compression_dtype)
606
- packed_array = self .pack_array_with_numba (raw_tensor .cpu ().numpy (), self .n_pack , self .bits , self .compress_bits , compression_dtype )
605
+ packed_array = self .pack_array_with_numba (
606
+ raw_tensor .cpu ().numpy (), self .n_pack , self .bits , self .compress_bits , compression_dtype
607
+ )
607
608
return torch .from_numpy (packed_array ).to (device = raw_tensor .device )
608
609
609
610
def unpack_tensor_with_numpy (self , packed_tensor ):
0 commit comments