@@ -253,27 +253,27 @@ Adding x*x to a few carries will not cascade the carry
253
253
254
254
void shl_96 (int96_v * const a )
255
255
/* shiftleft a one bit */
256
- { /* here, bitalign improves the 92-bit kernel, and slows down 76-bit */
256
+ { /* here, amd_bitalign improves the 92-bit kernel, and slows down 76-bit */
257
257
a -> d2 = amd_bitalign (a -> d2 , a -> d1 , 31 );
258
258
a -> d1 = amd_bitalign (a -> d1 , a -> d0 , 31 );
259
- // a->d2 = (a->d2 << 1) | ( a->d1 >> 31);
260
- // a->d1 = (a->d1 << 1) | ( a->d0 >> 31);
259
+ // a->d2 = generic_bitalign (a->d2, a->d1, 31);
260
+ // a->d1 = generic_bitalign (a->d1, a->d0, 31);
261
261
a -> d0 = a -> d0 << 1 ;
262
262
}
263
263
264
264
void shl_192 (int192_v * const a )
265
265
/* shiftleft a one bit */
266
- { /* in this function, bitalign slows down all kernels */
266
+ { /* in this function, amd_bitalign slows down all kernels */
267
267
// a->d5 = amd_bitalign(a->d5, a->d4, 31);
268
268
// a->d4 = amd_bitalign(a->d4, a->d3, 31);
269
269
// a->d3 = amd_bitalign(a->d3, a->d2, 31);
270
270
// a->d2 = amd_bitalign(a->d2, a->d1, 31);
271
271
// a->d1 = amd_bitalign(a->d1, a->d0, 31);
272
- a -> d5 = (a -> d5 << 1 ) | ( a -> d4 >> 31 );
273
- a -> d4 = (a -> d4 << 1 ) | ( a -> d3 >> 31 );
274
- a -> d3 = (a -> d3 << 1 ) | ( a -> d2 >> 31 );
275
- a -> d2 = (a -> d2 << 1 ) | ( a -> d1 >> 31 );
276
- a -> d1 = (a -> d1 << 1 ) | ( a -> d0 >> 31 );
272
+ a -> d5 = generic_bitalign (a -> d5 , a -> d4 , 31 );
273
+ a -> d4 = generic_bitalign (a -> d4 , a -> d3 , 31 );
274
+ a -> d3 = generic_bitalign (a -> d3 , a -> d2 , 31 );
275
+ a -> d2 = generic_bitalign (a -> d2 , a -> d1 , 31 );
276
+ a -> d1 = generic_bitalign (a -> d1 , a -> d0 , 31 );
277
277
a -> d0 = a -> d0 << 1 ;
278
278
}
279
279
@@ -442,12 +442,12 @@ void div_192_96(int96_v * const res, __private uint qd5, const int96_v n, const
442
442
443
443
// shiftleft nn 11 bits
444
444
#ifndef DIV_160_96
445
- nn .d3 = (nn .d3 << 11 ) + ( nn .d2 >> 21 );
445
+ nn .d3 = generic_bitalign (nn .d3 , nn .d2 , 21 );
446
446
#endif
447
447
nn .d2 = amd_bitalign (nn .d2 , nn .d1 , 21 );
448
448
nn .d1 = amd_bitalign (nn .d1 , nn .d0 , 21 );
449
- // nn.d2 = (nn.d2 << 11) + ( nn.d1 >> 21);
450
- // nn.d1 = (nn.d1 << 11) + ( nn.d0 >> 21);
449
+ // nn.d2 = generic_bitalign (nn.d2, nn.d1, 21);
450
+ // nn.d1 = generic_bitalign (nn.d1, nn.d0, 21);
451
451
nn .d0 = nn .d0 << 11 ;
452
452
453
453
// q = q - nn
@@ -510,11 +510,11 @@ void div_192_96(int96_v * const res, __private uint qd5, const int96_v n, const
510
510
nn .d4 = nn .d3 >> 9 ;
511
511
#endif
512
512
// nn.d3 = amd_bitalign(nn.d3, nn.d2, 9);
513
- nn .d3 = (nn .d3 << 23 ) + ( nn .d2 >> 9 );
513
+ nn .d3 = generic_bitalign (nn .d3 , nn .d2 , 9 );
514
514
nn .d2 = amd_bitalign (nn .d2 , nn .d1 , 9 );
515
- // nn.d2 = (nn.d2 << 23) + ( nn.d1 >> 9);
515
+ // nn.d2 = generic_bitalign (nn.d2, nn.d1, 9);
516
516
// nn.d1 = amd_bitalign(nn.d1, nn.d0, 9);
517
- nn .d1 = (nn .d1 << 23 ) + ( nn .d0 >> 9 );
517
+ nn .d1 = generic_bitalign (nn .d1 , nn .d0 , 9 );
518
518
nn .d0 = nn .d0 << 23 ;
519
519
520
520
// q = q - nn
@@ -642,9 +642,9 @@ void div_192_96(int96_v * const res, __private uint qd5, const int96_v n, const
642
642
#ifdef CHECKS_MODBASECASE
643
643
nn .d4 = nn .d3 >> 17 ;
644
644
#endif
645
- nn .d3 = (nn .d3 << 15 ) + ( nn .d2 >> 17 );
646
- nn .d2 = (nn .d2 << 15 ) + ( nn .d1 >> 17 );
647
- nn .d1 = (nn .d1 << 15 ) + ( nn .d0 >> 17 );
645
+ nn .d3 = generic_bitalign (nn .d3 , nn .d2 , 17 );
646
+ nn .d2 = generic_bitalign (nn .d2 , nn .d1 , 17 );
647
+ nn .d1 = generic_bitalign (nn .d1 , nn .d0 , 17 );
648
648
nn .d0 = nn .d0 << 15 ;
649
649
650
650
// q = q - nn
@@ -877,12 +877,12 @@ DIV_160_96 here. */
877
877
878
878
// shiftleft nn 11 bits
879
879
#ifndef DIV_160_96
880
- nn .d3 = (nn .d3 << 11 ) + ( nn .d2 >> 21 );
880
+ nn .d3 = generic_bitalign (nn .d3 , nn .d2 , 21 );
881
881
#endif
882
882
nn .d2 = amd_bitalign (nn .d2 , nn .d1 , 21 );
883
883
nn .d1 = amd_bitalign (nn .d1 , nn .d0 , 21 );
884
- // nn.d2 = (nn.d2 << 11) + ( nn.d1 >> 21);
885
- // nn.d1 = (nn.d1 << 11) + ( nn.d0 >> 21);
884
+ // nn.d2 = generic_bitalign (nn.d2, nn.d1, 21);
885
+ // nn.d1 = generic_bitalign (nn.d1, nn.d0, 21);
886
886
nn .d0 = nn .d0 << 11 ;
887
887
888
888
// q = q - nn
@@ -945,11 +945,11 @@ DIV_160_96 here. */
945
945
nn .d4 = nn .d3 >> 9 ;
946
946
#endif
947
947
// nn.d3 = amd_bitalign(nn.d3, nn.d2, 9);
948
- nn .d3 = (nn .d3 << 23 ) + ( nn .d2 >> 9 );
948
+ nn .d3 = generic_bitalign (nn .d3 , nn .d2 , 9 );
949
949
nn .d2 = amd_bitalign (nn .d2 , nn .d1 , 9 );
950
- // nn.d2 = (nn.d2 << 23) + ( nn.d1 >> 9);
950
+ // nn.d2 = generic_bitalign (nn.d2, nn.d1, 9);
951
951
// nn.d1 = amd_bitalign(nn.d1, nn.d0, 9);
952
- nn .d1 = (nn .d1 << 23 ) + ( nn .d0 >> 9 );
952
+ nn .d1 = generic_bitalign (nn .d1 , nn .d0 , 9 );
953
953
nn .d0 = nn .d0 << 23 ;
954
954
955
955
// q = q - nn
@@ -1077,9 +1077,9 @@ DIV_160_96 here. */
1077
1077
#ifdef CHECKS_MODBASECASE
1078
1078
nn .d4 = nn .d3 >> 17 ;
1079
1079
#endif
1080
- nn .d3 = (nn .d3 << 15 ) + ( nn .d2 >> 17 );
1081
- nn .d2 = (nn .d2 << 15 ) + ( nn .d1 >> 17 );
1082
- nn .d1 = (nn .d1 << 15 ) + ( nn .d0 >> 17 );
1080
+ nn .d3 = generic_bitalign (nn .d3 , nn .d2 , 17 );
1081
+ nn .d2 = generic_bitalign (nn .d2 , nn .d1 , 17 );
1082
+ nn .d1 = generic_bitalign (nn .d1 , nn .d0 , 17 );
1083
1083
nn .d0 = nn .d0 << 15 ;
1084
1084
1085
1085
// q = q - nn
0 commit comments