Skip to content

Commit

Permalink
[prog-pi-digits] faster check if factor should be reduced due modular…
Browse files Browse the repository at this point in the history
… multiplication
  • Loading branch information
markablov committed Oct 27, 2023
1 parent 99c4e70 commit 15db96b
Show file tree
Hide file tree
Showing 3 changed files with 204 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ mulMod:
mulMod_shiftedFactorLoaded:
CLB
DCL
XCH rr3

FIM r4, 0x00
FIM r5, 0x00 # res = 0
Expand Down Expand Up @@ -355,6 +356,9 @@ mulMod_updateResultAndFactor_return:
# [rr0, rr1, rr4, rr2] - multipliedFactor
# OUTPUT:
# [rr0, rr1, rr4, rr2] - multipliedFactor
# NOTES:
# block size is 13 bytes
__location(0x4:0x01)
mulMod_updateMultipliedFactor:
LD rr0
ADD rr0
Expand All @@ -368,9 +372,175 @@ mulMod_updateMultipliedFactor:
LD rr2
ADD rr2
XCH rr2 # multipliedFactor = multipliedFactor + multipliedFactor
AN7 # check if previous value of multipliedFactor[3] < 4, then new multipliedFactor[3] < 8
JCN z, mulMod_updateMultipliedFactor_return # if (multipliedFactor[3] < 0x8)
JIN r1 # jump table, based on multipliedFactor[3], to check when multipliedFactor[3] >= 8
# JIN takes 1 cycles, while AN7/JCN takes 3 cycles, so we save 2 cycles on every call!
__location(0x4:0x00)
mulMod_updateMultipliedFactor_factor0:
BBL 0

__location(0x4:0x10)
mulMod_updateMultipliedFactor_factor1:
BBL 0

__location(0x4:0x20)
mulMod_updateMultipliedFactor_factor2:
BBL 0

__location(0x4:0x30)
mulMod_updateMultipliedFactor_factor3:
BBL 0

__location(0x4:0x40)
mulMod_updateMultipliedFactor_factor4:
BBL 0

__location(0x4:0x50)
mulMod_updateMultipliedFactor_factor5:
BBL 0

__location(0x4:0x60)
mulMod_updateMultipliedFactor_factor6:
BBL 0

__location(0x4:0x70)
mulMod_updateMultipliedFactor_factor7:
BBL 0

__location(0x4:0x80)
mulMod_updateMultipliedFactor_factor8:
SRC r1
RD0
ADD rr0
XCH rr0
RD1
ADD rr1
XCH rr1
RD2
ADD rr4
XCH rr4
RD3
ADD rr2
XCH rr2 # multipliedFactor = multipliedFactor - m * floor((multipliedFactor[3] * 0x1000) / m)
CLC
BBL 0


__location(0x4:0x90)
mulMod_updateMultipliedFactor_factor9:
SRC r1
RD0
ADD rr0
XCH rr0
RD1
ADD rr1
XCH rr1
RD2
ADD rr4
XCH rr4
RD3
ADD rr2
XCH rr2 # multipliedFactor = multipliedFactor - m * floor((multipliedFactor[3] * 0x1000) / m)
CLC
BBL 0


__location(0x4:0xA0)
mulMod_updateMultipliedFactor_factorA:
SRC r1
RD0
ADD rr0
XCH rr0
RD1
ADD rr1
XCH rr1
RD2
ADD rr4
XCH rr4
RD3
ADD rr2
XCH rr2 # multipliedFactor = multipliedFactor - m * floor((multipliedFactor[3] * 0x1000) / m)
CLC
BBL 0


__location(0x4:0xB0)
mulMod_updateMultipliedFactor_factorB:
SRC r1
RD0
ADD rr0
XCH rr0
RD1
ADD rr1
XCH rr1
RD2
ADD rr4
XCH rr4
RD3
ADD rr2
XCH rr2 # multipliedFactor = multipliedFactor - m * floor((multipliedFactor[3] * 0x1000) / m)
CLC
BBL 0


__location(0x4:0xC0)
mulMod_updateMultipliedFactor_factorC:
SRC r1
RD0
ADD rr0
XCH rr0
RD1
ADD rr1
XCH rr1
RD2
ADD rr4
XCH rr4
RD3
ADD rr2
XCH rr2 # multipliedFactor = multipliedFactor - m * floor((multipliedFactor[3] * 0x1000) / m)
CLC
BBL 0


__location(0x4:0xD0)
mulMod_updateMultipliedFactor_factorD:
SRC r1
RD0
ADD rr0
XCH rr0
RD1
ADD rr1
XCH rr1
RD2
ADD rr4
XCH rr4
RD3
ADD rr2
XCH rr2 # multipliedFactor = multipliedFactor - m * floor((multipliedFactor[3] * 0x1000) / m)
CLC
BBL 0


__location(0x4:0xE0)
mulMod_updateMultipliedFactor_factorE:
SRC r1
RD0
ADD rr0
XCH rr0
RD1
ADD rr1
XCH rr1
RD2
ADD rr4
XCH rr4
RD3
ADD rr2
XCH rr2 # multipliedFactor = multipliedFactor - m * floor((multipliedFactor[3] * 0x1000) / m)
CLC
BBL 0


__location(0x4:0xF0)
mulMod_updateMultipliedFactor_factorF:
SRC r1
RD0
ADD rr0
Expand All @@ -385,7 +555,6 @@ mulMod_updateMultipliedFactor:
ADD rr2
XCH rr2 # multipliedFactor = multipliedFactor - m * floor((multipliedFactor[3] * 0x1000) / m)
CLC
mulMod_updateMultipliedFactor_return:
BBL 0

# multipliedFactor = multipliedFactor * 16
Expand Down
26 changes: 13 additions & 13 deletions packages/i40xx-program-pi-digits/src/submodules/shift4_table.i4040
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# acc - value
# OUTPUT:
# rr12 or rr13 - shifted value, depends on shift direction
__location(0x4:0x00)
__location(0x5:0x00)
shift4ByR1:
JIN r1

Expand All @@ -19,7 +19,7 @@ shift4ByR1:
# acc - value
# OUTPUT:
# rr12 or rr13 - shifted value, depends on shift direction
__location(0x4:0x02)
__location(0x5:0x02)
shift4ByR2:
JIN r2

Expand All @@ -29,7 +29,7 @@ shift4ByR2:
# acc - value
# OUTPUT:
# rr12 or rr13 - shifted value, depends on shift direction
__location(0x4:0x04)
__location(0x5:0x04)
shift4ByR5:
JIN r5

Expand All @@ -39,7 +39,7 @@ shift4ByR5:
# acc - value
# OUTPUT:
# rr12 or rr13 - shifted value, depends on shift direction
__location(0x4:0x06)
__location(0x5:0x06)
shift4ByR7:
JIN r7

Expand All @@ -49,25 +49,25 @@ shift4ByR7:
# acc - value
# OUTPUT:
# rr12 or rr13 - shifted value, depends on shift direction
__location(0x4:0x08)
__location(0x5:0x08)
shift4ByR4:
JIN r4

__location(0x4:0x10)
__location(0x5:0x10)
shift_right_1:
RAR
CLC
XCH rr12
BBL 0

__location(0x4:0x18)
__location(0x5:0x18)
shift_left_1:
RAL
CLC
XCH rr13
BBL 0

__location(0x4:0x20)
__location(0x5:0x20)
shift_right_2:
RAR
CLC
Expand All @@ -76,7 +76,7 @@ shift_right_2:
XCH rr12
BBL 0

__location(0x4:0x28)
__location(0x5:0x28)
shift_left_2:
RAL
CLC
Expand All @@ -85,29 +85,29 @@ shift_left_2:
XCH rr13
BBL 0

__location(0x4:0x30)
__location(0x5:0x30)
shift_right_3:
RAL
LDM 0x0
RAL
XCH rr12
BBL 0

__location(0x4:0x38)
__location(0x5:0x38)
shift_left_3:
RAR
LDM 0x0
RAR
XCH rr13
BBL 0

__location(0x4:0x40)
__location(0x5:0x40)
shift_right_4:
LDM 0x0
XCH rr12
BBL 0

__location(0x4:0x48)
__location(0x5:0x48)
shift_left_4:
LDM 0x0
XCH rr13
Expand Down
20 changes: 19 additions & 1 deletion packages/i40xx-program-pi-digits/src/tests/computeF/partial.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import {

import {
updateCodeForUseInEmulator, generateMemoryBankSwitch, generateMemoryStatusCharactersInitialization,
generateAccumulatorInitialization,
generateAccumulatorInitialization, generateMemoryMainCharactersInitialization,
} from '#utilities/codeGenerator.js';

import RAM_DUMP from '#data/multiplicationStaticData/ramWithLookupTables.json' assert { type: 'json' };
Expand Down Expand Up @@ -209,11 +209,29 @@ const testComputeF = () => {
console.log('Code to reproduce:');
const { N, m, a, vmax } = input;
const negativeN = numToHWNumber(0x10000 - (parseInt(N, 16) + 1));
const expRegister2 = [];
const expRegister3 = [];
if (a !== m) {
const aNum = parseInt(a, 16);
expRegister2.push(...[
...(aNum ** 2 < m ? numToHWNumber(aNum ** 2, 4) : [0, 0, 0, 0]),
...(aNum ** 3 < m ? numToHWNumber(aNum ** 3, 4) : [0, 0, 0, 0]),
...(aNum ** 4 < m ? numToHWNumber(aNum ** 4, 4) : [0, 0, 0, 0]),
...(aNum ** 5 < m ? numToHWNumber(aNum ** 5, 4) : [0, 0, 0, 0]),
]);
expRegister3.push(...[
...(aNum ** 6 < m ? numToHWNumber(aNum ** 6, 4) : [0, 0, 0, 0]),
...(aNum ** 7 < m ? numToHWNumber(aNum ** 7, 4) : [0, 0, 0, 0]),
]);
}

const initializators = [
generateMemoryBankSwitch(0x7),
generateMemoryStatusCharactersInitialization(VARIABLES.STATUS_MEM_VARIABLE_V, [0x0, 0x0, vmax, 0x00]),
generateMemoryStatusCharactersInitialization(VARIABLES.STATUS_MEM_VARIABLE_N_NEG, negativeN),
generateMemoryStatusCharactersInitialization(VARIABLES.STATUS_MEM_VARIABLE_CURRENT_PRIME, hexToHWNumber(a)),
...(expRegister2.length ? [generateMemoryMainCharactersInitialization(0x2, expRegister2)] : []),
...(expRegister3.length ? [generateMemoryMainCharactersInitialization(0x3, expRegister3)] : []),
...generateCodeToPrepareModulusBasedDataForEmulator(parseInt(m, 16)),
generateAccumulatorInitialization(vmax === 1 ? 1 : 0),
];
Expand Down

0 comments on commit 15db96b

Please sign in to comment.