Skip to content

Commit

Permalink
optimize sm2 p256 amd64 implementation, reduce multiplication
Browse files Browse the repository at this point in the history
  • Loading branch information
Emman committed Dec 27, 2021
1 parent 4ff0c45 commit 381476a
Showing 1 changed file with 52 additions and 68 deletions.
120 changes: 52 additions & 68 deletions sm2/p256_asm_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -848,24 +848,20 @@ TEXT ·p256OrdMul(SB),NOSPLIT,$0
ADDQ t1, acc1
ADCQ $0, DX
ADDQ AX, acc1
ADCQ $0, DX
MOVQ DX, t1

MOVQ p256ord<>+0x10(SB), AX
MULQ t0
ADDQ t1, acc2
ADCQ $0, DX
ADDQ AX, acc2
ADCQ $0, DX
MOVQ DX, t1

MOVQ p256ord<>+0x18(SB), AX
MULQ t0
ADDQ t1, acc3
ADCQ $0, DX
ADDQ AX, acc3
ADCQ DX, acc4
ADCQ DX, acc2
ADCQ $0, acc3
ADCQ t0, acc4
ADCQ $0, acc5

MOVQ t0, AX
MOVQ t0, DX
SHLQ $32, AX
SHRQ $32, DX

SUBQ t0, acc2
SBBQ AX, acc3
SBBQ DX, acc4
SBBQ $0, acc5
// x * y[1]
MOVQ (8*1)(y_ptr), t0

Expand Down Expand Up @@ -914,24 +910,20 @@ TEXT ·p256OrdMul(SB),NOSPLIT,$0
ADDQ t1, acc2
ADCQ $0, DX
ADDQ AX, acc2
ADCQ $0, DX
MOVQ DX, t1

MOVQ p256ord<>+0x10(SB), AX
MULQ t0
ADDQ t1, acc3
ADCQ $0, DX
ADDQ AX, acc3
ADCQ $0, DX
MOVQ DX, t1

MOVQ p256ord<>+0x18(SB), AX
MULQ t0
ADDQ t1, acc4
ADCQ $0, DX
ADDQ AX, acc4
ADCQ DX, acc5
ADCQ DX, acc3
ADCQ $0, acc4
ADCQ t0, acc5
ADCQ $0, acc0

MOVQ t0, AX
MOVQ t0, DX
SHLQ $32, AX
SHRQ $32, DX

SUBQ t0, acc3
SBBQ AX, acc4
SBBQ DX, acc5
SBBQ $0, acc0
// x * y[2]
MOVQ (8*2)(y_ptr), t0

Expand Down Expand Up @@ -980,24 +972,20 @@ TEXT ·p256OrdMul(SB),NOSPLIT,$0
ADDQ t1, acc3
ADCQ $0, DX
ADDQ AX, acc3
ADCQ $0, DX
MOVQ DX, t1

MOVQ p256ord<>+0x10(SB), AX
MULQ t0
ADDQ t1, acc4
ADCQ $0, DX
ADDQ AX, acc4
ADCQ $0, DX
MOVQ DX, t1

MOVQ p256ord<>+0x18(SB), AX
MULQ t0
ADDQ t1, acc5
ADCQ $0, DX
ADDQ AX, acc5
ADCQ DX, acc0
ADCQ DX, acc4
ADCQ $0, acc5
ADCQ t0, acc0
ADCQ $0, acc1

MOVQ t0, AX
MOVQ t0, DX
SHLQ $32, AX
SHRQ $32, DX

SUBQ t0, acc4
SBBQ AX, acc5
SBBQ DX, acc0
SBBQ $0, acc1
// x * y[3]
MOVQ (8*3)(y_ptr), t0

Expand Down Expand Up @@ -1046,24 +1034,20 @@ TEXT ·p256OrdMul(SB),NOSPLIT,$0
ADDQ t1, acc4
ADCQ $0, DX
ADDQ AX, acc4
ADCQ $0, DX
MOVQ DX, t1

MOVQ p256ord<>+0x10(SB), AX
MULQ t0
ADDQ t1, acc5
ADCQ $0, DX
ADDQ AX, acc5
ADCQ $0, DX
MOVQ DX, t1

MOVQ p256ord<>+0x18(SB), AX
MULQ t0
ADDQ t1, acc0
ADCQ $0, DX
ADDQ AX, acc0
ADCQ DX, acc1
ADCQ DX, acc5
ADCQ $0, acc0
ADCQ t0, acc1
ADCQ $0, acc2

MOVQ t0, AX
MOVQ t0, DX
SHLQ $32, AX
SHRQ $32, DX

SUBQ t0, acc5
SBBQ AX, acc0
SBBQ DX, acc1
SBBQ $0, acc2
// Copy result [255:0]
MOVQ acc4, x_ptr
MOVQ acc5, acc3
Expand Down

0 comments on commit 381476a

Please sign in to comment.