Skip to content

Commit

Permalink
Add SM2 X86_64 asm
Browse files Browse the repository at this point in the history
Not working yet
  • Loading branch information
guanzhi committed May 24, 2024
1 parent 3281949 commit 1fd9893
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 34 deletions.
10 changes: 10 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ option(ENABLE_SM9_ARM64 "Enable SM9_Z256 ARMv8 assembly" OFF)
option(ENABLE_GMUL_ARM64 "Enable GF(2^128) Multiplication AArch64 assembly" OFF)


option(ENABLE_SM2_AMD64 "Enable SM2_Z256 X86_64 assembly" OFF)


option(ENABLE_SM3_SSE "Enable SM3 SSE assembly implementation" OFF)

option(ENABLE_SM4_CTR_AESNI_AVX "Enable SM4 CTR AESNI+AVX assembly implementation" OFF)
Expand Down Expand Up @@ -259,6 +262,13 @@ if (ENABLE_SM2_ARM64)
list(APPEND src src/sm2_z256_arm64.S)
endif()

if (ENABLE_SM2_AMD64)
message(STATUS "ENABLE_SM2_AMD64 is ON")
add_definitions(-DENABLE_SM2_AMD64)
enable_language(ASM)
list(APPEND src src/sm2_z256_amd64.S)
endif()

if (ENABLE_SM2_NEON)
message(STATUS "ENABLE_SM2_NEON is ON")
add_definitions(-DENABLE_SM2_NEON)
Expand Down
16 changes: 8 additions & 8 deletions src/sm2_z256.c
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ const uint64_t SM2_Z256_NEG_P[4] = {
1, ((uint64_t)1 << 32) - 1, 0, ((uint64_t)1 << 32),
};

#ifndef ENABLE_SM2_ARM64
#if !defined(ENABLE_SM2_ARM64) && !defined(ENABLE_SM2_AMD64)
void sm2_z256_modp_add(sm2_z256_t r, const sm2_z256_t a, const sm2_z256_t b)
{
uint64_t c;
Expand Down Expand Up @@ -481,7 +481,7 @@ const uint64_t SM2_Z256_P_PRIME[4] = {
// mont(1) (mod p) = 2^256 mod p = 2^256 - p
const uint64_t *SM2_Z256_MODP_MONT_ONE = SM2_Z256_NEG_P;

#if defined(ENABLE_SM2_ARM64)
#if defined(ENABLE_SM2_ARM64) || defined(ENABLE_SM2_AMD64)
// src/sm2_z256_armv8.S
#elif defined(ENABLE_SM2_Z256_NEON)
#include <arm_neon.h>
Expand Down Expand Up @@ -812,7 +812,7 @@ const uint64_t SM2_Z256_NEG_N[4] = {
0xac440bf6c62abedd, 0x8dfc2094de39fad4, 0x0000000000000000, 0x0000000100000000,
};

#ifndef ENABLE_SM2_ARM64
#if !defined(ENABLE_SM2_ARM64) && !defined(ENABLE_SM2_AMD64)
void sm2_z256_modn_add(sm2_z256_t r, const sm2_z256_t a, const sm2_z256_t b)
{
uint64_t c;
Expand Down Expand Up @@ -868,7 +868,7 @@ const uint64_t *sm2_z256_order_minus_one(void) {
const uint64_t *SM2_Z256_MODN_MONT_ONE = SM2_Z256_NEG_N;


#ifndef ENABLE_SM2_ARM64
#if !defined(ENABLE_SM2_ARM64) && !defined(ENABLE_SM2_AMD64)
void sm2_z256_modn_mont_mul(sm2_z256_t r, const sm2_z256_t a, const sm2_z256_t b)
{
sm2_z512_t z;
Expand Down Expand Up @@ -917,7 +917,7 @@ void sm2_z256_modn_mul(sm2_z256_t r, const sm2_z256_t a, const sm2_z256_t b)
sm2_z256_modn_from_mont(r, r);
}

#ifndef ENABLE_SM2_ARM64
#if !defined(ENABLE_SM2_ARM64) && !defined(ENABLE_SM2_AMD64)
void sm2_z256_modn_mont_sqr(sm2_z256_t r, const sm2_z256_t a)
{
sm2_z256_modn_mont_mul(r, a, a);
Expand Down Expand Up @@ -1020,7 +1020,7 @@ void sm2_z256_modn_inv(sm2_z256_t r, const sm2_z256_t a)
}


#ifndef ENABLE_SM2_ARM64
#if !defined(ENABLE_SM2_ARM64) && !defined(ENABLE_SM2_AMD64)

// mont(mont(a), 1) = aR * 1 * R^-1 (mod n) = a (mod p)
void sm2_z256_modn_from_mont(sm2_z256_t r, const sm2_z256_t a)
Expand Down Expand Up @@ -1149,7 +1149,7 @@ int sm2_z256_point_get_xy(const SM2_Z256_POINT *P, uint64_t x[4], uint64_t y[4])
return 1;
}

#ifndef ENABLE_SM2_ARM64
#if !defined(ENABLE_SM2_ARM64) && !defined(ENABLE_SM2_AMD64)
void sm2_z256_point_dbl(SM2_Z256_POINT *R, const SM2_Z256_POINT *A)
{
const uint64_t *X1 = A->X;
Expand Down Expand Up @@ -1475,7 +1475,7 @@ void sm2_z256_point_copy_affine(SM2_Z256_POINT *R, const SM2_Z256_AFFINE_POINT *
sm2_z256_copy(R->Z, SM2_Z256_MODP_MONT_ONE);
}

#ifndef ENABLE_SM2_ARM64
#if !defined(ENABLE_SM2_ARM64) && !defined(ENABLE_SM2_AMD64)
void sm2_z256_point_add_affine(SM2_Z256_POINT *r, const SM2_Z256_POINT *a, const SM2_Z256_AFFINE_POINT *b)
{
sm2_z256_t U2, S2;
Expand Down
70 changes: 44 additions & 26 deletions src/sm2_z256_amd64.S
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@ L$Three:
L$ONE_mont:
.quad 0x0000000000000001, 0x00000000ffffffff, 0x0000000000000000, 0x0000000100000000

.globl _ecp_sm2z256_mul_by_2
.globl func(sm2_z256_modp_dbl)

.p2align 6
_ecp_sm2z256_mul_by_2:
func(sm2_z256_modp_dbl):
pushq %r12
pushq %r13

Expand Down Expand Up @@ -76,10 +76,10 @@ _ecp_sm2z256_mul_by_2:



.globl _ecp_sm2z256_div_by_2
.globl func(sm2_z256_modp_haf)

.p2align 5
_ecp_sm2z256_div_by_2:
func(sm2_z256_modp_haf):
pushq %r12
pushq %r13

Expand Down Expand Up @@ -136,10 +136,10 @@ _ecp_sm2z256_div_by_2:



.globl _ecp_sm2z256_mul_by_3
.globl func(sm2_z256_modp_tri)

.p2align 5
_ecp_sm2z256_mul_by_3:
func(sm2_z256_modp_tri):
pushq %r12
pushq %r13

Expand Down Expand Up @@ -202,10 +202,10 @@ _ecp_sm2z256_mul_by_3:



.globl _ecp_sm2z256_add
.globl func(sm2_z256_modp_add)

.p2align 5
_ecp_sm2z256_add:
func(sm2_z256_modp_add):
pushq %r12
pushq %r13

Expand Down Expand Up @@ -248,10 +248,10 @@ _ecp_sm2z256_add:



.globl _ecp_sm2z256_sub
.globl func(sm2_z256_modp_sub)

.p2align 5
_ecp_sm2z256_sub:
func(sm2_z256_modp_sub):
pushq %r12
pushq %r13

Expand Down Expand Up @@ -294,10 +294,10 @@ _ecp_sm2z256_sub:



.globl _ecp_sm2z256_neg
.globl func(sm2_z256_modp_neg)

.p2align 5
_ecp_sm2z256_neg:
func(sm2_z256_modp_neg):
pushq %r12
pushq %r13

Expand Down Expand Up @@ -341,10 +341,11 @@ _ecp_sm2z256_neg:



.globl _ecp_sm2z256_to_mont
.globl func(sm2_z256_modp_to_mont)

.p2align 5
_ecp_sm2z256_to_mont:
func(sm2_z256_modp_to_mont):
// FIXME: swap arg1 arg2
leaq L$RR(%rip),%rdx
jmp L$mul_mont

Expand All @@ -355,10 +356,10 @@ _ecp_sm2z256_to_mont:



.globl _ecp_sm2z256_mul_mont
.globl func(sm2_z256_modp_mont_mul)

.p2align 5
_ecp_sm2z256_mul_mont:
func(sm2_z256_modp_mont_mul):
L$mul_mont:
pushq %rbp
pushq %rbx
Expand Down Expand Up @@ -633,10 +634,10 @@ __ecp_sm2z256_mul_montq:



.globl _ecp_sm2z256_sqr_mont
.globl func(sm2_z256_modp_mont_sqr)

.p2align 5
_ecp_sm2z256_sqr_mont:
func(sm2_z256_modp_mont_mul):
pushq %rbp
pushq %rbx
pushq %r12
Expand Down Expand Up @@ -842,10 +843,10 @@ __ecp_sm2z256_sqr_montq:



.globl _ecp_sm2z256_from_mont
.globl func(sm2_z256_modp_from_mont)

.p2align 5
_ecp_sm2z256_from_mont:
func(sm2_z256_modp_from_mont):
pushq %r12
pushq %r13

Expand Down Expand Up @@ -1218,10 +1219,16 @@ __ecp_sm2z256_mul_by_2q:

.byte 0xf3,0xc3

.globl _ecp_sm2z256_point_double






.globl func(sm2_z256_point_dbl)

.p2align 5
_ecp_sm2z256_point_double:
func(sm2_z256_point_dbl):
pushq %rbp
pushq %rbx
pushq %r12
Expand Down Expand Up @@ -1420,10 +1427,17 @@ L$point_double_shortcutq:
popq %rbp
.byte 0xf3,0xc3

.globl _ecp_sm2z256_point_add







.globl func(sm2_z256_point_add)

.p2align 5
_ecp_sm2z256_point_add:
func(sm2_z256_point_add):
pushq %rbp
pushq %rbx
pushq %r12
Expand Down Expand Up @@ -1816,10 +1830,14 @@ L$add_doneq:
popq %rbp
.byte 0xf3,0xc3

.globl _ecp_sm2z256_point_add_affine




.globl func(sm2_z256_point_add_affine)

.p2align 5
_ecp_sm2z256_point_add_affine:
func(sm2_z256_point_add_affine):
pushq %rbp
pushq %rbx
pushq %r12
Expand Down

0 comments on commit 1fd9893

Please sign in to comment.