Skip to content

Commit

Permalink
Change sm4_cl_encrypt to sm4_cl_ctr32_encrypt
Browse files Browse the repository at this point in the history
  • Loading branch information
guanzhi committed Apr 23, 2024
1 parent 2e45b3f commit 2e6cef2
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 69 deletions.
4 changes: 2 additions & 2 deletions include/gmssl/sm4_cl.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ extern "C" {

typedef struct {
uint32_t rk[32];
size_t workgroup_size;
//size_t workgroup_size;
cl_context context;
cl_command_queue queue;
cl_program program;
Expand All @@ -40,7 +40,7 @@ typedef struct {

int sm4_cl_set_encrypt_key(SM4_CL_CTX *ctx, const uint8_t key[16]);
int sm4_cl_set_decrypt_key(SM4_CL_CTX *ctx, const uint8_t key[16]);
int sm4_cl_encrypt(SM4_CL_CTX *ctx, const uint8_t *in, size_t nblocks, uint8_t *out);
int sm4_cl_ctr32_encrypt(SM4_CL_CTX *ctx, uint8_t iv[16], const uint8_t *in, size_t nblocks, uint8_t *out);
void sm4_cl_cleanup(SM4_CL_CTX *ctx);


Expand Down
82 changes: 52 additions & 30 deletions src/sm4_cl.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <stdlib.h>
#include <stdint.h>
#include <gmssl/sm4_cl.h>
#include <gmssl/endian.h>
#include <gmssl/error.h>


Expand Down Expand Up @@ -197,15 +198,18 @@ static int sm4_cl_set_key(SM4_CL_CTX *ctx, const uint8_t key[16], int enc)
free(log);
goto end;
}
if (!(ctx->kernel = clCreateKernel(ctx->program, "sm4_encrypt", &err))) {
if (!(ctx->kernel = clCreateKernel(ctx->program, "sm4_ctr32_encrypt", &err))) {
cl_error_print(err);
goto end;
}
/*
// Apple M2 the CL_KERNEL_WORK_GROUP_SIZE is 256, but the valid work_group_size is 32
if ((err = clGetKernelWorkGroupInfo(ctx->kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
sizeof(ctx->workgroup_size), &ctx->workgroup_size, NULL)) != CL_SUCCESS) {
cl_error_print(err);
goto end;
}
*/

if (enc) {
sm4_set_encrypt_key((SM4_KEY *)ctx->rk, key);
Expand Down Expand Up @@ -239,51 +243,71 @@ int sm4_cl_set_decrypt_key(SM4_CL_CTX *ctx, const uint8_t key[16])
return sm4_cl_set_key(ctx, key, 0);
}

int sm4_cl_encrypt(SM4_CL_CTX *ctx, const uint8_t *in, size_t nblocks, uint8_t *out)
int sm4_cl_ctr32_encrypt(SM4_CL_CTX *ctx, uint8_t iv[16], const uint8_t *in, size_t nblocks, uint8_t *out)
{
int ret = -1;
cl_mem mem;
cl_int err;
size_t len = 16 * nblocks;
uint32_t ctr[4];
cl_mem mem_ctr = NULL;
size_t inlen = SM4_BLOCK_SIZE * nblocks;
cl_mem mem_buf = NULL;
cl_uint dim = 1;
size_t global_work_size = nblocks;
size_t local_work_size = 32; //ctx->workgroup_size;
void *p;
size_t local_work_size = 32;

if (out != in)
memcpy(out, in, len);
if (global_work_size % local_work_size) {
error_print();
return -1;
}

ctr[0] = GETU32(iv);
ctr[1] = GETU32(iv + 4);
ctr[2] = GETU32(iv + 8);
ctr[3] = GETU32(iv + 12);

if (!(mem = clCreateBuffer(ctx->context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, len, out, &err))) {
if (!(mem_ctr = clCreateBuffer(ctx->context, CL_MEM_READ_ONLY|CL_MEM_USE_HOST_PTR, sizeof(ctr), ctr, &err))) {
cl_error_print(err);
return -1;
}
if ((err = clSetKernelArg(ctx->kernel, 1, sizeof(cl_mem), &mem)) != CL_SUCCESS) {
if ((err = clSetKernelArg(ctx->kernel, 1, sizeof(cl_mem), &mem_ctr)) != CL_SUCCESS) {
cl_error_print(err);
goto end;
}

if (out != in) {
memcpy(out, in, inlen);
}
if (!(mem_buf = clCreateBuffer(ctx->context, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, inlen, out, &err))) {
cl_error_print(err);
return -1;
}
if ((err = clSetKernelArg(ctx->kernel, 2, sizeof(cl_mem), &mem_buf)) != CL_SUCCESS) {
cl_error_print(err);
goto end;
}

// on Apple M2, CL_KERNEL_WORK_GROUP_SIZE = 256
// but kernel will fail when local_work_size > 32.
// local_work_size might be restricted by the resources the kernel used.
if ((err = clEnqueueNDRangeKernel(ctx->queue, ctx->kernel,
dim, NULL, &global_work_size, &local_work_size, 0, NULL, NULL)) != CL_SUCCESS) {
cl_error_print(err);
goto end;
}
if (!(p = clEnqueueMapBuffer(ctx->queue, mem, CL_TRUE, 0, 0, len, 0, NULL, NULL, &err))) {
if (!clEnqueueMapBuffer(ctx->queue, mem_buf, CL_TRUE, 0, 0, inlen, 0, NULL, NULL, &err)) {
cl_error_print(err);
goto end;
}
if (p != out) {
fprintf(stderr, "%s %d: shit\n", __FILE__, __LINE__);
goto end;
}

ctr[3] += (uint32_t)nblocks;
PUTU32(iv + 12, ctr[3]);
ret = 1;
end:
clReleaseMemObject(mem);
if (mem_ctr) clReleaseMemObject(mem_ctr);
if (mem_buf) clReleaseMemObject(mem_buf);
return ret;
}



#define KERNEL(...) #__VA_ARGS__
static const char *sm4_cl_src = KERNEL(

Expand All @@ -306,16 +330,14 @@ __constant unsigned char SBOX[256] = {
0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20, 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48,
};

__kernel void sm4_encrypt(__global const unsigned int *rkey, __global unsigned char *data)
__kernel void sm4_ctr32_encrypt(__global const unsigned int *rkey, __global const unsigned int *ctr, __global unsigned char *data)
{
__local unsigned char S[256];
__local unsigned int rk[32];

unsigned int x0, x1, x2, x3, x4, i, t;
uint global_id = get_global_id(0);
__global unsigned char *p = data + 16 * global_id;
__global unsigned int *in = (__global unsigned int *)p;
__global unsigned int *out = (__global unsigned int *)p;
__global unsigned int *out = (__global unsigned int *)(data + 16 * global_id);

if (get_local_id(0) == 0) {
for (i = 0; i < 256; i++) {
Expand All @@ -326,10 +348,10 @@ __kernel void sm4_encrypt(__global const unsigned int *rkey, __global unsigned c
}
}

x0 = (in[0] >> 24) | ((in[0] >> 8) & 0xff00) | ((in[0] << 8) & 0xff0000) | (in[0] << 24);
x1 = (in[1] >> 24) | ((in[1] >> 8) & 0xff00) | ((in[1] << 8) & 0xff0000) | (in[1] << 24);
x2 = (in[2] >> 24) | ((in[2] >> 8) & 0xff00) | ((in[2] << 8) & 0xff0000) | (in[2] << 24);
x3 = (in[3] >> 24) | ((in[3] >> 8) & 0xff00) | ((in[3] << 8) & 0xff0000) | (in[3] << 24);
x0 = ctr[0];
x1 = ctr[1];
x2 = ctr[2];
x3 = ctr[3] + global_id;

for (i = 0; i < 31; i++) {
x4 = x1 ^ x2 ^ x3 ^ rk[i];
Expand Down Expand Up @@ -357,10 +379,10 @@ __kernel void sm4_encrypt(__global const unsigned int *rkey, __global unsigned c
((x4 << 18) | (x4 >> (32 - 18))) ^
((x4 << 24) | (x4 >> (32 - 24))));

out[0] = (x4 >> 24) | ((x4 >> 8) & 0xff00) | ((x4 << 8) & 0xff0000) | (x4 << 24);
out[1] = (x3 >> 24) | ((x3 >> 8) & 0xff00) | ((x3 << 8) & 0xff0000) | (x3 << 24);
out[2] = (x2 >> 24) | ((x2 >> 8) & 0xff00) | ((x2 << 8) & 0xff0000) | (x2 << 24);
out[3] = (x1 >> 24) | ((x1 >> 8) & 0xff00) | ((x1 << 8) & 0xff0000) | (x1 << 24);
out[0] ^= (x4 >> 24) | ((x4 >> 8) & 0xff00) | ((x4 << 8) & 0xff0000) | (x4 << 24);
out[1] ^= (x3 >> 24) | ((x3 >> 8) & 0xff00) | ((x3 << 8) & 0xff0000) | (x3 << 24);
out[2] ^= (x2 >> 24) | ((x2 >> 8) & 0xff00) | ((x2 << 8) & 0xff0000) | (x2 << 24);
out[3] ^= (x1 >> 24) | ((x1 >> 8) & 0xff00) | ((x1 << 8) & 0xff0000) | (x1 << 24);
}

);
Expand Down
71 changes: 34 additions & 37 deletions tests/sm4_cltest.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,64 +18,64 @@
#include <gmssl/error.h>



int test_sm4_cl(void)
static int test_sm4_cl_ctr32_encrypt(void)
{
const uint8_t key[16] = {
0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef,
0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10,
};
const uint8_t plaintext[16] = {
0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef,
0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10,
};
const uint8_t ciphertext[16] = {
0x68, 0x1e, 0xdf, 0x34, 0xd2, 0x06, 0x96, 0x5e,
0x86, 0xb3, 0xe9, 0x4f, 0x53, 0x6e, 0x42, 0x46,
};
const char *key_hex = "0123456789abcdeffedcba9876543210";
const char *iv_hex = "0123456789abcdeffedcba9876543210";
const char *plain_hex = "aaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbccccccccccccccccddddddddddddddddeeeeeeeeeeeeeeeeffffffffffffffffeeeeeeeeeeeeeeeeaaaaaaaaaaaaaaaa";
const char *cipher_hex = "c2b4759e78ac3cf43d0852f4e8d5f9fd7256e8a5fcb65a350ee00630912e44492a0b17e1b85b060d0fba612d8a95831638b361fd5ffacd942f081485a83ca35d";

int ret = -1;
SM4_CL_CTX ctx;
size_t nblocks = 1024;
uint8_t key[16];
uint8_t iv[16];
uint8_t ctr[16];
size_t nblocks = 64;
uint8_t *buf = NULL;
uint8_t *ciphertext = NULL;
size_t len;
size_t i;


if (!(buf = (uint8_t *)malloc(16 * nblocks))) {
error_print();
return -1;
}
for (i = 0; i < nblocks; i++) {
memcpy(buf + 16 * i, plaintext, 16);
if (!(ciphertext = (uint8_t *)malloc(16 * nblocks))) {
error_print();
goto end;
}

hex_to_bytes(key_hex, strlen(key_hex), key, &len);
hex_to_bytes(iv_hex, strlen(iv_hex), iv, &len);
hex_to_bytes(plain_hex, strlen(plain_hex), buf, &len);
hex_to_bytes(cipher_hex, strlen(cipher_hex), ciphertext, &len);

if (sm4_cl_set_encrypt_key(&ctx, key) != 1) {
error_print();
goto end;
}
if (sm4_cl_encrypt(&ctx, buf, nblocks, buf) != 1) {

memcpy(ctr, iv, sizeof(iv));
if (sm4_cl_ctr32_encrypt(&ctx, ctr, buf, nblocks, buf) != 1) {
error_print();
goto end;
}

for (i = 0; i < nblocks; i++) {
//fprintf(stderr, "%zu ", i);
//format_bytes(stderr, 0, 0, "ciphertext", buf + 16*i, 16);
if (memcmp(buf + 16 * i, ciphertext, 16) != 0) {
error_print();
goto end;
}
if (memcmp(buf, ciphertext, len) != 0) {
error_print();
goto end;
}

printf("%s() ok\n", __FUNCTION__);
ret = 1;
end:
if (buf) free(buf);
sm4_cl_cleanup(&ctx);
if (buf) free(buf);
if (ciphertext) free(ciphertext);
return ret;
}


int test_sm4_cl_speed(void)
static int test_sm4_cl_ctr32_encrypt_speed(void)
{
const uint8_t key[16] = {
0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef,
Expand All @@ -90,15 +90,16 @@ int test_sm4_cl_speed(void)
0x86, 0xb3, 0xe9, 0x4f, 0x53, 0x6e, 0x42, 0x46,
};


int ret = -1;
SM4_CL_CTX ctx;
uint8_t ctr[16];
size_t nblocks = 1024*1024;
uint8_t *buf = NULL;
clock_t start, end;
double seconds;
size_t i;


if (!(buf = (uint8_t *)malloc(16 * nblocks))) {
error_print();
return -1;
Expand All @@ -113,19 +114,15 @@ int test_sm4_cl_speed(void)
}

start = clock();
if (sm4_cl_encrypt(&ctx, buf, nblocks, buf) != 1) {
if (sm4_cl_ctr32_encrypt(&ctx, ctr, buf, nblocks, buf) != 1) {
error_print();
goto end;
}
end = clock();

seconds = (double)(end - start)/CLOCKS_PER_SEC;

fprintf(stderr, "sm4_cl_encrypt: %f-MiB per seconds\n", 16/seconds);




ret = 1;
end:
if (buf) free(buf);
Expand All @@ -135,8 +132,8 @@ int test_sm4_cl_speed(void)

int main(void)
{
if (test_sm4_cl() != 1) goto err;
if (test_sm4_cl_speed() != 1) goto err;
if (test_sm4_cl_ctr32_encrypt() != 1) goto err;
if (test_sm4_cl_ctr32_encrypt_speed() != 1) goto err;
printf("%s all tests passed\n", __FILE__);
return 0;
err:
Expand Down

0 comments on commit 2e6cef2

Please sign in to comment.