diff --git a/jsrc/CMakeLists.txt b/jsrc/CMakeLists.txt index 5cd4a6c4..936b3a38 100644 --- a/jsrc/CMakeLists.txt +++ b/jsrc/CMakeLists.txt @@ -3,7 +3,6 @@ add_library(j) target_include_directories(j PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) target_compile_definitions(j PRIVATE) target_link_libraries(j PRIVATE ${STANDARD_MATH_LIBRARY}) -set_source_files_properties(aes-ni.c PROPERTIES COMPILE_FLAGS -maes) target_sources(j PRIVATE adverbs/a.c adverbs/ab.c @@ -77,8 +76,6 @@ target_sources(j PRIVATE verbs/vt.c verbs/vx.c verbs/vz.c - aes-c.c - aes-ni.c cpuinfo.c d.c dc.c @@ -116,7 +113,6 @@ target_sources(j PRIVATE x.c x15.c xa.c - xaes.c xb.c xc.c xcrc.c diff --git a/jsrc/aes-arm.c b/jsrc/aes-arm.c deleted file mode 100644 index 510bbc5c..00000000 --- a/jsrc/aes-arm.c +++ /dev/null @@ -1,316 +0,0 @@ -#if defined(__aarch64__) - -#include -#include -/* GCC and LLVM Clang, but not Apple Clang */ -# if defined(__GNUC__) && !defined(__apple_build_version__) -# if defined(__ARM_ACLE) || defined(__ARM_FEATURE_CRYPTO) -# include -# endif -# endif - -#include "j.h" -#include "aes-arm_table.h" - -#define AES_ENCRYPT 1 -#define AES_DECRYPT 0 - -#define BLOCK_SIZE 16 - -#define AES_RKSIZE 272 - -typedef struct { - uint8_t __attribute__ ((aligned (16))) rk[AES_RKSIZE]; - uint8_t Nk; //For this standard, Nk = 4, 6, or 8. (Also see Sec. 6.3.) - uint8_t Nr; //For this standard, Nr = 10, 12, or 14. (Also see Sec. 6.3.) -} block_state; - -#define Nb 4u -#define Nbb Nb*4 - -#ifndef GET_UINT32_LE -#define GET_UINT32_LE(n,b,i) \ -{ \ - (n) = ( (uint32_t) (b)[(i) ] ) \ - | ( (uint32_t) (b)[(i) + 1] << 8 ) \ - | ( (uint32_t) (b)[(i) + 2] << 16 ) \ - | ( (uint32_t) (b)[(i) + 3] << 24 ); \ -} -#endif - -static void aes_setkey_enc(block_state* self, const uint8_t *key, int keyn) -{ - unsigned int i; - uint32_t *RK; - -// keyn 16 24 32 -// AES_KEYSIZE 128 192 256 *&8 << 3 -// Nk 4 6 8 %& >> 2 -// Nr 10 12 14 6+%&4 6 + >>2 - - uint32_t AES_KEYSIZE = keyn << 3; - self->Nk = keyn >> 2; - self->Nr = (self->Nk + 6); - - RK = (uint32_t *) self->rk; - - for( i = 0; i < (self->Nk); i++ ) { - GET_UINT32_LE( RK[i], key, i << 2 ); - } - - switch( self->Nr ) { - case 10: - - for( i = 0; i < 10; i++, RK += 4 ) { - RK[4] = RK[0] ^ RCON[i] ^ - ( (uint32_t) FSb[ ( RK[3] >> 8 ) & 0xFF ] ) ^ - ( (uint32_t) FSb[ ( RK[3] >> 16 ) & 0xFF ] << 8 ) ^ - ( (uint32_t) FSb[ ( RK[3] >> 24 ) & 0xFF ] << 16 ) ^ - ( (uint32_t) FSb[ ( RK[3] ) & 0xFF ] << 24 ); - - RK[5] = RK[1] ^ RK[4]; - RK[6] = RK[2] ^ RK[5]; - RK[7] = RK[3] ^ RK[6]; - } - break; - - case 12: - - for( i = 0; i < 8; i++, RK += 6 ) { - RK[6] = RK[0] ^ RCON[i] ^ - ( (uint32_t) FSb[ ( RK[5] >> 8 ) & 0xFF ] ) ^ - ( (uint32_t) FSb[ ( RK[5] >> 16 ) & 0xFF ] << 8 ) ^ - ( (uint32_t) FSb[ ( RK[5] >> 24 ) & 0xFF ] << 16 ) ^ - ( (uint32_t) FSb[ ( RK[5] ) & 0xFF ] << 24 ); - - RK[7] = RK[1] ^ RK[6]; - RK[8] = RK[2] ^ RK[7]; - RK[9] = RK[3] ^ RK[8]; - RK[10] = RK[4] ^ RK[9]; - RK[11] = RK[5] ^ RK[10]; - } - break; - - case 14: - - for( i = 0; i < 7; i++, RK += 8 ) { - RK[8] = RK[0] ^ RCON[i] ^ - ( (uint32_t) FSb[ ( RK[7] >> 8 ) & 0xFF ] ) ^ - ( (uint32_t) FSb[ ( RK[7] >> 16 ) & 0xFF ] << 8 ) ^ - ( (uint32_t) FSb[ ( RK[7] >> 24 ) & 0xFF ] << 16 ) ^ - ( (uint32_t) FSb[ ( RK[7] ) & 0xFF ] << 24 ); - - RK[9] = RK[1] ^ RK[8]; - RK[10] = RK[2] ^ RK[9]; - RK[11] = RK[3] ^ RK[10]; - - RK[12] = RK[4] ^ - ( (uint32_t) FSb[ ( RK[11] ) & 0xFF ] ) ^ - ( (uint32_t) FSb[ ( RK[11] >> 8 ) & 0xFF ] << 8 ) ^ - ( (uint32_t) FSb[ ( RK[11] >> 16 ) & 0xFF ] << 16 ) ^ - ( (uint32_t) FSb[ ( RK[11] >> 24 ) & 0xFF ] << 24 ); - - RK[13] = RK[5] ^ RK[12]; - RK[14] = RK[6] ^ RK[13]; - RK[15] = RK[7] ^ RK[14]; - } - break; - } -} - -static void aes_inverse_key(block_state* self, const uint8_t *fwdkey) -{ - int i, j; - uint32_t *RK; - uint32_t *SK; - -#if (defined(__clang__) && ( (__clang_major__ > 3) || ((__clang_major__ == 3) && ((__clang_minor__ > 5) || !(defined(__aarch32__)||defined(__arm__)||defined(__aarch64__)) )))) || __GNUC__ > 4 || (__GNUC__ == 4 && (__GNUC_MINOR__ > 6)) - fwdkey = (const uint8_t *) __builtin_assume_aligned (fwdkey, 16); -#endif - RK = (uint32_t *) self->rk; - SK = ((uint32_t *) fwdkey) + self->Nr * 4; - - *RK++ = *SK++; - *RK++ = *SK++; - *RK++ = *SK++; - *RK++ = *SK++; - - for( i = self->Nr - 1, SK -= 8; i > 0; i--, SK -= 8 ) { - for( j = 0; j < 4; j++, SK++ ) { - *RK++ = RT0[ FSb[ ( *SK ) & 0xFF ] ] ^ - RT1[ FSb[ ( *SK >> 8 ) & 0xFF ] ] ^ - RT2[ FSb[ ( *SK >> 16 ) & 0xFF ] ] ^ - RT3[ FSb[ ( *SK >> 24 ) & 0xFF ] ]; - } - } - - *RK++ = *SK++; - *RK++ = *SK++; - *RK++ = *SK++; - *RK++ = *SK++; -} - -/* - * ARMv8a AES-ECB block en(de)cryption - */ -static void aes_crypt_ecb( block_state* self, - int mode, - const unsigned char input[16], - unsigned char output[16] ) -{ - int i; - uint8x16_t state_vec, roundkey_vec; - uint8_t *RK = (uint8_t *) self->rk; - - // Load input and round key into into their vectors - state_vec = vld1q_u8( input ); - - if ( mode == AES_ENCRYPT ) { - // Initial AddRoundKey is in the loop due to AES instruction always doing AddRoundKey first - for( i = 0; i < self->Nr - 1; i++ ) { - // Load Round Key - roundkey_vec = vld1q_u8( RK ); - // Forward (AESE) round (AddRoundKey, SubBytes and ShiftRows) - state_vec = vaeseq_u8( state_vec, roundkey_vec ); - // Mix Columns (AESMC) - state_vec = vaesmcq_u8( state_vec ); - // Move pointer ready to load next round key - RK += 16; - } - - // Final Forward (AESE) round (AddRoundKey, SubBytes and ShiftRows). No Mix columns - roundkey_vec = vld1q_u8( RK ); /* RK already moved in loop */ - state_vec = vaeseq_u8( state_vec, roundkey_vec ); - } else { - // Initial AddRoundKey is in the loop due to AES instruction always doing AddRoundKey first - for( i = 0; i < self->Nr - 1; i++ ) { - // Load Round Key - roundkey_vec = vld1q_u8( RK ); - // Reverse (AESD) round (AddRoundKey, SubBytes and ShiftRows) - state_vec = vaesdq_u8( state_vec, roundkey_vec ); - // Inverse Mix Columns (AESIMC) - state_vec = vaesimcq_u8( state_vec ); - // Move pointer ready to load next round key - RK += 16; - } - - // Final Reverse (AESD) round (AddRoundKey, SubBytes and ShiftRows). No Mix columns - roundkey_vec = vld1q_u8( RK ); /* RK already moved in loop */ - state_vec = vaesdq_u8( state_vec, roundkey_vec ); - } - - // Manually apply final Add RoundKey step (EOR) - RK += 16; - roundkey_vec = vld1q_u8( RK ); - state_vec = veorq_u8( state_vec, roundkey_vec ); - - // Write results back to output array - vst1q_u8( output, state_vec ); -} - -#define block_init aes_setkey_enc -#define block_encrypt(self, input, output) aes_crypt_ecb(self, AES_ENCRYPT, input, output) -#define block_decrypt(self, input, output) aes_crypt_ecb(self, AES_DECRYPT, input, output) - -static void block_finalize(block_state* self) -{ -} - -/* - mode - 0 ECB - 1 CBC - 2 CTR - */ -// iv must be 16-byte wide -// out buffer of n bytes and n must be 16-byte block -// out buffer will be overwritten -int aes_arm(I decrypt,I mode,UC *key,I keyn,UC* ivec,UC* out,I len) -{ - block_state self; - uint8_t __attribute__ ((aligned (16))) rk_tmp[AES_RKSIZE]; - uint8_t *str=out; - I i; - - switch(mode) { - case 0: - block_init(&self, key, (int)keyn); - if(decrypt) { - memcpy(rk_tmp, self.rk, AES_RKSIZE); - aes_inverse_key(&self, rk_tmp); - for(i=0; i= 0; --bi) { - /* inc will overflow */ - if (iv[bi] == 255) { - iv[bi] = 0; - continue; - } - iv[bi] += 1; - break; - } - bi = 0; - } - out[i] = (out[i] ^ buffer[bi]); - } - } - block_finalize(&self); - break; - - default: - return 1; - - } - - return 0; // success -} - -#endif - diff --git a/jsrc/aes-arm_table.h b/jsrc/aes-arm_table.h deleted file mode 100644 index 0acefe91..00000000 --- a/jsrc/aes-arm_table.h +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Forward S-box - */ -static const unsigned char FSb[256] = { - 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, - 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, - 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, - 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, - 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, - 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, - 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, - 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, - 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, - 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, - 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, - 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, - 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, - 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, - 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, - 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, - 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, - 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, - 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, - 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, - 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, - 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, - 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, - 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, - 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, - 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, - 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, - 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, - 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, - 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, - 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, - 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16 -}; - -/* - * Round constants - */ -static const uint32_t RCON[10] = { - 0x00000001, 0x00000002, 0x00000004, 0x00000008, - 0x00000010, 0x00000020, 0x00000040, 0x00000080, - 0x0000001B, 0x00000036 -}; - -/* - * Reverse tables - */ -#define RT \ -\ - V(50,A7,F4,51), V(53,65,41,7E), V(C3,A4,17,1A), V(96,5E,27,3A), \ - V(CB,6B,AB,3B), V(F1,45,9D,1F), V(AB,58,FA,AC), V(93,03,E3,4B), \ - V(55,FA,30,20), V(F6,6D,76,AD), V(91,76,CC,88), V(25,4C,02,F5), \ - V(FC,D7,E5,4F), V(D7,CB,2A,C5), V(80,44,35,26), V(8F,A3,62,B5), \ - V(49,5A,B1,DE), V(67,1B,BA,25), V(98,0E,EA,45), V(E1,C0,FE,5D), \ - V(02,75,2F,C3), V(12,F0,4C,81), V(A3,97,46,8D), V(C6,F9,D3,6B), \ - V(E7,5F,8F,03), V(95,9C,92,15), V(EB,7A,6D,BF), V(DA,59,52,95), \ - V(2D,83,BE,D4), V(D3,21,74,58), V(29,69,E0,49), V(44,C8,C9,8E), \ - V(6A,89,C2,75), V(78,79,8E,F4), V(6B,3E,58,99), V(DD,71,B9,27), \ - V(B6,4F,E1,BE), V(17,AD,88,F0), V(66,AC,20,C9), V(B4,3A,CE,7D), \ - V(18,4A,DF,63), V(82,31,1A,E5), V(60,33,51,97), V(45,7F,53,62), \ - V(E0,77,64,B1), V(84,AE,6B,BB), V(1C,A0,81,FE), V(94,2B,08,F9), \ - V(58,68,48,70), V(19,FD,45,8F), V(87,6C,DE,94), V(B7,F8,7B,52), \ - V(23,D3,73,AB), V(E2,02,4B,72), V(57,8F,1F,E3), V(2A,AB,55,66), \ - V(07,28,EB,B2), V(03,C2,B5,2F), V(9A,7B,C5,86), V(A5,08,37,D3), \ - V(F2,87,28,30), V(B2,A5,BF,23), V(BA,6A,03,02), V(5C,82,16,ED), \ - V(2B,1C,CF,8A), V(92,B4,79,A7), V(F0,F2,07,F3), V(A1,E2,69,4E), \ - V(CD,F4,DA,65), V(D5,BE,05,06), V(1F,62,34,D1), V(8A,FE,A6,C4), \ - V(9D,53,2E,34), V(A0,55,F3,A2), V(32,E1,8A,05), V(75,EB,F6,A4), \ - V(39,EC,83,0B), V(AA,EF,60,40), V(06,9F,71,5E), V(51,10,6E,BD), \ - V(F9,8A,21,3E), V(3D,06,DD,96), V(AE,05,3E,DD), V(46,BD,E6,4D), \ - V(B5,8D,54,91), V(05,5D,C4,71), V(6F,D4,06,04), V(FF,15,50,60), \ - V(24,FB,98,19), V(97,E9,BD,D6), V(CC,43,40,89), V(77,9E,D9,67), \ - V(BD,42,E8,B0), V(88,8B,89,07), V(38,5B,19,E7), V(DB,EE,C8,79), \ - V(47,0A,7C,A1), V(E9,0F,42,7C), V(C9,1E,84,F8), V(00,00,00,00), \ - V(83,86,80,09), V(48,ED,2B,32), V(AC,70,11,1E), V(4E,72,5A,6C), \ - V(FB,FF,0E,FD), V(56,38,85,0F), V(1E,D5,AE,3D), V(27,39,2D,36), \ - V(64,D9,0F,0A), V(21,A6,5C,68), V(D1,54,5B,9B), V(3A,2E,36,24), \ - V(B1,67,0A,0C), V(0F,E7,57,93), V(D2,96,EE,B4), V(9E,91,9B,1B), \ - V(4F,C5,C0,80), V(A2,20,DC,61), V(69,4B,77,5A), V(16,1A,12,1C), \ - V(0A,BA,93,E2), V(E5,2A,A0,C0), V(43,E0,22,3C), V(1D,17,1B,12), \ - V(0B,0D,09,0E), V(AD,C7,8B,F2), V(B9,A8,B6,2D), V(C8,A9,1E,14), \ - V(85,19,F1,57), V(4C,07,75,AF), V(BB,DD,99,EE), V(FD,60,7F,A3), \ - V(9F,26,01,F7), V(BC,F5,72,5C), V(C5,3B,66,44), V(34,7E,FB,5B), \ - V(76,29,43,8B), V(DC,C6,23,CB), V(68,FC,ED,B6), V(63,F1,E4,B8), \ - V(CA,DC,31,D7), V(10,85,63,42), V(40,22,97,13), V(20,11,C6,84), \ - V(7D,24,4A,85), V(F8,3D,BB,D2), V(11,32,F9,AE), V(6D,A1,29,C7), \ - V(4B,2F,9E,1D), V(F3,30,B2,DC), V(EC,52,86,0D), V(D0,E3,C1,77), \ - V(6C,16,B3,2B), V(99,B9,70,A9), V(FA,48,94,11), V(22,64,E9,47), \ - V(C4,8C,FC,A8), V(1A,3F,F0,A0), V(D8,2C,7D,56), V(EF,90,33,22), \ - V(C7,4E,49,87), V(C1,D1,38,D9), V(FE,A2,CA,8C), V(36,0B,D4,98), \ - V(CF,81,F5,A6), V(28,DE,7A,A5), V(26,8E,B7,DA), V(A4,BF,AD,3F), \ - V(E4,9D,3A,2C), V(0D,92,78,50), V(9B,CC,5F,6A), V(62,46,7E,54), \ - V(C2,13,8D,F6), V(E8,B8,D8,90), V(5E,F7,39,2E), V(F5,AF,C3,82), \ - V(BE,80,5D,9F), V(7C,93,D0,69), V(A9,2D,D5,6F), V(B3,12,25,CF), \ - V(3B,99,AC,C8), V(A7,7D,18,10), V(6E,63,9C,E8), V(7B,BB,3B,DB), \ - V(09,78,26,CD), V(F4,18,59,6E), V(01,B7,9A,EC), V(A8,9A,4F,83), \ - V(65,6E,95,E6), V(7E,E6,FF,AA), V(08,CF,BC,21), V(E6,E8,15,EF), \ - V(D9,9B,E7,BA), V(CE,36,6F,4A), V(D4,09,9F,EA), V(D6,7C,B0,29), \ - V(AF,B2,A4,31), V(31,23,3F,2A), V(30,94,A5,C6), V(C0,66,A2,35), \ - V(37,BC,4E,74), V(A6,CA,82,FC), V(B0,D0,90,E0), V(15,D8,A7,33), \ - V(4A,98,04,F1), V(F7,DA,EC,41), V(0E,50,CD,7F), V(2F,F6,91,17), \ - V(8D,D6,4D,76), V(4D,B0,EF,43), V(54,4D,AA,CC), V(DF,04,96,E4), \ - V(E3,B5,D1,9E), V(1B,88,6A,4C), V(B8,1F,2C,C1), V(7F,51,65,46), \ - V(04,EA,5E,9D), V(5D,35,8C,01), V(73,74,87,FA), V(2E,41,0B,FB), \ - V(5A,1D,67,B3), V(52,D2,DB,92), V(33,56,10,E9), V(13,47,D6,6D), \ - V(8C,61,D7,9A), V(7A,0C,A1,37), V(8E,14,F8,59), V(89,3C,13,EB), \ - V(EE,27,A9,CE), V(35,C9,61,B7), V(ED,E5,1C,E1), V(3C,B1,47,7A), \ - V(59,DF,D2,9C), V(3F,73,F2,55), V(79,CE,14,18), V(BF,37,C7,73), \ - V(EA,CD,F7,53), V(5B,AA,FD,5F), V(14,6F,3D,DF), V(86,DB,44,78), \ - V(81,F3,AF,CA), V(3E,C4,68,B9), V(2C,34,24,38), V(5F,40,A3,C2), \ - V(72,C3,1D,16), V(0C,25,E2,BC), V(8B,49,3C,28), V(41,95,0D,FF), \ - V(71,01,A8,39), V(DE,B3,0C,08), V(9C,E4,B4,D8), V(90,C1,56,64), \ - V(61,84,CB,7B), V(70,B6,32,D5), V(74,5C,6C,48), V(42,57,B8,D0) - -#define V(a,b,c,d) 0x##a##b##c##d -static const uint32_t RT0[256] = { RT }; -#undef V - -#define V(a,b,c,d) 0x##b##c##d##a -static const uint32_t RT1[256] = { RT }; -#undef V - -#define V(a,b,c,d) 0x##c##d##a##b -static const uint32_t RT2[256] = { RT }; -#undef V - -#define V(a,b,c,d) 0x##d##a##b##c -static const uint32_t RT3[256] = { RT }; -#undef V - -#undef RT diff --git a/jsrc/aes-c.c b/jsrc/aes-c.c deleted file mode 100644 index edc28b06..00000000 --- a/jsrc/aes-c.c +++ /dev/null @@ -1,463 +0,0 @@ -#include "aes-c.h" - -/*****************************************************************************/ -/* Defines: */ -/*****************************************************************************/ -// The number of columns comprising a state in AES. This is a constant in AES. Value=4 -#define Nb 4u - -/*****************************************************************************/ -/* Private variables: */ -/*****************************************************************************/ -// state - array holding the intermediate results during decryption. -typedef uint8_t state_t[4][4]; - -// The lookup-tables are marked const so they can be placed in read-only storage instead of RAM -// The numbers below can be computed dynamically trading ROM for RAM - -// This can be useful in (embedded) bootloader applications, where ROM is often limited. -static const uint8_t sbox[256] = { - //0 1 2 3 4 5 6 7 8 9 A B C D E F - 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, - 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, - 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, - 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, - 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, - 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, - 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, - 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, - 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, - 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, - 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, - 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, - 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, - 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, - 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, - 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 -}; - -static const uint8_t rsbox[256] = { - 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, - 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, - 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, - 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, - 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, - 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, - 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, - 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, - 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, - 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, - 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, - 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, - 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, - 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, - 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, - 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d -}; - -// The round constant word array, Rcon[i], contains the values given by -// x to the power (i-1) being powers of x (x is denoted as {02}) in the field GF(2^8) -static const uint8_t Rcon[11] = { - 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36 -}; - -/*****************************************************************************/ -/* Private functions: */ -/*****************************************************************************/ -/* -static uint8_t getSBoxValue(uint8_t num) -{ - return sbox[num]; -} -*/ -#define getSBoxValue(num) (sbox[(num)]) -/* -static uint8_t getSBoxInvert(uint8_t num) -{ - return rsbox[num]; -} -*/ -#define getSBoxInvert(num) (rsbox[(num)]) - -// This function produces Nb(Nr+1) round keys. The round keys are used in each round to decrypt the states. -static void KeyExpansion(AES_ctx* ctx, const uint8_t* Key, int keyn) -{ - unsigned int i, j, k; - uint8_t tempa[4]; // Used for the column/row operations - uint8_t* RoundKey = ctx->RoundKey; - if(keyn==32) { - ctx->Nk = 8; - ctx->Nr = 14; - } else if(keyn==24) { - ctx->Nk = 6; - ctx->Nr = 12; - } else { /* keyn==16 */ - ctx->Nk = 4; // The number of 32 bit words in a key. - ctx->Nr = 10; // The number of rounds in AES Cipher. - } - - // The first round key is the key itself. - for (i = 0; i < ctx->Nk; ++i) { - RoundKey[(i * 4) + 0] = Key[(i * 4) + 0]; - RoundKey[(i * 4) + 1] = Key[(i * 4) + 1]; - RoundKey[(i * 4) + 2] = Key[(i * 4) + 2]; - RoundKey[(i * 4) + 3] = Key[(i * 4) + 3]; - } - - // All other round keys are found from the previous round keys. - for (i = ctx->Nk; i < Nb * (ctx->Nr + 1); ++i) { - { - k = (i - 1) * 4; - tempa[0]=RoundKey[k + 0]; - tempa[1]=RoundKey[k + 1]; - tempa[2]=RoundKey[k + 2]; - tempa[3]=RoundKey[k + 3]; - - } - - if (i % ctx->Nk == 0) { - // This function shifts the 4 bytes in a word to the left once. - // [a0,a1,a2,a3] becomes [a1,a2,a3,a0] - - // Function RotWord() - { - const uint8_t u8tmp = tempa[0]; - tempa[0] = tempa[1]; - tempa[1] = tempa[2]; - tempa[2] = tempa[3]; - tempa[3] = u8tmp; - } - - // SubWord() is a function that takes a four-byte input word and - // applies the S-box to each of the four bytes to produce an output word. - - // Function Subword() - { - tempa[0] = getSBoxValue(tempa[0]); - tempa[1] = getSBoxValue(tempa[1]); - tempa[2] = getSBoxValue(tempa[2]); - tempa[3] = getSBoxValue(tempa[3]); - } - - tempa[0] = tempa[0] ^ Rcon[i/ctx->Nk]; - } - if (ctx->Nk == 8) { - if (i % ctx->Nk == 4) { - // Function Subword() - { - tempa[0] = getSBoxValue(tempa[0]); - tempa[1] = getSBoxValue(tempa[1]); - tempa[2] = getSBoxValue(tempa[2]); - tempa[3] = getSBoxValue(tempa[3]); - } - } - } - j = i * 4; - k=(i - ctx->Nk) * 4; - RoundKey[j + 0] = RoundKey[k + 0] ^ tempa[0]; - RoundKey[j + 1] = RoundKey[k + 1] ^ tempa[1]; - RoundKey[j + 2] = RoundKey[k + 2] ^ tempa[2]; - RoundKey[j + 3] = RoundKey[k + 3] ^ tempa[3]; - } -} - -void AES_init_ctx(AES_ctx* ctx, const uint8_t* key, int keyn) -{ - KeyExpansion(ctx, key, keyn); -} - -void AES_init_ctx_iv(AES_ctx* ctx, const uint8_t* key, int keyn, const uint8_t* iv) -{ - KeyExpansion(ctx, key, keyn); - memcpy (ctx->Iv, iv, AES_BLOCKLEN); -} -void AES_ctx_set_iv(AES_ctx* ctx, const uint8_t* iv) -{ - memcpy (ctx->Iv, iv, AES_BLOCKLEN); -} - -// This function adds the round key to state. -// The round key is added to the state by an XOR function. -static void AddRoundKey(uint8_t round, state_t* state, const uint8_t* RoundKey) -{ - uint8_t i,j; - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) { - (*state)[i][j] ^= RoundKey[(round * Nb * 4) + (i * Nb) + j]; - } - } -} - -// The SubBytes Function Substitutes the values in the -// state matrix with values in an S-box. -static void SubBytes(state_t* state) -{ - uint8_t i, j; - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) { - (*state)[j][i] = getSBoxValue((*state)[j][i]); - } - } -} - -// The ShiftRows() function shifts the rows in the state to the left. -// Each row is shifted with different offset. -// Offset = Row number. So the first row is not shifted. -static void ShiftRows(state_t* state) -{ - uint8_t temp; - - // Rotate first row 1 columns to left - temp = (*state)[0][1]; - (*state)[0][1] = (*state)[1][1]; - (*state)[1][1] = (*state)[2][1]; - (*state)[2][1] = (*state)[3][1]; - (*state)[3][1] = temp; - - // Rotate second row 2 columns to left - temp = (*state)[0][2]; - (*state)[0][2] = (*state)[2][2]; - (*state)[2][2] = temp; - - temp = (*state)[1][2]; - (*state)[1][2] = (*state)[3][2]; - (*state)[3][2] = temp; - - // Rotate third row 3 columns to left - temp = (*state)[0][3]; - (*state)[0][3] = (*state)[3][3]; - (*state)[3][3] = (*state)[2][3]; - (*state)[2][3] = (*state)[1][3]; - (*state)[1][3] = temp; -} - -static uint8_t xtime(uint8_t x) -{ - return ((x<<1) ^ (((x>>7) & 1) * 0x1b)); -} - -// MixColumns function mixes the columns of the state matrix -static void MixColumns(state_t* state) -{ - uint8_t i; - uint8_t Tmp, Tm, t; - for (i = 0; i < 4; ++i) { - t = (*state)[i][0]; - Tmp = (*state)[i][0] ^ (*state)[i][1] ^ (*state)[i][2] ^ (*state)[i][3] ; - Tm = (*state)[i][0] ^ (*state)[i][1] ; - Tm = xtime(Tm); - (*state)[i][0] ^= Tm ^ Tmp ; - Tm = (*state)[i][1] ^ (*state)[i][2] ; - Tm = xtime(Tm); - (*state)[i][1] ^= Tm ^ Tmp ; - Tm = (*state)[i][2] ^ (*state)[i][3] ; - Tm = xtime(Tm); - (*state)[i][2] ^= Tm ^ Tmp ; - Tm = (*state)[i][3] ^ t ; - Tm = xtime(Tm); - (*state)[i][3] ^= Tm ^ Tmp ; - } -} - -#define Multiply(x, y) \ - ( ((y & 1) * x) ^ \ - ((y>>1 & 1) * xtime(x)) ^ \ - ((y>>2 & 1) * xtime(xtime(x))) ^ \ - ((y>>3 & 1) * xtime(xtime(xtime(x)))) ^ \ - ((y>>4 & 1) * xtime(xtime(xtime(xtime(x)))))) \ - - -// MixColumns function mixes the columns of the state matrix. -// The method used to multiply may be difficult to understand for the inexperienced. -// Please use the references to gain more information. -static void InvMixColumns(state_t* state) -{ - int i; - uint8_t a, b, c, d; - for (i = 0; i < 4; ++i) { - a = (*state)[i][0]; - b = (*state)[i][1]; - c = (*state)[i][2]; - d = (*state)[i][3]; - - (*state)[i][0] = Multiply(a, 0x0e) ^ Multiply(b, 0x0b) ^ Multiply(c, 0x0d) ^ Multiply(d, 0x09); - (*state)[i][1] = Multiply(a, 0x09) ^ Multiply(b, 0x0e) ^ Multiply(c, 0x0b) ^ Multiply(d, 0x0d); - (*state)[i][2] = Multiply(a, 0x0d) ^ Multiply(b, 0x09) ^ Multiply(c, 0x0e) ^ Multiply(d, 0x0b); - (*state)[i][3] = Multiply(a, 0x0b) ^ Multiply(b, 0x0d) ^ Multiply(c, 0x09) ^ Multiply(d, 0x0e); - } -} - - -// The SubBytes Function Substitutes the values in the -// state matrix with values in an S-box. -static void InvSubBytes(state_t* state) -{ - uint8_t i, j; - for (i = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) { - (*state)[j][i] = getSBoxInvert((*state)[j][i]); - } - } -} - -static void InvShiftRows(state_t* state) -{ - uint8_t temp; - - // Rotate first row 1 columns to right - temp = (*state)[3][1]; - (*state)[3][1] = (*state)[2][1]; - (*state)[2][1] = (*state)[1][1]; - (*state)[1][1] = (*state)[0][1]; - (*state)[0][1] = temp; - - // Rotate second row 2 columns to right - temp = (*state)[0][2]; - (*state)[0][2] = (*state)[2][2]; - (*state)[2][2] = temp; - - temp = (*state)[1][2]; - (*state)[1][2] = (*state)[3][2]; - (*state)[3][2] = temp; - - // Rotate third row 3 columns to right - temp = (*state)[0][3]; - (*state)[0][3] = (*state)[1][3]; - (*state)[1][3] = (*state)[2][3]; - (*state)[2][3] = (*state)[3][3]; - (*state)[3][3] = temp; -} - -// Cipher is the main function that encrypts the PlainText. -static void Cipher(AES_ctx* ctx, state_t* state) -{ - const uint8_t* RoundKey = ctx->RoundKey; - uint8_t round = 0; - - // Add the First round key to the state before starting the rounds. - AddRoundKey(0, state, RoundKey); - - // There will be Nr rounds. - // The first Nr-1 rounds are identical. - // These Nr-1 rounds are executed in the loop below. - for (round = 1; round < ctx->Nr; ++round) { - SubBytes(state); - ShiftRows(state); - MixColumns(state); - AddRoundKey(round, state, RoundKey); - } - - // The last round is given below. - // The MixColumns function is not here in the last round. - SubBytes(state); - ShiftRows(state); - AddRoundKey(ctx->Nr, state, RoundKey); -} - -static void InvCipher(AES_ctx* ctx, state_t* state) -{ - const uint8_t* RoundKey = ctx->RoundKey; - uint8_t round = 0; - - // Add the First round key to the state before starting the rounds. - AddRoundKey(ctx->Nr, state, RoundKey); - - // There will be Nr rounds. - // The first Nr-1 rounds are identical. - // These Nr-1 rounds are executed in the loop below. - for (round = (ctx->Nr - 1); round > 0; --round) { - InvShiftRows(state); - InvSubBytes(state); - AddRoundKey(round, state, RoundKey); - InvMixColumns(state); - } - - // The last round is given below. - // The MixColumns function is not here in the last round. - InvShiftRows(state); - InvSubBytes(state); - AddRoundKey(0, state, RoundKey); -} - -/*****************************************************************************/ -/* Public functions: */ -/*****************************************************************************/ - -void AES_ECB_encrypt(AES_ctx* ctx, uint8_t* buf) -{ - // The next function call encrypts the PlainText with the Key using AES algorithm. - Cipher(ctx, (state_t*)buf); -} - -void AES_ECB_decrypt(AES_ctx* ctx, uint8_t* buf) -{ - // The next function call decrypts the PlainText with the Key using AES algorithm. - InvCipher(ctx, (state_t*)buf); -} - -static void XorWithIv(uint8_t* buf, const uint8_t* Iv) -{ - uint8_t i; - for (i = 0; i < AES_BLOCKLEN; ++i) { // The block in AES is always 128bit no matter the key size - buf[i] ^= Iv[i]; - } -} - -void AES_CBC_encrypt_buffer(AES_ctx *ctx, uint8_t* buf, uintptr_t length) -{ - uintptr_t i; - uint8_t *Iv = ctx->Iv; - for (i = 0; i < length; i += AES_BLOCKLEN) { - XorWithIv(buf, Iv); - Cipher(ctx, (state_t*)buf); - Iv = buf; - buf += AES_BLOCKLEN; - //printf("Step %d - %d", i/16, i); - } - /* store Iv in ctx for next call */ - memcpy(ctx->Iv, Iv, AES_BLOCKLEN); -} - -void AES_CBC_decrypt_buffer(AES_ctx* ctx, uint8_t* buf, uintptr_t length) -{ - uintptr_t i; - uint8_t storeNextIv[AES_BLOCKLEN]; - for (i = 0; i < length; i += AES_BLOCKLEN) { - memcpy(storeNextIv, buf, AES_BLOCKLEN); - InvCipher(ctx, (state_t*)buf); - XorWithIv(buf, ctx->Iv); - memcpy(ctx->Iv, storeNextIv, AES_BLOCKLEN); - buf += AES_BLOCKLEN; - } - -} - -/* Symmetrical operation: same function for encrypting as for decrypting. Note any IV/nonce should never be reused with the same key */ -void AES_CTR_xcrypt_buffer(AES_ctx* ctx, uint8_t* buf, uintptr_t length) -{ - uint8_t buffer[AES_BLOCKLEN]; - - uintptr_t i; - int bi; - for (i = 0, bi = AES_BLOCKLEN; i < length; ++i, ++bi) { - if (bi == AES_BLOCKLEN) { /* we need to regen xor compliment in buffer */ - - memcpy(buffer, ctx->Iv, AES_BLOCKLEN); - Cipher(ctx, (state_t*)buffer); - - /* Increment Iv and handle overflow */ - for (bi = (AES_BLOCKLEN - 1); bi >= 0; --bi) { - /* inc will overflow */ - if (ctx->Iv[bi] == 255) { - ctx->Iv[bi] = 0; - continue; - } - ctx->Iv[bi] += 1; - break; - } - bi = 0; - } - - buf[i] = (buf[i] ^ buffer[bi]); - } -} - diff --git a/jsrc/aes-c.h b/jsrc/aes-c.h deleted file mode 100644 index 7d6bb60f..00000000 --- a/jsrc/aes-c.h +++ /dev/null @@ -1,32 +0,0 @@ -#include -#include // CBC mode, for memset - -#define AES_BLOCKLEN 16 //Block length in bytes AES is 128b block only - -#define AES_keyExpSize 240 - -typedef struct { - uint8_t RoundKey[AES_keyExpSize]; - uint8_t Iv[AES_BLOCKLEN]; - uint8_t Nk; - uint8_t Nr; -} AES_ctx; - -void AES_init_ctx(AES_ctx* ctx, const uint8_t* key, int keyn); -void AES_init_ctx_iv(AES_ctx* ctx, const uint8_t* key, int keyn, const uint8_t* iv); -void AES_ctx_set_iv(AES_ctx* ctx, const uint8_t* iv); - -// buffer size is exactly AES_BLOCKLEN bytes; -// you need only AES_init_ctx as IV is not used in ECB -// ECB is considered insecure for most uses -void AES_ECB_encrypt(AES_ctx* ctx, uint8_t* buf); -void AES_ECB_decrypt(AES_ctx* ctx, uint8_t* buf); - -// buffer size MUST be mutiple of AES_BLOCKLEN; -void AES_CBC_encrypt_buffer(AES_ctx* ctx, uint8_t* buf, uintptr_t length); -void AES_CBC_decrypt_buffer(AES_ctx* ctx, uint8_t* buf, uintptr_t length); - -// Same function for encrypting as for decrypting. -// IV is incremented for every block, and used after encryption as XOR-compliment for output -void AES_CTR_xcrypt_buffer(AES_ctx* ctx, uint8_t* buf, uintptr_t length); - diff --git a/jsrc/aes-ni.c b/jsrc/aes-ni.c deleted file mode 100644 index 93c953bd..00000000 --- a/jsrc/aes-ni.c +++ /dev/null @@ -1,347 +0,0 @@ -#include "j.h" - -#include -#include -#include - -#define ADD _mm_add_epi32 -#define XOR _mm_xor_si128 -#define AESENC _mm_aesenc_si128 -#define AESENCLAST _mm_aesenclast_si128 -#define AESDEC _mm_aesdec_si128 -#define AESDECLAST _mm_aesdeclast_si128 - -#define BLOCK_SIZE 16 - -typedef unsigned char u8; - -typedef struct { - __m128i* ek; - __m128i* dk; - int rounds; -} block_state; - -/* Wrapper functions for malloc and free with memory alignment */ -#if defined(HAVE_ALIGNED_ALLOC) /* aligned_alloc is defined by C11 */ -# define aligned_malloc_wrapper aligned_alloc -# define aligned_free_wrapper free -#elif defined(__LP64__) /* posix_memalign is defined by POSIX */ -static void* aligned_malloc_wrapper(size_t alignment, size_t size) -{ - void* tmp = NULL; - int err = posix_memalign(&tmp, alignment, size); - if (err != 0) { - /* posix_memalign does NOT set errno on failure; the error is returned */ - errno = err; - return NULL; - } - return tmp; -} -# define aligned_free_wrapper free -#else -static void* aligned_malloc_wrapper(size_t align, size_t size) -{ - void *result; - void *mem = malloc(size+(align-1)+sizeof(void*)); - if(mem) { - result = (void*)((uintptr_t)(mem+(align-1)+sizeof(void*)) & ~(align-1)); - ((void**)result)[-1] = mem; - } else result = 0; - return result; -} -static void aligned_free_wrapper(void* ptr) -{ - free(((void**)ptr)[-1]); -} -#endif - -/* Helper functions to expand keys */ - -static __m128i aes128_keyexpand(__m128i key) -{ - key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); - key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); - return _mm_xor_si128(key, _mm_slli_si128(key, 4)); -} - -static __m128i aes192_keyexpand_2(__m128i key, __m128i key2) -{ - key = _mm_shuffle_epi32(key, 0xff); - key2 = _mm_xor_si128(key2, _mm_slli_si128(key2, 4)); - return _mm_xor_si128(key, key2); -} - -#define KEYEXP128_H(K1, K2, I, S) _mm_xor_si128(aes128_keyexpand(K1), \ - _mm_shuffle_epi32(_mm_aeskeygenassist_si128(K2, I), S)) - -#define KEYEXP128(K, I) KEYEXP128_H(K, K, I, 0xff) -#define KEYEXP192(K1, K2, I) KEYEXP128_H(K1, K2, I, 0x55) -#define KEYEXP192_2(K1, K2) aes192_keyexpand_2(K1, K2) -#define KEYEXP256(K1, K2, I) KEYEXP128_H(K1, K2, I, 0xff) -#define KEYEXP256_2(K1, K2) KEYEXP128_H(K1, K2, 0x00, 0xaa) - -/* Encryption key setup */ -static void aes_key_setup_enc(__m128i* rk, const u8* cipherKey, int keylen) -{ - switch (keylen) { - case 16: { - /* 128 bit key setup */ - rk[0] = _mm_loadu_si128((const __m128i*) cipherKey); - rk[1] = KEYEXP128(rk[0], 0x01); - rk[2] = KEYEXP128(rk[1], 0x02); - rk[3] = KEYEXP128(rk[2], 0x04); - rk[4] = KEYEXP128(rk[3], 0x08); - rk[5] = KEYEXP128(rk[4], 0x10); - rk[6] = KEYEXP128(rk[5], 0x20); - rk[7] = KEYEXP128(rk[6], 0x40); - rk[8] = KEYEXP128(rk[7], 0x80); - rk[9] = KEYEXP128(rk[8], 0x1B); - rk[10] = KEYEXP128(rk[9], 0x36); - break; - } - case 24: { - /* 192 bit key setup */ - __m128i temp[2]; - rk[0] = _mm_loadu_si128((const __m128i*) cipherKey); - rk[1] = _mm_loadu_si128((const __m128i*) (cipherKey+16)); - temp[0] = KEYEXP192(rk[0], rk[1], 0x01); - temp[1] = KEYEXP192_2(temp[0], rk[1]); - rk[1] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(rk[1]), _mm_castsi128_pd(temp[0]), 0)); - rk[2] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(temp[0]), _mm_castsi128_pd(temp[1]), 1)); - rk[3] = KEYEXP192(temp[0], temp[1], 0x02); - rk[4] = KEYEXP192_2(rk[3], temp[1]); - temp[0] = KEYEXP192(rk[3], rk[4], 0x04); - temp[1] = KEYEXP192_2(temp[0], rk[4]); - rk[4] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(rk[4]), _mm_castsi128_pd(temp[0]), 0)); - rk[5] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(temp[0]), _mm_castsi128_pd(temp[1]), 1)); - rk[6] = KEYEXP192(temp[0], temp[1], 0x08); - rk[7] = KEYEXP192_2(rk[6], temp[1]); - temp[0] = KEYEXP192(rk[6], rk[7], 0x10); - temp[1] = KEYEXP192_2(temp[0], rk[7]); - rk[7] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(rk[7]), _mm_castsi128_pd(temp[0]), 0)); - rk[8] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(temp[0]), _mm_castsi128_pd(temp[1]), 1)); - rk[9] = KEYEXP192(temp[0], temp[1], 0x20); - rk[10] = KEYEXP192_2(rk[9], temp[1]); - temp[0] = KEYEXP192(rk[9], rk[10], 0x40); - temp[1] = KEYEXP192_2(temp[0], rk[10]); - rk[10] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(rk[10]), _mm_castsi128_pd(temp[0]), 0)); - rk[11] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(temp[0]),_mm_castsi128_pd(temp[1]), 1)); - rk[12] = KEYEXP192(temp[0], temp[1], 0x80); - break; - } - case 32: { - /* 256 bit key setup */ - rk[0] = _mm_loadu_si128((const __m128i*) cipherKey); - rk[1] = _mm_loadu_si128((const __m128i*) (cipherKey+16)); - rk[2] = KEYEXP256(rk[0], rk[1], 0x01); - rk[3] = KEYEXP256_2(rk[1], rk[2]); - rk[4] = KEYEXP256(rk[2], rk[3], 0x02); - rk[5] = KEYEXP256_2(rk[3], rk[4]); - rk[6] = KEYEXP256(rk[4], rk[5], 0x04); - rk[7] = KEYEXP256_2(rk[5], rk[6]); - rk[8] = KEYEXP256(rk[6], rk[7], 0x08); - rk[9] = KEYEXP256_2(rk[7], rk[8]); - rk[10] = KEYEXP256(rk[8], rk[9], 0x10); - rk[11] = KEYEXP256_2(rk[9], rk[10]); - rk[12] = KEYEXP256(rk[10], rk[11], 0x20); - rk[13] = KEYEXP256_2(rk[11], rk[12]); - rk[14] = KEYEXP256(rk[12], rk[13], 0x40); - break; - } - } -} - -/* Decryption key setup */ -static void aes_key_setup_dec(__m128i* dk, const __m128i* ek, int rounds) -{ - int i; - dk[rounds] = ek[0]; - for (i = 1; i < rounds; ++i) { - dk[rounds - i] = _mm_aesimc_si128(ek[i]); - } - dk[0] = ek[rounds]; -} - -static void block_init(block_state* self, u8* key, int keylen) -{ - int nr = 0; - switch (keylen) { - case 16: - nr = 10; - break; - case 24: - nr = 12; - break; - case 32: - nr = 14; - break; - } - - /* ensure that self->ek and self->dk are aligned to 16 byte boundaries */ - void* tek = aligned_malloc_wrapper(16, (nr + 1) * sizeof(__m128i)); - void* tdk = aligned_malloc_wrapper(16, (nr + 1) * sizeof(__m128i)); - - self->ek = tek; - self->dk = tdk; - - self->rounds = nr; - aes_key_setup_enc(self->ek, key, keylen); - aes_key_setup_dec(self->dk, self->ek, nr); -} - -static void block_finalize(block_state* self) -{ - /* overwrite contents of ek and dk */ - memset(self->ek, 0, (self->rounds + 1) * sizeof(__m128i)); - memset(self->dk, 0, (self->rounds + 1) * sizeof(__m128i)); - - aligned_free_wrapper(self->ek); - aligned_free_wrapper(self->dk); -} - -static void block_encrypt(block_state* self, const u8* in, u8* out) -{ - __m128i m = _mm_loadu_si128((const __m128i*) in); - /* first 9 rounds */ - m = _mm_xor_si128(m, self->ek[0]); - m = _mm_aesenc_si128(m, self->ek[1]); - m = _mm_aesenc_si128(m, self->ek[2]); - m = _mm_aesenc_si128(m, self->ek[3]); - m = _mm_aesenc_si128(m, self->ek[4]); - m = _mm_aesenc_si128(m, self->ek[5]); - m = _mm_aesenc_si128(m, self->ek[6]); - m = _mm_aesenc_si128(m, self->ek[7]); - m = _mm_aesenc_si128(m, self->ek[8]); - m = _mm_aesenc_si128(m, self->ek[9]); - if (self->rounds != 10) { - /* two additional rounds for AES-192/256 */ - m = _mm_aesenc_si128(m, self->ek[10]); - m = _mm_aesenc_si128(m, self->ek[11]); - if (self->rounds == 14) { - /* another two additional rounds for AES-256 */ - m = _mm_aesenc_si128(m, self->ek[12]); - m = _mm_aesenc_si128(m, self->ek[13]); - } - } - m = _mm_aesenclast_si128(m, self->ek[self->rounds]); - _mm_storeu_si128((__m128i*) out, m); -} - -static void block_decrypt(block_state* self, const u8* in, u8* out) -{ - __m128i m = _mm_loadu_si128((const __m128i*) in); - /* first 9 rounds */ - m = _mm_xor_si128(m, self->dk[0]); - m = _mm_aesdec_si128(m, self->dk[1]); - m = _mm_aesdec_si128(m, self->dk[2]); - m = _mm_aesdec_si128(m, self->dk[3]); - m = _mm_aesdec_si128(m, self->dk[4]); - m = _mm_aesdec_si128(m, self->dk[5]); - m = _mm_aesdec_si128(m, self->dk[6]); - m = _mm_aesdec_si128(m, self->dk[7]); - m = _mm_aesdec_si128(m, self->dk[8]); - m = _mm_aesdec_si128(m, self->dk[9]); - if (self->rounds != 10) { - /* two additional rounds for AES-192/256 */ - m = _mm_aesdec_si128(m, self->dk[10]); - m = _mm_aesdec_si128(m, self->dk[11]); - if (self->rounds == 14) { - /* another two additional rounds for AES-256 */ - m = _mm_aesdec_si128(m, self->dk[12]); - m = _mm_aesdec_si128(m, self->dk[13]); - } - } - m = _mm_aesdeclast_si128(m, self->dk[self->rounds]); - _mm_storeu_si128((__m128i*) out, m); -} - -/* - mode - 0 ECB - 1 CBC - 2 CTR - */ -// iv must be 16-byte wide -// out buffer of n bytes and n must be 16-byte block -// out buffer will be overwritten -int aes_ni(I decrypt,I mode,UC *key,I keyn,UC* ivec,UC* out,I len) -{ - block_state self; - u8 *str=out; - I i; - - switch(mode) { - case 0: - block_init(&self, key, (int)keyn); - if(decrypt) { - for(i=0; i= 0; --bi) { - /* inc will overflow */ - if (iv[bi] == 255) { - iv[bi] = 0; - continue; - } - iv[bi] += 1; - break; - } - bi = 0; - } - out[i] = (out[i] ^ buffer[bi]); - } - } - block_finalize(&self); - break; - - default: - return 1; - - } - - return 0; // success -} diff --git a/jsrc/i.c b/jsrc/i.c index 65d81cb0..6a401b23 100644 --- a/jsrc/i.c +++ b/jsrc/i.c @@ -86,9 +86,6 @@ B jtglobinit(J jt){A x,y;A *oldpushx=jt->tnextpushp; pinit(); cpuInit(); -#if defined(__aarch64__) - hwaes=(getCpuFeatures()&ARM_HWCAP_AES)?1:0; -#endif // take all the permanent blocks off the tpop stack so that we don't decrement their usecount. All blocks allocated here must be permanent jt->tnextpushp=oldpushx; DO(IOTAVECLEN, iotavec[i]=i+IOTAVECBEGIN;) // init our vector of ascending integers diff --git a/jsrc/j.c b/jsrc/j.c index 73f2a3d7..34ffee2f 100644 --- a/jsrc/j.c +++ b/jsrc/j.c @@ -64,7 +64,6 @@ Z zeroZ={0,0}; /* 0j0 */ A zpath=0; /* default locale search path */ I iotavec[IOTAVECLEN]; // return values for i. small uint64_t g_cpuFeatures; // blis -UC hwaes=0; // hardware aes support UC hwfma=0; // blis cpu tuning // globals end diff --git a/jsrc/je.h b/jsrc/je.h index ba767b0b..568c44e5 100644 --- a/jsrc/je.h +++ b/jsrc/je.h @@ -376,12 +376,9 @@ extern F1(jtxco1); extern F1(jtxepq); extern F1(jtxeps); -// extern F1(jttest1); - extern F2(jtabase2); extern F2(jtadot2); extern F2(jtadverse); -extern F2(jtaes2); extern F2(jtaflag2); extern F2(jtagenda); extern F2(jtagendai); @@ -954,7 +951,6 @@ extern I oneone[2]; extern A zpath; extern I iotavec[IOTAVECLEN]; // ascending integers, starting at IOTAVECBEGIN /* cpu feature */ -extern UC hwaes; extern UC hwfma; extern VARPSA rpsnull; extern PRIM sfn0overself; diff --git a/jsrc/x.c b/jsrc/x.c index 655f5ea1..5b33d891 100644 --- a/jsrc/x.c +++ b/jsrc/x.c @@ -269,11 +269,8 @@ F2(jtforeign){I p,q; case XC(128,3): return CDERIV(CIBEAM, jtcrc1, jtcrc2, VASGSAFE,RMAX,RMAX,RMAX); case XC(128,4): return CDERIV(CIBEAM, jtrngraw, 0, VASGSAFE,RMAX,RMAX,RMAX); case XC(128,5): return CDERIV(CIBEAM, jtisnan, 0, VASGSAFE,RMAX,RMAX,RMAX); - case XC(128,7): return CDERIV(CIBEAM, 0, jtaes2, VASGSAFE,RMAX,RMAX,RMAX); case XC(128,8): return CDERIV(CIBEAM, jtqhash12, jtqhash12, VASGSAFE|VJTFLGOK1|VJTFLGOK2,RMAX,RMAX,RMAX); -// case XC(128,110): return CDERIV(CIBEAM, jttest1, 0, VASGSAFE,RMAX,RMAX,RMAX); -// default: return foreignextra(a,w); default: ASSERT(0,EVDOMAIN); // any unknown combination is a domain error right away }} diff --git a/jsrc/xaes.c b/jsrc/xaes.c deleted file mode 100644 index fd1af495..00000000 --- a/jsrc/xaes.c +++ /dev/null @@ -1,153 +0,0 @@ -/* Copyright 1990-2006, Jsoftware Inc. All rights reserved. */ -/* Licensed use only. Any other use is in violation of copyright. */ -/* */ -/* Xenos: AES calculation */ - -#include "j.h" -#include "x.h" -#include "cpuinfo.h" - -#include "aes-c.h" - -#include - - -int aes_c(I decrypt,I mode,UC *key,I keyn,UC* iv,UC* out,I n); -#if defined(__i386__) || defined(_M_X64) || defined(__x86_64__) -int aes_ni(I decrypt,I mode,UC *key,I keyn,UC* iv,UC* out,I n); -#endif -#if defined(__aarch64__) -int aes_arm(I decrypt,I mode,UC *key,I keyn,UC* iv,UC* out,I n); -#endif - -/* - mode - 0 ECB - 1 CBC - 2 CTR - */ -F2(jtaes2) -{ - I n,decrypt,keyn,mode=1; - int n1,padding=1; - A z,*av,dec; - UC *out,*key,*iv; - F2RANK(1,1,jtaes2,UNUSED_VALUE); // do rank loop if necessary - ASSERT(AT(a)&BOX,EVDOMAIN); - ASSERT(1>=AR(a),EVRANK); - ASSERT(AN(a)>=3&&AN(a)<=4,EVLENGTH); - av=AAV(a); - ASSERT(1>=AR(av[0]),EVRANK); - RE(dec=vi(av[0])); - ASSERT(AN(dec)==1,EVDOMAIN); - decrypt=(AV(dec))[0]; - ASSERT(decrypt==0||decrypt==1,EVDOMAIN); - ASSERT(AT(av[1])&LIT,EVDOMAIN); - ASSERT(1>=AR(av[1]),EVRANK); - key=UAV(av[1]); - keyn=AN(av[1]); - ASSERT(keyn==16||keyn==24||keyn==32,EVDOMAIN); - ASSERT(AT(av[2])&LIT,EVDOMAIN); - ASSERT(1>=AR(av[2]),EVRANK); - iv=UAV(av[2]); - ASSERT(AN(av[2])==16,EVDOMAIN); - if(AN(a)>3) { - ASSERT(AT(av[3])&LIT,EVDOMAIN); - ASSERT(1>=AR(av[3]),EVRANK); - ASSERT(3==AN(av[3])||9==AN(av[3]),EVDOMAIN); - if(3==AN(av[3])) { - mode=(!strncasecmp(CAV(av[3]),"ECB",AN(av[3])))?0:(!strncasecmp(CAV(av[3]),"CBC",AN(av[3])))?1:(!strncasecmp(CAV(av[3]),"CTR",AN(av[3])))?2:-1; - } else { - padding=0; - mode=(!strncasecmp(CAV(av[3]),"ECB NOPAD",AN(av[3])))?0:(!strncasecmp(CAV(av[3]),"CBC NOPAD",AN(av[3])))?1:(!strncasecmp(CAV(av[3]),"CTR NOPAD",AN(av[3])))?2:-1; - } - ASSERT(mode!=-1,EVDOMAIN); - } - n=AN(w); - ASSERT(!n||AT(w)&LIT,EVDOMAIN); - ASSERT(!n||1>=AR(w),EVRANK); - if(decrypt) { - ASSERT(n||!padding,EVLENGTH); - ASSERT(!n||0==n%16,EVLENGTH); - } else { - if(!(n1=n%16)&&padding)n+=16; - if(n1)n+=16-n1; - } - ASSERT(0==(n%16),EVDOMAIN); - GATV0(z,LIT,n,1); - out=UAV(z); - if(!n)return z; - MC(out,CAV(w),AN(w)); - if(!decrypt) { - if(padding) { - if(n1)memset(out+n-(16-n1),16-n1,16-n1); - else memset(out+n-16,16,16); - } else if(n1)memset(out+n-(16-n1),0,16-n1); - } -#if (defined(__i386__) || defined(_M_X64) || defined(__x86_64__)) - if(hwaes) { - ASSERT(!aes_ni(decrypt,mode,key,keyn,iv,out,n),EVDOMAIN); - } else { - ASSERT(!aes_c(decrypt,mode,key,keyn,iv,out,n),EVDOMAIN); - } -#else -#if defined(__aarch64__) - if(hwaes) { - ASSERT(!aes_arm(decrypt,mode,key,keyn,iv,out,n),EVDOMAIN); - } else { - ASSERT(!aes_c(decrypt,mode,key,keyn,iv,out,n),EVDOMAIN); - } -#else - ASSERT(!aes_c(decrypt,mode,key,keyn,iv,out,n),EVDOMAIN); -#endif -#endif - if(decrypt&&padding) { - int i; - n1=out[n-1]; - ASSERT(n1&&n1<=16,EVDOMAIN); - for(i=n1; i>0; i--)ASSERT(n1==out[n-i],EVDOMAIN); - AS(z)[0]=AN(z)=n-n1; - memset(out+n-n1,0,n1); - } - return z; -} - -/* - mode - 0 ECB - 1 CBC - 2 CTR - */ -// iv must be 16-byte wide -// out buffer of n bytes and n must be 16-byte block -// out buffer will be overwritten -int aes_c(I decrypt,I mode,UC *key,I keyn,UC* iv,UC* out,I n) -{ - I i; - AES_ctx ctx; - switch(mode) { - case 0: - AES_init_ctx(&ctx, key, (int)keyn); - if(decrypt) { - for(i=0; i:16|->:#msg -txt=. msg,pad#pad{a. -assert. 0=16|#txt -j=. (0;key;iv;mode,' nopad') 128!:7 txt -s=. (0;key;iv;mode) 128!:7 msg -k=. (1;key;iv;mode,' nopad') 128!:7 j -t=. (1;key;iv;mode) 128!:7 s -(j-:s)*.(k-:txt)*.(t-:msg) -) - -'ecb' pkcs '' -'ecb' pkcs 'h' -'ecb' pkcs 'hello world' -'ecb' pkcs 'hello world1234' -'ecb' pkcs 'hello world12345' -'ecb' pkcs 'hello world123456' - -'cbc' pkcs '' -'cbc' pkcs 'h' -'cbc' pkcs 'hello world' -'cbc' pkcs 'hello world1234' -'cbc' pkcs 'hello world12345' -'cbc' pkcs 'hello world123456' - -'ctr' pkcs '' -'ctr' pkcs 'h' -'ctr' pkcs 'hello world' -'ctr' pkcs 'hello world1234' -'ctr' pkcs 'hello world12345' -'ctr' pkcs 'hello world123456' - -'domain error' -: f etx 2 3 4 -'domain error' -: f etx 2 3 4x -'domain error' -: f etx 2 3.4 -'domain error' -: f etx 2 3j4 -'domain error' -: f etx 2 3r4 -'domain error' -: f etx 2 3;4 -'domain error' -: f etx s:@<"0 'abc' -'domain error' -: f etx u: 'abc' -'domain error' -: f etx 10 u: 'abc' - -'domain error' -: 123 f etx 3 4 5 -'domain error' -: 123 f etx 3 4 5x -'domain error' -: 123 f etx 3.4 5 -'domain error' -: 123 f etx 3j4 5 -'domain error' -: 123 f etx 3r4 5 - -'domain error' -: 0 f etx 'xyz' -'domain error' -: 16 f etx 'xyz' -'domain error' -: _16 f etx 'xyz' -'domain error' -: '34' f etx 'xyz' -'domain error' -: (u:'34') f etx 'xyz' -'domain error' -: (10&u:'34') f etx 'xyz' -'domain error' -: (s:@<"0 '34') f etx 'xyz' -'domain error' -: 3.4 f etx 'xyz' -'domain error' -: 3j4 f etx 'xyz' -'length error' -: (<'abc') f etx 'xyz' -'length error' -: (