diff --git a/deps/openssl/asm/arm-void-gas/aes/aesv8-armx.S b/deps/openssl/asm/arm-void-gas/aes/aesv8-armx.S index 732ba3d9c88b94..fd979d078f46ba 100644 --- a/deps/openssl/asm/arm-void-gas/aes/aesv8-armx.S +++ b/deps/openssl/asm/arm-void-gas/aes/aesv8-armx.S @@ -230,17 +230,17 @@ aes_v8_encrypt: .Loop_enc: .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 - vld1.32 {q0},[r2]! .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 + vld1.32 {q0},[r2]! subs r3,r3,#2 .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 - vld1.32 {q1},[r2]! .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 + vld1.32 {q1},[r2]! bgt .Loop_enc .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 - vld1.32 {q0},[r2] .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 + vld1.32 {q0},[r2] .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 veor q2,q2,q0 @@ -259,17 +259,17 @@ aes_v8_decrypt: .Loop_dec: .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 - vld1.32 {q0},[r2]! .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 + vld1.32 {q0},[r2]! subs r3,r3,#2 .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 - vld1.32 {q1},[r2]! .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 + vld1.32 {q1},[r2]! bgt .Loop_dec .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 - vld1.32 {q0},[r2] .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 + vld1.32 {q0},[r2] .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 veor q2,q2,q0 @@ -313,16 +313,42 @@ aes_v8_cbc_encrypt: veor q5,q8,q7 beq .Lcbc_enc128 + vld1.32 {q2-q3},[r7] + add r7,r3,#16 + add r6,r3,#16*4 + add r12,r3,#16*5 + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + add r14,r3,#16*6 + add r3,r3,#16*7 + b .Lenter_cbc_enc + +.align 4 .Loop_cbc_enc: .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 - vld1.32 {q8},[r7]! .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - subs r6,r6,#2 + vst1.8 {q6},[r1]! +.Lenter_cbc_enc: .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 - vld1.32 {q9},[r7]! .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - bgt .Loop_cbc_enc + .byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.32 {q8},[r6] + cmp r5,#4 + .byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.32 {q9},[r12] + beq .Lcbc_enc192 + + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.32 {q8},[r14] + .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.32 {q9},[r3] + nop +.Lcbc_enc192: .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 subs r2,r2,#16 @@ -331,7 +357,6 @@ aes_v8_cbc_encrypt: moveq r8,#0 .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - add r7,r3,#16 .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 vld1.8 {q8},[r0],r8 @@ -340,16 +365,14 @@ aes_v8_cbc_encrypt: veor q8,q8,q5 .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] + vld1.32 {q9},[r7] @ re-pre-load rndkey[1] .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 - - mov r6,r5 veor q6,q0,q7 - vst1.8 {q6},[r1]! bhs .Loop_cbc_enc + vst1.8 {q6},[r1]! b .Lcbc_done .align 5 @@ -407,79 +430,78 @@ aes_v8_cbc_encrypt: .Loop3x_cbc_dec: .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 - .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 - .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 - vld1.32 {q8},[r7]! .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.32 {q8},[r7]! subs r6,r6,#2 .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 - .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 - .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 - vld1.32 {q9},[r7]! .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.32 {q9},[r7]! bgt .Loop3x_cbc_dec .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 - .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 - .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 - veor q4,q6,q7 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + veor q4,q6,q7 + subs r2,r2,#0x30 veor q5,q2,q7 + movlo r6,r2 @ r6, r6, is zero at this point .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 - .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 - .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 - veor q9,q3,q7 - subs r2,r2,#0x30 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 - vorr q6,q11,q11 - movlo r6,r2 @ r6, r6, is zero at this point - .byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 - .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 - .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 + veor q9,q3,q7 add r0,r0,r6 @ r0 is adjusted in such way that @ at exit from the loop q1-q10 @ are loaded with last "words" + vorr q6,q11,q11 + mov r7,r3 + .byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 - mov r7,r3 - .byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 - .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 - .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 vld1.8 {q2},[r0]! + .byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 vld1.8 {q3},[r0]! .byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 - .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 - .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 - vld1.8 {q11},[r0]! .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 - vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] + vld1.8 {q11},[r0]! .byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 .byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 - + vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] add r6,r5,#2 veor q4,q4,q0 veor q5,q5,q1 veor q10,q10,q9 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] - vorr q0,q2,q2 vst1.8 {q4},[r1]! - vorr q1,q3,q3 + vorr q0,q2,q2 vst1.8 {q5},[r1]! + vorr q1,q3,q3 vst1.8 {q10},[r1]! vorr q10,q11,q11 bhs .Loop3x_cbc_dec @@ -490,39 +512,39 @@ aes_v8_cbc_encrypt: .Lcbc_dec_tail: .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 - .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 - vld1.32 {q8},[r7]! .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.32 {q8},[r7]! subs r6,r6,#2 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 - .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 - vld1.32 {q9},[r7]! .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.32 {q9},[r7]! bgt .Lcbc_dec_tail .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 - .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 - .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 - .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 cmn r2,#0x20 .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 - .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 veor q5,q6,q7 .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 - .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 veor q9,q3,q7 .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 @@ -590,70 +612,69 @@ aes_v8_ctr32_encrypt_blocks: .align 4 .Loop3x_ctr32: .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 - .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 - .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 - vld1.32 {q8},[r7]! .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 + vld1.32 {q8},[r7]! subs r6,r6,#2 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 - .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 - .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 - vld1.32 {q9},[r7]! .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 + vld1.32 {q9},[r7]! bgt .Loop3x_ctr32 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 - .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 - .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 - mov r7,r3 .byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0 - vld1.8 {q2},[r0]! + .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 .byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1 - .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 + vld1.8 {q2},[r0]! vorr q0,q6,q6 - .byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 + .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 + .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 vld1.8 {q3},[r0]! - .byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 - .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 vorr q1,q6,q6 + .byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 - vld1.8 {q11},[r0]! + .byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + vld1.8 {q11},[r0]! + mov r7,r3 + .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 .byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10 vorr q10,q6,q6 add r9,r8,#1 .byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12 + .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 .byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12 - .byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12 + .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 veor q2,q2,q7 add r10,r8,#2 - .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 - .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + .byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 veor q3,q3,q7 add r8,r8,#3 .byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13 + .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 .byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13 - .byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13 + .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 veor q11,q11,q7 rev r9,r9 - .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 - vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] - .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + .byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 vmov.32 d1[1], r9 rev r10,r10 .byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14 + .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 .byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14 - .byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14 + .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 vmov.32 d3[1], r10 rev r12,r8 - .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 - .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + .byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 vmov.32 d21[1], r12 subs r2,r2,#3 @@ -661,13 +682,14 @@ aes_v8_ctr32_encrypt_blocks: .byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15 .byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15 - mov r6,r5 veor q2,q2,q4 + vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] + vst1.8 {q2},[r1]! veor q3,q3,q5 + mov r6,r5 + vst1.8 {q3},[r1]! veor q11,q11,q9 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] - vst1.8 {q2},[r1]! - vst1.8 {q3},[r1]! vst1.8 {q11},[r1]! bhs .Loop3x_ctr32 @@ -679,40 +701,40 @@ aes_v8_ctr32_encrypt_blocks: .Lctr32_tail: .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 - .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 - vld1.32 {q8},[r7]! .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + vld1.32 {q8},[r7]! subs r6,r6,#2 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 - .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 - vld1.32 {q9},[r7]! .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + vld1.32 {q9},[r7]! bgt .Lctr32_tail .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 - .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 - .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 vld1.8 {q2},[r0],r12 .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 - .byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 - vld1.8 {q3},[r0] .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + vld1.8 {q3},[r0] .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 - .byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 - .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 - .byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 veor q2,q2,q7 + .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 veor q3,q3,q7 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 diff --git a/deps/openssl/asm/arm-void-gas/modes/ghash-armv4.S b/deps/openssl/asm/arm-void-gas/modes/ghash-armv4.S index d321235f79bd83..c54f5149974c6f 100644 --- a/deps/openssl/asm/arm-void-gas/modes/ghash-armv4.S +++ b/deps/openssl/asm/arm-void-gas/modes/ghash-armv4.S @@ -495,7 +495,7 @@ gcm_ghash_neon: veor q10,q10,q9 @ vshl.i64 q9,q0,#63 veor q10, q10, q9 @ - veor d1,d1,d20 @ + veor d1,d1,d20 @ veor d4,d4,d21 vshr.u64 q10,q0,#1 @ 2nd phase diff --git a/deps/openssl/asm/arm-void-gas/modes/ghashv8-armx.S b/deps/openssl/asm/arm-void-gas/modes/ghashv8-armx.S index 570d9175c47605..269574945fa6a2 100644 --- a/deps/openssl/asm/arm-void-gas/modes/ghashv8-armx.S +++ b/deps/openssl/asm/arm-void-gas/modes/ghashv8-armx.S @@ -7,109 +7,223 @@ .type gcm_init_v8,%function .align 4 gcm_init_v8: - vld1.64 {q9},[r1] @ load H - vmov.i8 q8,#0xe1 + vld1.64 {q9},[r1] @ load input H + vmov.i8 q11,#0xe1 + vshl.i64 q11,q11,#57 @ 0xc2.0 vext.8 q3,q9,q9,#8 - vshl.i64 q8,q8,#57 - vshr.u64 q10,q8,#63 - vext.8 q8,q10,q8,#8 @ t0=0xc2....01 + vshr.u64 q10,q11,#63 vdup.32 q9,d18[1] - vshr.u64 q11,q3,#63 + vext.8 q8,q10,q11,#8 @ t0=0xc2....01 + vshr.u64 q10,q3,#63 vshr.s32 q9,q9,#31 @ broadcast carry bit - vand q11,q11,q8 + vand q10,q10,q8 vshl.i64 q3,q3,#1 - vext.8 q11,q11,q11,#8 + vext.8 q10,q10,q10,#8 vand q8,q8,q9 - vorr q3,q3,q11 @ H<<<=1 - veor q3,q3,q8 @ twisted H - vst1.64 {q3},[r0] + vorr q3,q3,q10 @ H<<<=1 + veor q12,q3,q8 @ twisted H + vst1.64 {q12},[r0]! @ store Htable[0] + + @ calculate H^2 + vext.8 q8,q12,q12,#8 @ Karatsuba pre-processing + .byte 0xa8,0x0e,0xa8,0xf2 @ pmull q0,q12,q12 + veor q8,q8,q12 + .byte 0xa9,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q12 + .byte 0xa0,0x2e,0xa0,0xf2 @ pmull q1,q8,q8 + + vext.8 q9,q0,q2,#8 @ Karatsuba post-processing + veor q10,q0,q2 + veor q1,q1,q9 + veor q1,q1,q10 + .byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase + + vmov d4,d3 @ Xh|Xm - 256-bit result + vmov d3,d0 @ Xm is rotated Xl + veor q0,q1,q10 + + vext.8 q10,q0,q0,#8 @ 2nd phase + .byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11 + veor q10,q10,q2 + veor q14,q0,q10 + + vext.8 q9,q14,q14,#8 @ Karatsuba pre-processing + veor q9,q9,q14 + vext.8 q13,q8,q9,#8 @ pack Karatsuba pre-processed + vst1.64 {q13-q14},[r0] @ store Htable[1..2] bx lr .size gcm_init_v8,.-gcm_init_v8 - .global gcm_gmult_v8 .type gcm_gmult_v8,%function .align 4 gcm_gmult_v8: vld1.64 {q9},[r0] @ load Xi vmov.i8 q11,#0xe1 - vld1.64 {q12},[r1] @ load twisted H + vld1.64 {q12-q13},[r1] @ load twisted H, ... vshl.u64 q11,q11,#57 #ifndef __ARMEB__ vrev64.8 q9,q9 #endif - vext.8 q13,q12,q12,#8 - mov r3,#0 vext.8 q3,q9,q9,#8 - mov r12,#0 - veor q13,q13,q12 @ Karatsuba pre-processing - mov r2,r0 - b .Lgmult_v8 -.size gcm_gmult_v8,.-gcm_gmult_v8 + .byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo + veor q9,q9,q3 @ Karatsuba pre-processing + .byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi + .byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi) + + vext.8 q9,q0,q2,#8 @ Karatsuba post-processing + veor q10,q0,q2 + veor q1,q1,q9 + veor q1,q1,q10 + .byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction + + vmov d4,d3 @ Xh|Xm - 256-bit result + vmov d3,d0 @ Xm is rotated Xl + veor q0,q1,q10 + + vext.8 q10,q0,q0,#8 @ 2nd phase of reduction + .byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11 + veor q10,q10,q2 + veor q0,q0,q10 + +#ifndef __ARMEB__ + vrev64.8 q0,q0 +#endif + vext.8 q0,q0,q0,#8 + vst1.64 {q0},[r0] @ write out Xi + + bx lr +.size gcm_gmult_v8,.-gcm_gmult_v8 .global gcm_ghash_v8 .type gcm_ghash_v8,%function .align 4 gcm_ghash_v8: + vstmdb sp!,{d8-d15} @ 32-bit ABI says so vld1.64 {q0},[r0] @ load [rotated] Xi - subs r3,r3,#16 + @ "[rotated]" means that + @ loaded value would have + @ to be rotated in order to + @ make it appear as in + @ alorithm specification + subs r3,r3,#32 @ see if r3 is 32 or larger + mov r12,#16 @ r12 is used as post- + @ increment for input pointer; + @ as loop is modulo-scheduled + @ r12 is zeroed just in time + @ to preclude oversteping + @ inp[len], which means that + @ last block[s] are actually + @ loaded twice, but last + @ copy is not processed + vld1.64 {q12-q13},[r1]! @ load twisted H, ..., H^2 vmov.i8 q11,#0xe1 - mov r12,#16 - vld1.64 {q12},[r1] @ load twisted H - moveq r12,#0 - vext.8 q0,q0,q0,#8 - vshl.u64 q11,q11,#57 - vld1.64 {q9},[r2],r12 @ load [rotated] inp - vext.8 q13,q12,q12,#8 + vld1.64 {q14},[r1] + moveq r12,#0 @ is it time to zero r12? + vext.8 q0,q0,q0,#8 @ rotate Xi + vld1.64 {q8},[r2]! @ load [rotated] I[0] + vshl.u64 q11,q11,#57 @ compose 0xc2.0 constant #ifndef __ARMEB__ + vrev64.8 q8,q8 vrev64.8 q0,q0 +#endif + vext.8 q3,q8,q8,#8 @ rotate I[0] + blo .Lodd_tail_v8 @ r3 was less than 32 + vld1.64 {q9},[r2],r12 @ load [rotated] I[1] +#ifndef __ARMEB__ vrev64.8 q9,q9 #endif - veor q13,q13,q12 @ Karatsuba pre-processing - vext.8 q3,q9,q9,#8 - b .Loop_v8 + vext.8 q7,q9,q9,#8 + veor q3,q3,q0 @ I[i]^=Xi + .byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1 + veor q9,q9,q7 @ Karatsuba pre-processing + .byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7 + b .Loop_mod2x_v8 .align 4 -.Loop_v8: +.Loop_mod2x_v8: + vext.8 q10,q3,q3,#8 + subs r3,r3,#32 @ is there more data? + .byte 0x86,0x0e,0xac,0xf2 @ pmull q0,q14,q3 @ H^2.lo·Xi.lo + movlo r12,#0 @ is it time to zero r12? + + .byte 0xa2,0xae,0xaa,0xf2 @ pmull q5,q13,q9 + veor q10,q10,q3 @ Karatsuba pre-processing + .byte 0x87,0x4e,0xad,0xf2 @ pmull2 q2,q14,q3 @ H^2.hi·Xi.hi + veor q0,q0,q4 @ accumulate + .byte 0xa5,0x2e,0xab,0xf2 @ pmull2 q1,q13,q10 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi) + vld1.64 {q8},[r2],r12 @ load [rotated] I[i+2] + + veor q2,q2,q6 + moveq r12,#0 @ is it time to zero r12? + veor q1,q1,q5 + + vext.8 q9,q0,q2,#8 @ Karatsuba post-processing + veor q10,q0,q2 + veor q1,q1,q9 + vld1.64 {q9},[r2],r12 @ load [rotated] I[i+3] +#ifndef __ARMEB__ + vrev64.8 q8,q8 +#endif + veor q1,q1,q10 + .byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction + +#ifndef __ARMEB__ + vrev64.8 q9,q9 +#endif + vmov d4,d3 @ Xh|Xm - 256-bit result + vmov d3,d0 @ Xm is rotated Xl + vext.8 q7,q9,q9,#8 + vext.8 q3,q8,q8,#8 + veor q0,q1,q10 + .byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1 + veor q3,q3,q2 @ accumulate q3 early + + vext.8 q10,q0,q0,#8 @ 2nd phase of reduction + .byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11 + veor q3,q3,q10 + veor q9,q9,q7 @ Karatsuba pre-processing + veor q3,q3,q0 + .byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7 + bhs .Loop_mod2x_v8 @ there was at least 32 more bytes + + veor q2,q2,q10 + vext.8 q3,q8,q8,#8 @ re-construct q3 + adds r3,r3,#32 @ re-construct r3 + veor q0,q0,q2 @ re-construct q0 + beq .Ldone_v8 @ is r3 zero? +.Lodd_tail_v8: vext.8 q10,q0,q0,#8 veor q3,q3,q0 @ inp^=Xi - veor q9,q9,q10 @ q9 is rotated inp^Xi + veor q9,q8,q10 @ q9 is rotated inp^Xi -.Lgmult_v8: .byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo veor q9,q9,q3 @ Karatsuba pre-processing .byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi - subs r3,r3,#16 .byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi) - moveq r12,#0 vext.8 q9,q0,q2,#8 @ Karatsuba post-processing veor q10,q0,q2 veor q1,q1,q9 - vld1.64 {q9},[r2],r12 @ load [rotated] inp veor q1,q1,q10 - .byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase + .byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction vmov d4,d3 @ Xh|Xm - 256-bit result vmov d3,d0 @ Xm is rotated Xl -#ifndef __ARMEB__ - vrev64.8 q9,q9 -#endif veor q0,q1,q10 - vext.8 q3,q9,q9,#8 - vext.8 q10,q0,q0,#8 @ 2nd phase + vext.8 q10,q0,q0,#8 @ 2nd phase of reduction .byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11 veor q10,q10,q2 veor q0,q0,q10 - bhs .Loop_v8 +.Ldone_v8: #ifndef __ARMEB__ vrev64.8 q0,q0 #endif vext.8 q0,q0,q0,#8 vst1.64 {q0},[r0] @ write out Xi + vldmia sp!,{d8-d15} @ 32-bit ABI says so bx lr .size gcm_ghash_v8,.-gcm_ghash_v8 .asciz "GHASH for ARMv8, CRYPTOGAMS by " diff --git a/deps/openssl/asm/arm-void-gas/sha/sha256-armv4.S b/deps/openssl/asm/arm-void-gas/sha/sha256-armv4.S index bf1ce4f997e7b7..683f1cc0c874b5 100644 --- a/deps/openssl/asm/arm-void-gas/sha/sha256-armv4.S +++ b/deps/openssl/asm/arm-void-gas/sha/sha256-armv4.S @@ -1,7 +1,59 @@ -#include "arm_arch.h" + +@ ==================================================================== +@ Written by Andy Polyakov for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ +@ Permission to use under GPL terms is granted. +@ ==================================================================== + +@ SHA256 block procedure for ARMv4. May 2007. + +@ Performance is ~2x better than gcc 3.4 generated code and in "abso- +@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per +@ byte [on single-issue Xscale PXA250 core]. + +@ July 2010. +@ +@ Rescheduling for dual-issue pipeline resulted in 22% improvement on +@ Cortex A8 core and ~20 cycles per processed byte. + +@ February 2011. +@ +@ Profiler-assisted and platform-specific optimization resulted in 16% +@ improvement on Cortex A8 core and ~15.4 cycles per processed byte. + +@ September 2013. +@ +@ Add NEON implementation. On Cortex A8 it was measured to process one +@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon +@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only +@ code (meaning that latter performs sub-optimally, nothing was done +@ about it). + +@ May 2014. +@ +@ Add ARMv8 code path performing at 2.0 cpb on Apple A7. + +#ifndef __KERNEL__ +# include "arm_arch.h" +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ 7 +#endif .text +#if __ARM_ARCH__<7 .code 32 +#else +.syntax unified +# ifdef __thumb2__ +.thumb +# else +.code 32 +# endif +#endif .type K256,%object .align 5 @@ -24,7 +76,7 @@ K256: .word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .size K256,.-K256 .word 0 @ terminator -#if __ARM_MAX_ARCH__>=7 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) .LOPENSSL_armcap: .word OPENSSL_armcap_P-sha256_block_data_order #endif @@ -33,9 +85,12 @@ K256: .global sha256_block_data_order .type sha256_block_data_order,%function sha256_block_data_order: +#if __ARM_ARCH__<7 sub r3,pc,#8 @ sha256_block_data_order - add r2,r1,r2,lsl#6 @ len to point at the end of inp -#if __ARM_MAX_ARCH__>=7 +#else + adr r3,sha256_block_data_order +#endif +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) ldr r12,.LOPENSSL_armcap ldr r12,[r3,r12] @ OPENSSL_armcap_P tst r12,#ARMV8_SHA256 @@ -43,6 +98,7 @@ sha256_block_data_order: tst r12,#ARMV7_NEON bne .LNEON #endif + add r2,r1,r2,lsl#6 @ len to point at the end of inp stmdb sp!,{r0,r1,r2,r4-r11,lr} ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} sub r14,r3,#256+32 @ K256 @@ -1736,6 +1792,9 @@ sha256_block_data_order: eor r12,r12,r6 @ Maj(a,b,c) add r4,r4,r0,ror#2 @ h+=Sigma0(a) @ add r4,r4,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + ite eq @ Thumb2 thing, sanity check in ARM +#endif ldreq r3,[sp,#16*4] @ pull ctx bne .Lrounds_16_xx @@ -1777,16 +1836,19 @@ sha256_block_data_order: .arch armv7-a .fpu neon +.global sha256_block_data_order_neon .type sha256_block_data_order_neon,%function .align 4 sha256_block_data_order_neon: .LNEON: stmdb sp!,{r4-r12,lr} + sub r11,sp,#16*4+16 + adr r14,K256 + bic r11,r11,#15 @ align for 128-bit stores mov r12,sp - sub sp,sp,#16*4+16 @ alloca - sub r14,r3,#256+32 @ K256 - bic sp,sp,#15 @ align for 128-bit stores + mov sp,r11 @ alloca + add r2,r1,r2,lsl#6 @ len to point at the end of inp vld1.8 {q0},[r1]! vld1.8 {q1},[r1]! @@ -2224,11 +2286,13 @@ sha256_block_data_order_neon: ldr r0,[sp,#72] sub r14,r14,#256 @ rewind r14 teq r1,r0 + it eq subeq r1,r1,#64 @ avoid SEGV vld1.8 {q0},[r1]! @ load next input block vld1.8 {q1},[r1]! vld1.8 {q2},[r1]! vld1.8 {q3},[r1]! + it ne strne r1,[sp,#68] mov r1,sp add r11,r11,r2 @@ -2542,23 +2606,38 @@ sha256_block_data_order_neon: str r7,[r2],#4 stmia r2,{r8-r11} + ittte ne movne r1,sp ldrne r2,[sp,#0] eorne r12,r12,r12 ldreq sp,[sp,#76] @ restore original sp + itt ne eorne r3,r5,r6 bne .L_00_48 ldmia sp!,{r4-r12,pc} .size sha256_block_data_order_neon,.-sha256_block_data_order_neon #endif -#if __ARM_MAX_ARCH__>=7 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + +# ifdef __thumb2__ +# define INST(a,b,c,d) .byte c,d|0xc,a,b +# else +# define INST(a,b,c,d) .byte a,b,c,d +# endif + .type sha256_block_data_order_armv8,%function .align 5 sha256_block_data_order_armv8: .LARMv8: vld1.32 {q0,q1},[r0] - sub r3,r3,#sha256_block_data_order-K256 +# ifdef __thumb2__ + adr r3,.LARMv8 + sub r3,r3,#.LARMv8-K256 +# else + adrl r3,K256 +# endif + add r2,r1,r2,lsl#6 @ len to point at the end of inp .Loop_v8: vld1.8 {q8-q9},[r1]! @@ -2573,114 +2652,115 @@ sha256_block_data_order_armv8: teq r1,r2 vld1.32 {q13},[r3]! vadd.i32 q12,q12,q8 - .byte 0xe2,0x03,0xfa,0xf3 @ sha256su0 q8,q9 + INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 vmov q2,q0 - .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 - .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 - .byte 0xe6,0x0c,0x64,0xf3 @ sha256su1 q8,q10,q11 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 vld1.32 {q12},[r3]! vadd.i32 q13,q13,q9 - .byte 0xe4,0x23,0xfa,0xf3 @ sha256su0 q9,q10 + INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 vmov q2,q0 - .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 - .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 - .byte 0xe0,0x2c,0x66,0xf3 @ sha256su1 q9,q11,q8 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 vld1.32 {q13},[r3]! vadd.i32 q12,q12,q10 - .byte 0xe6,0x43,0xfa,0xf3 @ sha256su0 q10,q11 + INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 vmov q2,q0 - .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 - .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 - .byte 0xe2,0x4c,0x60,0xf3 @ sha256su1 q10,q8,q9 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 vld1.32 {q12},[r3]! vadd.i32 q13,q13,q11 - .byte 0xe0,0x63,0xfa,0xf3 @ sha256su0 q11,q8 + INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 vmov q2,q0 - .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 - .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 - .byte 0xe4,0x6c,0x62,0xf3 @ sha256su1 q11,q9,q10 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 vld1.32 {q13},[r3]! vadd.i32 q12,q12,q8 - .byte 0xe2,0x03,0xfa,0xf3 @ sha256su0 q8,q9 + INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 vmov q2,q0 - .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 - .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 - .byte 0xe6,0x0c,0x64,0xf3 @ sha256su1 q8,q10,q11 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 vld1.32 {q12},[r3]! vadd.i32 q13,q13,q9 - .byte 0xe4,0x23,0xfa,0xf3 @ sha256su0 q9,q10 + INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 vmov q2,q0 - .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 - .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 - .byte 0xe0,0x2c,0x66,0xf3 @ sha256su1 q9,q11,q8 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 vld1.32 {q13},[r3]! vadd.i32 q12,q12,q10 - .byte 0xe6,0x43,0xfa,0xf3 @ sha256su0 q10,q11 + INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 vmov q2,q0 - .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 - .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 - .byte 0xe2,0x4c,0x60,0xf3 @ sha256su1 q10,q8,q9 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 vld1.32 {q12},[r3]! vadd.i32 q13,q13,q11 - .byte 0xe0,0x63,0xfa,0xf3 @ sha256su0 q11,q8 + INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 vmov q2,q0 - .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 - .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 - .byte 0xe4,0x6c,0x62,0xf3 @ sha256su1 q11,q9,q10 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 vld1.32 {q13},[r3]! vadd.i32 q12,q12,q8 - .byte 0xe2,0x03,0xfa,0xf3 @ sha256su0 q8,q9 + INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 vmov q2,q0 - .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 - .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 - .byte 0xe6,0x0c,0x64,0xf3 @ sha256su1 q8,q10,q11 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 vld1.32 {q12},[r3]! vadd.i32 q13,q13,q9 - .byte 0xe4,0x23,0xfa,0xf3 @ sha256su0 q9,q10 + INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 vmov q2,q0 - .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 - .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 - .byte 0xe0,0x2c,0x66,0xf3 @ sha256su1 q9,q11,q8 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 vld1.32 {q13},[r3]! vadd.i32 q12,q12,q10 - .byte 0xe6,0x43,0xfa,0xf3 @ sha256su0 q10,q11 + INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 vmov q2,q0 - .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 - .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 - .byte 0xe2,0x4c,0x60,0xf3 @ sha256su1 q10,q8,q9 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 vld1.32 {q12},[r3]! vadd.i32 q13,q13,q11 - .byte 0xe0,0x63,0xfa,0xf3 @ sha256su0 q11,q8 + INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 vmov q2,q0 - .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 - .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 - .byte 0xe4,0x6c,0x62,0xf3 @ sha256su1 q11,q9,q10 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 vld1.32 {q13},[r3]! vadd.i32 q12,q12,q8 vmov q2,q0 - .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 - .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 vld1.32 {q12},[r3]! vadd.i32 q13,q13,q9 vmov q2,q0 - .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 - .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 vld1.32 {q13},[r3] vadd.i32 q12,q12,q10 sub r3,r3,#256-16 @ rewind vmov q2,q0 - .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 - .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 vadd.i32 q13,q13,q11 vmov q2,q0 - .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 - .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 vadd.i32 q0,q0,q14 vadd.i32 q1,q1,q15 + it ne bne .Loop_v8 vst1.32 {q0,q1},[r0] @@ -2690,6 +2770,6 @@ sha256_block_data_order_armv8: #endif .asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by " .align 2 -#if __ARM_MAX_ARCH__>=7 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) .comm OPENSSL_armcap_P,4,4 #endif diff --git a/deps/openssl/asm/arm64-linux64-gas/aes/aesv8-armx.S b/deps/openssl/asm/arm64-linux64-gas/aes/aesv8-armx.S index 0a4b1ac4c40082..f5dd6cbb86234e 100644 --- a/deps/openssl/asm/arm64-linux64-gas/aes/aesv8-armx.S +++ b/deps/openssl/asm/arm64-linux64-gas/aes/aesv8-armx.S @@ -227,17 +227,17 @@ aes_v8_encrypt: .Loop_enc: aese v2.16b,v0.16b - ld1 {v0.4s},[x2],#16 aesmc v2.16b,v2.16b + ld1 {v0.4s},[x2],#16 subs w3,w3,#2 aese v2.16b,v1.16b - ld1 {v1.4s},[x2],#16 aesmc v2.16b,v2.16b + ld1 {v1.4s},[x2],#16 b.gt .Loop_enc aese v2.16b,v0.16b - ld1 {v0.4s},[x2] aesmc v2.16b,v2.16b + ld1 {v0.4s},[x2] aese v2.16b,v1.16b eor v2.16b,v2.16b,v0.16b @@ -256,17 +256,17 @@ aes_v8_decrypt: .Loop_dec: aesd v2.16b,v0.16b - ld1 {v0.4s},[x2],#16 aesimc v2.16b,v2.16b + ld1 {v0.4s},[x2],#16 subs w3,w3,#2 aesd v2.16b,v1.16b - ld1 {v1.4s},[x2],#16 aesimc v2.16b,v2.16b + ld1 {v1.4s},[x2],#16 b.gt .Loop_dec aesd v2.16b,v0.16b - ld1 {v0.4s},[x2] aesimc v2.16b,v2.16b + ld1 {v0.4s},[x2] aesd v2.16b,v1.16b eor v2.16b,v2.16b,v0.16b @@ -308,16 +308,42 @@ aes_v8_cbc_encrypt: eor v5.16b,v16.16b,v7.16b b.eq .Lcbc_enc128 + ld1 {v2.4s-v3.4s},[x7] + add x7,x3,#16 + add x6,x3,#16*4 + add x12,x3,#16*5 + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + add x14,x3,#16*6 + add x3,x3,#16*7 + b .Lenter_cbc_enc + +.align 4 .Loop_cbc_enc: aese v0.16b,v16.16b - ld1 {v16.4s},[x7],#16 aesmc v0.16b,v0.16b - subs w6,w6,#2 + st1 {v6.16b},[x1],#16 +.Lenter_cbc_enc: aese v0.16b,v17.16b - ld1 {v17.4s},[x7],#16 aesmc v0.16b,v0.16b - b.gt .Loop_cbc_enc + aese v0.16b,v2.16b + aesmc v0.16b,v0.16b + ld1 {v16.4s},[x6] + cmp w5,#4 + aese v0.16b,v3.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x12] + b.eq .Lcbc_enc192 + + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + ld1 {v16.4s},[x14] + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x3] + nop +.Lcbc_enc192: aese v0.16b,v16.16b aesmc v0.16b,v0.16b subs x2,x2,#16 @@ -326,7 +352,6 @@ aes_v8_cbc_encrypt: csel x8,xzr,x8,eq aese v0.16b,v18.16b aesmc v0.16b,v0.16b - add x7,x3,#16 aese v0.16b,v19.16b aesmc v0.16b,v0.16b ld1 {v16.16b},[x0],x8 @@ -335,16 +360,14 @@ aes_v8_cbc_encrypt: eor v16.16b,v16.16b,v5.16b aese v0.16b,v21.16b aesmc v0.16b,v0.16b - ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + ld1 {v17.4s},[x7] // re-pre-load rndkey[1] aese v0.16b,v22.16b aesmc v0.16b,v0.16b aese v0.16b,v23.16b - - mov w6,w5 eor v6.16b,v0.16b,v7.16b - st1 {v6.16b},[x1],#16 b.hs .Loop_cbc_enc + st1 {v6.16b},[x1],#16 b .Lcbc_done .align 5 @@ -402,79 +425,78 @@ aes_v8_cbc_encrypt: .Loop3x_cbc_dec: aesd v0.16b,v16.16b - aesd v1.16b,v16.16b - aesd v18.16b,v16.16b - ld1 {v16.4s},[x7],#16 aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b aesimc v1.16b,v1.16b + aesd v18.16b,v16.16b aesimc v18.16b,v18.16b + ld1 {v16.4s},[x7],#16 subs w6,w6,#2 aesd v0.16b,v17.16b - aesd v1.16b,v17.16b - aesd v18.16b,v17.16b - ld1 {v17.4s},[x7],#16 aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b aesimc v1.16b,v1.16b + aesd v18.16b,v17.16b aesimc v18.16b,v18.16b + ld1 {v17.4s},[x7],#16 b.gt .Loop3x_cbc_dec aesd v0.16b,v16.16b - aesd v1.16b,v16.16b - aesd v18.16b,v16.16b - eor v4.16b,v6.16b,v7.16b aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b aesimc v1.16b,v1.16b + aesd v18.16b,v16.16b aesimc v18.16b,v18.16b + eor v4.16b,v6.16b,v7.16b + subs x2,x2,#0x30 eor v5.16b,v2.16b,v7.16b + csel x6,x2,x6,lo // x6, w6, is zero at this point aesd v0.16b,v17.16b - aesd v1.16b,v17.16b - aesd v18.16b,v17.16b - eor v17.16b,v3.16b,v7.16b - subs x2,x2,#0x30 aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b aesimc v1.16b,v1.16b + aesd v18.16b,v17.16b aesimc v18.16b,v18.16b - orr v6.16b,v19.16b,v19.16b - csel x6,x2,x6,lo // x6, w6, is zero at this point - aesd v0.16b,v20.16b - aesd v1.16b,v20.16b - aesd v18.16b,v20.16b + eor v17.16b,v3.16b,v7.16b add x0,x0,x6 // x0 is adjusted in such way that // at exit from the loop v1.16b-v18.16b // are loaded with last "words" + orr v6.16b,v19.16b,v19.16b + mov x7,x3 + aesd v0.16b,v20.16b aesimc v0.16b,v0.16b + aesd v1.16b,v20.16b aesimc v1.16b,v1.16b + aesd v18.16b,v20.16b aesimc v18.16b,v18.16b - mov x7,x3 - aesd v0.16b,v21.16b - aesd v1.16b,v21.16b - aesd v18.16b,v21.16b ld1 {v2.16b},[x0],#16 + aesd v0.16b,v21.16b aesimc v0.16b,v0.16b + aesd v1.16b,v21.16b aesimc v1.16b,v1.16b + aesd v18.16b,v21.16b aesimc v18.16b,v18.16b ld1 {v3.16b},[x0],#16 aesd v0.16b,v22.16b - aesd v1.16b,v22.16b - aesd v18.16b,v22.16b - ld1 {v19.16b},[x0],#16 aesimc v0.16b,v0.16b + aesd v1.16b,v22.16b aesimc v1.16b,v1.16b + aesd v18.16b,v22.16b aesimc v18.16b,v18.16b - ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + ld1 {v19.16b},[x0],#16 aesd v0.16b,v23.16b aesd v1.16b,v23.16b aesd v18.16b,v23.16b - + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] add w6,w5,#2 eor v4.16b,v4.16b,v0.16b eor v5.16b,v5.16b,v1.16b eor v18.16b,v18.16b,v17.16b ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] - orr v0.16b,v2.16b,v2.16b st1 {v4.16b},[x1],#16 - orr v1.16b,v3.16b,v3.16b + orr v0.16b,v2.16b,v2.16b st1 {v5.16b},[x1],#16 + orr v1.16b,v3.16b,v3.16b st1 {v18.16b},[x1],#16 orr v18.16b,v19.16b,v19.16b b.hs .Loop3x_cbc_dec @@ -485,39 +507,39 @@ aes_v8_cbc_encrypt: .Lcbc_dec_tail: aesd v1.16b,v16.16b - aesd v18.16b,v16.16b - ld1 {v16.4s},[x7],#16 aesimc v1.16b,v1.16b + aesd v18.16b,v16.16b aesimc v18.16b,v18.16b + ld1 {v16.4s},[x7],#16 subs w6,w6,#2 aesd v1.16b,v17.16b - aesd v18.16b,v17.16b - ld1 {v17.4s},[x7],#16 aesimc v1.16b,v1.16b + aesd v18.16b,v17.16b aesimc v18.16b,v18.16b + ld1 {v17.4s},[x7],#16 b.gt .Lcbc_dec_tail aesd v1.16b,v16.16b - aesd v18.16b,v16.16b aesimc v1.16b,v1.16b + aesd v18.16b,v16.16b aesimc v18.16b,v18.16b aesd v1.16b,v17.16b - aesd v18.16b,v17.16b aesimc v1.16b,v1.16b + aesd v18.16b,v17.16b aesimc v18.16b,v18.16b aesd v1.16b,v20.16b - aesd v18.16b,v20.16b aesimc v1.16b,v1.16b + aesd v18.16b,v20.16b aesimc v18.16b,v18.16b cmn x2,#0x20 aesd v1.16b,v21.16b - aesd v18.16b,v21.16b aesimc v1.16b,v1.16b + aesd v18.16b,v21.16b aesimc v18.16b,v18.16b eor v5.16b,v6.16b,v7.16b aesd v1.16b,v22.16b - aesd v18.16b,v22.16b aesimc v1.16b,v1.16b + aesd v18.16b,v22.16b aesimc v18.16b,v18.16b eor v17.16b,v3.16b,v7.16b aesd v1.16b,v23.16b @@ -583,70 +605,69 @@ aes_v8_ctr32_encrypt_blocks: .align 4 .Loop3x_ctr32: aese v0.16b,v16.16b - aese v1.16b,v16.16b - aese v18.16b,v16.16b - ld1 {v16.4s},[x7],#16 aesmc v0.16b,v0.16b + aese v1.16b,v16.16b aesmc v1.16b,v1.16b + aese v18.16b,v16.16b aesmc v18.16b,v18.16b + ld1 {v16.4s},[x7],#16 subs w6,w6,#2 aese v0.16b,v17.16b - aese v1.16b,v17.16b - aese v18.16b,v17.16b - ld1 {v17.4s},[x7],#16 aesmc v0.16b,v0.16b + aese v1.16b,v17.16b aesmc v1.16b,v1.16b + aese v18.16b,v17.16b aesmc v18.16b,v18.16b + ld1 {v17.4s},[x7],#16 b.gt .Loop3x_ctr32 aese v0.16b,v16.16b - aese v1.16b,v16.16b - aese v18.16b,v16.16b - mov x7,x3 aesmc v4.16b,v0.16b - ld1 {v2.16b},[x0],#16 + aese v1.16b,v16.16b aesmc v5.16b,v1.16b - aesmc v18.16b,v18.16b + ld1 {v2.16b},[x0],#16 orr v0.16b,v6.16b,v6.16b - aese v4.16b,v17.16b + aese v18.16b,v16.16b + aesmc v18.16b,v18.16b ld1 {v3.16b},[x0],#16 - aese v5.16b,v17.16b - aese v18.16b,v17.16b orr v1.16b,v6.16b,v6.16b + aese v4.16b,v17.16b aesmc v4.16b,v4.16b - ld1 {v19.16b},[x0],#16 + aese v5.16b,v17.16b aesmc v5.16b,v5.16b + ld1 {v19.16b},[x0],#16 + mov x7,x3 + aese v18.16b,v17.16b aesmc v17.16b,v18.16b orr v18.16b,v6.16b,v6.16b add w9,w8,#1 aese v4.16b,v20.16b + aesmc v4.16b,v4.16b aese v5.16b,v20.16b - aese v17.16b,v20.16b + aesmc v5.16b,v5.16b eor v2.16b,v2.16b,v7.16b add w10,w8,#2 - aesmc v4.16b,v4.16b - aesmc v5.16b,v5.16b + aese v17.16b,v20.16b aesmc v17.16b,v17.16b eor v3.16b,v3.16b,v7.16b add w8,w8,#3 aese v4.16b,v21.16b + aesmc v4.16b,v4.16b aese v5.16b,v21.16b - aese v17.16b,v21.16b + aesmc v5.16b,v5.16b eor v19.16b,v19.16b,v7.16b rev w9,w9 - aesmc v4.16b,v4.16b - ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] - aesmc v5.16b,v5.16b + aese v17.16b,v21.16b aesmc v17.16b,v17.16b mov v0.s[3], w9 rev w10,w10 aese v4.16b,v22.16b + aesmc v4.16b,v4.16b aese v5.16b,v22.16b - aese v17.16b,v22.16b + aesmc v5.16b,v5.16b mov v1.s[3], w10 rev w12,w8 - aesmc v4.16b,v4.16b - aesmc v5.16b,v5.16b + aese v17.16b,v22.16b aesmc v17.16b,v17.16b mov v18.s[3], w12 subs x2,x2,#3 @@ -654,13 +675,14 @@ aes_v8_ctr32_encrypt_blocks: aese v5.16b,v23.16b aese v17.16b,v23.16b - mov w6,w5 eor v2.16b,v2.16b,v4.16b + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + st1 {v2.16b},[x1],#16 eor v3.16b,v3.16b,v5.16b + mov w6,w5 + st1 {v3.16b},[x1],#16 eor v19.16b,v19.16b,v17.16b ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] - st1 {v2.16b},[x1],#16 - st1 {v3.16b},[x1],#16 st1 {v19.16b},[x1],#16 b.hs .Loop3x_ctr32 @@ -672,40 +694,40 @@ aes_v8_ctr32_encrypt_blocks: .Lctr32_tail: aese v0.16b,v16.16b - aese v1.16b,v16.16b - ld1 {v16.4s},[x7],#16 aesmc v0.16b,v0.16b + aese v1.16b,v16.16b aesmc v1.16b,v1.16b + ld1 {v16.4s},[x7],#16 subs w6,w6,#2 aese v0.16b,v17.16b - aese v1.16b,v17.16b - ld1 {v17.4s},[x7],#16 aesmc v0.16b,v0.16b + aese v1.16b,v17.16b aesmc v1.16b,v1.16b + ld1 {v17.4s},[x7],#16 b.gt .Lctr32_tail aese v0.16b,v16.16b - aese v1.16b,v16.16b aesmc v0.16b,v0.16b + aese v1.16b,v16.16b aesmc v1.16b,v1.16b aese v0.16b,v17.16b - aese v1.16b,v17.16b aesmc v0.16b,v0.16b + aese v1.16b,v17.16b aesmc v1.16b,v1.16b ld1 {v2.16b},[x0],x12 aese v0.16b,v20.16b - aese v1.16b,v20.16b - ld1 {v3.16b},[x0] aesmc v0.16b,v0.16b + aese v1.16b,v20.16b aesmc v1.16b,v1.16b + ld1 {v3.16b},[x0] aese v0.16b,v21.16b - aese v1.16b,v21.16b aesmc v0.16b,v0.16b + aese v1.16b,v21.16b aesmc v1.16b,v1.16b - aese v0.16b,v22.16b - aese v1.16b,v22.16b eor v2.16b,v2.16b,v7.16b + aese v0.16b,v22.16b aesmc v0.16b,v0.16b + aese v1.16b,v22.16b aesmc v1.16b,v1.16b eor v3.16b,v3.16b,v7.16b aese v0.16b,v23.16b diff --git a/deps/openssl/asm/arm64-linux64-gas/modes/ghashv8-armx.S b/deps/openssl/asm/arm64-linux64-gas/modes/ghashv8-armx.S index 1bfb26340a6e9e..479007dc54dfab 100644 --- a/deps/openssl/asm/arm64-linux64-gas/modes/ghashv8-armx.S +++ b/deps/openssl/asm/arm64-linux64-gas/modes/ghashv8-armx.S @@ -6,103 +6,215 @@ .type gcm_init_v8,%function .align 4 gcm_init_v8: - ld1 {v17.2d},[x1] //load H - movi v16.16b,#0xe1 + ld1 {v17.2d},[x1] //load input H + movi v19.16b,#0xe1 + shl v19.2d,v19.2d,#57 //0xc2.0 ext v3.16b,v17.16b,v17.16b,#8 - shl v16.2d,v16.2d,#57 - ushr v18.2d,v16.2d,#63 - ext v16.16b,v18.16b,v16.16b,#8 //t0=0xc2....01 + ushr v18.2d,v19.2d,#63 dup v17.4s,v17.s[1] - ushr v19.2d,v3.2d,#63 + ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01 + ushr v18.2d,v3.2d,#63 sshr v17.4s,v17.4s,#31 //broadcast carry bit - and v19.16b,v19.16b,v16.16b + and v18.16b,v18.16b,v16.16b shl v3.2d,v3.2d,#1 - ext v19.16b,v19.16b,v19.16b,#8 + ext v18.16b,v18.16b,v18.16b,#8 and v16.16b,v16.16b,v17.16b - orr v3.16b,v3.16b,v19.16b //H<<<=1 - eor v3.16b,v3.16b,v16.16b //twisted H - st1 {v3.2d},[x0] + orr v3.16b,v3.16b,v18.16b //H<<<=1 + eor v20.16b,v3.16b,v16.16b //twisted H + st1 {v20.2d},[x0],#16 //store Htable[0] + + //calculate H^2 + ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing + pmull v0.1q,v20.1d,v20.1d + eor v16.16b,v16.16b,v20.16b + pmull2 v2.1q,v20.2d,v20.2d + pmull v1.1q,v16.1d,v16.1d + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase + + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v22.16b,v0.16b,v18.16b + + ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing + eor v17.16b,v17.16b,v22.16b + ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed + st1 {v21.2d-v22.2d},[x0] //store Htable[1..2] ret .size gcm_init_v8,.-gcm_init_v8 - .global gcm_gmult_v8 .type gcm_gmult_v8,%function .align 4 gcm_gmult_v8: ld1 {v17.2d},[x0] //load Xi movi v19.16b,#0xe1 - ld1 {v20.2d},[x1] //load twisted H + ld1 {v20.2d-v21.2d},[x1] //load twisted H, ... shl v19.2d,v19.2d,#57 #ifndef __ARMEB__ rev64 v17.16b,v17.16b #endif - ext v21.16b,v20.16b,v20.16b,#8 - mov x3,#0 ext v3.16b,v17.16b,v17.16b,#8 - mov x12,#0 - eor v21.16b,v21.16b,v20.16b //Karatsuba pre-processing - mov x2,x0 - b .Lgmult_v8 -.size gcm_gmult_v8,.-gcm_gmult_v8 + pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo + eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing + pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi + pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi) + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + +#ifndef __ARMEB__ + rev64 v0.16b,v0.16b +#endif + ext v0.16b,v0.16b,v0.16b,#8 + st1 {v0.2d},[x0] //write out Xi + + ret +.size gcm_gmult_v8,.-gcm_gmult_v8 .global gcm_ghash_v8 .type gcm_ghash_v8,%function .align 4 gcm_ghash_v8: ld1 {v0.2d},[x0] //load [rotated] Xi - subs x3,x3,#16 + //"[rotated]" means that + //loaded value would have + //to be rotated in order to + //make it appear as in + //alorithm specification + subs x3,x3,#32 //see if x3 is 32 or larger + mov x12,#16 //x12 is used as post- + //increment for input pointer; + //as loop is modulo-scheduled + //x12 is zeroed just in time + //to preclude oversteping + //inp[len], which means that + //last block[s] are actually + //loaded twice, but last + //copy is not processed + ld1 {v20.2d-v21.2d},[x1],#32 //load twisted H, ..., H^2 movi v19.16b,#0xe1 - mov x12,#16 - ld1 {v20.2d},[x1] //load twisted H - csel x12,xzr,x12,eq - ext v0.16b,v0.16b,v0.16b,#8 - shl v19.2d,v19.2d,#57 - ld1 {v17.2d},[x2],x12 //load [rotated] inp - ext v21.16b,v20.16b,v20.16b,#8 + ld1 {v22.2d},[x1] + csel x12,xzr,x12,eq //is it time to zero x12? + ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi + ld1 {v16.2d},[x2],#16 //load [rotated] I[0] + shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant #ifndef __ARMEB__ + rev64 v16.16b,v16.16b rev64 v0.16b,v0.16b +#endif + ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0] + b.lo .Lodd_tail_v8 //x3 was less than 32 + ld1 {v17.2d},[x2],x12 //load [rotated] I[1] +#ifndef __ARMEB__ rev64 v17.16b,v17.16b #endif - eor v21.16b,v21.16b,v20.16b //Karatsuba pre-processing - ext v3.16b,v17.16b,v17.16b,#8 - b .Loop_v8 + ext v7.16b,v17.16b,v17.16b,#8 + eor v3.16b,v3.16b,v0.16b //I[i]^=Xi + pmull v4.1q,v20.1d,v7.1d //H·Ii+1 + eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing + pmull2 v6.1q,v20.2d,v7.2d + b .Loop_mod2x_v8 .align 4 -.Loop_v8: +.Loop_mod2x_v8: + ext v18.16b,v3.16b,v3.16b,#8 + subs x3,x3,#32 //is there more data? + pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo + csel x12,xzr,x12,lo //is it time to zero x12? + + pmull v5.1q,v21.1d,v17.1d + eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing + pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi + eor v0.16b,v0.16b,v4.16b //accumulate + pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi) + ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2] + + eor v2.16b,v2.16b,v6.16b + csel x12,xzr,x12,eq //is it time to zero x12? + eor v1.16b,v1.16b,v5.16b + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3] +#ifndef __ARMEB__ + rev64 v16.16b,v16.16b +#endif + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + +#ifndef __ARMEB__ + rev64 v17.16b,v17.16b +#endif + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + ext v7.16b,v17.16b,v17.16b,#8 + ext v3.16b,v16.16b,v16.16b,#8 + eor v0.16b,v1.16b,v18.16b + pmull v4.1q,v20.1d,v7.1d //H·Ii+1 + eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v3.16b,v3.16b,v18.16b + eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing + eor v3.16b,v3.16b,v0.16b + pmull2 v6.1q,v20.2d,v7.2d + b.hs .Loop_mod2x_v8 //there was at least 32 more bytes + + eor v2.16b,v2.16b,v18.16b + ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b + adds x3,x3,#32 //re-construct x3 + eor v0.16b,v0.16b,v2.16b //re-construct v0.16b + b.eq .Ldone_v8 //is x3 zero? +.Lodd_tail_v8: ext v18.16b,v0.16b,v0.16b,#8 eor v3.16b,v3.16b,v0.16b //inp^=Xi - eor v17.16b,v17.16b,v18.16b //v17.16b is rotated inp^Xi + eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi -.Lgmult_v8: pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi - subs x3,x3,#16 pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi) - csel x12,xzr,x12,eq ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing eor v18.16b,v0.16b,v2.16b eor v1.16b,v1.16b,v17.16b - ld1 {v17.2d},[x2],x12 //load [rotated] inp eor v1.16b,v1.16b,v18.16b - pmull v18.1q,v0.1d,v19.1d //1st phase + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction ins v2.d[0],v1.d[1] ins v1.d[1],v0.d[0] -#ifndef __ARMEB__ - rev64 v17.16b,v17.16b -#endif eor v0.16b,v1.16b,v18.16b - ext v3.16b,v17.16b,v17.16b,#8 - ext v18.16b,v0.16b,v0.16b,#8 //2nd phase + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction pmull v0.1q,v0.1d,v19.1d eor v18.16b,v18.16b,v2.16b eor v0.16b,v0.16b,v18.16b - b.hs .Loop_v8 +.Ldone_v8: #ifndef __ARMEB__ rev64 v0.16b,v0.16b #endif diff --git a/deps/openssl/asm/x64-elf-gas/aes/aesni-x86_64.s b/deps/openssl/asm/x64-elf-gas/aes/aesni-x86_64.s index 84708afbbb352f..6573fe4be3494d 100644 --- a/deps/openssl/asm/x64-elf-gas/aes/aesni-x86_64.s +++ b/deps/openssl/asm/x64-elf-gas/aes/aesni-x86_64.s @@ -17,7 +17,10 @@ aesni_encrypt: leaq 16(%rdx),%rdx jnz .Loop_enc1_1 .byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 .byte 0xf3,0xc3 .size aesni_encrypt,.-aesni_encrypt @@ -38,7 +41,10 @@ aesni_decrypt: leaq 16(%rdx),%rdx jnz .Loop_dec1_2 .byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 .byte 0xf3,0xc3 .size aesni_decrypt, .-aesni_decrypt .type _aesni_encrypt2,@function @@ -264,21 +270,18 @@ _aesni_encrypt6: pxor %xmm0,%xmm6 .byte 102,15,56,220,225 pxor %xmm0,%xmm7 + movups (%rcx,%rax,1),%xmm0 addq $16,%rax -.byte 102,15,56,220,233 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 - movups -16(%rcx,%rax,1),%xmm0 jmp .Lenc_loop6_enter .align 16 .Lenc_loop6: .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 +.Lenc_loop6_enter: .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 -.Lenc_loop6_enter: movups (%rcx,%rax,1),%xmm1 addq $32,%rax .byte 102,15,56,220,208 @@ -321,21 +324,18 @@ _aesni_decrypt6: pxor %xmm0,%xmm6 .byte 102,15,56,222,225 pxor %xmm0,%xmm7 + movups (%rcx,%rax,1),%xmm0 addq $16,%rax -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 - movups -16(%rcx,%rax,1),%xmm0 jmp .Ldec_loop6_enter .align 16 .Ldec_loop6: .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 +.Ldec_loop6_enter: .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 -.Ldec_loop6_enter: movups (%rcx,%rax,1),%xmm1 addq $32,%rax .byte 102,15,56,222,208 @@ -375,23 +375,18 @@ _aesni_encrypt8: leaq 32(%rcx,%rax,1),%rcx negq %rax .byte 102,15,56,220,209 - addq $16,%rax pxor %xmm0,%xmm7 -.byte 102,15,56,220,217 pxor %xmm0,%xmm8 +.byte 102,15,56,220,217 pxor %xmm0,%xmm9 -.byte 102,15,56,220,225 -.byte 102,15,56,220,233 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 -.byte 102,68,15,56,220,193 -.byte 102,68,15,56,220,201 - movups -16(%rcx,%rax,1),%xmm0 - jmp .Lenc_loop8_enter + movups (%rcx,%rax,1),%xmm0 + addq $16,%rax + jmp .Lenc_loop8_inner .align 16 .Lenc_loop8: .byte 102,15,56,220,209 .byte 102,15,56,220,217 +.Lenc_loop8_inner: .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 @@ -444,23 +439,18 @@ _aesni_decrypt8: leaq 32(%rcx,%rax,1),%rcx negq %rax .byte 102,15,56,222,209 - addq $16,%rax pxor %xmm0,%xmm7 -.byte 102,15,56,222,217 pxor %xmm0,%xmm8 +.byte 102,15,56,222,217 pxor %xmm0,%xmm9 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 -.byte 102,68,15,56,222,193 -.byte 102,68,15,56,222,201 - movups -16(%rcx,%rax,1),%xmm0 - jmp .Ldec_loop8_enter + movups (%rcx,%rax,1),%xmm0 + addq $16,%rax + jmp .Ldec_loop8_inner .align 16 .Ldec_loop8: .byte 102,15,56,222,209 .byte 102,15,56,222,217 +.Ldec_loop8_inner: .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 @@ -587,6 +577,7 @@ aesni_ecb_encrypt: movups 80(%rdi),%xmm7 je .Lecb_enc_six movdqu 96(%rdi),%xmm8 + xorps %xmm9,%xmm9 call _aesni_encrypt8 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) @@ -700,15 +691,23 @@ aesni_ecb_encrypt: jnc .Lecb_dec_loop8 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 movq %r11,%rcx movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 movl %r10d,%eax movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 movups %xmm7,80(%rsi) + pxor %xmm7,%xmm7 movups %xmm8,96(%rsi) + pxor %xmm8,%xmm8 movups %xmm9,112(%rsi) + pxor %xmm9,%xmm9 leaq 128(%rsi),%rsi addq $128,%rdx jz .Lecb_ret @@ -731,14 +730,23 @@ aesni_ecb_encrypt: je .Lecb_dec_six movups 96(%rdi),%xmm8 movups (%rcx),%xmm0 + xorps %xmm9,%xmm9 call _aesni_decrypt8 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 movups %xmm7,80(%rsi) + pxor %xmm7,%xmm7 movups %xmm8,96(%rsi) + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 jmp .Lecb_ret .align 16 .Lecb_dec_one: @@ -754,49 +762,73 @@ aesni_ecb_encrypt: jnz .Loop_dec1_4 .byte 102,15,56,223,209 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 jmp .Lecb_ret .align 16 .Lecb_dec_two: call _aesni_decrypt2 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 jmp .Lecb_ret .align 16 .Lecb_dec_three: call _aesni_decrypt3 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 jmp .Lecb_ret .align 16 .Lecb_dec_four: call _aesni_decrypt4 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 jmp .Lecb_ret .align 16 .Lecb_dec_five: xorps %xmm7,%xmm7 call _aesni_decrypt6 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 jmp .Lecb_ret .align 16 .Lecb_dec_six: call _aesni_decrypt6 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 movups %xmm7,80(%rsi) + pxor %xmm7,%xmm7 .Lecb_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 .byte 0xf3,0xc3 .size aesni_ecb_encrypt,.-aesni_ecb_encrypt .globl aesni_ccm64_encrypt_blocks @@ -853,7 +885,13 @@ aesni_ccm64_encrypt_blocks: leaq 16(%rsi),%rsi jnz .Lccm64_enc_outer + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 movups %xmm3,(%r9) + pxor %xmm3,%xmm3 + pxor %xmm8,%xmm8 + pxor %xmm6,%xmm6 .byte 0xf3,0xc3 .size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks .globl aesni_ccm64_decrypt_blocks @@ -944,21 +982,56 @@ aesni_ccm64_decrypt_blocks: leaq 16(%r11),%r11 jnz .Loop_enc1_6 .byte 102,15,56,221,217 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 movups %xmm3,(%r9) + pxor %xmm3,%xmm3 + pxor %xmm8,%xmm8 + pxor %xmm6,%xmm6 .byte 0xf3,0xc3 .size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks .globl aesni_ctr32_encrypt_blocks .type aesni_ctr32_encrypt_blocks,@function .align 16 aesni_ctr32_encrypt_blocks: + cmpq $1,%rdx + jne .Lctr32_bulk + + + + movups (%r8),%xmm2 + movups (%rdi),%xmm3 + movl 240(%rcx),%edx + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_7: +.byte 102,15,56,220,209 + decl %edx + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_7 +.byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + xorps %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movups %xmm2,(%rsi) + xorps %xmm2,%xmm2 + jmp .Lctr32_epilogue + +.align 16 +.Lctr32_bulk: leaq (%rsp),%rax pushq %rbp subq $128,%rsp andq $-16,%rsp leaq -8(%rax),%rbp - cmpq $1,%rdx - je .Lctr32_one_shortcut + + movdqu (%r8),%xmm2 movdqu (%rcx),%xmm0 @@ -1349,11 +1422,14 @@ aesni_ctr32_encrypt_blocks: leaq -128(%rcx),%rcx .Lctr32_tail: + + leaq 16(%rcx),%rcx cmpq $4,%rdx jb .Lctr32_loop3 je .Lctr32_loop4 + shll $4,%eax movdqa 96(%rsp),%xmm8 pxor %xmm9,%xmm9 @@ -1456,30 +1532,33 @@ aesni_ctr32_encrypt_blocks: movups 32(%rdi),%xmm12 xorps %xmm12,%xmm4 movups %xmm4,32(%rsi) - jmp .Lctr32_done -.align 16 -.Lctr32_one_shortcut: - movups (%r8),%xmm2 - movups (%rdi),%xmm10 - movl 240(%rcx),%eax - movups (%rcx),%xmm0 - movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx - xorps %xmm0,%xmm2 -.Loop_enc1_7: -.byte 102,15,56,220,209 - decl %eax - movups (%rcx),%xmm1 - leaq 16(%rcx),%rcx - jnz .Loop_enc1_7 -.byte 102,15,56,221,209 - xorps %xmm10,%xmm2 - movups %xmm2,(%rsi) - jmp .Lctr32_done - -.align 16 .Lctr32_done: + xorps %xmm0,%xmm0 + xorl %r11d,%r11d + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + movaps %xmm0,0(%rsp) + pxor %xmm8,%xmm8 + movaps %xmm0,16(%rsp) + pxor %xmm9,%xmm9 + movaps %xmm0,32(%rsp) + pxor %xmm10,%xmm10 + movaps %xmm0,48(%rsp) + pxor %xmm11,%xmm11 + movaps %xmm0,64(%rsp) + pxor %xmm12,%xmm12 + movaps %xmm0,80(%rsp) + pxor %xmm13,%xmm13 + movaps %xmm0,96(%rsp) + pxor %xmm14,%xmm14 + movaps %xmm0,112(%rsp) + pxor %xmm15,%xmm15 leaq (%rbp),%rsp popq %rbp .Lctr32_epilogue: @@ -1750,6 +1829,7 @@ aesni_xts_encrypt: shrl $4,%eax .Lxts_enc_short: + movl %eax,%r10d pxor %xmm0,%xmm10 addq $96,%rdx @@ -1778,6 +1858,7 @@ aesni_xts_encrypt: pxor %xmm12,%xmm4 pxor %xmm13,%xmm5 pxor %xmm14,%xmm6 + pxor %xmm7,%xmm7 call _aesni_encrypt6 @@ -1920,6 +2001,29 @@ aesni_xts_encrypt: movups %xmm2,-16(%rsi) .Lxts_enc_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + movaps %xmm0,0(%rsp) + pxor %xmm8,%xmm8 + movaps %xmm0,16(%rsp) + pxor %xmm9,%xmm9 + movaps %xmm0,32(%rsp) + pxor %xmm10,%xmm10 + movaps %xmm0,48(%rsp) + pxor %xmm11,%xmm11 + movaps %xmm0,64(%rsp) + pxor %xmm12,%xmm12 + movaps %xmm0,80(%rsp) + pxor %xmm13,%xmm13 + movaps %xmm0,96(%rsp) + pxor %xmm14,%xmm14 + pxor %xmm15,%xmm15 leaq (%rbp),%rsp popq %rbp .Lxts_enc_epilogue: @@ -2196,6 +2300,7 @@ aesni_xts_decrypt: shrl $4,%eax .Lxts_dec_short: + movl %eax,%r10d pxor %xmm0,%xmm10 pxor %xmm0,%xmm11 @@ -2398,6 +2503,29 @@ aesni_xts_decrypt: movups %xmm2,(%rsi) .Lxts_dec_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + movaps %xmm0,0(%rsp) + pxor %xmm8,%xmm8 + movaps %xmm0,16(%rsp) + pxor %xmm9,%xmm9 + movaps %xmm0,32(%rsp) + pxor %xmm10,%xmm10 + movaps %xmm0,48(%rsp) + pxor %xmm11,%xmm11 + movaps %xmm0,64(%rsp) + pxor %xmm12,%xmm12 + movaps %xmm0,80(%rsp) + pxor %xmm13,%xmm13 + movaps %xmm0,96(%rsp) + pxor %xmm14,%xmm14 + pxor %xmm15,%xmm15 leaq (%rbp),%rsp popq %rbp .Lxts_dec_epilogue: @@ -2446,7 +2574,11 @@ aesni_cbc_encrypt: jnc .Lcbc_enc_loop addq $16,%rdx jnz .Lcbc_enc_tail + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 movups %xmm2,(%r8) + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 jmp .Lcbc_ret .Lcbc_enc_tail: @@ -2466,6 +2598,35 @@ aesni_cbc_encrypt: .align 16 .Lcbc_decrypt: + cmpq $16,%rdx + jne .Lcbc_decrypt_bulk + + + + movdqu (%rdi),%xmm2 + movdqu (%r8),%xmm3 + movdqa %xmm2,%xmm4 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_16: +.byte 102,15,56,222,209 + decl %r10d + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_16 +.byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movdqu %xmm4,(%r8) + xorps %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + jmp .Lcbc_ret +.align 16 +.Lcbc_decrypt_bulk: leaq (%rsp),%rax pushq %rbp subq $16,%rsp @@ -2702,7 +2863,7 @@ aesni_cbc_encrypt: movaps %xmm9,%xmm2 leaq -112(%rcx),%rcx addq $112,%rdx - jle .Lcbc_dec_tail_collected + jle .Lcbc_dec_clear_tail_collected movups %xmm9,(%rsi) leaq 16(%rsi),%rsi cmpq $80,%rdx @@ -2721,14 +2882,19 @@ aesni_cbc_encrypt: movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 pxor %xmm13,%xmm5 movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 pxor %xmm14,%xmm6 movdqu %xmm5,48(%rsi) + pxor %xmm5,%xmm5 pxor %xmm15,%xmm7 movdqu %xmm6,64(%rsi) + pxor %xmm6,%xmm6 leaq 80(%rsi),%rsi movdqa %xmm7,%xmm2 + pxor %xmm7,%xmm7 jmp .Lcbc_dec_tail_collected .align 16 @@ -2743,16 +2909,23 @@ aesni_cbc_encrypt: movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 pxor %xmm13,%xmm5 movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 pxor %xmm14,%xmm6 movdqu %xmm5,48(%rsi) + pxor %xmm5,%xmm5 pxor %xmm15,%xmm7 movdqu %xmm6,64(%rsi) + pxor %xmm6,%xmm6 pxor %xmm9,%xmm8 movdqu %xmm7,80(%rsi) + pxor %xmm7,%xmm7 leaq 96(%rsi),%rsi movdqa %xmm8,%xmm2 + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 jmp .Lcbc_dec_tail_collected .align 16 @@ -2796,7 +2969,7 @@ aesni_cbc_encrypt: movdqa %xmm7,%xmm2 addq $80,%rdx - jle .Lcbc_dec_tail_collected + jle .Lcbc_dec_clear_tail_collected movups %xmm7,(%rsi) leaq 16(%rsi),%rsi @@ -2831,12 +3004,17 @@ aesni_cbc_encrypt: movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 pxor %xmm13,%xmm5 movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 pxor %xmm14,%xmm6 movdqu %xmm5,48(%rsi) + pxor %xmm5,%xmm5 leaq 64(%rsi),%rsi movdqa %xmm6,%xmm2 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 subq $16,%rdx jmp .Lcbc_dec_tail_collected @@ -2847,12 +3025,12 @@ aesni_cbc_encrypt: movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 -.Loop_dec1_16: +.Loop_dec1_17: .byte 102,15,56,222,209 decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz .Loop_dec1_16 + jnz .Loop_dec1_17 .byte 102,15,56,223,209 xorps %xmm10,%xmm2 movaps %xmm11,%xmm10 @@ -2866,6 +3044,7 @@ aesni_cbc_encrypt: pxor %xmm11,%xmm3 movdqu %xmm2,(%rsi) movdqa %xmm3,%xmm2 + pxor %xmm3,%xmm3 leaq 16(%rsi),%rsi jmp .Lcbc_dec_tail_collected .align 16 @@ -2878,7 +3057,9 @@ aesni_cbc_encrypt: movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 movdqa %xmm4,%xmm2 + pxor %xmm4,%xmm4 leaq 32(%rsi),%rsi jmp .Lcbc_dec_tail_collected .align 16 @@ -2891,29 +3072,45 @@ aesni_cbc_encrypt: movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 pxor %xmm13,%xmm5 movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 movdqa %xmm5,%xmm2 + pxor %xmm5,%xmm5 leaq 48(%rsi),%rsi jmp .Lcbc_dec_tail_collected .align 16 +.Lcbc_dec_clear_tail_collected: + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 .Lcbc_dec_tail_collected: movups %xmm10,(%r8) andq $15,%rdx jnz .Lcbc_dec_tail_partial movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 jmp .Lcbc_dec_ret .align 16 .Lcbc_dec_tail_partial: movaps %xmm2,(%rsp) + pxor %xmm2,%xmm2 movq $16,%rcx movq %rsi,%rdi subq %rdx,%rcx leaq (%rsp),%rsi .long 0x9066A4F3 + movdqa %xmm2,(%rsp) .Lcbc_dec_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 leaq (%rbp),%rsp popq %rbp .Lcbc_ret: @@ -2951,7 +3148,9 @@ aesni_set_decrypt_key: movups (%rdx),%xmm0 .byte 102,15,56,219,192 + pxor %xmm1,%xmm1 movups %xmm0,(%rdi) + pxor %xmm0,%xmm0 .Ldec_key_ret: addq $8,%rsp .byte 0xf3,0xc3 @@ -2969,8 +3168,10 @@ __aesni_set_encrypt_key: testq %rdx,%rdx jz .Lenc_key_ret + movl $268437504,%r10d movups (%rdi),%xmm0 xorps %xmm4,%xmm4 + andl OPENSSL_ia32cap_P+4(%rip),%r10d leaq 16(%rdx),%rax cmpl $256,%esi je .L14rounds @@ -2981,6 +3182,9 @@ __aesni_set_encrypt_key: .L10rounds: movl $9,%esi + cmpl $268435456,%r10d + je .L10rounds_alt + movups %xmm0,(%rdx) .byte 102,15,58,223,200,1 call .Lkey_expansion_128_cold @@ -3007,10 +3211,80 @@ __aesni_set_encrypt_key: xorl %eax,%eax jmp .Lenc_key_ret +.align 16 +.L10rounds_alt: + movdqa .Lkey_rotate(%rip),%xmm5 + movl $8,%r10d + movdqa .Lkey_rcon1(%rip),%xmm4 + movdqa %xmm0,%xmm2 + movdqu %xmm0,(%rdx) + jmp .Loop_key128 + +.align 16 +.Loop_key128: +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + leaq 16(%rax),%rax + + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,-16(%rax) + movdqa %xmm0,%xmm2 + + decl %r10d + jnz .Loop_key128 + + movdqa .Lkey_rcon1b(%rip),%xmm4 + +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,(%rax) + + movdqa %xmm0,%xmm2 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,16(%rax) + + movl %esi,96(%rax) + xorl %eax,%eax + jmp .Lenc_key_ret + .align 16 .L12rounds: movq 16(%rdi),%xmm2 movl $11,%esi + cmpl $268435456,%r10d + je .L12rounds_alt + movups %xmm0,(%rdx) .byte 102,15,58,223,202,1 call .Lkey_expansion_192a_cold @@ -3033,11 +3307,55 @@ __aesni_set_encrypt_key: xorq %rax,%rax jmp .Lenc_key_ret +.align 16 +.L12rounds_alt: + movdqa .Lkey_rotate192(%rip),%xmm5 + movdqa .Lkey_rcon1(%rip),%xmm4 + movl $8,%r10d + movdqu %xmm0,(%rdx) + jmp .Loop_key192 + +.align 16 +.Loop_key192: + movq %xmm2,0(%rax) + movdqa %xmm2,%xmm1 +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + pslld $1,%xmm4 + leaq 24(%rax),%rax + + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + + pshufd $255,%xmm0,%xmm3 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + + pxor %xmm2,%xmm0 + pxor %xmm3,%xmm2 + movdqu %xmm0,-16(%rax) + + decl %r10d + jnz .Loop_key192 + + movl %esi,32(%rax) + xorl %eax,%eax + jmp .Lenc_key_ret + .align 16 .L14rounds: movups 16(%rdi),%xmm2 movl $13,%esi leaq 16(%rax),%rax + cmpl $268435456,%r10d + je .L14rounds_alt + movups %xmm0,(%rdx) movups %xmm2,16(%rdx) .byte 102,15,58,223,202,1 @@ -3071,10 +3389,70 @@ __aesni_set_encrypt_key: xorq %rax,%rax jmp .Lenc_key_ret +.align 16 +.L14rounds_alt: + movdqa .Lkey_rotate(%rip),%xmm5 + movdqa .Lkey_rcon1(%rip),%xmm4 + movl $7,%r10d + movdqu %xmm0,0(%rdx) + movdqa %xmm2,%xmm1 + movdqu %xmm2,16(%rdx) + jmp .Loop_key256 + +.align 16 +.Loop_key256: +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pslld $1,%xmm4 + + pxor %xmm2,%xmm0 + movdqu %xmm0,(%rax) + + decl %r10d + jz .Ldone_key256 + + pshufd $255,%xmm0,%xmm2 + pxor %xmm3,%xmm3 +.byte 102,15,56,221,211 + + movdqa %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm3,%xmm1 + + pxor %xmm1,%xmm2 + movdqu %xmm2,16(%rax) + leaq 32(%rax),%rax + movdqa %xmm2,%xmm1 + + jmp .Loop_key256 + +.Ldone_key256: + movl %esi,16(%rax) + xorl %eax,%eax + jmp .Lenc_key_ret + .align 16 .Lbad_keybits: movq $-2,%rax .Lenc_key_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 addq $8,%rsp .byte 0xf3,0xc3 .LSEH_end_set_encrypt_key: @@ -3160,6 +3538,14 @@ __aesni_set_encrypt_key: .long 0x87,0,1,0 .Lincrement1: .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +.Lkey_rotate: +.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d +.Lkey_rotate192: +.long 0x04070605,0x04070605,0x04070605,0x04070605 +.Lkey_rcon1: +.long 1,1,1,1 +.Lkey_rcon1b: +.long 0x1b,0x1b,0x1b,0x1b .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 64 diff --git a/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s b/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s index 84dd72075d3007..db3fe399abd495 100644 --- a/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s +++ b/deps/openssl/asm/x64-elf-gas/bn/x86_64-mont5.s @@ -2884,11 +2884,16 @@ sqrx8x_reduction: .type bn_get_bits5,@function .align 16 bn_get_bits5: - movq %rdi,%r10 + leaq 0(%rdi),%r10 + leaq 1(%rdi),%r11 movl %esi,%ecx - shrl $3,%esi - movzwl (%r10,%rsi,1),%eax - andl $7,%ecx + shrl $4,%esi + andl $15,%ecx + leal -8(%rcx),%eax + cmpl $11,%ecx + cmovaq %r11,%r10 + cmoval %eax,%ecx + movzwl (%r10,%rsi,2),%eax shrl %cl,%eax andl $31,%eax .byte 0xf3,0xc3 diff --git a/deps/openssl/asm/x64-macosx-gas/aes/aesni-x86_64.s b/deps/openssl/asm/x64-macosx-gas/aes/aesni-x86_64.s index 57509ae7196c08..41ad80eebd1f89 100644 --- a/deps/openssl/asm/x64-macosx-gas/aes/aesni-x86_64.s +++ b/deps/openssl/asm/x64-macosx-gas/aes/aesni-x86_64.s @@ -17,7 +17,10 @@ L$oop_enc1_1: leaq 16(%rdx),%rdx jnz L$oop_enc1_1 .byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 .byte 0xf3,0xc3 @@ -38,7 +41,10 @@ L$oop_dec1_2: leaq 16(%rdx),%rdx jnz L$oop_dec1_2 .byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 .byte 0xf3,0xc3 @@ -264,21 +270,18 @@ _aesni_encrypt6: pxor %xmm0,%xmm6 .byte 102,15,56,220,225 pxor %xmm0,%xmm7 + movups (%rcx,%rax,1),%xmm0 addq $16,%rax -.byte 102,15,56,220,233 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 - movups -16(%rcx,%rax,1),%xmm0 jmp L$enc_loop6_enter .p2align 4 L$enc_loop6: .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 +L$enc_loop6_enter: .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 -L$enc_loop6_enter: movups (%rcx,%rax,1),%xmm1 addq $32,%rax .byte 102,15,56,220,208 @@ -321,21 +324,18 @@ _aesni_decrypt6: pxor %xmm0,%xmm6 .byte 102,15,56,222,225 pxor %xmm0,%xmm7 + movups (%rcx,%rax,1),%xmm0 addq $16,%rax -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 - movups -16(%rcx,%rax,1),%xmm0 jmp L$dec_loop6_enter .p2align 4 L$dec_loop6: .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 +L$dec_loop6_enter: .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 -L$dec_loop6_enter: movups (%rcx,%rax,1),%xmm1 addq $32,%rax .byte 102,15,56,222,208 @@ -375,23 +375,18 @@ _aesni_encrypt8: leaq 32(%rcx,%rax,1),%rcx negq %rax .byte 102,15,56,220,209 - addq $16,%rax pxor %xmm0,%xmm7 -.byte 102,15,56,220,217 pxor %xmm0,%xmm8 +.byte 102,15,56,220,217 pxor %xmm0,%xmm9 -.byte 102,15,56,220,225 -.byte 102,15,56,220,233 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 -.byte 102,68,15,56,220,193 -.byte 102,68,15,56,220,201 - movups -16(%rcx,%rax,1),%xmm0 - jmp L$enc_loop8_enter + movups (%rcx,%rax,1),%xmm0 + addq $16,%rax + jmp L$enc_loop8_inner .p2align 4 L$enc_loop8: .byte 102,15,56,220,209 .byte 102,15,56,220,217 +L$enc_loop8_inner: .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 @@ -444,23 +439,18 @@ _aesni_decrypt8: leaq 32(%rcx,%rax,1),%rcx negq %rax .byte 102,15,56,222,209 - addq $16,%rax pxor %xmm0,%xmm7 -.byte 102,15,56,222,217 pxor %xmm0,%xmm8 +.byte 102,15,56,222,217 pxor %xmm0,%xmm9 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 -.byte 102,68,15,56,222,193 -.byte 102,68,15,56,222,201 - movups -16(%rcx,%rax,1),%xmm0 - jmp L$dec_loop8_enter + movups (%rcx,%rax,1),%xmm0 + addq $16,%rax + jmp L$dec_loop8_inner .p2align 4 L$dec_loop8: .byte 102,15,56,222,209 .byte 102,15,56,222,217 +L$dec_loop8_inner: .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 @@ -587,6 +577,7 @@ L$ecb_enc_tail: movups 80(%rdi),%xmm7 je L$ecb_enc_six movdqu 96(%rdi),%xmm8 + xorps %xmm9,%xmm9 call _aesni_encrypt8 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) @@ -700,15 +691,23 @@ L$ecb_dec_loop8_enter: jnc L$ecb_dec_loop8 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 movq %r11,%rcx movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 movl %r10d,%eax movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 movups %xmm7,80(%rsi) + pxor %xmm7,%xmm7 movups %xmm8,96(%rsi) + pxor %xmm8,%xmm8 movups %xmm9,112(%rsi) + pxor %xmm9,%xmm9 leaq 128(%rsi),%rsi addq $128,%rdx jz L$ecb_ret @@ -731,14 +730,23 @@ L$ecb_dec_tail: je L$ecb_dec_six movups 96(%rdi),%xmm8 movups (%rcx),%xmm0 + xorps %xmm9,%xmm9 call _aesni_decrypt8 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 movups %xmm7,80(%rsi) + pxor %xmm7,%xmm7 movups %xmm8,96(%rsi) + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 jmp L$ecb_ret .p2align 4 L$ecb_dec_one: @@ -754,49 +762,73 @@ L$oop_dec1_4: jnz L$oop_dec1_4 .byte 102,15,56,223,209 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 jmp L$ecb_ret .p2align 4 L$ecb_dec_two: call _aesni_decrypt2 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 jmp L$ecb_ret .p2align 4 L$ecb_dec_three: call _aesni_decrypt3 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 jmp L$ecb_ret .p2align 4 L$ecb_dec_four: call _aesni_decrypt4 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 jmp L$ecb_ret .p2align 4 L$ecb_dec_five: xorps %xmm7,%xmm7 call _aesni_decrypt6 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 jmp L$ecb_ret .p2align 4 L$ecb_dec_six: call _aesni_decrypt6 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 movups %xmm7,80(%rsi) + pxor %xmm7,%xmm7 L$ecb_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 .byte 0xf3,0xc3 .globl _aesni_ccm64_encrypt_blocks @@ -853,7 +885,13 @@ L$ccm64_enc2_loop: leaq 16(%rsi),%rsi jnz L$ccm64_enc_outer + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 movups %xmm3,(%r9) + pxor %xmm3,%xmm3 + pxor %xmm8,%xmm8 + pxor %xmm6,%xmm6 .byte 0xf3,0xc3 .globl _aesni_ccm64_decrypt_blocks @@ -944,21 +982,56 @@ L$oop_enc1_6: leaq 16(%r11),%r11 jnz L$oop_enc1_6 .byte 102,15,56,221,217 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 movups %xmm3,(%r9) + pxor %xmm3,%xmm3 + pxor %xmm8,%xmm8 + pxor %xmm6,%xmm6 .byte 0xf3,0xc3 .globl _aesni_ctr32_encrypt_blocks .p2align 4 _aesni_ctr32_encrypt_blocks: + cmpq $1,%rdx + jne L$ctr32_bulk + + + + movups (%r8),%xmm2 + movups (%rdi),%xmm3 + movl 240(%rcx),%edx + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_enc1_7: +.byte 102,15,56,220,209 + decl %edx + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_7 +.byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + xorps %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movups %xmm2,(%rsi) + xorps %xmm2,%xmm2 + jmp L$ctr32_epilogue + +.p2align 4 +L$ctr32_bulk: leaq (%rsp),%rax pushq %rbp subq $128,%rsp andq $-16,%rsp leaq -8(%rax),%rbp - cmpq $1,%rdx - je L$ctr32_one_shortcut + + movdqu (%r8),%xmm2 movdqu (%rcx),%xmm0 @@ -1349,11 +1422,14 @@ L$ctr32_enc_done: leaq -128(%rcx),%rcx L$ctr32_tail: + + leaq 16(%rcx),%rcx cmpq $4,%rdx jb L$ctr32_loop3 je L$ctr32_loop4 + shll $4,%eax movdqa 96(%rsp),%xmm8 pxor %xmm9,%xmm9 @@ -1456,30 +1532,33 @@ L$ctr32_loop3: movups 32(%rdi),%xmm12 xorps %xmm12,%xmm4 movups %xmm4,32(%rsi) - jmp L$ctr32_done -.p2align 4 -L$ctr32_one_shortcut: - movups (%r8),%xmm2 - movups (%rdi),%xmm10 - movl 240(%rcx),%eax - movups (%rcx),%xmm0 - movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx - xorps %xmm0,%xmm2 -L$oop_enc1_7: -.byte 102,15,56,220,209 - decl %eax - movups (%rcx),%xmm1 - leaq 16(%rcx),%rcx - jnz L$oop_enc1_7 -.byte 102,15,56,221,209 - xorps %xmm10,%xmm2 - movups %xmm2,(%rsi) - jmp L$ctr32_done - -.p2align 4 L$ctr32_done: + xorps %xmm0,%xmm0 + xorl %r11d,%r11d + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + movaps %xmm0,0(%rsp) + pxor %xmm8,%xmm8 + movaps %xmm0,16(%rsp) + pxor %xmm9,%xmm9 + movaps %xmm0,32(%rsp) + pxor %xmm10,%xmm10 + movaps %xmm0,48(%rsp) + pxor %xmm11,%xmm11 + movaps %xmm0,64(%rsp) + pxor %xmm12,%xmm12 + movaps %xmm0,80(%rsp) + pxor %xmm13,%xmm13 + movaps %xmm0,96(%rsp) + pxor %xmm14,%xmm14 + movaps %xmm0,112(%rsp) + pxor %xmm15,%xmm15 leaq (%rbp),%rsp popq %rbp L$ctr32_epilogue: @@ -1750,6 +1829,7 @@ L$xts_enc_loop6: shrl $4,%eax L$xts_enc_short: + movl %eax,%r10d pxor %xmm0,%xmm10 addq $96,%rdx @@ -1778,6 +1858,7 @@ L$xts_enc_short: pxor %xmm12,%xmm4 pxor %xmm13,%xmm5 pxor %xmm14,%xmm6 + pxor %xmm7,%xmm7 call _aesni_encrypt6 @@ -1920,6 +2001,29 @@ L$oop_enc1_10: movups %xmm2,-16(%rsi) L$xts_enc_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + movaps %xmm0,0(%rsp) + pxor %xmm8,%xmm8 + movaps %xmm0,16(%rsp) + pxor %xmm9,%xmm9 + movaps %xmm0,32(%rsp) + pxor %xmm10,%xmm10 + movaps %xmm0,48(%rsp) + pxor %xmm11,%xmm11 + movaps %xmm0,64(%rsp) + pxor %xmm12,%xmm12 + movaps %xmm0,80(%rsp) + pxor %xmm13,%xmm13 + movaps %xmm0,96(%rsp) + pxor %xmm14,%xmm14 + pxor %xmm15,%xmm15 leaq (%rbp),%rsp popq %rbp L$xts_enc_epilogue: @@ -2196,6 +2300,7 @@ L$xts_dec_loop6: shrl $4,%eax L$xts_dec_short: + movl %eax,%r10d pxor %xmm0,%xmm10 pxor %xmm0,%xmm11 @@ -2398,6 +2503,29 @@ L$oop_dec1_14: movups %xmm2,(%rsi) L$xts_dec_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + movaps %xmm0,0(%rsp) + pxor %xmm8,%xmm8 + movaps %xmm0,16(%rsp) + pxor %xmm9,%xmm9 + movaps %xmm0,32(%rsp) + pxor %xmm10,%xmm10 + movaps %xmm0,48(%rsp) + pxor %xmm11,%xmm11 + movaps %xmm0,64(%rsp) + pxor %xmm12,%xmm12 + movaps %xmm0,80(%rsp) + pxor %xmm13,%xmm13 + movaps %xmm0,96(%rsp) + pxor %xmm14,%xmm14 + pxor %xmm15,%xmm15 leaq (%rbp),%rsp popq %rbp L$xts_dec_epilogue: @@ -2446,7 +2574,11 @@ L$oop_enc1_15: jnc L$cbc_enc_loop addq $16,%rdx jnz L$cbc_enc_tail + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 movups %xmm2,(%r8) + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 jmp L$cbc_ret L$cbc_enc_tail: @@ -2466,6 +2598,35 @@ L$cbc_enc_tail: .p2align 4 L$cbc_decrypt: + cmpq $16,%rdx + jne L$cbc_decrypt_bulk + + + + movdqu (%rdi),%xmm2 + movdqu (%r8),%xmm3 + movdqa %xmm2,%xmm4 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_dec1_16: +.byte 102,15,56,222,209 + decl %r10d + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_dec1_16 +.byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movdqu %xmm4,(%r8) + xorps %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + jmp L$cbc_ret +.p2align 4 +L$cbc_decrypt_bulk: leaq (%rsp),%rax pushq %rbp subq $16,%rsp @@ -2702,7 +2863,7 @@ L$cbc_dec_done: movaps %xmm9,%xmm2 leaq -112(%rcx),%rcx addq $112,%rdx - jle L$cbc_dec_tail_collected + jle L$cbc_dec_clear_tail_collected movups %xmm9,(%rsi) leaq 16(%rsi),%rsi cmpq $80,%rdx @@ -2721,14 +2882,19 @@ L$cbc_dec_six_or_seven: movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 pxor %xmm13,%xmm5 movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 pxor %xmm14,%xmm6 movdqu %xmm5,48(%rsi) + pxor %xmm5,%xmm5 pxor %xmm15,%xmm7 movdqu %xmm6,64(%rsi) + pxor %xmm6,%xmm6 leaq 80(%rsi),%rsi movdqa %xmm7,%xmm2 + pxor %xmm7,%xmm7 jmp L$cbc_dec_tail_collected .p2align 4 @@ -2743,16 +2909,23 @@ L$cbc_dec_seven: movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 pxor %xmm13,%xmm5 movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 pxor %xmm14,%xmm6 movdqu %xmm5,48(%rsi) + pxor %xmm5,%xmm5 pxor %xmm15,%xmm7 movdqu %xmm6,64(%rsi) + pxor %xmm6,%xmm6 pxor %xmm9,%xmm8 movdqu %xmm7,80(%rsi) + pxor %xmm7,%xmm7 leaq 96(%rsi),%rsi movdqa %xmm8,%xmm2 + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 jmp L$cbc_dec_tail_collected .p2align 4 @@ -2796,7 +2969,7 @@ L$cbc_dec_loop6_enter: movdqa %xmm7,%xmm2 addq $80,%rdx - jle L$cbc_dec_tail_collected + jle L$cbc_dec_clear_tail_collected movups %xmm7,(%rsi) leaq 16(%rsi),%rsi @@ -2831,12 +3004,17 @@ L$cbc_dec_tail: movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 pxor %xmm13,%xmm5 movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 pxor %xmm14,%xmm6 movdqu %xmm5,48(%rsi) + pxor %xmm5,%xmm5 leaq 64(%rsi),%rsi movdqa %xmm6,%xmm2 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 subq $16,%rdx jmp L$cbc_dec_tail_collected @@ -2847,12 +3025,12 @@ L$cbc_dec_one: movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 -L$oop_dec1_16: +L$oop_dec1_17: .byte 102,15,56,222,209 decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz L$oop_dec1_16 + jnz L$oop_dec1_17 .byte 102,15,56,223,209 xorps %xmm10,%xmm2 movaps %xmm11,%xmm10 @@ -2866,6 +3044,7 @@ L$cbc_dec_two: pxor %xmm11,%xmm3 movdqu %xmm2,(%rsi) movdqa %xmm3,%xmm2 + pxor %xmm3,%xmm3 leaq 16(%rsi),%rsi jmp L$cbc_dec_tail_collected .p2align 4 @@ -2878,7 +3057,9 @@ L$cbc_dec_three: movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 movdqa %xmm4,%xmm2 + pxor %xmm4,%xmm4 leaq 32(%rsi),%rsi jmp L$cbc_dec_tail_collected .p2align 4 @@ -2891,29 +3072,45 @@ L$cbc_dec_four: movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 pxor %xmm13,%xmm5 movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 movdqa %xmm5,%xmm2 + pxor %xmm5,%xmm5 leaq 48(%rsi),%rsi jmp L$cbc_dec_tail_collected .p2align 4 +L$cbc_dec_clear_tail_collected: + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 L$cbc_dec_tail_collected: movups %xmm10,(%r8) andq $15,%rdx jnz L$cbc_dec_tail_partial movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 jmp L$cbc_dec_ret .p2align 4 L$cbc_dec_tail_partial: movaps %xmm2,(%rsp) + pxor %xmm2,%xmm2 movq $16,%rcx movq %rsi,%rdi subq %rdx,%rcx leaq (%rsp),%rsi .long 0x9066A4F3 + movdqa %xmm2,(%rsp) L$cbc_dec_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 leaq (%rbp),%rsp popq %rbp L$cbc_ret: @@ -2951,7 +3148,9 @@ L$dec_key_inverse: movups (%rdx),%xmm0 .byte 102,15,56,219,192 + pxor %xmm1,%xmm1 movups %xmm0,(%rdi) + pxor %xmm0,%xmm0 L$dec_key_ret: addq $8,%rsp .byte 0xf3,0xc3 @@ -2969,8 +3168,10 @@ __aesni_set_encrypt_key: testq %rdx,%rdx jz L$enc_key_ret + movl $268437504,%r10d movups (%rdi),%xmm0 xorps %xmm4,%xmm4 + andl _OPENSSL_ia32cap_P+4(%rip),%r10d leaq 16(%rdx),%rax cmpl $256,%esi je L$14rounds @@ -2981,6 +3182,9 @@ __aesni_set_encrypt_key: L$10rounds: movl $9,%esi + cmpl $268435456,%r10d + je L$10rounds_alt + movups %xmm0,(%rdx) .byte 102,15,58,223,200,1 call L$key_expansion_128_cold @@ -3007,10 +3211,80 @@ L$10rounds: xorl %eax,%eax jmp L$enc_key_ret +.p2align 4 +L$10rounds_alt: + movdqa L$key_rotate(%rip),%xmm5 + movl $8,%r10d + movdqa L$key_rcon1(%rip),%xmm4 + movdqa %xmm0,%xmm2 + movdqu %xmm0,(%rdx) + jmp L$oop_key128 + +.p2align 4 +L$oop_key128: +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + leaq 16(%rax),%rax + + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,-16(%rax) + movdqa %xmm0,%xmm2 + + decl %r10d + jnz L$oop_key128 + + movdqa L$key_rcon1b(%rip),%xmm4 + +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,(%rax) + + movdqa %xmm0,%xmm2 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,16(%rax) + + movl %esi,96(%rax) + xorl %eax,%eax + jmp L$enc_key_ret + .p2align 4 L$12rounds: movq 16(%rdi),%xmm2 movl $11,%esi + cmpl $268435456,%r10d + je L$12rounds_alt + movups %xmm0,(%rdx) .byte 102,15,58,223,202,1 call L$key_expansion_192a_cold @@ -3033,11 +3307,55 @@ L$12rounds: xorq %rax,%rax jmp L$enc_key_ret +.p2align 4 +L$12rounds_alt: + movdqa L$key_rotate192(%rip),%xmm5 + movdqa L$key_rcon1(%rip),%xmm4 + movl $8,%r10d + movdqu %xmm0,(%rdx) + jmp L$oop_key192 + +.p2align 4 +L$oop_key192: + movq %xmm2,0(%rax) + movdqa %xmm2,%xmm1 +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + pslld $1,%xmm4 + leaq 24(%rax),%rax + + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + + pshufd $255,%xmm0,%xmm3 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + + pxor %xmm2,%xmm0 + pxor %xmm3,%xmm2 + movdqu %xmm0,-16(%rax) + + decl %r10d + jnz L$oop_key192 + + movl %esi,32(%rax) + xorl %eax,%eax + jmp L$enc_key_ret + .p2align 4 L$14rounds: movups 16(%rdi),%xmm2 movl $13,%esi leaq 16(%rax),%rax + cmpl $268435456,%r10d + je L$14rounds_alt + movups %xmm0,(%rdx) movups %xmm2,16(%rdx) .byte 102,15,58,223,202,1 @@ -3071,10 +3389,70 @@ L$14rounds: xorq %rax,%rax jmp L$enc_key_ret +.p2align 4 +L$14rounds_alt: + movdqa L$key_rotate(%rip),%xmm5 + movdqa L$key_rcon1(%rip),%xmm4 + movl $7,%r10d + movdqu %xmm0,0(%rdx) + movdqa %xmm2,%xmm1 + movdqu %xmm2,16(%rdx) + jmp L$oop_key256 + +.p2align 4 +L$oop_key256: +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pslld $1,%xmm4 + + pxor %xmm2,%xmm0 + movdqu %xmm0,(%rax) + + decl %r10d + jz L$done_key256 + + pshufd $255,%xmm0,%xmm2 + pxor %xmm3,%xmm3 +.byte 102,15,56,221,211 + + movdqa %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm3,%xmm1 + + pxor %xmm1,%xmm2 + movdqu %xmm2,16(%rax) + leaq 32(%rax),%rax + movdqa %xmm2,%xmm1 + + jmp L$oop_key256 + +L$done_key256: + movl %esi,16(%rax) + xorl %eax,%eax + jmp L$enc_key_ret + .p2align 4 L$bad_keybits: movq $-2,%rax L$enc_key_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 addq $8,%rsp .byte 0xf3,0xc3 L$SEH_end_set_encrypt_key: @@ -3160,6 +3538,14 @@ L$xts_magic: .long 0x87,0,1,0 L$increment1: .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +L$key_rotate: +.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d +L$key_rotate192: +.long 0x04070605,0x04070605,0x04070605,0x04070605 +L$key_rcon1: +.long 1,1,1,1 +L$key_rcon1b: +.long 0x1b,0x1b,0x1b,0x1b .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .p2align 6 diff --git a/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s b/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s index 65cf9993d80396..5470fb0336182a 100644 --- a/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s +++ b/deps/openssl/asm/x64-macosx-gas/bn/x86_64-mont5.s @@ -2884,11 +2884,16 @@ L$sqrx4x_sub: .p2align 4 _bn_get_bits5: - movq %rdi,%r10 + leaq 0(%rdi),%r10 + leaq 1(%rdi),%r11 movl %esi,%ecx - shrl $3,%esi - movzwl (%r10,%rsi,1),%eax - andl $7,%ecx + shrl $4,%esi + andl $15,%ecx + leal -8(%rcx),%eax + cmpl $11,%ecx + cmovaq %r11,%r10 + cmoval %eax,%ecx + movzwl (%r10,%rsi,2),%eax shrl %cl,%eax andl $31,%eax .byte 0xf3,0xc3 diff --git a/deps/openssl/asm/x64-win32-masm/aes/aesni-x86_64.asm b/deps/openssl/asm/x64-win32-masm/aes/aesni-x86_64.asm index 53d8afc950f174..5e848125d679fe 100644 --- a/deps/openssl/asm/x64-win32-masm/aes/aesni-x86_64.asm +++ b/deps/openssl/asm/x64-win32-masm/aes/aesni-x86_64.asm @@ -18,7 +18,10 @@ DB 102,15,56,220,209 lea r8,QWORD PTR[16+r8] jnz $L$oop_enc1_1 DB 102,15,56,221,209 + pxor xmm0,xmm0 + pxor xmm1,xmm1 movups XMMWORD PTR[rdx],xmm2 + pxor xmm2,xmm2 DB 0F3h,0C3h ;repret aesni_encrypt ENDP @@ -39,7 +42,10 @@ DB 102,15,56,222,209 lea r8,QWORD PTR[16+r8] jnz $L$oop_dec1_2 DB 102,15,56,223,209 + pxor xmm0,xmm0 + pxor xmm1,xmm1 movups XMMWORD PTR[rdx],xmm2 + pxor xmm2,xmm2 DB 0F3h,0C3h ;repret aesni_decrypt ENDP @@ -265,21 +271,18 @@ DB 102,15,56,220,217 pxor xmm6,xmm0 DB 102,15,56,220,225 pxor xmm7,xmm0 + movups xmm0,XMMWORD PTR[rax*1+rcx] add rax,16 -DB 102,15,56,220,233 -DB 102,15,56,220,241 -DB 102,15,56,220,249 - movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] jmp $L$enc_loop6_enter ALIGN 16 $L$enc_loop6:: DB 102,15,56,220,209 DB 102,15,56,220,217 DB 102,15,56,220,225 +$L$enc_loop6_enter:: DB 102,15,56,220,233 DB 102,15,56,220,241 DB 102,15,56,220,249 -$L$enc_loop6_enter:: movups xmm1,XMMWORD PTR[rax*1+rcx] add rax,32 DB 102,15,56,220,208 @@ -322,21 +325,18 @@ DB 102,15,56,222,217 pxor xmm6,xmm0 DB 102,15,56,222,225 pxor xmm7,xmm0 + movups xmm0,XMMWORD PTR[rax*1+rcx] add rax,16 -DB 102,15,56,222,233 -DB 102,15,56,222,241 -DB 102,15,56,222,249 - movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] jmp $L$dec_loop6_enter ALIGN 16 $L$dec_loop6:: DB 102,15,56,222,209 DB 102,15,56,222,217 DB 102,15,56,222,225 +$L$dec_loop6_enter:: DB 102,15,56,222,233 DB 102,15,56,222,241 DB 102,15,56,222,249 -$L$dec_loop6_enter:: movups xmm1,XMMWORD PTR[rax*1+rcx] add rax,32 DB 102,15,56,222,208 @@ -376,23 +376,18 @@ _aesni_encrypt8 PROC PRIVATE lea rcx,QWORD PTR[32+rax*1+rcx] neg rax DB 102,15,56,220,209 - add rax,16 pxor xmm7,xmm0 -DB 102,15,56,220,217 pxor xmm8,xmm0 +DB 102,15,56,220,217 pxor xmm9,xmm0 -DB 102,15,56,220,225 -DB 102,15,56,220,233 -DB 102,15,56,220,241 -DB 102,15,56,220,249 -DB 102,68,15,56,220,193 -DB 102,68,15,56,220,201 - movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] - jmp $L$enc_loop8_enter + movups xmm0,XMMWORD PTR[rax*1+rcx] + add rax,16 + jmp $L$enc_loop8_inner ALIGN 16 $L$enc_loop8:: DB 102,15,56,220,209 DB 102,15,56,220,217 +$L$enc_loop8_inner:: DB 102,15,56,220,225 DB 102,15,56,220,233 DB 102,15,56,220,241 @@ -445,23 +440,18 @@ _aesni_decrypt8 PROC PRIVATE lea rcx,QWORD PTR[32+rax*1+rcx] neg rax DB 102,15,56,222,209 - add rax,16 pxor xmm7,xmm0 -DB 102,15,56,222,217 pxor xmm8,xmm0 +DB 102,15,56,222,217 pxor xmm9,xmm0 -DB 102,15,56,222,225 -DB 102,15,56,222,233 -DB 102,15,56,222,241 -DB 102,15,56,222,249 -DB 102,68,15,56,222,193 -DB 102,68,15,56,222,201 - movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] - jmp $L$dec_loop8_enter + movups xmm0,XMMWORD PTR[rax*1+rcx] + add rax,16 + jmp $L$dec_loop8_inner ALIGN 16 $L$dec_loop8:: DB 102,15,56,222,209 DB 102,15,56,222,217 +$L$dec_loop8_inner:: DB 102,15,56,222,225 DB 102,15,56,222,233 DB 102,15,56,222,241 @@ -605,6 +595,7 @@ $L$ecb_enc_tail:: movups xmm7,XMMWORD PTR[80+rdi] je $L$ecb_enc_six movdqu xmm8,XMMWORD PTR[96+rdi] + xorps xmm9,xmm9 call _aesni_encrypt8 movups XMMWORD PTR[rsi],xmm2 movups XMMWORD PTR[16+rsi],xmm3 @@ -718,15 +709,23 @@ $L$ecb_dec_loop8_enter:: jnc $L$ecb_dec_loop8 movups XMMWORD PTR[rsi],xmm2 + pxor xmm2,xmm2 mov rcx,r11 movups XMMWORD PTR[16+rsi],xmm3 + pxor xmm3,xmm3 mov eax,r10d movups XMMWORD PTR[32+rsi],xmm4 + pxor xmm4,xmm4 movups XMMWORD PTR[48+rsi],xmm5 + pxor xmm5,xmm5 movups XMMWORD PTR[64+rsi],xmm6 + pxor xmm6,xmm6 movups XMMWORD PTR[80+rsi],xmm7 + pxor xmm7,xmm7 movups XMMWORD PTR[96+rsi],xmm8 + pxor xmm8,xmm8 movups XMMWORD PTR[112+rsi],xmm9 + pxor xmm9,xmm9 lea rsi,QWORD PTR[128+rsi] add rdx,080h jz $L$ecb_ret @@ -749,14 +748,23 @@ $L$ecb_dec_tail:: je $L$ecb_dec_six movups xmm8,XMMWORD PTR[96+rdi] movups xmm0,XMMWORD PTR[rcx] + xorps xmm9,xmm9 call _aesni_decrypt8 movups XMMWORD PTR[rsi],xmm2 + pxor xmm2,xmm2 movups XMMWORD PTR[16+rsi],xmm3 + pxor xmm3,xmm3 movups XMMWORD PTR[32+rsi],xmm4 + pxor xmm4,xmm4 movups XMMWORD PTR[48+rsi],xmm5 + pxor xmm5,xmm5 movups XMMWORD PTR[64+rsi],xmm6 + pxor xmm6,xmm6 movups XMMWORD PTR[80+rsi],xmm7 + pxor xmm7,xmm7 movups XMMWORD PTR[96+rsi],xmm8 + pxor xmm8,xmm8 + pxor xmm9,xmm9 jmp $L$ecb_ret ALIGN 16 $L$ecb_dec_one:: @@ -772,53 +780,81 @@ DB 102,15,56,222,209 jnz $L$oop_dec1_4 DB 102,15,56,223,209 movups XMMWORD PTR[rsi],xmm2 + pxor xmm2,xmm2 jmp $L$ecb_ret ALIGN 16 $L$ecb_dec_two:: call _aesni_decrypt2 movups XMMWORD PTR[rsi],xmm2 + pxor xmm2,xmm2 movups XMMWORD PTR[16+rsi],xmm3 + pxor xmm3,xmm3 jmp $L$ecb_ret ALIGN 16 $L$ecb_dec_three:: call _aesni_decrypt3 movups XMMWORD PTR[rsi],xmm2 + pxor xmm2,xmm2 movups XMMWORD PTR[16+rsi],xmm3 + pxor xmm3,xmm3 movups XMMWORD PTR[32+rsi],xmm4 + pxor xmm4,xmm4 jmp $L$ecb_ret ALIGN 16 $L$ecb_dec_four:: call _aesni_decrypt4 movups XMMWORD PTR[rsi],xmm2 + pxor xmm2,xmm2 movups XMMWORD PTR[16+rsi],xmm3 + pxor xmm3,xmm3 movups XMMWORD PTR[32+rsi],xmm4 + pxor xmm4,xmm4 movups XMMWORD PTR[48+rsi],xmm5 + pxor xmm5,xmm5 jmp $L$ecb_ret ALIGN 16 $L$ecb_dec_five:: xorps xmm7,xmm7 call _aesni_decrypt6 movups XMMWORD PTR[rsi],xmm2 + pxor xmm2,xmm2 movups XMMWORD PTR[16+rsi],xmm3 + pxor xmm3,xmm3 movups XMMWORD PTR[32+rsi],xmm4 + pxor xmm4,xmm4 movups XMMWORD PTR[48+rsi],xmm5 + pxor xmm5,xmm5 movups XMMWORD PTR[64+rsi],xmm6 + pxor xmm6,xmm6 + pxor xmm7,xmm7 jmp $L$ecb_ret ALIGN 16 $L$ecb_dec_six:: call _aesni_decrypt6 movups XMMWORD PTR[rsi],xmm2 + pxor xmm2,xmm2 movups XMMWORD PTR[16+rsi],xmm3 + pxor xmm3,xmm3 movups XMMWORD PTR[32+rsi],xmm4 + pxor xmm4,xmm4 movups XMMWORD PTR[48+rsi],xmm5 + pxor xmm5,xmm5 movups XMMWORD PTR[64+rsi],xmm6 + pxor xmm6,xmm6 movups XMMWORD PTR[80+rsi],xmm7 + pxor xmm7,xmm7 $L$ecb_ret:: + xorps xmm0,xmm0 + pxor xmm1,xmm1 movaps xmm6,XMMWORD PTR[rsp] + movaps XMMWORD PTR[rsp],xmm0 movaps xmm7,XMMWORD PTR[16+rsp] + movaps XMMWORD PTR[16+rsp],xmm0 movaps xmm8,XMMWORD PTR[32+rsp] + movaps XMMWORD PTR[32+rsp],xmm0 movaps xmm9,XMMWORD PTR[48+rsp] + movaps XMMWORD PTR[48+rsp],xmm0 lea rsp,QWORD PTR[88+rsp] $L$ecb_enc_ret:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue @@ -898,11 +934,21 @@ DB 102,15,56,0,215 lea rsi,QWORD PTR[16+rsi] jnz $L$ccm64_enc_outer + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 movups XMMWORD PTR[r9],xmm3 + pxor xmm3,xmm3 + pxor xmm8,xmm8 + pxor xmm6,xmm6 movaps xmm6,XMMWORD PTR[rsp] + movaps XMMWORD PTR[rsp],xmm0 movaps xmm7,XMMWORD PTR[16+rsp] + movaps XMMWORD PTR[16+rsp],xmm0 movaps xmm8,XMMWORD PTR[32+rsp] + movaps XMMWORD PTR[32+rsp],xmm0 movaps xmm9,XMMWORD PTR[48+rsp] + movaps XMMWORD PTR[48+rsp],xmm0 lea rsp,QWORD PTR[88+rsp] $L$ccm64_enc_ret:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue @@ -1016,11 +1062,21 @@ DB 102,15,56,220,217 lea r11,QWORD PTR[16+r11] jnz $L$oop_enc1_6 DB 102,15,56,221,217 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 movups XMMWORD PTR[r9],xmm3 + pxor xmm3,xmm3 + pxor xmm8,xmm8 + pxor xmm6,xmm6 movaps xmm6,XMMWORD PTR[rsp] + movaps XMMWORD PTR[rsp],xmm0 movaps xmm7,XMMWORD PTR[16+rsp] + movaps XMMWORD PTR[16+rsp],xmm0 movaps xmm8,XMMWORD PTR[32+rsp] + movaps XMMWORD PTR[32+rsp],xmm0 movaps xmm9,XMMWORD PTR[48+rsp] + movaps XMMWORD PTR[48+rsp],xmm0 lea rsp,QWORD PTR[88+rsp] $L$ccm64_dec_ret:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue @@ -1043,6 +1099,35 @@ $L$SEH_begin_aesni_ctr32_encrypt_blocks:: mov r8,QWORD PTR[40+rsp] + cmp rdx,1 + jne $L$ctr32_bulk + + + + movups xmm2,XMMWORD PTR[r8] + movups xmm3,XMMWORD PTR[rdi] + mov edx,DWORD PTR[240+rcx] + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_7:: +DB 102,15,56,220,209 + dec edx + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_enc1_7 +DB 102,15,56,221,209 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + xorps xmm2,xmm3 + pxor xmm3,xmm3 + movups XMMWORD PTR[rsi],xmm2 + xorps xmm2,xmm2 + jmp $L$ctr32_epilogue + +ALIGN 16 +$L$ctr32_bulk:: lea rax,QWORD PTR[rsp] push rbp sub rsp,288 @@ -1060,8 +1145,8 @@ $L$SEH_begin_aesni_ctr32_encrypt_blocks:: $L$ctr32_body:: lea rbp,QWORD PTR[((-8))+rax] - cmp rdx,1 - je $L$ctr32_one_shortcut + + movdqu xmm2,XMMWORD PTR[r8] movdqu xmm0,XMMWORD PTR[rcx] @@ -1452,11 +1537,14 @@ DB 102,69,15,56,221,202 lea rcx,QWORD PTR[((-128))+rcx] $L$ctr32_tail:: + + lea rcx,QWORD PTR[16+rcx] cmp rdx,4 jb $L$ctr32_loop3 je $L$ctr32_loop4 + shl eax,4 movdqa xmm8,XMMWORD PTR[96+rsp] pxor xmm9,xmm9 @@ -1559,40 +1647,43 @@ DB 102,15,56,221,225 movups xmm12,XMMWORD PTR[32+rdi] xorps xmm4,xmm12 movups XMMWORD PTR[32+rsi],xmm4 - jmp $L$ctr32_done -ALIGN 16 -$L$ctr32_one_shortcut:: - movups xmm2,XMMWORD PTR[r8] - movups xmm10,XMMWORD PTR[rdi] - mov eax,DWORD PTR[240+rcx] - movups xmm0,XMMWORD PTR[rcx] - movups xmm1,XMMWORD PTR[16+rcx] - lea rcx,QWORD PTR[32+rcx] - xorps xmm2,xmm0 -$L$oop_enc1_7:: -DB 102,15,56,220,209 - dec eax - movups xmm1,XMMWORD PTR[rcx] - lea rcx,QWORD PTR[16+rcx] - jnz $L$oop_enc1_7 -DB 102,15,56,221,209 - xorps xmm2,xmm10 - movups XMMWORD PTR[rsi],xmm2 - jmp $L$ctr32_done - -ALIGN 16 $L$ctr32_done:: + xorps xmm0,xmm0 + xor r11d,r11d + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 movaps xmm6,XMMWORD PTR[((-160))+rbp] + movaps XMMWORD PTR[(-160)+rbp],xmm0 movaps xmm7,XMMWORD PTR[((-144))+rbp] + movaps XMMWORD PTR[(-144)+rbp],xmm0 movaps xmm8,XMMWORD PTR[((-128))+rbp] + movaps XMMWORD PTR[(-128)+rbp],xmm0 movaps xmm9,XMMWORD PTR[((-112))+rbp] + movaps XMMWORD PTR[(-112)+rbp],xmm0 movaps xmm10,XMMWORD PTR[((-96))+rbp] + movaps XMMWORD PTR[(-96)+rbp],xmm0 movaps xmm11,XMMWORD PTR[((-80))+rbp] + movaps XMMWORD PTR[(-80)+rbp],xmm0 movaps xmm12,XMMWORD PTR[((-64))+rbp] + movaps XMMWORD PTR[(-64)+rbp],xmm0 movaps xmm13,XMMWORD PTR[((-48))+rbp] + movaps XMMWORD PTR[(-48)+rbp],xmm0 movaps xmm14,XMMWORD PTR[((-32))+rbp] + movaps XMMWORD PTR[(-32)+rbp],xmm0 movaps xmm15,XMMWORD PTR[((-16))+rbp] + movaps XMMWORD PTR[(-16)+rbp],xmm0 + movaps XMMWORD PTR[rsp],xmm0 + movaps XMMWORD PTR[16+rsp],xmm0 + movaps XMMWORD PTR[32+rsp],xmm0 + movaps XMMWORD PTR[48+rsp],xmm0 + movaps XMMWORD PTR[64+rsp],xmm0 + movaps XMMWORD PTR[80+rsp],xmm0 + movaps XMMWORD PTR[96+rsp],xmm0 + movaps XMMWORD PTR[112+rsp],xmm0 lea rsp,QWORD PTR[rbp] pop rbp $L$ctr32_epilogue:: @@ -1889,6 +1980,7 @@ DB 102,15,56,221,124,36,80 shr eax,4 $L$xts_enc_short:: + mov r10d,eax pxor xmm10,xmm0 add rdx,16*6 @@ -1917,6 +2009,7 @@ $L$xts_enc_short:: pxor xmm4,xmm12 pxor xmm5,xmm13 pxor xmm6,xmm14 + pxor xmm7,xmm7 call _aesni_encrypt6 @@ -2059,16 +2152,39 @@ DB 102,15,56,221,209 movups XMMWORD PTR[(-16)+rsi],xmm2 $L$xts_enc_ret:: + xorps xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 movaps xmm6,XMMWORD PTR[((-160))+rbp] + movaps XMMWORD PTR[(-160)+rbp],xmm0 movaps xmm7,XMMWORD PTR[((-144))+rbp] + movaps XMMWORD PTR[(-144)+rbp],xmm0 movaps xmm8,XMMWORD PTR[((-128))+rbp] + movaps XMMWORD PTR[(-128)+rbp],xmm0 movaps xmm9,XMMWORD PTR[((-112))+rbp] + movaps XMMWORD PTR[(-112)+rbp],xmm0 movaps xmm10,XMMWORD PTR[((-96))+rbp] + movaps XMMWORD PTR[(-96)+rbp],xmm0 movaps xmm11,XMMWORD PTR[((-80))+rbp] + movaps XMMWORD PTR[(-80)+rbp],xmm0 movaps xmm12,XMMWORD PTR[((-64))+rbp] + movaps XMMWORD PTR[(-64)+rbp],xmm0 movaps xmm13,XMMWORD PTR[((-48))+rbp] + movaps XMMWORD PTR[(-48)+rbp],xmm0 movaps xmm14,XMMWORD PTR[((-32))+rbp] + movaps XMMWORD PTR[(-32)+rbp],xmm0 movaps xmm15,XMMWORD PTR[((-16))+rbp] + movaps XMMWORD PTR[(-16)+rbp],xmm0 + movaps XMMWORD PTR[rsp],xmm0 + movaps XMMWORD PTR[16+rsp],xmm0 + movaps XMMWORD PTR[32+rsp],xmm0 + movaps XMMWORD PTR[48+rsp],xmm0 + movaps XMMWORD PTR[64+rsp],xmm0 + movaps XMMWORD PTR[80+rsp],xmm0 + movaps XMMWORD PTR[96+rsp],xmm0 lea rsp,QWORD PTR[rbp] pop rbp $L$xts_enc_epilogue:: @@ -2371,6 +2487,7 @@ DB 102,15,56,223,124,36,80 shr eax,4 $L$xts_dec_short:: + mov r10d,eax pxor xmm10,xmm0 pxor xmm11,xmm0 @@ -2573,16 +2690,39 @@ DB 102,15,56,223,209 movups XMMWORD PTR[rsi],xmm2 $L$xts_dec_ret:: + xorps xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 movaps xmm6,XMMWORD PTR[((-160))+rbp] + movaps XMMWORD PTR[(-160)+rbp],xmm0 movaps xmm7,XMMWORD PTR[((-144))+rbp] + movaps XMMWORD PTR[(-144)+rbp],xmm0 movaps xmm8,XMMWORD PTR[((-128))+rbp] + movaps XMMWORD PTR[(-128)+rbp],xmm0 movaps xmm9,XMMWORD PTR[((-112))+rbp] + movaps XMMWORD PTR[(-112)+rbp],xmm0 movaps xmm10,XMMWORD PTR[((-96))+rbp] + movaps XMMWORD PTR[(-96)+rbp],xmm0 movaps xmm11,XMMWORD PTR[((-80))+rbp] + movaps XMMWORD PTR[(-80)+rbp],xmm0 movaps xmm12,XMMWORD PTR[((-64))+rbp] + movaps XMMWORD PTR[(-64)+rbp],xmm0 movaps xmm13,XMMWORD PTR[((-48))+rbp] + movaps XMMWORD PTR[(-48)+rbp],xmm0 movaps xmm14,XMMWORD PTR[((-32))+rbp] + movaps XMMWORD PTR[(-32)+rbp],xmm0 movaps xmm15,XMMWORD PTR[((-16))+rbp] + movaps XMMWORD PTR[(-16)+rbp],xmm0 + movaps XMMWORD PTR[rsp],xmm0 + movaps XMMWORD PTR[16+rsp],xmm0 + movaps XMMWORD PTR[32+rsp],xmm0 + movaps XMMWORD PTR[48+rsp],xmm0 + movaps XMMWORD PTR[64+rsp],xmm0 + movaps XMMWORD PTR[80+rsp],xmm0 + movaps XMMWORD PTR[96+rsp],xmm0 lea rsp,QWORD PTR[rbp] pop rbp $L$xts_dec_epilogue:: @@ -2646,7 +2786,11 @@ DB 102,15,56,221,209 jnc $L$cbc_enc_loop add rdx,16 jnz $L$cbc_enc_tail + pxor xmm0,xmm0 + pxor xmm1,xmm1 movups XMMWORD PTR[r8],xmm2 + pxor xmm2,xmm2 + pxor xmm3,xmm3 jmp $L$cbc_ret $L$cbc_enc_tail:: @@ -2666,6 +2810,35 @@ $L$cbc_enc_tail:: ALIGN 16 $L$cbc_decrypt:: + cmp rdx,16 + jne $L$cbc_decrypt_bulk + + + + movdqu xmm2,XMMWORD PTR[rdi] + movdqu xmm3,XMMWORD PTR[r8] + movdqa xmm4,xmm2 + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_16:: +DB 102,15,56,222,209 + dec r10d + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_dec1_16 +DB 102,15,56,223,209 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + movdqu XMMWORD PTR[r8],xmm4 + xorps xmm2,xmm3 + pxor xmm3,xmm3 + movups XMMWORD PTR[rsi],xmm2 + pxor xmm2,xmm2 + jmp $L$cbc_ret +ALIGN 16 +$L$cbc_decrypt_bulk:: lea rax,QWORD PTR[rsp] push rbp sub rsp,176 @@ -2913,7 +3086,7 @@ DB 102,69,15,56,223,202 movaps xmm2,xmm9 lea rcx,QWORD PTR[((-112))+rcx] add rdx,070h - jle $L$cbc_dec_tail_collected + jle $L$cbc_dec_clear_tail_collected movups XMMWORD PTR[rsi],xmm9 lea rsi,QWORD PTR[16+rsi] cmp rdx,050h @@ -2932,14 +3105,19 @@ $L$cbc_dec_six_or_seven:: movdqu XMMWORD PTR[rsi],xmm2 pxor xmm4,xmm12 movdqu XMMWORD PTR[16+rsi],xmm3 + pxor xmm3,xmm3 pxor xmm5,xmm13 movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm4,xmm4 pxor xmm6,xmm14 movdqu XMMWORD PTR[48+rsi],xmm5 + pxor xmm5,xmm5 pxor xmm7,xmm15 movdqu XMMWORD PTR[64+rsi],xmm6 + pxor xmm6,xmm6 lea rsi,QWORD PTR[80+rsi] movdqa xmm2,xmm7 + pxor xmm7,xmm7 jmp $L$cbc_dec_tail_collected ALIGN 16 @@ -2954,16 +3132,23 @@ $L$cbc_dec_seven:: movdqu XMMWORD PTR[rsi],xmm2 pxor xmm4,xmm12 movdqu XMMWORD PTR[16+rsi],xmm3 + pxor xmm3,xmm3 pxor xmm5,xmm13 movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm4,xmm4 pxor xmm6,xmm14 movdqu XMMWORD PTR[48+rsi],xmm5 + pxor xmm5,xmm5 pxor xmm7,xmm15 movdqu XMMWORD PTR[64+rsi],xmm6 + pxor xmm6,xmm6 pxor xmm8,xmm9 movdqu XMMWORD PTR[80+rsi],xmm7 + pxor xmm7,xmm7 lea rsi,QWORD PTR[96+rsi] movdqa xmm2,xmm8 + pxor xmm8,xmm8 + pxor xmm9,xmm9 jmp $L$cbc_dec_tail_collected ALIGN 16 @@ -3007,7 +3192,7 @@ $L$cbc_dec_loop6_enter:: movdqa xmm2,xmm7 add rdx,050h - jle $L$cbc_dec_tail_collected + jle $L$cbc_dec_clear_tail_collected movups XMMWORD PTR[rsi],xmm7 lea rsi,QWORD PTR[16+rsi] @@ -3042,12 +3227,17 @@ $L$cbc_dec_tail:: movdqu XMMWORD PTR[rsi],xmm2 pxor xmm4,xmm12 movdqu XMMWORD PTR[16+rsi],xmm3 + pxor xmm3,xmm3 pxor xmm5,xmm13 movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm4,xmm4 pxor xmm6,xmm14 movdqu XMMWORD PTR[48+rsi],xmm5 + pxor xmm5,xmm5 lea rsi,QWORD PTR[64+rsi] movdqa xmm2,xmm6 + pxor xmm6,xmm6 + pxor xmm7,xmm7 sub rdx,010h jmp $L$cbc_dec_tail_collected @@ -3058,12 +3248,12 @@ $L$cbc_dec_one:: movups xmm1,XMMWORD PTR[16+rcx] lea rcx,QWORD PTR[32+rcx] xorps xmm2,xmm0 -$L$oop_dec1_16:: +$L$oop_dec1_17:: DB 102,15,56,222,209 dec eax movups xmm1,XMMWORD PTR[rcx] lea rcx,QWORD PTR[16+rcx] - jnz $L$oop_dec1_16 + jnz $L$oop_dec1_17 DB 102,15,56,223,209 xorps xmm2,xmm10 movaps xmm10,xmm11 @@ -3077,6 +3267,7 @@ $L$cbc_dec_two:: pxor xmm3,xmm11 movdqu XMMWORD PTR[rsi],xmm2 movdqa xmm2,xmm3 + pxor xmm3,xmm3 lea rsi,QWORD PTR[16+rsi] jmp $L$cbc_dec_tail_collected ALIGN 16 @@ -3089,7 +3280,9 @@ $L$cbc_dec_three:: movdqu XMMWORD PTR[rsi],xmm2 pxor xmm4,xmm12 movdqu XMMWORD PTR[16+rsi],xmm3 + pxor xmm3,xmm3 movdqa xmm2,xmm4 + pxor xmm4,xmm4 lea rsi,QWORD PTR[32+rsi] jmp $L$cbc_dec_tail_collected ALIGN 16 @@ -3102,39 +3295,61 @@ $L$cbc_dec_four:: movdqu XMMWORD PTR[rsi],xmm2 pxor xmm4,xmm12 movdqu XMMWORD PTR[16+rsi],xmm3 + pxor xmm3,xmm3 pxor xmm5,xmm13 movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm4,xmm4 movdqa xmm2,xmm5 + pxor xmm5,xmm5 lea rsi,QWORD PTR[48+rsi] jmp $L$cbc_dec_tail_collected ALIGN 16 +$L$cbc_dec_clear_tail_collected:: + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 $L$cbc_dec_tail_collected:: movups XMMWORD PTR[r8],xmm10 and rdx,15 jnz $L$cbc_dec_tail_partial movups XMMWORD PTR[rsi],xmm2 + pxor xmm2,xmm2 jmp $L$cbc_dec_ret ALIGN 16 $L$cbc_dec_tail_partial:: movaps XMMWORD PTR[rsp],xmm2 + pxor xmm2,xmm2 mov rcx,16 mov rdi,rsi sub rcx,rdx lea rsi,QWORD PTR[rsp] DD 09066A4F3h + movdqa XMMWORD PTR[rsp],xmm2 $L$cbc_dec_ret:: + xorps xmm0,xmm0 + pxor xmm1,xmm1 movaps xmm6,XMMWORD PTR[16+rsp] + movaps XMMWORD PTR[16+rsp],xmm0 movaps xmm7,XMMWORD PTR[32+rsp] + movaps XMMWORD PTR[32+rsp],xmm0 movaps xmm8,XMMWORD PTR[48+rsp] + movaps XMMWORD PTR[48+rsp],xmm0 movaps xmm9,XMMWORD PTR[64+rsp] + movaps XMMWORD PTR[64+rsp],xmm0 movaps xmm10,XMMWORD PTR[80+rsp] + movaps XMMWORD PTR[80+rsp],xmm0 movaps xmm11,XMMWORD PTR[96+rsp] + movaps XMMWORD PTR[96+rsp],xmm0 movaps xmm12,XMMWORD PTR[112+rsp] + movaps XMMWORD PTR[112+rsp],xmm0 movaps xmm13,XMMWORD PTR[128+rsp] + movaps XMMWORD PTR[128+rsp],xmm0 movaps xmm14,XMMWORD PTR[144+rsp] + movaps XMMWORD PTR[144+rsp],xmm0 movaps xmm15,XMMWORD PTR[160+rsp] + movaps XMMWORD PTR[160+rsp],xmm0 lea rsp,QWORD PTR[rbp] pop rbp $L$cbc_ret:: @@ -3175,7 +3390,9 @@ DB 102,15,56,219,201 movups xmm0,XMMWORD PTR[r8] DB 102,15,56,219,192 + pxor xmm1,xmm1 movups XMMWORD PTR[rcx],xmm0 + pxor xmm0,xmm0 $L$dec_key_ret:: add rsp,8 DB 0F3h,0C3h ;repret @@ -3193,8 +3410,10 @@ DB 048h,083h,0ECh,008h test r8,r8 jz $L$enc_key_ret + mov r10d,268437504 movups xmm0,XMMWORD PTR[rcx] xorps xmm4,xmm4 + and r10d,DWORD PTR[((OPENSSL_ia32cap_P+4))] lea rax,QWORD PTR[16+r8] cmp edx,256 je $L$14rounds @@ -3205,6 +3424,9 @@ DB 048h,083h,0ECh,008h $L$10rounds:: mov edx,9 + cmp r10d,268435456 + je $L$10rounds_alt + movups XMMWORD PTR[r8],xmm0 DB 102,15,58,223,200,1 call $L$key_expansion_128_cold @@ -3231,10 +3453,80 @@ DB 102,15,58,223,200,54 xor eax,eax jmp $L$enc_key_ret +ALIGN 16 +$L$10rounds_alt:: + movdqa xmm5,XMMWORD PTR[$L$key_rotate] + mov r10d,8 + movdqa xmm4,XMMWORD PTR[$L$key_rcon1] + movdqa xmm2,xmm0 + movdqu XMMWORD PTR[r8],xmm0 + jmp $L$oop_key128 + +ALIGN 16 +$L$oop_key128:: +DB 102,15,56,0,197 +DB 102,15,56,221,196 + pslld xmm4,1 + lea rax,QWORD PTR[16+rax] + + movdqa xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm2,xmm3 + + pxor xmm0,xmm2 + movdqu XMMWORD PTR[(-16)+rax],xmm0 + movdqa xmm2,xmm0 + + dec r10d + jnz $L$oop_key128 + + movdqa xmm4,XMMWORD PTR[$L$key_rcon1b] + +DB 102,15,56,0,197 +DB 102,15,56,221,196 + pslld xmm4,1 + + movdqa xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm2,xmm3 + + pxor xmm0,xmm2 + movdqu XMMWORD PTR[rax],xmm0 + + movdqa xmm2,xmm0 +DB 102,15,56,0,197 +DB 102,15,56,221,196 + + movdqa xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm2,xmm3 + + pxor xmm0,xmm2 + movdqu XMMWORD PTR[16+rax],xmm0 + + mov DWORD PTR[96+rax],edx + xor eax,eax + jmp $L$enc_key_ret + ALIGN 16 $L$12rounds:: movq xmm2,QWORD PTR[16+rcx] mov edx,11 + cmp r10d,268435456 + je $L$12rounds_alt + movups XMMWORD PTR[r8],xmm0 DB 102,15,58,223,202,1 call $L$key_expansion_192a_cold @@ -3257,11 +3549,55 @@ DB 102,15,58,223,202,128 xor rax,rax jmp $L$enc_key_ret +ALIGN 16 +$L$12rounds_alt:: + movdqa xmm5,XMMWORD PTR[$L$key_rotate192] + movdqa xmm4,XMMWORD PTR[$L$key_rcon1] + mov r10d,8 + movdqu XMMWORD PTR[r8],xmm0 + jmp $L$oop_key192 + +ALIGN 16 +$L$oop_key192:: + movq QWORD PTR[rax],xmm2 + movdqa xmm1,xmm2 +DB 102,15,56,0,213 +DB 102,15,56,221,212 + pslld xmm4,1 + lea rax,QWORD PTR[24+rax] + + movdqa xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm0,xmm3 + + pshufd xmm3,xmm0,0ffh + pxor xmm3,xmm1 + pslldq xmm1,4 + pxor xmm3,xmm1 + + pxor xmm0,xmm2 + pxor xmm2,xmm3 + movdqu XMMWORD PTR[(-16)+rax],xmm0 + + dec r10d + jnz $L$oop_key192 + + mov DWORD PTR[32+rax],edx + xor eax,eax + jmp $L$enc_key_ret + ALIGN 16 $L$14rounds:: movups xmm2,XMMWORD PTR[16+rcx] mov edx,13 lea rax,QWORD PTR[16+rax] + cmp r10d,268435456 + je $L$14rounds_alt + movups XMMWORD PTR[r8],xmm0 movups XMMWORD PTR[16+r8],xmm2 DB 102,15,58,223,202,1 @@ -3295,10 +3631,70 @@ DB 102,15,58,223,202,64 xor rax,rax jmp $L$enc_key_ret +ALIGN 16 +$L$14rounds_alt:: + movdqa xmm5,XMMWORD PTR[$L$key_rotate] + movdqa xmm4,XMMWORD PTR[$L$key_rcon1] + mov r10d,7 + movdqu XMMWORD PTR[r8],xmm0 + movdqa xmm1,xmm2 + movdqu XMMWORD PTR[16+r8],xmm2 + jmp $L$oop_key256 + +ALIGN 16 +$L$oop_key256:: +DB 102,15,56,0,213 +DB 102,15,56,221,212 + + movdqa xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm0,xmm3 + pslld xmm4,1 + + pxor xmm0,xmm2 + movdqu XMMWORD PTR[rax],xmm0 + + dec r10d + jz $L$done_key256 + + pshufd xmm2,xmm0,0ffh + pxor xmm3,xmm3 +DB 102,15,56,221,211 + + movdqa xmm3,xmm1 + pslldq xmm1,4 + pxor xmm3,xmm1 + pslldq xmm1,4 + pxor xmm3,xmm1 + pslldq xmm1,4 + pxor xmm1,xmm3 + + pxor xmm2,xmm1 + movdqu XMMWORD PTR[16+rax],xmm2 + lea rax,QWORD PTR[32+rax] + movdqa xmm1,xmm2 + + jmp $L$oop_key256 + +$L$done_key256:: + mov DWORD PTR[16+rax],edx + xor eax,eax + jmp $L$enc_key_ret + ALIGN 16 $L$bad_keybits:: mov rax,-2 $L$enc_key_ret:: + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 add rsp,8 DB 0F3h,0C3h ;repret $L$SEH_end_set_encrypt_key:: @@ -3384,6 +3780,14 @@ $L$xts_magic:: DD 087h,0,1,0 $L$increment1:: DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +$L$key_rotate:: + DD 00c0f0e0dh,00c0f0e0dh,00c0f0e0dh,00c0f0e0dh +$L$key_rotate192:: + DD 004070605h,004070605h,004070605h,004070605h +$L$key_rcon1:: + DD 1,1,1,1 +$L$key_rcon1b:: + DD 01bh,01bh,01bh,01bh DB 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 DB 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 @@ -3489,7 +3893,7 @@ cbc_se_handler PROC PRIVATE mov rax,QWORD PTR[152+r8] mov rbx,QWORD PTR[248+r8] - lea r10,QWORD PTR[$L$cbc_decrypt] + lea r10,QWORD PTR[$L$cbc_decrypt_bulk] cmp rbx,r10 jb $L$common_seh_tail diff --git a/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm b/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm index 64a1b42cfeedcb..9fdd91d0165574 100644 --- a/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm +++ b/deps/openssl/asm/x64-win32-masm/bn/x86_64-mont5.asm @@ -3001,11 +3001,16 @@ PUBLIC bn_get_bits5 ALIGN 16 bn_get_bits5 PROC PUBLIC - mov r10,rcx + lea r10,QWORD PTR[rcx] + lea r11,QWORD PTR[1+rcx] mov ecx,edx - shr edx,3 - movzx eax,WORD PTR[rdx*1+r10] - and ecx,7 + shr edx,4 + and ecx,15 + lea eax,DWORD PTR[((-8))+rcx] + cmp ecx,11 + cmova r10,r11 + cmova ecx,eax + movzx eax,WORD PTR[rdx*2+r10] shr eax,cl and eax,31 DB 0F3h,0C3h ;repret diff --git a/deps/openssl/asm/x86-elf-gas/aes/aesni-x86.s b/deps/openssl/asm/x86-elf-gas/aes/aesni-x86.s index a68f7cdbe9cbe6..3bbc4e47d612a8 100644 --- a/deps/openssl/asm/x86-elf-gas/aes/aesni-x86.s +++ b/deps/openssl/asm/x86-elf-gas/aes/aesni-x86.s @@ -21,7 +21,10 @@ aesni_encrypt: leal 16(%edx),%edx jnz .L000enc1_loop_1 .byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 movups %xmm2,(%eax) + pxor %xmm2,%xmm2 ret .size aesni_encrypt,.-.L_aesni_encrypt_begin .globl aesni_decrypt @@ -45,7 +48,10 @@ aesni_decrypt: leal 16(%edx),%edx jnz .L001dec1_loop_2 .byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 movups %xmm2,(%eax) + pxor %xmm2,%xmm2 ret .size aesni_decrypt,.-.L_aesni_decrypt_begin .type _aesni_encrypt2,@function @@ -259,17 +265,15 @@ _aesni_encrypt6: negl %ecx .byte 102,15,56,220,225 pxor %xmm0,%xmm7 + movups (%edx,%ecx,1),%xmm0 addl $16,%ecx -.byte 102,15,56,220,233 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 - movups -16(%edx,%ecx,1),%xmm0 - jmp .L_aesni_encrypt6_enter + jmp .L008_aesni_encrypt6_inner .align 16 -.L008enc6_loop: +.L009enc6_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 +.L008_aesni_encrypt6_inner: .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 @@ -283,7 +287,7 @@ _aesni_encrypt6: .byte 102,15,56,220,240 .byte 102,15,56,220,248 movups -16(%edx,%ecx,1),%xmm0 - jnz .L008enc6_loop + jnz .L009enc6_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 @@ -315,17 +319,15 @@ _aesni_decrypt6: negl %ecx .byte 102,15,56,222,225 pxor %xmm0,%xmm7 + movups (%edx,%ecx,1),%xmm0 addl $16,%ecx -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 - movups -16(%edx,%ecx,1),%xmm0 - jmp .L_aesni_decrypt6_enter + jmp .L010_aesni_decrypt6_inner .align 16 -.L009dec6_loop: +.L011dec6_loop: .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 +.L010_aesni_decrypt6_inner: .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 @@ -339,7 +341,7 @@ _aesni_decrypt6: .byte 102,15,56,222,240 .byte 102,15,56,222,248 movups -16(%edx,%ecx,1),%xmm0 - jnz .L009dec6_loop + jnz .L011dec6_loop .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 @@ -369,14 +371,14 @@ aesni_ecb_encrypt: movl 32(%esp),%edx movl 36(%esp),%ebx andl $-16,%eax - jz .L010ecb_ret + jz .L012ecb_ret movl 240(%edx),%ecx testl %ebx,%ebx - jz .L011ecb_decrypt + jz .L013ecb_decrypt movl %edx,%ebp movl %ecx,%ebx cmpl $96,%eax - jb .L012ecb_enc_tail + jb .L014ecb_enc_tail movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 @@ -385,9 +387,9 @@ aesni_ecb_encrypt: movdqu 80(%esi),%xmm7 leal 96(%esi),%esi subl $96,%eax - jmp .L013ecb_enc_loop6_enter + jmp .L015ecb_enc_loop6_enter .align 16 -.L014ecb_enc_loop6: +.L016ecb_enc_loop6: movups %xmm2,(%edi) movdqu (%esi),%xmm2 movups %xmm3,16(%edi) @@ -402,12 +404,12 @@ aesni_ecb_encrypt: leal 96(%edi),%edi movdqu 80(%esi),%xmm7 leal 96(%esi),%esi -.L013ecb_enc_loop6_enter: +.L015ecb_enc_loop6_enter: call _aesni_encrypt6 movl %ebp,%edx movl %ebx,%ecx subl $96,%eax - jnc .L014ecb_enc_loop6 + jnc .L016ecb_enc_loop6 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) @@ -416,18 +418,18 @@ aesni_ecb_encrypt: movups %xmm7,80(%edi) leal 96(%edi),%edi addl $96,%eax - jz .L010ecb_ret -.L012ecb_enc_tail: + jz .L012ecb_ret +.L014ecb_enc_tail: movups (%esi),%xmm2 cmpl $32,%eax - jb .L015ecb_enc_one + jb .L017ecb_enc_one movups 16(%esi),%xmm3 - je .L016ecb_enc_two + je .L018ecb_enc_two movups 32(%esi),%xmm4 cmpl $64,%eax - jb .L017ecb_enc_three + jb .L019ecb_enc_three movups 48(%esi),%xmm5 - je .L018ecb_enc_four + je .L020ecb_enc_four movups 64(%esi),%xmm6 xorps %xmm7,%xmm7 call _aesni_encrypt6 @@ -436,49 +438,49 @@ aesni_ecb_encrypt: movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) - jmp .L010ecb_ret + jmp .L012ecb_ret .align 16 -.L015ecb_enc_one: +.L017ecb_enc_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L019enc1_loop_3: +.L021enc1_loop_3: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L019enc1_loop_3 + jnz .L021enc1_loop_3 .byte 102,15,56,221,209 movups %xmm2,(%edi) - jmp .L010ecb_ret + jmp .L012ecb_ret .align 16 -.L016ecb_enc_two: +.L018ecb_enc_two: call _aesni_encrypt2 movups %xmm2,(%edi) movups %xmm3,16(%edi) - jmp .L010ecb_ret + jmp .L012ecb_ret .align 16 -.L017ecb_enc_three: +.L019ecb_enc_three: call _aesni_encrypt3 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) - jmp .L010ecb_ret + jmp .L012ecb_ret .align 16 -.L018ecb_enc_four: +.L020ecb_enc_four: call _aesni_encrypt4 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) - jmp .L010ecb_ret + jmp .L012ecb_ret .align 16 -.L011ecb_decrypt: +.L013ecb_decrypt: movl %edx,%ebp movl %ecx,%ebx cmpl $96,%eax - jb .L020ecb_dec_tail + jb .L022ecb_dec_tail movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 @@ -487,9 +489,9 @@ aesni_ecb_encrypt: movdqu 80(%esi),%xmm7 leal 96(%esi),%esi subl $96,%eax - jmp .L021ecb_dec_loop6_enter + jmp .L023ecb_dec_loop6_enter .align 16 -.L022ecb_dec_loop6: +.L024ecb_dec_loop6: movups %xmm2,(%edi) movdqu (%esi),%xmm2 movups %xmm3,16(%edi) @@ -504,12 +506,12 @@ aesni_ecb_encrypt: leal 96(%edi),%edi movdqu 80(%esi),%xmm7 leal 96(%esi),%esi -.L021ecb_dec_loop6_enter: +.L023ecb_dec_loop6_enter: call _aesni_decrypt6 movl %ebp,%edx movl %ebx,%ecx subl $96,%eax - jnc .L022ecb_dec_loop6 + jnc .L024ecb_dec_loop6 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) @@ -518,18 +520,18 @@ aesni_ecb_encrypt: movups %xmm7,80(%edi) leal 96(%edi),%edi addl $96,%eax - jz .L010ecb_ret -.L020ecb_dec_tail: + jz .L012ecb_ret +.L022ecb_dec_tail: movups (%esi),%xmm2 cmpl $32,%eax - jb .L023ecb_dec_one + jb .L025ecb_dec_one movups 16(%esi),%xmm3 - je .L024ecb_dec_two + je .L026ecb_dec_two movups 32(%esi),%xmm4 cmpl $64,%eax - jb .L025ecb_dec_three + jb .L027ecb_dec_three movups 48(%esi),%xmm5 - je .L026ecb_dec_four + je .L028ecb_dec_four movups 64(%esi),%xmm6 xorps %xmm7,%xmm7 call _aesni_decrypt6 @@ -538,43 +540,51 @@ aesni_ecb_encrypt: movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) - jmp .L010ecb_ret + jmp .L012ecb_ret .align 16 -.L023ecb_dec_one: +.L025ecb_dec_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L027dec1_loop_4: +.L029dec1_loop_4: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L027dec1_loop_4 + jnz .L029dec1_loop_4 .byte 102,15,56,223,209 movups %xmm2,(%edi) - jmp .L010ecb_ret + jmp .L012ecb_ret .align 16 -.L024ecb_dec_two: +.L026ecb_dec_two: call _aesni_decrypt2 movups %xmm2,(%edi) movups %xmm3,16(%edi) - jmp .L010ecb_ret + jmp .L012ecb_ret .align 16 -.L025ecb_dec_three: +.L027ecb_dec_three: call _aesni_decrypt3 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) - jmp .L010ecb_ret + jmp .L012ecb_ret .align 16 -.L026ecb_dec_four: +.L028ecb_dec_four: call _aesni_decrypt4 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) -.L010ecb_ret: +.L012ecb_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 popl %edi popl %esi popl %ebx @@ -621,7 +631,7 @@ aesni_ccm64_encrypt_blocks: leal 32(%edx,%ecx,1),%edx subl %ecx,%ebx .byte 102,15,56,0,253 -.L028ccm64_enc_outer: +.L030ccm64_enc_outer: movups (%ebp),%xmm0 movl %ebx,%ecx movups (%esi),%xmm6 @@ -630,7 +640,7 @@ aesni_ccm64_encrypt_blocks: xorps %xmm6,%xmm0 xorps %xmm0,%xmm3 movups 32(%ebp),%xmm0 -.L029ccm64_enc2_loop: +.L031ccm64_enc2_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 movups (%edx,%ecx,1),%xmm1 @@ -638,7 +648,7 @@ aesni_ccm64_encrypt_blocks: .byte 102,15,56,220,208 .byte 102,15,56,220,216 movups -16(%edx,%ecx,1),%xmm0 - jnz .L029ccm64_enc2_loop + jnz .L031ccm64_enc2_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 paddq 16(%esp),%xmm7 @@ -651,10 +661,18 @@ aesni_ccm64_encrypt_blocks: movups %xmm6,(%edi) .byte 102,15,56,0,213 leal 16(%edi),%edi - jnz .L028ccm64_enc_outer + jnz .L030ccm64_enc_outer movl 48(%esp),%esp movl 40(%esp),%edi movups %xmm3,(%edi) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 popl %edi popl %esi popl %ebx @@ -702,12 +720,12 @@ aesni_ccm64_decrypt_blocks: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L030enc1_loop_5: +.L032enc1_loop_5: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L030enc1_loop_5 + jnz .L032enc1_loop_5 .byte 102,15,56,221,209 shll $4,%ebx movl $16,%ecx @@ -717,16 +735,16 @@ aesni_ccm64_decrypt_blocks: subl %ebx,%ecx leal 32(%ebp,%ebx,1),%edx movl %ecx,%ebx - jmp .L031ccm64_dec_outer + jmp .L033ccm64_dec_outer .align 16 -.L031ccm64_dec_outer: +.L033ccm64_dec_outer: xorps %xmm2,%xmm6 movdqa %xmm7,%xmm2 movups %xmm6,(%edi) leal 16(%edi),%edi .byte 102,15,56,0,213 subl $1,%eax - jz .L032ccm64_dec_break + jz .L034ccm64_dec_break movups (%ebp),%xmm0 movl %ebx,%ecx movups 16(%ebp),%xmm1 @@ -734,7 +752,7 @@ aesni_ccm64_decrypt_blocks: xorps %xmm0,%xmm2 xorps %xmm6,%xmm3 movups 32(%ebp),%xmm0 -.L033ccm64_dec2_loop: +.L035ccm64_dec2_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 movups (%edx,%ecx,1),%xmm1 @@ -742,7 +760,7 @@ aesni_ccm64_decrypt_blocks: .byte 102,15,56,220,208 .byte 102,15,56,220,216 movups -16(%edx,%ecx,1),%xmm0 - jnz .L033ccm64_dec2_loop + jnz .L035ccm64_dec2_loop movups (%esi),%xmm6 paddq 16(%esp),%xmm7 .byte 102,15,56,220,209 @@ -750,9 +768,9 @@ aesni_ccm64_decrypt_blocks: .byte 102,15,56,221,208 .byte 102,15,56,221,216 leal 16(%esi),%esi - jmp .L031ccm64_dec_outer + jmp .L033ccm64_dec_outer .align 16 -.L032ccm64_dec_break: +.L034ccm64_dec_break: movl 240(%ebp),%ecx movl %ebp,%edx movups (%edx),%xmm0 @@ -760,16 +778,24 @@ aesni_ccm64_decrypt_blocks: xorps %xmm0,%xmm6 leal 32(%edx),%edx xorps %xmm6,%xmm3 -.L034enc1_loop_6: +.L036enc1_loop_6: .byte 102,15,56,220,217 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L034enc1_loop_6 + jnz .L036enc1_loop_6 .byte 102,15,56,221,217 movl 48(%esp),%esp movl 40(%esp),%edi movups %xmm3,(%edi) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 popl %edi popl %esi popl %ebx @@ -795,7 +821,7 @@ aesni_ctr32_encrypt_blocks: andl $-16,%esp movl %ebp,80(%esp) cmpl $1,%eax - je .L035ctr32_one_shortcut + je .L037ctr32_one_shortcut movdqu (%ebx),%xmm7 movl $202182159,(%esp) movl $134810123,4(%esp) @@ -833,7 +859,7 @@ aesni_ctr32_encrypt_blocks: pshufd $192,%xmm0,%xmm2 pshufd $128,%xmm0,%xmm3 cmpl $6,%eax - jb .L036ctr32_tail + jb .L038ctr32_tail pxor %xmm6,%xmm7 shll $4,%ecx movl $16,%ebx @@ -842,9 +868,9 @@ aesni_ctr32_encrypt_blocks: subl %ecx,%ebx leal 32(%edx,%ecx,1),%edx subl $6,%eax - jmp .L037ctr32_loop6 + jmp .L039ctr32_loop6 .align 16 -.L037ctr32_loop6: +.L039ctr32_loop6: pshufd $64,%xmm0,%xmm4 movdqa 32(%esp),%xmm0 pshufd $192,%xmm1,%xmm5 @@ -898,27 +924,27 @@ aesni_ctr32_encrypt_blocks: leal 96(%edi),%edi pshufd $128,%xmm0,%xmm3 subl $6,%eax - jnc .L037ctr32_loop6 + jnc .L039ctr32_loop6 addl $6,%eax - jz .L038ctr32_ret + jz .L040ctr32_ret movdqu (%ebp),%xmm7 movl %ebp,%edx pxor 32(%esp),%xmm7 movl 240(%ebp),%ecx -.L036ctr32_tail: +.L038ctr32_tail: por %xmm7,%xmm2 cmpl $2,%eax - jb .L039ctr32_one + jb .L041ctr32_one pshufd $64,%xmm0,%xmm4 por %xmm7,%xmm3 - je .L040ctr32_two + je .L042ctr32_two pshufd $192,%xmm1,%xmm5 por %xmm7,%xmm4 cmpl $4,%eax - jb .L041ctr32_three + jb .L043ctr32_three pshufd $128,%xmm1,%xmm6 por %xmm7,%xmm5 - je .L042ctr32_four + je .L044ctr32_four por %xmm7,%xmm6 call _aesni_encrypt6 movups (%esi),%xmm1 @@ -936,29 +962,29 @@ aesni_ctr32_encrypt_blocks: movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) - jmp .L038ctr32_ret + jmp .L040ctr32_ret .align 16 -.L035ctr32_one_shortcut: +.L037ctr32_one_shortcut: movups (%ebx),%xmm2 movl 240(%edx),%ecx -.L039ctr32_one: +.L041ctr32_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L043enc1_loop_7: +.L045enc1_loop_7: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L043enc1_loop_7 + jnz .L045enc1_loop_7 .byte 102,15,56,221,209 movups (%esi),%xmm6 xorps %xmm2,%xmm6 movups %xmm6,(%edi) - jmp .L038ctr32_ret + jmp .L040ctr32_ret .align 16 -.L040ctr32_two: +.L042ctr32_two: call _aesni_encrypt2 movups (%esi),%xmm5 movups 16(%esi),%xmm6 @@ -966,9 +992,9 @@ aesni_ctr32_encrypt_blocks: xorps %xmm6,%xmm3 movups %xmm2,(%edi) movups %xmm3,16(%edi) - jmp .L038ctr32_ret + jmp .L040ctr32_ret .align 16 -.L041ctr32_three: +.L043ctr32_three: call _aesni_encrypt3 movups (%esi),%xmm5 movups 16(%esi),%xmm6 @@ -979,9 +1005,9 @@ aesni_ctr32_encrypt_blocks: xorps %xmm7,%xmm4 movups %xmm3,16(%edi) movups %xmm4,32(%edi) - jmp .L038ctr32_ret + jmp .L040ctr32_ret .align 16 -.L042ctr32_four: +.L044ctr32_four: call _aesni_encrypt4 movups (%esi),%xmm6 movups 16(%esi),%xmm7 @@ -995,7 +1021,18 @@ aesni_ctr32_encrypt_blocks: xorps %xmm0,%xmm5 movups %xmm4,32(%edi) movups %xmm5,48(%edi) -.L038ctr32_ret: +.L040ctr32_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 movl 80(%esp),%esp popl %edi popl %esi @@ -1020,12 +1057,12 @@ aesni_xts_encrypt: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L044enc1_loop_8: +.L046enc1_loop_8: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L044enc1_loop_8 + jnz .L046enc1_loop_8 .byte 102,15,56,221,209 movl 20(%esp),%esi movl 24(%esp),%edi @@ -1049,14 +1086,14 @@ aesni_xts_encrypt: movl %edx,%ebp movl %ecx,%ebx subl $96,%eax - jc .L045xts_enc_short + jc .L047xts_enc_short shll $4,%ecx movl $16,%ebx subl %ecx,%ebx leal 32(%edx,%ecx,1),%edx - jmp .L046xts_enc_loop6 + jmp .L048xts_enc_loop6 .align 16 -.L046xts_enc_loop6: +.L048xts_enc_loop6: pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,(%esp) @@ -1145,23 +1182,23 @@ aesni_xts_encrypt: pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 subl $96,%eax - jnc .L046xts_enc_loop6 + jnc .L048xts_enc_loop6 movl 240(%ebp),%ecx movl %ebp,%edx movl %ecx,%ebx -.L045xts_enc_short: +.L047xts_enc_short: addl $96,%eax - jz .L047xts_enc_done6x + jz .L049xts_enc_done6x movdqa %xmm1,%xmm5 cmpl $32,%eax - jb .L048xts_enc_one + jb .L050xts_enc_one pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 - je .L049xts_enc_two + je .L051xts_enc_two pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm6 @@ -1170,7 +1207,7 @@ aesni_xts_encrypt: pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 cmpl $64,%eax - jb .L050xts_enc_three + jb .L052xts_enc_three pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm7 @@ -1180,7 +1217,7 @@ aesni_xts_encrypt: pxor %xmm2,%xmm1 movdqa %xmm5,(%esp) movdqa %xmm6,16(%esp) - je .L051xts_enc_four + je .L053xts_enc_four movdqa %xmm7,32(%esp) pshufd $19,%xmm0,%xmm7 movdqa %xmm1,48(%esp) @@ -1212,9 +1249,9 @@ aesni_xts_encrypt: movups %xmm5,48(%edi) movups %xmm6,64(%edi) leal 80(%edi),%edi - jmp .L052xts_enc_done + jmp .L054xts_enc_done .align 16 -.L048xts_enc_one: +.L050xts_enc_one: movups (%esi),%xmm2 leal 16(%esi),%esi xorps %xmm5,%xmm2 @@ -1222,20 +1259,20 @@ aesni_xts_encrypt: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L053enc1_loop_9: +.L055enc1_loop_9: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L053enc1_loop_9 + jnz .L055enc1_loop_9 .byte 102,15,56,221,209 xorps %xmm5,%xmm2 movups %xmm2,(%edi) leal 16(%edi),%edi movdqa %xmm5,%xmm1 - jmp .L052xts_enc_done + jmp .L054xts_enc_done .align 16 -.L049xts_enc_two: +.L051xts_enc_two: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1249,9 +1286,9 @@ aesni_xts_encrypt: movups %xmm3,16(%edi) leal 32(%edi),%edi movdqa %xmm6,%xmm1 - jmp .L052xts_enc_done + jmp .L054xts_enc_done .align 16 -.L050xts_enc_three: +.L052xts_enc_three: movaps %xmm1,%xmm7 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1269,9 +1306,9 @@ aesni_xts_encrypt: movups %xmm4,32(%edi) leal 48(%edi),%edi movdqa %xmm7,%xmm1 - jmp .L052xts_enc_done + jmp .L054xts_enc_done .align 16 -.L051xts_enc_four: +.L053xts_enc_four: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1293,28 +1330,28 @@ aesni_xts_encrypt: movups %xmm5,48(%edi) leal 64(%edi),%edi movdqa %xmm6,%xmm1 - jmp .L052xts_enc_done + jmp .L054xts_enc_done .align 16 -.L047xts_enc_done6x: +.L049xts_enc_done6x: movl 112(%esp),%eax andl $15,%eax - jz .L054xts_enc_ret + jz .L056xts_enc_ret movdqa %xmm1,%xmm5 movl %eax,112(%esp) - jmp .L055xts_enc_steal + jmp .L057xts_enc_steal .align 16 -.L052xts_enc_done: +.L054xts_enc_done: movl 112(%esp),%eax pxor %xmm0,%xmm0 andl $15,%eax - jz .L054xts_enc_ret + jz .L056xts_enc_ret pcmpgtd %xmm1,%xmm0 movl %eax,112(%esp) pshufd $19,%xmm0,%xmm5 paddq %xmm1,%xmm1 pand 96(%esp),%xmm5 pxor %xmm1,%xmm5 -.L055xts_enc_steal: +.L057xts_enc_steal: movzbl (%esi),%ecx movzbl -16(%edi),%edx leal 1(%esi),%esi @@ -1322,7 +1359,7 @@ aesni_xts_encrypt: movb %dl,(%edi) leal 1(%edi),%edi subl $1,%eax - jnz .L055xts_enc_steal + jnz .L057xts_enc_steal subl 112(%esp),%edi movl %ebp,%edx movl %ebx,%ecx @@ -1332,16 +1369,30 @@ aesni_xts_encrypt: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L056enc1_loop_10: +.L058enc1_loop_10: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L056enc1_loop_10 + jnz .L058enc1_loop_10 .byte 102,15,56,221,209 xorps %xmm5,%xmm2 movups %xmm2,-16(%edi) -.L054xts_enc_ret: +.L056xts_enc_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + movdqa %xmm0,(%esp) + pxor %xmm3,%xmm3 + movdqa %xmm0,16(%esp) + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 + movdqa %xmm0,80(%esp) movl 116(%esp),%esp popl %edi popl %esi @@ -1366,12 +1417,12 @@ aesni_xts_decrypt: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L057enc1_loop_11: +.L059enc1_loop_11: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L057enc1_loop_11 + jnz .L059enc1_loop_11 .byte 102,15,56,221,209 movl 20(%esp),%esi movl 24(%esp),%edi @@ -1400,14 +1451,14 @@ aesni_xts_decrypt: pcmpgtd %xmm1,%xmm0 andl $-16,%eax subl $96,%eax - jc .L058xts_dec_short + jc .L060xts_dec_short shll $4,%ecx movl $16,%ebx subl %ecx,%ebx leal 32(%edx,%ecx,1),%edx - jmp .L059xts_dec_loop6 + jmp .L061xts_dec_loop6 .align 16 -.L059xts_dec_loop6: +.L061xts_dec_loop6: pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,(%esp) @@ -1496,23 +1547,23 @@ aesni_xts_decrypt: pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 subl $96,%eax - jnc .L059xts_dec_loop6 + jnc .L061xts_dec_loop6 movl 240(%ebp),%ecx movl %ebp,%edx movl %ecx,%ebx -.L058xts_dec_short: +.L060xts_dec_short: addl $96,%eax - jz .L060xts_dec_done6x + jz .L062xts_dec_done6x movdqa %xmm1,%xmm5 cmpl $32,%eax - jb .L061xts_dec_one + jb .L063xts_dec_one pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 - je .L062xts_dec_two + je .L064xts_dec_two pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm6 @@ -1521,7 +1572,7 @@ aesni_xts_decrypt: pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 cmpl $64,%eax - jb .L063xts_dec_three + jb .L065xts_dec_three pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm7 @@ -1531,7 +1582,7 @@ aesni_xts_decrypt: pxor %xmm2,%xmm1 movdqa %xmm5,(%esp) movdqa %xmm6,16(%esp) - je .L064xts_dec_four + je .L066xts_dec_four movdqa %xmm7,32(%esp) pshufd $19,%xmm0,%xmm7 movdqa %xmm1,48(%esp) @@ -1563,9 +1614,9 @@ aesni_xts_decrypt: movups %xmm5,48(%edi) movups %xmm6,64(%edi) leal 80(%edi),%edi - jmp .L065xts_dec_done + jmp .L067xts_dec_done .align 16 -.L061xts_dec_one: +.L063xts_dec_one: movups (%esi),%xmm2 leal 16(%esi),%esi xorps %xmm5,%xmm2 @@ -1573,20 +1624,20 @@ aesni_xts_decrypt: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L066dec1_loop_12: +.L068dec1_loop_12: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L066dec1_loop_12 + jnz .L068dec1_loop_12 .byte 102,15,56,223,209 xorps %xmm5,%xmm2 movups %xmm2,(%edi) leal 16(%edi),%edi movdqa %xmm5,%xmm1 - jmp .L065xts_dec_done + jmp .L067xts_dec_done .align 16 -.L062xts_dec_two: +.L064xts_dec_two: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1600,9 +1651,9 @@ aesni_xts_decrypt: movups %xmm3,16(%edi) leal 32(%edi),%edi movdqa %xmm6,%xmm1 - jmp .L065xts_dec_done + jmp .L067xts_dec_done .align 16 -.L063xts_dec_three: +.L065xts_dec_three: movaps %xmm1,%xmm7 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1620,9 +1671,9 @@ aesni_xts_decrypt: movups %xmm4,32(%edi) leal 48(%edi),%edi movdqa %xmm7,%xmm1 - jmp .L065xts_dec_done + jmp .L067xts_dec_done .align 16 -.L064xts_dec_four: +.L066xts_dec_four: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1644,20 +1695,20 @@ aesni_xts_decrypt: movups %xmm5,48(%edi) leal 64(%edi),%edi movdqa %xmm6,%xmm1 - jmp .L065xts_dec_done + jmp .L067xts_dec_done .align 16 -.L060xts_dec_done6x: +.L062xts_dec_done6x: movl 112(%esp),%eax andl $15,%eax - jz .L067xts_dec_ret + jz .L069xts_dec_ret movl %eax,112(%esp) - jmp .L068xts_dec_only_one_more + jmp .L070xts_dec_only_one_more .align 16 -.L065xts_dec_done: +.L067xts_dec_done: movl 112(%esp),%eax pxor %xmm0,%xmm0 andl $15,%eax - jz .L067xts_dec_ret + jz .L069xts_dec_ret pcmpgtd %xmm1,%xmm0 movl %eax,112(%esp) pshufd $19,%xmm0,%xmm2 @@ -1667,7 +1718,7 @@ aesni_xts_decrypt: pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 -.L068xts_dec_only_one_more: +.L070xts_dec_only_one_more: pshufd $19,%xmm0,%xmm5 movdqa %xmm1,%xmm6 paddq %xmm1,%xmm1 @@ -1681,16 +1732,16 @@ aesni_xts_decrypt: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L069dec1_loop_13: +.L071dec1_loop_13: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L069dec1_loop_13 + jnz .L071dec1_loop_13 .byte 102,15,56,223,209 xorps %xmm5,%xmm2 movups %xmm2,(%edi) -.L070xts_dec_steal: +.L072xts_dec_steal: movzbl 16(%esi),%ecx movzbl (%edi),%edx leal 1(%esi),%esi @@ -1698,7 +1749,7 @@ aesni_xts_decrypt: movb %dl,16(%edi) leal 1(%edi),%edi subl $1,%eax - jnz .L070xts_dec_steal + jnz .L072xts_dec_steal subl 112(%esp),%edi movl %ebp,%edx movl %ebx,%ecx @@ -1708,16 +1759,30 @@ aesni_xts_decrypt: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L071dec1_loop_14: +.L073dec1_loop_14: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L071dec1_loop_14 + jnz .L073dec1_loop_14 .byte 102,15,56,223,209 xorps %xmm6,%xmm2 movups %xmm2,(%edi) -.L067xts_dec_ret: +.L069xts_dec_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + movdqa %xmm0,(%esp) + pxor %xmm3,%xmm3 + movdqa %xmm0,16(%esp) + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 + movdqa %xmm0,80(%esp) movl 116(%esp),%esp popl %edi popl %esi @@ -1743,7 +1808,7 @@ aesni_cbc_encrypt: movl 32(%esp),%edx movl 36(%esp),%ebp testl %eax,%eax - jz .L072cbc_abort + jz .L074cbc_abort cmpl $0,40(%esp) xchgl %esp,%ebx movups (%ebp),%xmm7 @@ -1751,14 +1816,14 @@ aesni_cbc_encrypt: movl %edx,%ebp movl %ebx,16(%esp) movl %ecx,%ebx - je .L073cbc_decrypt + je .L075cbc_decrypt movaps %xmm7,%xmm2 cmpl $16,%eax - jb .L074cbc_enc_tail + jb .L076cbc_enc_tail subl $16,%eax - jmp .L075cbc_enc_loop + jmp .L077cbc_enc_loop .align 16 -.L075cbc_enc_loop: +.L077cbc_enc_loop: movups (%esi),%xmm7 leal 16(%esi),%esi movups (%edx),%xmm0 @@ -1766,24 +1831,25 @@ aesni_cbc_encrypt: xorps %xmm0,%xmm7 leal 32(%edx),%edx xorps %xmm7,%xmm2 -.L076enc1_loop_15: +.L078enc1_loop_15: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L076enc1_loop_15 + jnz .L078enc1_loop_15 .byte 102,15,56,221,209 movl %ebx,%ecx movl %ebp,%edx movups %xmm2,(%edi) leal 16(%edi),%edi subl $16,%eax - jnc .L075cbc_enc_loop + jnc .L077cbc_enc_loop addl $16,%eax - jnz .L074cbc_enc_tail + jnz .L076cbc_enc_tail movaps %xmm2,%xmm7 - jmp .L077cbc_ret -.L074cbc_enc_tail: + pxor %xmm2,%xmm2 + jmp .L079cbc_ret +.L076cbc_enc_tail: movl %eax,%ecx .long 2767451785 movl $16,%ecx @@ -1794,20 +1860,20 @@ aesni_cbc_encrypt: movl %ebx,%ecx movl %edi,%esi movl %ebp,%edx - jmp .L075cbc_enc_loop + jmp .L077cbc_enc_loop .align 16 -.L073cbc_decrypt: +.L075cbc_decrypt: cmpl $80,%eax - jbe .L078cbc_dec_tail + jbe .L080cbc_dec_tail movaps %xmm7,(%esp) subl $80,%eax - jmp .L079cbc_dec_loop6_enter + jmp .L081cbc_dec_loop6_enter .align 16 -.L080cbc_dec_loop6: +.L082cbc_dec_loop6: movaps %xmm0,(%esp) movups %xmm7,(%edi) leal 16(%edi),%edi -.L079cbc_dec_loop6_enter: +.L081cbc_dec_loop6_enter: movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 @@ -1837,28 +1903,28 @@ aesni_cbc_encrypt: movups %xmm6,64(%edi) leal 80(%edi),%edi subl $96,%eax - ja .L080cbc_dec_loop6 + ja .L082cbc_dec_loop6 movaps %xmm7,%xmm2 movaps %xmm0,%xmm7 addl $80,%eax - jle .L081cbc_dec_tail_collected + jle .L083cbc_dec_clear_tail_collected movups %xmm2,(%edi) leal 16(%edi),%edi -.L078cbc_dec_tail: +.L080cbc_dec_tail: movups (%esi),%xmm2 movaps %xmm2,%xmm6 cmpl $16,%eax - jbe .L082cbc_dec_one + jbe .L084cbc_dec_one movups 16(%esi),%xmm3 movaps %xmm3,%xmm5 cmpl $32,%eax - jbe .L083cbc_dec_two + jbe .L085cbc_dec_two movups 32(%esi),%xmm4 cmpl $48,%eax - jbe .L084cbc_dec_three + jbe .L086cbc_dec_three movups 48(%esi),%xmm5 cmpl $64,%eax - jbe .L085cbc_dec_four + jbe .L087cbc_dec_four movups 64(%esi),%xmm6 movaps %xmm7,(%esp) movups (%esi),%xmm2 @@ -1876,55 +1942,62 @@ aesni_cbc_encrypt: xorps %xmm0,%xmm6 movups %xmm2,(%edi) movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 movups %xmm4,32(%edi) + pxor %xmm4,%xmm4 movups %xmm5,48(%edi) + pxor %xmm5,%xmm5 leal 64(%edi),%edi movaps %xmm6,%xmm2 + pxor %xmm6,%xmm6 subl $80,%eax - jmp .L081cbc_dec_tail_collected + jmp .L088cbc_dec_tail_collected .align 16 -.L082cbc_dec_one: +.L084cbc_dec_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L086dec1_loop_16: +.L089dec1_loop_16: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L086dec1_loop_16 + jnz .L089dec1_loop_16 .byte 102,15,56,223,209 xorps %xmm7,%xmm2 movaps %xmm6,%xmm7 subl $16,%eax - jmp .L081cbc_dec_tail_collected + jmp .L088cbc_dec_tail_collected .align 16 -.L083cbc_dec_two: +.L085cbc_dec_two: call _aesni_decrypt2 xorps %xmm7,%xmm2 xorps %xmm6,%xmm3 movups %xmm2,(%edi) movaps %xmm3,%xmm2 + pxor %xmm3,%xmm3 leal 16(%edi),%edi movaps %xmm5,%xmm7 subl $32,%eax - jmp .L081cbc_dec_tail_collected + jmp .L088cbc_dec_tail_collected .align 16 -.L084cbc_dec_three: +.L086cbc_dec_three: call _aesni_decrypt3 xorps %xmm7,%xmm2 xorps %xmm6,%xmm3 xorps %xmm5,%xmm4 movups %xmm2,(%edi) movaps %xmm4,%xmm2 + pxor %xmm4,%xmm4 movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 leal 32(%edi),%edi movups 32(%esi),%xmm7 subl $48,%eax - jmp .L081cbc_dec_tail_collected + jmp .L088cbc_dec_tail_collected .align 16 -.L085cbc_dec_four: +.L087cbc_dec_four: call _aesni_decrypt4 movups 16(%esi),%xmm1 movups 32(%esi),%xmm0 @@ -1934,28 +2007,44 @@ aesni_cbc_encrypt: movups %xmm2,(%edi) xorps %xmm1,%xmm4 movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 xorps %xmm0,%xmm5 movups %xmm4,32(%edi) + pxor %xmm4,%xmm4 leal 48(%edi),%edi movaps %xmm5,%xmm2 + pxor %xmm5,%xmm5 subl $64,%eax -.L081cbc_dec_tail_collected: + jmp .L088cbc_dec_tail_collected +.align 16 +.L083cbc_dec_clear_tail_collected: + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 +.L088cbc_dec_tail_collected: andl $15,%eax - jnz .L087cbc_dec_tail_partial + jnz .L090cbc_dec_tail_partial movups %xmm2,(%edi) - jmp .L077cbc_ret + pxor %xmm0,%xmm0 + jmp .L079cbc_ret .align 16 -.L087cbc_dec_tail_partial: +.L090cbc_dec_tail_partial: movaps %xmm2,(%esp) + pxor %xmm0,%xmm0 movl $16,%ecx movl %esp,%esi subl %eax,%ecx .long 2767451785 -.L077cbc_ret: + movdqa %xmm2,(%esp) +.L079cbc_ret: movl 16(%esp),%esp movl 36(%esp),%ebp + pxor %xmm2,%xmm2 + pxor %xmm1,%xmm1 movups %xmm7,(%ebp) -.L072cbc_abort: + pxor %xmm7,%xmm7 +.L074cbc_abort: popl %edi popl %esi popl %ebx @@ -1965,52 +2054,62 @@ aesni_cbc_encrypt: .type _aesni_set_encrypt_key,@function .align 16 _aesni_set_encrypt_key: + pushl %ebp + pushl %ebx testl %eax,%eax - jz .L088bad_pointer + jz .L091bad_pointer testl %edx,%edx - jz .L088bad_pointer + jz .L091bad_pointer + call .L092pic +.L092pic: + popl %ebx + leal .Lkey_const-.L092pic(%ebx),%ebx + leal OPENSSL_ia32cap_P,%ebp movups (%eax),%xmm0 xorps %xmm4,%xmm4 + movl 4(%ebp),%ebp leal 16(%edx),%edx + andl $268437504,%ebp cmpl $256,%ecx - je .L08914rounds + je .L09314rounds cmpl $192,%ecx - je .L09012rounds + je .L09412rounds cmpl $128,%ecx - jne .L091bad_keybits + jne .L095bad_keybits .align 16 -.L09210rounds: +.L09610rounds: + cmpl $268435456,%ebp + je .L09710rounds_alt movl $9,%ecx movups %xmm0,-16(%edx) .byte 102,15,58,223,200,1 - call .L093key_128_cold + call .L098key_128_cold .byte 102,15,58,223,200,2 - call .L094key_128 + call .L099key_128 .byte 102,15,58,223,200,4 - call .L094key_128 + call .L099key_128 .byte 102,15,58,223,200,8 - call .L094key_128 + call .L099key_128 .byte 102,15,58,223,200,16 - call .L094key_128 + call .L099key_128 .byte 102,15,58,223,200,32 - call .L094key_128 + call .L099key_128 .byte 102,15,58,223,200,64 - call .L094key_128 + call .L099key_128 .byte 102,15,58,223,200,128 - call .L094key_128 + call .L099key_128 .byte 102,15,58,223,200,27 - call .L094key_128 + call .L099key_128 .byte 102,15,58,223,200,54 - call .L094key_128 + call .L099key_128 movups %xmm0,(%edx) movl %ecx,80(%edx) - xorl %eax,%eax - ret + jmp .L100good_key .align 16 -.L094key_128: +.L099key_128: movups %xmm0,(%edx) leal 16(%edx),%edx -.L093key_128_cold: +.L098key_128_cold: shufps $16,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 @@ -2019,38 +2118,91 @@ _aesni_set_encrypt_key: xorps %xmm1,%xmm0 ret .align 16 -.L09012rounds: +.L09710rounds_alt: + movdqa (%ebx),%xmm5 + movl $8,%ecx + movdqa 32(%ebx),%xmm4 + movdqa %xmm0,%xmm2 + movdqu %xmm0,-16(%edx) +.L101loop_key128: +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + leal 16(%edx),%edx + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,-16(%edx) + movdqa %xmm0,%xmm2 + decl %ecx + jnz .L101loop_key128 + movdqa 48(%ebx),%xmm4 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,(%edx) + movdqa %xmm0,%xmm2 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,16(%edx) + movl $9,%ecx + movl %ecx,96(%edx) + jmp .L100good_key +.align 16 +.L09412rounds: movq 16(%eax),%xmm2 + cmpl $268435456,%ebp + je .L10212rounds_alt movl $11,%ecx movups %xmm0,-16(%edx) .byte 102,15,58,223,202,1 - call .L095key_192a_cold + call .L103key_192a_cold .byte 102,15,58,223,202,2 - call .L096key_192b + call .L104key_192b .byte 102,15,58,223,202,4 - call .L097key_192a + call .L105key_192a .byte 102,15,58,223,202,8 - call .L096key_192b + call .L104key_192b .byte 102,15,58,223,202,16 - call .L097key_192a + call .L105key_192a .byte 102,15,58,223,202,32 - call .L096key_192b + call .L104key_192b .byte 102,15,58,223,202,64 - call .L097key_192a + call .L105key_192a .byte 102,15,58,223,202,128 - call .L096key_192b + call .L104key_192b movups %xmm0,(%edx) movl %ecx,48(%edx) - xorl %eax,%eax - ret + jmp .L100good_key .align 16 -.L097key_192a: +.L105key_192a: movups %xmm0,(%edx) leal 16(%edx),%edx .align 16 -.L095key_192a_cold: +.L103key_192a_cold: movaps %xmm2,%xmm5 -.L098key_192b_warm: +.L106key_192b_warm: shufps $16,%xmm0,%xmm4 movdqa %xmm2,%xmm3 xorps %xmm4,%xmm0 @@ -2064,56 +2216,90 @@ _aesni_set_encrypt_key: pxor %xmm3,%xmm2 ret .align 16 -.L096key_192b: +.L104key_192b: movaps %xmm0,%xmm3 shufps $68,%xmm0,%xmm5 movups %xmm5,(%edx) shufps $78,%xmm2,%xmm3 movups %xmm3,16(%edx) leal 32(%edx),%edx - jmp .L098key_192b_warm + jmp .L106key_192b_warm +.align 16 +.L10212rounds_alt: + movdqa 16(%ebx),%xmm5 + movdqa 32(%ebx),%xmm4 + movl $8,%ecx + movdqu %xmm0,-16(%edx) +.L107loop_key192: + movq %xmm2,(%edx) + movdqa %xmm2,%xmm1 +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + pslld $1,%xmm4 + leal 24(%edx),%edx + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pxor %xmm2,%xmm0 + pxor %xmm3,%xmm2 + movdqu %xmm0,-16(%edx) + decl %ecx + jnz .L107loop_key192 + movl $11,%ecx + movl %ecx,32(%edx) + jmp .L100good_key .align 16 -.L08914rounds: +.L09314rounds: movups 16(%eax),%xmm2 - movl $13,%ecx leal 16(%edx),%edx + cmpl $268435456,%ebp + je .L10814rounds_alt + movl $13,%ecx movups %xmm0,-32(%edx) movups %xmm2,-16(%edx) .byte 102,15,58,223,202,1 - call .L099key_256a_cold + call .L109key_256a_cold .byte 102,15,58,223,200,1 - call .L100key_256b + call .L110key_256b .byte 102,15,58,223,202,2 - call .L101key_256a + call .L111key_256a .byte 102,15,58,223,200,2 - call .L100key_256b + call .L110key_256b .byte 102,15,58,223,202,4 - call .L101key_256a + call .L111key_256a .byte 102,15,58,223,200,4 - call .L100key_256b + call .L110key_256b .byte 102,15,58,223,202,8 - call .L101key_256a + call .L111key_256a .byte 102,15,58,223,200,8 - call .L100key_256b + call .L110key_256b .byte 102,15,58,223,202,16 - call .L101key_256a + call .L111key_256a .byte 102,15,58,223,200,16 - call .L100key_256b + call .L110key_256b .byte 102,15,58,223,202,32 - call .L101key_256a + call .L111key_256a .byte 102,15,58,223,200,32 - call .L100key_256b + call .L110key_256b .byte 102,15,58,223,202,64 - call .L101key_256a + call .L111key_256a movups %xmm0,(%edx) movl %ecx,16(%edx) xorl %eax,%eax - ret + jmp .L100good_key .align 16 -.L101key_256a: +.L111key_256a: movups %xmm2,(%edx) leal 16(%edx),%edx -.L099key_256a_cold: +.L109key_256a_cold: shufps $16,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 @@ -2122,7 +2308,7 @@ _aesni_set_encrypt_key: xorps %xmm1,%xmm0 ret .align 16 -.L100key_256b: +.L110key_256b: movups %xmm0,(%edx) leal 16(%edx),%edx shufps $16,%xmm2,%xmm4 @@ -2132,13 +2318,70 @@ _aesni_set_encrypt_key: shufps $170,%xmm1,%xmm1 xorps %xmm1,%xmm2 ret +.align 16 +.L10814rounds_alt: + movdqa (%ebx),%xmm5 + movdqa 32(%ebx),%xmm4 + movl $7,%ecx + movdqu %xmm0,-32(%edx) + movdqa %xmm2,%xmm1 + movdqu %xmm2,-16(%edx) +.L112loop_key256: +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pslld $1,%xmm4 + pxor %xmm2,%xmm0 + movdqu %xmm0,(%edx) + decl %ecx + jz .L113done_key256 + pshufd $255,%xmm0,%xmm2 + pxor %xmm3,%xmm3 +.byte 102,15,56,221,211 + movdqa %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm3,%xmm1 + pxor %xmm1,%xmm2 + movdqu %xmm2,16(%edx) + leal 32(%edx),%edx + movdqa %xmm2,%xmm1 + jmp .L112loop_key256 +.L113done_key256: + movl $13,%ecx + movl %ecx,16(%edx) +.L100good_key: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + xorl %eax,%eax + popl %ebx + popl %ebp + ret .align 4 -.L088bad_pointer: +.L091bad_pointer: movl $-1,%eax + popl %ebx + popl %ebp ret .align 4 -.L091bad_keybits: +.L095bad_keybits: + pxor %xmm0,%xmm0 movl $-2,%eax + popl %ebx + popl %ebp ret .size _aesni_set_encrypt_key,.-_aesni_set_encrypt_key .globl aesni_set_encrypt_key @@ -2164,7 +2407,7 @@ aesni_set_decrypt_key: movl 12(%esp),%edx shll $4,%ecx testl %eax,%eax - jnz .L102dec_key_ret + jnz .L114dec_key_ret leal 16(%edx,%ecx,1),%eax movups (%edx),%xmm0 movups (%eax),%xmm1 @@ -2172,7 +2415,7 @@ aesni_set_decrypt_key: movups %xmm1,(%edx) leal 16(%edx),%edx leal -16(%eax),%eax -.L103dec_key_inverse: +.L115dec_key_inverse: movups (%edx),%xmm0 movups (%eax),%xmm1 .byte 102,15,56,219,192 @@ -2182,15 +2425,24 @@ aesni_set_decrypt_key: movups %xmm0,16(%eax) movups %xmm1,-16(%edx) cmpl %edx,%eax - ja .L103dec_key_inverse + ja .L115dec_key_inverse movups (%edx),%xmm0 .byte 102,15,56,219,192 movups %xmm0,(%edx) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 xorl %eax,%eax -.L102dec_key_ret: +.L114dec_key_ret: ret .size aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin +.align 64 +.Lkey_const: +.long 202313229,202313229,202313229,202313229 +.long 67569157,67569157,67569157,67569157 +.long 1,1,1,1 +.long 27,27,27,27 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 .byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 .byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 .byte 115,108,46,111,114,103,62,0 +.comm OPENSSL_ia32cap_P,16,4 diff --git a/deps/openssl/asm/x86-macosx-gas/aes/aesni-x86.s b/deps/openssl/asm/x86-macosx-gas/aes/aesni-x86.s index cecd5f83f71e6d..c1f5aec62ce4e3 100644 --- a/deps/openssl/asm/x86-macosx-gas/aes/aesni-x86.s +++ b/deps/openssl/asm/x86-macosx-gas/aes/aesni-x86.s @@ -20,7 +20,10 @@ L000enc1_loop_1: leal 16(%edx),%edx jnz L000enc1_loop_1 .byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 movups %xmm2,(%eax) + pxor %xmm2,%xmm2 ret .globl _aesni_decrypt .align 4 @@ -42,7 +45,10 @@ L001dec1_loop_2: leal 16(%edx),%edx jnz L001dec1_loop_2 .byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 movups %xmm2,(%eax) + pxor %xmm2,%xmm2 ret .align 4 __aesni_encrypt2: @@ -242,17 +248,15 @@ __aesni_encrypt6: negl %ecx .byte 102,15,56,220,225 pxor %xmm0,%xmm7 + movups (%edx,%ecx,1),%xmm0 addl $16,%ecx -.byte 102,15,56,220,233 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 - movups -16(%edx,%ecx,1),%xmm0 - jmp L_aesni_encrypt6_enter + jmp L008_aesni_encrypt6_inner .align 4,0x90 -L008enc6_loop: +L009enc6_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 +L008_aesni_encrypt6_inner: .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 @@ -266,7 +270,7 @@ L_aesni_encrypt6_enter: .byte 102,15,56,220,240 .byte 102,15,56,220,248 movups -16(%edx,%ecx,1),%xmm0 - jnz L008enc6_loop + jnz L009enc6_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 @@ -296,17 +300,15 @@ __aesni_decrypt6: negl %ecx .byte 102,15,56,222,225 pxor %xmm0,%xmm7 + movups (%edx,%ecx,1),%xmm0 addl $16,%ecx -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 - movups -16(%edx,%ecx,1),%xmm0 - jmp L_aesni_decrypt6_enter + jmp L010_aesni_decrypt6_inner .align 4,0x90 -L009dec6_loop: +L011dec6_loop: .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 +L010_aesni_decrypt6_inner: .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 @@ -320,7 +322,7 @@ L_aesni_decrypt6_enter: .byte 102,15,56,222,240 .byte 102,15,56,222,248 movups -16(%edx,%ecx,1),%xmm0 - jnz L009dec6_loop + jnz L011dec6_loop .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 @@ -348,14 +350,14 @@ L_aesni_ecb_encrypt_begin: movl 32(%esp),%edx movl 36(%esp),%ebx andl $-16,%eax - jz L010ecb_ret + jz L012ecb_ret movl 240(%edx),%ecx testl %ebx,%ebx - jz L011ecb_decrypt + jz L013ecb_decrypt movl %edx,%ebp movl %ecx,%ebx cmpl $96,%eax - jb L012ecb_enc_tail + jb L014ecb_enc_tail movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 @@ -364,9 +366,9 @@ L_aesni_ecb_encrypt_begin: movdqu 80(%esi),%xmm7 leal 96(%esi),%esi subl $96,%eax - jmp L013ecb_enc_loop6_enter + jmp L015ecb_enc_loop6_enter .align 4,0x90 -L014ecb_enc_loop6: +L016ecb_enc_loop6: movups %xmm2,(%edi) movdqu (%esi),%xmm2 movups %xmm3,16(%edi) @@ -381,12 +383,12 @@ L014ecb_enc_loop6: leal 96(%edi),%edi movdqu 80(%esi),%xmm7 leal 96(%esi),%esi -L013ecb_enc_loop6_enter: +L015ecb_enc_loop6_enter: call __aesni_encrypt6 movl %ebp,%edx movl %ebx,%ecx subl $96,%eax - jnc L014ecb_enc_loop6 + jnc L016ecb_enc_loop6 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) @@ -395,18 +397,18 @@ L013ecb_enc_loop6_enter: movups %xmm7,80(%edi) leal 96(%edi),%edi addl $96,%eax - jz L010ecb_ret -L012ecb_enc_tail: + jz L012ecb_ret +L014ecb_enc_tail: movups (%esi),%xmm2 cmpl $32,%eax - jb L015ecb_enc_one + jb L017ecb_enc_one movups 16(%esi),%xmm3 - je L016ecb_enc_two + je L018ecb_enc_two movups 32(%esi),%xmm4 cmpl $64,%eax - jb L017ecb_enc_three + jb L019ecb_enc_three movups 48(%esi),%xmm5 - je L018ecb_enc_four + je L020ecb_enc_four movups 64(%esi),%xmm6 xorps %xmm7,%xmm7 call __aesni_encrypt6 @@ -415,49 +417,49 @@ L012ecb_enc_tail: movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) - jmp L010ecb_ret + jmp L012ecb_ret .align 4,0x90 -L015ecb_enc_one: +L017ecb_enc_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L019enc1_loop_3: +L021enc1_loop_3: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L019enc1_loop_3 + jnz L021enc1_loop_3 .byte 102,15,56,221,209 movups %xmm2,(%edi) - jmp L010ecb_ret + jmp L012ecb_ret .align 4,0x90 -L016ecb_enc_two: +L018ecb_enc_two: call __aesni_encrypt2 movups %xmm2,(%edi) movups %xmm3,16(%edi) - jmp L010ecb_ret + jmp L012ecb_ret .align 4,0x90 -L017ecb_enc_three: +L019ecb_enc_three: call __aesni_encrypt3 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) - jmp L010ecb_ret + jmp L012ecb_ret .align 4,0x90 -L018ecb_enc_four: +L020ecb_enc_four: call __aesni_encrypt4 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) - jmp L010ecb_ret + jmp L012ecb_ret .align 4,0x90 -L011ecb_decrypt: +L013ecb_decrypt: movl %edx,%ebp movl %ecx,%ebx cmpl $96,%eax - jb L020ecb_dec_tail + jb L022ecb_dec_tail movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 @@ -466,9 +468,9 @@ L011ecb_decrypt: movdqu 80(%esi),%xmm7 leal 96(%esi),%esi subl $96,%eax - jmp L021ecb_dec_loop6_enter + jmp L023ecb_dec_loop6_enter .align 4,0x90 -L022ecb_dec_loop6: +L024ecb_dec_loop6: movups %xmm2,(%edi) movdqu (%esi),%xmm2 movups %xmm3,16(%edi) @@ -483,12 +485,12 @@ L022ecb_dec_loop6: leal 96(%edi),%edi movdqu 80(%esi),%xmm7 leal 96(%esi),%esi -L021ecb_dec_loop6_enter: +L023ecb_dec_loop6_enter: call __aesni_decrypt6 movl %ebp,%edx movl %ebx,%ecx subl $96,%eax - jnc L022ecb_dec_loop6 + jnc L024ecb_dec_loop6 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) @@ -497,18 +499,18 @@ L021ecb_dec_loop6_enter: movups %xmm7,80(%edi) leal 96(%edi),%edi addl $96,%eax - jz L010ecb_ret -L020ecb_dec_tail: + jz L012ecb_ret +L022ecb_dec_tail: movups (%esi),%xmm2 cmpl $32,%eax - jb L023ecb_dec_one + jb L025ecb_dec_one movups 16(%esi),%xmm3 - je L024ecb_dec_two + je L026ecb_dec_two movups 32(%esi),%xmm4 cmpl $64,%eax - jb L025ecb_dec_three + jb L027ecb_dec_three movups 48(%esi),%xmm5 - je L026ecb_dec_four + je L028ecb_dec_four movups 64(%esi),%xmm6 xorps %xmm7,%xmm7 call __aesni_decrypt6 @@ -517,43 +519,51 @@ L020ecb_dec_tail: movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) - jmp L010ecb_ret + jmp L012ecb_ret .align 4,0x90 -L023ecb_dec_one: +L025ecb_dec_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L027dec1_loop_4: +L029dec1_loop_4: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L027dec1_loop_4 + jnz L029dec1_loop_4 .byte 102,15,56,223,209 movups %xmm2,(%edi) - jmp L010ecb_ret + jmp L012ecb_ret .align 4,0x90 -L024ecb_dec_two: +L026ecb_dec_two: call __aesni_decrypt2 movups %xmm2,(%edi) movups %xmm3,16(%edi) - jmp L010ecb_ret + jmp L012ecb_ret .align 4,0x90 -L025ecb_dec_three: +L027ecb_dec_three: call __aesni_decrypt3 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) - jmp L010ecb_ret + jmp L012ecb_ret .align 4,0x90 -L026ecb_dec_four: +L028ecb_dec_four: call __aesni_decrypt4 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) -L010ecb_ret: +L012ecb_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 popl %edi popl %esi popl %ebx @@ -598,7 +608,7 @@ L_aesni_ccm64_encrypt_blocks_begin: leal 32(%edx,%ecx,1),%edx subl %ecx,%ebx .byte 102,15,56,0,253 -L028ccm64_enc_outer: +L030ccm64_enc_outer: movups (%ebp),%xmm0 movl %ebx,%ecx movups (%esi),%xmm6 @@ -607,7 +617,7 @@ L028ccm64_enc_outer: xorps %xmm6,%xmm0 xorps %xmm0,%xmm3 movups 32(%ebp),%xmm0 -L029ccm64_enc2_loop: +L031ccm64_enc2_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 movups (%edx,%ecx,1),%xmm1 @@ -615,7 +625,7 @@ L029ccm64_enc2_loop: .byte 102,15,56,220,208 .byte 102,15,56,220,216 movups -16(%edx,%ecx,1),%xmm0 - jnz L029ccm64_enc2_loop + jnz L031ccm64_enc2_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 paddq 16(%esp),%xmm7 @@ -628,10 +638,18 @@ L029ccm64_enc2_loop: movups %xmm6,(%edi) .byte 102,15,56,0,213 leal 16(%edi),%edi - jnz L028ccm64_enc_outer + jnz L030ccm64_enc_outer movl 48(%esp),%esp movl 40(%esp),%edi movups %xmm3,(%edi) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 popl %edi popl %esi popl %ebx @@ -677,12 +695,12 @@ L_aesni_ccm64_decrypt_blocks_begin: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L030enc1_loop_5: +L032enc1_loop_5: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L030enc1_loop_5 + jnz L032enc1_loop_5 .byte 102,15,56,221,209 shll $4,%ebx movl $16,%ecx @@ -692,16 +710,16 @@ L030enc1_loop_5: subl %ebx,%ecx leal 32(%ebp,%ebx,1),%edx movl %ecx,%ebx - jmp L031ccm64_dec_outer + jmp L033ccm64_dec_outer .align 4,0x90 -L031ccm64_dec_outer: +L033ccm64_dec_outer: xorps %xmm2,%xmm6 movdqa %xmm7,%xmm2 movups %xmm6,(%edi) leal 16(%edi),%edi .byte 102,15,56,0,213 subl $1,%eax - jz L032ccm64_dec_break + jz L034ccm64_dec_break movups (%ebp),%xmm0 movl %ebx,%ecx movups 16(%ebp),%xmm1 @@ -709,7 +727,7 @@ L031ccm64_dec_outer: xorps %xmm0,%xmm2 xorps %xmm6,%xmm3 movups 32(%ebp),%xmm0 -L033ccm64_dec2_loop: +L035ccm64_dec2_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 movups (%edx,%ecx,1),%xmm1 @@ -717,7 +735,7 @@ L033ccm64_dec2_loop: .byte 102,15,56,220,208 .byte 102,15,56,220,216 movups -16(%edx,%ecx,1),%xmm0 - jnz L033ccm64_dec2_loop + jnz L035ccm64_dec2_loop movups (%esi),%xmm6 paddq 16(%esp),%xmm7 .byte 102,15,56,220,209 @@ -725,9 +743,9 @@ L033ccm64_dec2_loop: .byte 102,15,56,221,208 .byte 102,15,56,221,216 leal 16(%esi),%esi - jmp L031ccm64_dec_outer + jmp L033ccm64_dec_outer .align 4,0x90 -L032ccm64_dec_break: +L034ccm64_dec_break: movl 240(%ebp),%ecx movl %ebp,%edx movups (%edx),%xmm0 @@ -735,16 +753,24 @@ L032ccm64_dec_break: xorps %xmm0,%xmm6 leal 32(%edx),%edx xorps %xmm6,%xmm3 -L034enc1_loop_6: +L036enc1_loop_6: .byte 102,15,56,220,217 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L034enc1_loop_6 + jnz L036enc1_loop_6 .byte 102,15,56,221,217 movl 48(%esp),%esp movl 40(%esp),%edi movups %xmm3,(%edi) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 popl %edi popl %esi popl %ebx @@ -768,7 +794,7 @@ L_aesni_ctr32_encrypt_blocks_begin: andl $-16,%esp movl %ebp,80(%esp) cmpl $1,%eax - je L035ctr32_one_shortcut + je L037ctr32_one_shortcut movdqu (%ebx),%xmm7 movl $202182159,(%esp) movl $134810123,4(%esp) @@ -806,7 +832,7 @@ L_aesni_ctr32_encrypt_blocks_begin: pshufd $192,%xmm0,%xmm2 pshufd $128,%xmm0,%xmm3 cmpl $6,%eax - jb L036ctr32_tail + jb L038ctr32_tail pxor %xmm6,%xmm7 shll $4,%ecx movl $16,%ebx @@ -815,9 +841,9 @@ L_aesni_ctr32_encrypt_blocks_begin: subl %ecx,%ebx leal 32(%edx,%ecx,1),%edx subl $6,%eax - jmp L037ctr32_loop6 + jmp L039ctr32_loop6 .align 4,0x90 -L037ctr32_loop6: +L039ctr32_loop6: pshufd $64,%xmm0,%xmm4 movdqa 32(%esp),%xmm0 pshufd $192,%xmm1,%xmm5 @@ -871,27 +897,27 @@ L037ctr32_loop6: leal 96(%edi),%edi pshufd $128,%xmm0,%xmm3 subl $6,%eax - jnc L037ctr32_loop6 + jnc L039ctr32_loop6 addl $6,%eax - jz L038ctr32_ret + jz L040ctr32_ret movdqu (%ebp),%xmm7 movl %ebp,%edx pxor 32(%esp),%xmm7 movl 240(%ebp),%ecx -L036ctr32_tail: +L038ctr32_tail: por %xmm7,%xmm2 cmpl $2,%eax - jb L039ctr32_one + jb L041ctr32_one pshufd $64,%xmm0,%xmm4 por %xmm7,%xmm3 - je L040ctr32_two + je L042ctr32_two pshufd $192,%xmm1,%xmm5 por %xmm7,%xmm4 cmpl $4,%eax - jb L041ctr32_three + jb L043ctr32_three pshufd $128,%xmm1,%xmm6 por %xmm7,%xmm5 - je L042ctr32_four + je L044ctr32_four por %xmm7,%xmm6 call __aesni_encrypt6 movups (%esi),%xmm1 @@ -909,29 +935,29 @@ L036ctr32_tail: movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) - jmp L038ctr32_ret + jmp L040ctr32_ret .align 4,0x90 -L035ctr32_one_shortcut: +L037ctr32_one_shortcut: movups (%ebx),%xmm2 movl 240(%edx),%ecx -L039ctr32_one: +L041ctr32_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L043enc1_loop_7: +L045enc1_loop_7: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L043enc1_loop_7 + jnz L045enc1_loop_7 .byte 102,15,56,221,209 movups (%esi),%xmm6 xorps %xmm2,%xmm6 movups %xmm6,(%edi) - jmp L038ctr32_ret + jmp L040ctr32_ret .align 4,0x90 -L040ctr32_two: +L042ctr32_two: call __aesni_encrypt2 movups (%esi),%xmm5 movups 16(%esi),%xmm6 @@ -939,9 +965,9 @@ L040ctr32_two: xorps %xmm6,%xmm3 movups %xmm2,(%edi) movups %xmm3,16(%edi) - jmp L038ctr32_ret + jmp L040ctr32_ret .align 4,0x90 -L041ctr32_three: +L043ctr32_three: call __aesni_encrypt3 movups (%esi),%xmm5 movups 16(%esi),%xmm6 @@ -952,9 +978,9 @@ L041ctr32_three: xorps %xmm7,%xmm4 movups %xmm3,16(%edi) movups %xmm4,32(%edi) - jmp L038ctr32_ret + jmp L040ctr32_ret .align 4,0x90 -L042ctr32_four: +L044ctr32_four: call __aesni_encrypt4 movups (%esi),%xmm6 movups 16(%esi),%xmm7 @@ -968,7 +994,18 @@ L042ctr32_four: xorps %xmm0,%xmm5 movups %xmm4,32(%edi) movups %xmm5,48(%edi) -L038ctr32_ret: +L040ctr32_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 movl 80(%esp),%esp popl %edi popl %esi @@ -991,12 +1028,12 @@ L_aesni_xts_encrypt_begin: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L044enc1_loop_8: +L046enc1_loop_8: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L044enc1_loop_8 + jnz L046enc1_loop_8 .byte 102,15,56,221,209 movl 20(%esp),%esi movl 24(%esp),%edi @@ -1020,14 +1057,14 @@ L044enc1_loop_8: movl %edx,%ebp movl %ecx,%ebx subl $96,%eax - jc L045xts_enc_short + jc L047xts_enc_short shll $4,%ecx movl $16,%ebx subl %ecx,%ebx leal 32(%edx,%ecx,1),%edx - jmp L046xts_enc_loop6 + jmp L048xts_enc_loop6 .align 4,0x90 -L046xts_enc_loop6: +L048xts_enc_loop6: pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,(%esp) @@ -1116,23 +1153,23 @@ L046xts_enc_loop6: pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 subl $96,%eax - jnc L046xts_enc_loop6 + jnc L048xts_enc_loop6 movl 240(%ebp),%ecx movl %ebp,%edx movl %ecx,%ebx -L045xts_enc_short: +L047xts_enc_short: addl $96,%eax - jz L047xts_enc_done6x + jz L049xts_enc_done6x movdqa %xmm1,%xmm5 cmpl $32,%eax - jb L048xts_enc_one + jb L050xts_enc_one pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 - je L049xts_enc_two + je L051xts_enc_two pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm6 @@ -1141,7 +1178,7 @@ L045xts_enc_short: pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 cmpl $64,%eax - jb L050xts_enc_three + jb L052xts_enc_three pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm7 @@ -1151,7 +1188,7 @@ L045xts_enc_short: pxor %xmm2,%xmm1 movdqa %xmm5,(%esp) movdqa %xmm6,16(%esp) - je L051xts_enc_four + je L053xts_enc_four movdqa %xmm7,32(%esp) pshufd $19,%xmm0,%xmm7 movdqa %xmm1,48(%esp) @@ -1183,9 +1220,9 @@ L045xts_enc_short: movups %xmm5,48(%edi) movups %xmm6,64(%edi) leal 80(%edi),%edi - jmp L052xts_enc_done + jmp L054xts_enc_done .align 4,0x90 -L048xts_enc_one: +L050xts_enc_one: movups (%esi),%xmm2 leal 16(%esi),%esi xorps %xmm5,%xmm2 @@ -1193,20 +1230,20 @@ L048xts_enc_one: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L053enc1_loop_9: +L055enc1_loop_9: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L053enc1_loop_9 + jnz L055enc1_loop_9 .byte 102,15,56,221,209 xorps %xmm5,%xmm2 movups %xmm2,(%edi) leal 16(%edi),%edi movdqa %xmm5,%xmm1 - jmp L052xts_enc_done + jmp L054xts_enc_done .align 4,0x90 -L049xts_enc_two: +L051xts_enc_two: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1220,9 +1257,9 @@ L049xts_enc_two: movups %xmm3,16(%edi) leal 32(%edi),%edi movdqa %xmm6,%xmm1 - jmp L052xts_enc_done + jmp L054xts_enc_done .align 4,0x90 -L050xts_enc_three: +L052xts_enc_three: movaps %xmm1,%xmm7 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1240,9 +1277,9 @@ L050xts_enc_three: movups %xmm4,32(%edi) leal 48(%edi),%edi movdqa %xmm7,%xmm1 - jmp L052xts_enc_done + jmp L054xts_enc_done .align 4,0x90 -L051xts_enc_four: +L053xts_enc_four: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1264,28 +1301,28 @@ L051xts_enc_four: movups %xmm5,48(%edi) leal 64(%edi),%edi movdqa %xmm6,%xmm1 - jmp L052xts_enc_done + jmp L054xts_enc_done .align 4,0x90 -L047xts_enc_done6x: +L049xts_enc_done6x: movl 112(%esp),%eax andl $15,%eax - jz L054xts_enc_ret + jz L056xts_enc_ret movdqa %xmm1,%xmm5 movl %eax,112(%esp) - jmp L055xts_enc_steal + jmp L057xts_enc_steal .align 4,0x90 -L052xts_enc_done: +L054xts_enc_done: movl 112(%esp),%eax pxor %xmm0,%xmm0 andl $15,%eax - jz L054xts_enc_ret + jz L056xts_enc_ret pcmpgtd %xmm1,%xmm0 movl %eax,112(%esp) pshufd $19,%xmm0,%xmm5 paddq %xmm1,%xmm1 pand 96(%esp),%xmm5 pxor %xmm1,%xmm5 -L055xts_enc_steal: +L057xts_enc_steal: movzbl (%esi),%ecx movzbl -16(%edi),%edx leal 1(%esi),%esi @@ -1293,7 +1330,7 @@ L055xts_enc_steal: movb %dl,(%edi) leal 1(%edi),%edi subl $1,%eax - jnz L055xts_enc_steal + jnz L057xts_enc_steal subl 112(%esp),%edi movl %ebp,%edx movl %ebx,%ecx @@ -1303,16 +1340,30 @@ L055xts_enc_steal: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L056enc1_loop_10: +L058enc1_loop_10: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L056enc1_loop_10 + jnz L058enc1_loop_10 .byte 102,15,56,221,209 xorps %xmm5,%xmm2 movups %xmm2,-16(%edi) -L054xts_enc_ret: +L056xts_enc_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + movdqa %xmm0,(%esp) + pxor %xmm3,%xmm3 + movdqa %xmm0,16(%esp) + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 + movdqa %xmm0,80(%esp) movl 116(%esp),%esp popl %edi popl %esi @@ -1335,12 +1386,12 @@ L_aesni_xts_decrypt_begin: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L057enc1_loop_11: +L059enc1_loop_11: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L057enc1_loop_11 + jnz L059enc1_loop_11 .byte 102,15,56,221,209 movl 20(%esp),%esi movl 24(%esp),%edi @@ -1369,14 +1420,14 @@ L057enc1_loop_11: pcmpgtd %xmm1,%xmm0 andl $-16,%eax subl $96,%eax - jc L058xts_dec_short + jc L060xts_dec_short shll $4,%ecx movl $16,%ebx subl %ecx,%ebx leal 32(%edx,%ecx,1),%edx - jmp L059xts_dec_loop6 + jmp L061xts_dec_loop6 .align 4,0x90 -L059xts_dec_loop6: +L061xts_dec_loop6: pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,(%esp) @@ -1465,23 +1516,23 @@ L059xts_dec_loop6: pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 subl $96,%eax - jnc L059xts_dec_loop6 + jnc L061xts_dec_loop6 movl 240(%ebp),%ecx movl %ebp,%edx movl %ecx,%ebx -L058xts_dec_short: +L060xts_dec_short: addl $96,%eax - jz L060xts_dec_done6x + jz L062xts_dec_done6x movdqa %xmm1,%xmm5 cmpl $32,%eax - jb L061xts_dec_one + jb L063xts_dec_one pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 - je L062xts_dec_two + je L064xts_dec_two pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm6 @@ -1490,7 +1541,7 @@ L058xts_dec_short: pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 cmpl $64,%eax - jb L063xts_dec_three + jb L065xts_dec_three pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm7 @@ -1500,7 +1551,7 @@ L058xts_dec_short: pxor %xmm2,%xmm1 movdqa %xmm5,(%esp) movdqa %xmm6,16(%esp) - je L064xts_dec_four + je L066xts_dec_four movdqa %xmm7,32(%esp) pshufd $19,%xmm0,%xmm7 movdqa %xmm1,48(%esp) @@ -1532,9 +1583,9 @@ L058xts_dec_short: movups %xmm5,48(%edi) movups %xmm6,64(%edi) leal 80(%edi),%edi - jmp L065xts_dec_done + jmp L067xts_dec_done .align 4,0x90 -L061xts_dec_one: +L063xts_dec_one: movups (%esi),%xmm2 leal 16(%esi),%esi xorps %xmm5,%xmm2 @@ -1542,20 +1593,20 @@ L061xts_dec_one: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L066dec1_loop_12: +L068dec1_loop_12: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L066dec1_loop_12 + jnz L068dec1_loop_12 .byte 102,15,56,223,209 xorps %xmm5,%xmm2 movups %xmm2,(%edi) leal 16(%edi),%edi movdqa %xmm5,%xmm1 - jmp L065xts_dec_done + jmp L067xts_dec_done .align 4,0x90 -L062xts_dec_two: +L064xts_dec_two: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1569,9 +1620,9 @@ L062xts_dec_two: movups %xmm3,16(%edi) leal 32(%edi),%edi movdqa %xmm6,%xmm1 - jmp L065xts_dec_done + jmp L067xts_dec_done .align 4,0x90 -L063xts_dec_three: +L065xts_dec_three: movaps %xmm1,%xmm7 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1589,9 +1640,9 @@ L063xts_dec_three: movups %xmm4,32(%edi) leal 48(%edi),%edi movdqa %xmm7,%xmm1 - jmp L065xts_dec_done + jmp L067xts_dec_done .align 4,0x90 -L064xts_dec_four: +L066xts_dec_four: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1613,20 +1664,20 @@ L064xts_dec_four: movups %xmm5,48(%edi) leal 64(%edi),%edi movdqa %xmm6,%xmm1 - jmp L065xts_dec_done + jmp L067xts_dec_done .align 4,0x90 -L060xts_dec_done6x: +L062xts_dec_done6x: movl 112(%esp),%eax andl $15,%eax - jz L067xts_dec_ret + jz L069xts_dec_ret movl %eax,112(%esp) - jmp L068xts_dec_only_one_more + jmp L070xts_dec_only_one_more .align 4,0x90 -L065xts_dec_done: +L067xts_dec_done: movl 112(%esp),%eax pxor %xmm0,%xmm0 andl $15,%eax - jz L067xts_dec_ret + jz L069xts_dec_ret pcmpgtd %xmm1,%xmm0 movl %eax,112(%esp) pshufd $19,%xmm0,%xmm2 @@ -1636,7 +1687,7 @@ L065xts_dec_done: pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 -L068xts_dec_only_one_more: +L070xts_dec_only_one_more: pshufd $19,%xmm0,%xmm5 movdqa %xmm1,%xmm6 paddq %xmm1,%xmm1 @@ -1650,16 +1701,16 @@ L068xts_dec_only_one_more: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L069dec1_loop_13: +L071dec1_loop_13: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L069dec1_loop_13 + jnz L071dec1_loop_13 .byte 102,15,56,223,209 xorps %xmm5,%xmm2 movups %xmm2,(%edi) -L070xts_dec_steal: +L072xts_dec_steal: movzbl 16(%esi),%ecx movzbl (%edi),%edx leal 1(%esi),%esi @@ -1667,7 +1718,7 @@ L070xts_dec_steal: movb %dl,16(%edi) leal 1(%edi),%edi subl $1,%eax - jnz L070xts_dec_steal + jnz L072xts_dec_steal subl 112(%esp),%edi movl %ebp,%edx movl %ebx,%ecx @@ -1677,16 +1728,30 @@ L070xts_dec_steal: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L071dec1_loop_14: +L073dec1_loop_14: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L071dec1_loop_14 + jnz L073dec1_loop_14 .byte 102,15,56,223,209 xorps %xmm6,%xmm2 movups %xmm2,(%edi) -L067xts_dec_ret: +L069xts_dec_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + movdqa %xmm0,(%esp) + pxor %xmm3,%xmm3 + movdqa %xmm0,16(%esp) + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 + movdqa %xmm0,80(%esp) movl 116(%esp),%esp popl %edi popl %esi @@ -1710,7 +1775,7 @@ L_aesni_cbc_encrypt_begin: movl 32(%esp),%edx movl 36(%esp),%ebp testl %eax,%eax - jz L072cbc_abort + jz L074cbc_abort cmpl $0,40(%esp) xchgl %esp,%ebx movups (%ebp),%xmm7 @@ -1718,14 +1783,14 @@ L_aesni_cbc_encrypt_begin: movl %edx,%ebp movl %ebx,16(%esp) movl %ecx,%ebx - je L073cbc_decrypt + je L075cbc_decrypt movaps %xmm7,%xmm2 cmpl $16,%eax - jb L074cbc_enc_tail + jb L076cbc_enc_tail subl $16,%eax - jmp L075cbc_enc_loop + jmp L077cbc_enc_loop .align 4,0x90 -L075cbc_enc_loop: +L077cbc_enc_loop: movups (%esi),%xmm7 leal 16(%esi),%esi movups (%edx),%xmm0 @@ -1733,24 +1798,25 @@ L075cbc_enc_loop: xorps %xmm0,%xmm7 leal 32(%edx),%edx xorps %xmm7,%xmm2 -L076enc1_loop_15: +L078enc1_loop_15: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L076enc1_loop_15 + jnz L078enc1_loop_15 .byte 102,15,56,221,209 movl %ebx,%ecx movl %ebp,%edx movups %xmm2,(%edi) leal 16(%edi),%edi subl $16,%eax - jnc L075cbc_enc_loop + jnc L077cbc_enc_loop addl $16,%eax - jnz L074cbc_enc_tail + jnz L076cbc_enc_tail movaps %xmm2,%xmm7 - jmp L077cbc_ret -L074cbc_enc_tail: + pxor %xmm2,%xmm2 + jmp L079cbc_ret +L076cbc_enc_tail: movl %eax,%ecx .long 2767451785 movl $16,%ecx @@ -1761,20 +1827,20 @@ L074cbc_enc_tail: movl %ebx,%ecx movl %edi,%esi movl %ebp,%edx - jmp L075cbc_enc_loop + jmp L077cbc_enc_loop .align 4,0x90 -L073cbc_decrypt: +L075cbc_decrypt: cmpl $80,%eax - jbe L078cbc_dec_tail + jbe L080cbc_dec_tail movaps %xmm7,(%esp) subl $80,%eax - jmp L079cbc_dec_loop6_enter + jmp L081cbc_dec_loop6_enter .align 4,0x90 -L080cbc_dec_loop6: +L082cbc_dec_loop6: movaps %xmm0,(%esp) movups %xmm7,(%edi) leal 16(%edi),%edi -L079cbc_dec_loop6_enter: +L081cbc_dec_loop6_enter: movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 @@ -1804,28 +1870,28 @@ L079cbc_dec_loop6_enter: movups %xmm6,64(%edi) leal 80(%edi),%edi subl $96,%eax - ja L080cbc_dec_loop6 + ja L082cbc_dec_loop6 movaps %xmm7,%xmm2 movaps %xmm0,%xmm7 addl $80,%eax - jle L081cbc_dec_tail_collected + jle L083cbc_dec_clear_tail_collected movups %xmm2,(%edi) leal 16(%edi),%edi -L078cbc_dec_tail: +L080cbc_dec_tail: movups (%esi),%xmm2 movaps %xmm2,%xmm6 cmpl $16,%eax - jbe L082cbc_dec_one + jbe L084cbc_dec_one movups 16(%esi),%xmm3 movaps %xmm3,%xmm5 cmpl $32,%eax - jbe L083cbc_dec_two + jbe L085cbc_dec_two movups 32(%esi),%xmm4 cmpl $48,%eax - jbe L084cbc_dec_three + jbe L086cbc_dec_three movups 48(%esi),%xmm5 cmpl $64,%eax - jbe L085cbc_dec_four + jbe L087cbc_dec_four movups 64(%esi),%xmm6 movaps %xmm7,(%esp) movups (%esi),%xmm2 @@ -1843,55 +1909,62 @@ L078cbc_dec_tail: xorps %xmm0,%xmm6 movups %xmm2,(%edi) movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 movups %xmm4,32(%edi) + pxor %xmm4,%xmm4 movups %xmm5,48(%edi) + pxor %xmm5,%xmm5 leal 64(%edi),%edi movaps %xmm6,%xmm2 + pxor %xmm6,%xmm6 subl $80,%eax - jmp L081cbc_dec_tail_collected + jmp L088cbc_dec_tail_collected .align 4,0x90 -L082cbc_dec_one: +L084cbc_dec_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L086dec1_loop_16: +L089dec1_loop_16: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L086dec1_loop_16 + jnz L089dec1_loop_16 .byte 102,15,56,223,209 xorps %xmm7,%xmm2 movaps %xmm6,%xmm7 subl $16,%eax - jmp L081cbc_dec_tail_collected + jmp L088cbc_dec_tail_collected .align 4,0x90 -L083cbc_dec_two: +L085cbc_dec_two: call __aesni_decrypt2 xorps %xmm7,%xmm2 xorps %xmm6,%xmm3 movups %xmm2,(%edi) movaps %xmm3,%xmm2 + pxor %xmm3,%xmm3 leal 16(%edi),%edi movaps %xmm5,%xmm7 subl $32,%eax - jmp L081cbc_dec_tail_collected + jmp L088cbc_dec_tail_collected .align 4,0x90 -L084cbc_dec_three: +L086cbc_dec_three: call __aesni_decrypt3 xorps %xmm7,%xmm2 xorps %xmm6,%xmm3 xorps %xmm5,%xmm4 movups %xmm2,(%edi) movaps %xmm4,%xmm2 + pxor %xmm4,%xmm4 movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 leal 32(%edi),%edi movups 32(%esi),%xmm7 subl $48,%eax - jmp L081cbc_dec_tail_collected + jmp L088cbc_dec_tail_collected .align 4,0x90 -L085cbc_dec_four: +L087cbc_dec_four: call __aesni_decrypt4 movups 16(%esi),%xmm1 movups 32(%esi),%xmm0 @@ -1901,28 +1974,44 @@ L085cbc_dec_four: movups %xmm2,(%edi) xorps %xmm1,%xmm4 movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 xorps %xmm0,%xmm5 movups %xmm4,32(%edi) + pxor %xmm4,%xmm4 leal 48(%edi),%edi movaps %xmm5,%xmm2 + pxor %xmm5,%xmm5 subl $64,%eax -L081cbc_dec_tail_collected: + jmp L088cbc_dec_tail_collected +.align 4,0x90 +L083cbc_dec_clear_tail_collected: + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 +L088cbc_dec_tail_collected: andl $15,%eax - jnz L087cbc_dec_tail_partial + jnz L090cbc_dec_tail_partial movups %xmm2,(%edi) - jmp L077cbc_ret + pxor %xmm0,%xmm0 + jmp L079cbc_ret .align 4,0x90 -L087cbc_dec_tail_partial: +L090cbc_dec_tail_partial: movaps %xmm2,(%esp) + pxor %xmm0,%xmm0 movl $16,%ecx movl %esp,%esi subl %eax,%ecx .long 2767451785 -L077cbc_ret: + movdqa %xmm2,(%esp) +L079cbc_ret: movl 16(%esp),%esp movl 36(%esp),%ebp + pxor %xmm2,%xmm2 + pxor %xmm1,%xmm1 movups %xmm7,(%ebp) -L072cbc_abort: + pxor %xmm7,%xmm7 +L074cbc_abort: popl %edi popl %esi popl %ebx @@ -1930,52 +2019,62 @@ L072cbc_abort: ret .align 4 __aesni_set_encrypt_key: + pushl %ebp + pushl %ebx testl %eax,%eax - jz L088bad_pointer + jz L091bad_pointer testl %edx,%edx - jz L088bad_pointer + jz L091bad_pointer + call L092pic +L092pic: + popl %ebx + leal Lkey_const-L092pic(%ebx),%ebx + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-Lkey_const(%ebx),%ebp movups (%eax),%xmm0 xorps %xmm4,%xmm4 + movl 4(%ebp),%ebp leal 16(%edx),%edx + andl $268437504,%ebp cmpl $256,%ecx - je L08914rounds + je L09314rounds cmpl $192,%ecx - je L09012rounds + je L09412rounds cmpl $128,%ecx - jne L091bad_keybits + jne L095bad_keybits .align 4,0x90 -L09210rounds: +L09610rounds: + cmpl $268435456,%ebp + je L09710rounds_alt movl $9,%ecx movups %xmm0,-16(%edx) .byte 102,15,58,223,200,1 - call L093key_128_cold + call L098key_128_cold .byte 102,15,58,223,200,2 - call L094key_128 + call L099key_128 .byte 102,15,58,223,200,4 - call L094key_128 + call L099key_128 .byte 102,15,58,223,200,8 - call L094key_128 + call L099key_128 .byte 102,15,58,223,200,16 - call L094key_128 + call L099key_128 .byte 102,15,58,223,200,32 - call L094key_128 + call L099key_128 .byte 102,15,58,223,200,64 - call L094key_128 + call L099key_128 .byte 102,15,58,223,200,128 - call L094key_128 + call L099key_128 .byte 102,15,58,223,200,27 - call L094key_128 + call L099key_128 .byte 102,15,58,223,200,54 - call L094key_128 + call L099key_128 movups %xmm0,(%edx) movl %ecx,80(%edx) - xorl %eax,%eax - ret + jmp L100good_key .align 4,0x90 -L094key_128: +L099key_128: movups %xmm0,(%edx) leal 16(%edx),%edx -L093key_128_cold: +L098key_128_cold: shufps $16,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 @@ -1984,38 +2083,91 @@ L093key_128_cold: xorps %xmm1,%xmm0 ret .align 4,0x90 -L09012rounds: +L09710rounds_alt: + movdqa (%ebx),%xmm5 + movl $8,%ecx + movdqa 32(%ebx),%xmm4 + movdqa %xmm0,%xmm2 + movdqu %xmm0,-16(%edx) +L101loop_key128: +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + leal 16(%edx),%edx + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,-16(%edx) + movdqa %xmm0,%xmm2 + decl %ecx + jnz L101loop_key128 + movdqa 48(%ebx),%xmm4 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,(%edx) + movdqa %xmm0,%xmm2 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,16(%edx) + movl $9,%ecx + movl %ecx,96(%edx) + jmp L100good_key +.align 4,0x90 +L09412rounds: movq 16(%eax),%xmm2 + cmpl $268435456,%ebp + je L10212rounds_alt movl $11,%ecx movups %xmm0,-16(%edx) .byte 102,15,58,223,202,1 - call L095key_192a_cold + call L103key_192a_cold .byte 102,15,58,223,202,2 - call L096key_192b + call L104key_192b .byte 102,15,58,223,202,4 - call L097key_192a + call L105key_192a .byte 102,15,58,223,202,8 - call L096key_192b + call L104key_192b .byte 102,15,58,223,202,16 - call L097key_192a + call L105key_192a .byte 102,15,58,223,202,32 - call L096key_192b + call L104key_192b .byte 102,15,58,223,202,64 - call L097key_192a + call L105key_192a .byte 102,15,58,223,202,128 - call L096key_192b + call L104key_192b movups %xmm0,(%edx) movl %ecx,48(%edx) - xorl %eax,%eax - ret + jmp L100good_key .align 4,0x90 -L097key_192a: +L105key_192a: movups %xmm0,(%edx) leal 16(%edx),%edx .align 4,0x90 -L095key_192a_cold: +L103key_192a_cold: movaps %xmm2,%xmm5 -L098key_192b_warm: +L106key_192b_warm: shufps $16,%xmm0,%xmm4 movdqa %xmm2,%xmm3 xorps %xmm4,%xmm0 @@ -2029,56 +2181,90 @@ L098key_192b_warm: pxor %xmm3,%xmm2 ret .align 4,0x90 -L096key_192b: +L104key_192b: movaps %xmm0,%xmm3 shufps $68,%xmm0,%xmm5 movups %xmm5,(%edx) shufps $78,%xmm2,%xmm3 movups %xmm3,16(%edx) leal 32(%edx),%edx - jmp L098key_192b_warm + jmp L106key_192b_warm .align 4,0x90 -L08914rounds: +L10212rounds_alt: + movdqa 16(%ebx),%xmm5 + movdqa 32(%ebx),%xmm4 + movl $8,%ecx + movdqu %xmm0,-16(%edx) +L107loop_key192: + movq %xmm2,(%edx) + movdqa %xmm2,%xmm1 +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + pslld $1,%xmm4 + leal 24(%edx),%edx + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pxor %xmm2,%xmm0 + pxor %xmm3,%xmm2 + movdqu %xmm0,-16(%edx) + decl %ecx + jnz L107loop_key192 + movl $11,%ecx + movl %ecx,32(%edx) + jmp L100good_key +.align 4,0x90 +L09314rounds: movups 16(%eax),%xmm2 - movl $13,%ecx leal 16(%edx),%edx + cmpl $268435456,%ebp + je L10814rounds_alt + movl $13,%ecx movups %xmm0,-32(%edx) movups %xmm2,-16(%edx) .byte 102,15,58,223,202,1 - call L099key_256a_cold + call L109key_256a_cold .byte 102,15,58,223,200,1 - call L100key_256b + call L110key_256b .byte 102,15,58,223,202,2 - call L101key_256a + call L111key_256a .byte 102,15,58,223,200,2 - call L100key_256b + call L110key_256b .byte 102,15,58,223,202,4 - call L101key_256a + call L111key_256a .byte 102,15,58,223,200,4 - call L100key_256b + call L110key_256b .byte 102,15,58,223,202,8 - call L101key_256a + call L111key_256a .byte 102,15,58,223,200,8 - call L100key_256b + call L110key_256b .byte 102,15,58,223,202,16 - call L101key_256a + call L111key_256a .byte 102,15,58,223,200,16 - call L100key_256b + call L110key_256b .byte 102,15,58,223,202,32 - call L101key_256a + call L111key_256a .byte 102,15,58,223,200,32 - call L100key_256b + call L110key_256b .byte 102,15,58,223,202,64 - call L101key_256a + call L111key_256a movups %xmm0,(%edx) movl %ecx,16(%edx) xorl %eax,%eax - ret + jmp L100good_key .align 4,0x90 -L101key_256a: +L111key_256a: movups %xmm2,(%edx) leal 16(%edx),%edx -L099key_256a_cold: +L109key_256a_cold: shufps $16,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 @@ -2087,7 +2273,7 @@ L099key_256a_cold: xorps %xmm1,%xmm0 ret .align 4,0x90 -L100key_256b: +L110key_256b: movups %xmm0,(%edx) leal 16(%edx),%edx shufps $16,%xmm2,%xmm4 @@ -2097,13 +2283,70 @@ L100key_256b: shufps $170,%xmm1,%xmm1 xorps %xmm1,%xmm2 ret +.align 4,0x90 +L10814rounds_alt: + movdqa (%ebx),%xmm5 + movdqa 32(%ebx),%xmm4 + movl $7,%ecx + movdqu %xmm0,-32(%edx) + movdqa %xmm2,%xmm1 + movdqu %xmm2,-16(%edx) +L112loop_key256: +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pslld $1,%xmm4 + pxor %xmm2,%xmm0 + movdqu %xmm0,(%edx) + decl %ecx + jz L113done_key256 + pshufd $255,%xmm0,%xmm2 + pxor %xmm3,%xmm3 +.byte 102,15,56,221,211 + movdqa %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm3,%xmm1 + pxor %xmm1,%xmm2 + movdqu %xmm2,16(%edx) + leal 32(%edx),%edx + movdqa %xmm2,%xmm1 + jmp L112loop_key256 +L113done_key256: + movl $13,%ecx + movl %ecx,16(%edx) +L100good_key: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + xorl %eax,%eax + popl %ebx + popl %ebp + ret .align 2,0x90 -L088bad_pointer: +L091bad_pointer: movl $-1,%eax + popl %ebx + popl %ebp ret .align 2,0x90 -L091bad_keybits: +L095bad_keybits: + pxor %xmm0,%xmm0 movl $-2,%eax + popl %ebx + popl %ebp ret .globl _aesni_set_encrypt_key .align 4 @@ -2125,7 +2368,7 @@ L_aesni_set_decrypt_key_begin: movl 12(%esp),%edx shll $4,%ecx testl %eax,%eax - jnz L102dec_key_ret + jnz L114dec_key_ret leal 16(%edx,%ecx,1),%eax movups (%edx),%xmm0 movups (%eax),%xmm1 @@ -2133,7 +2376,7 @@ L_aesni_set_decrypt_key_begin: movups %xmm1,(%edx) leal 16(%edx),%edx leal -16(%eax),%eax -L103dec_key_inverse: +L115dec_key_inverse: movups (%edx),%xmm0 movups (%eax),%xmm1 .byte 102,15,56,219,192 @@ -2143,14 +2386,27 @@ L103dec_key_inverse: movups %xmm0,16(%eax) movups %xmm1,-16(%edx) cmpl %edx,%eax - ja L103dec_key_inverse + ja L115dec_key_inverse movups (%edx),%xmm0 .byte 102,15,56,219,192 movups %xmm0,(%edx) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 xorl %eax,%eax -L102dec_key_ret: +L114dec_key_ret: ret +.align 6,0x90 +Lkey_const: +.long 202313229,202313229,202313229,202313229 +.long 67569157,67569157,67569157,67569157 +.long 1,1,1,1 +.long 27,27,27,27 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 .byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 .byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 .byte 115,108,46,111,114,103,62,0 +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +.comm _OPENSSL_ia32cap_P,16,2 diff --git a/deps/openssl/asm/x86-win32-masm/aes/aesni-x86.asm b/deps/openssl/asm/x86-win32-masm/aes/aesni-x86.asm index 43fdb5a0345e93..6511c21bcf87ca 100644 --- a/deps/openssl/asm/x86-win32-masm/aes/aesni-x86.asm +++ b/deps/openssl/asm/x86-win32-masm/aes/aesni-x86.asm @@ -17,6 +17,7 @@ IF @Version LT 800 ELSE .text$ SEGMENT ALIGN(64) 'CODE' ENDIF +;EXTERN _OPENSSL_ia32cap_P:NEAR ALIGN 16 _aesni_encrypt PROC PUBLIC $L_aesni_encrypt_begin:: @@ -36,7 +37,10 @@ DB 102,15,56,220,209 lea edx,DWORD PTR 16[edx] jnz $L000enc1_loop_1 DB 102,15,56,221,209 + pxor xmm0,xmm0 + pxor xmm1,xmm1 movups XMMWORD PTR [eax],xmm2 + pxor xmm2,xmm2 ret _aesni_encrypt ENDP ALIGN 16 @@ -58,7 +62,10 @@ DB 102,15,56,222,209 lea edx,DWORD PTR 16[edx] jnz $L001dec1_loop_2 DB 102,15,56,223,209 + pxor xmm0,xmm0 + pxor xmm1,xmm1 movups XMMWORD PTR [eax],xmm2 + pxor xmm2,xmm2 ret _aesni_decrypt ENDP ALIGN 16 @@ -265,17 +272,15 @@ DB 102,15,56,220,217 neg ecx DB 102,15,56,220,225 pxor xmm7,xmm0 + movups xmm0,XMMWORD PTR [ecx*1+edx] add ecx,16 -DB 102,15,56,220,233 -DB 102,15,56,220,241 -DB 102,15,56,220,249 - movups xmm0,XMMWORD PTR [ecx*1+edx-16] - jmp $L_aesni_encrypt6_enter + jmp $L008_aesni_encrypt6_inner ALIGN 16 -$L008enc6_loop: +$L009enc6_loop: DB 102,15,56,220,209 DB 102,15,56,220,217 DB 102,15,56,220,225 +$L008_aesni_encrypt6_inner: DB 102,15,56,220,233 DB 102,15,56,220,241 DB 102,15,56,220,249 @@ -289,7 +294,7 @@ DB 102,15,56,220,232 DB 102,15,56,220,240 DB 102,15,56,220,248 movups xmm0,XMMWORD PTR [ecx*1+edx-16] - jnz $L008enc6_loop + jnz $L009enc6_loop DB 102,15,56,220,209 DB 102,15,56,220,217 DB 102,15,56,220,225 @@ -320,17 +325,15 @@ DB 102,15,56,222,217 neg ecx DB 102,15,56,222,225 pxor xmm7,xmm0 + movups xmm0,XMMWORD PTR [ecx*1+edx] add ecx,16 -DB 102,15,56,222,233 -DB 102,15,56,222,241 -DB 102,15,56,222,249 - movups xmm0,XMMWORD PTR [ecx*1+edx-16] - jmp $L_aesni_decrypt6_enter + jmp $L010_aesni_decrypt6_inner ALIGN 16 -$L009dec6_loop: +$L011dec6_loop: DB 102,15,56,222,209 DB 102,15,56,222,217 DB 102,15,56,222,225 +$L010_aesni_decrypt6_inner: DB 102,15,56,222,233 DB 102,15,56,222,241 DB 102,15,56,222,249 @@ -344,7 +347,7 @@ DB 102,15,56,222,232 DB 102,15,56,222,240 DB 102,15,56,222,248 movups xmm0,XMMWORD PTR [ecx*1+edx-16] - jnz $L009dec6_loop + jnz $L011dec6_loop DB 102,15,56,222,209 DB 102,15,56,222,217 DB 102,15,56,222,225 @@ -372,14 +375,14 @@ $L_aesni_ecb_encrypt_begin:: mov edx,DWORD PTR 32[esp] mov ebx,DWORD PTR 36[esp] and eax,-16 - jz $L010ecb_ret + jz $L012ecb_ret mov ecx,DWORD PTR 240[edx] test ebx,ebx - jz $L011ecb_decrypt + jz $L013ecb_decrypt mov ebp,edx mov ebx,ecx cmp eax,96 - jb $L012ecb_enc_tail + jb $L014ecb_enc_tail movdqu xmm2,XMMWORD PTR [esi] movdqu xmm3,XMMWORD PTR 16[esi] movdqu xmm4,XMMWORD PTR 32[esi] @@ -388,9 +391,9 @@ $L_aesni_ecb_encrypt_begin:: movdqu xmm7,XMMWORD PTR 80[esi] lea esi,DWORD PTR 96[esi] sub eax,96 - jmp $L013ecb_enc_loop6_enter + jmp $L015ecb_enc_loop6_enter ALIGN 16 -$L014ecb_enc_loop6: +$L016ecb_enc_loop6: movups XMMWORD PTR [edi],xmm2 movdqu xmm2,XMMWORD PTR [esi] movups XMMWORD PTR 16[edi],xmm3 @@ -405,12 +408,12 @@ $L014ecb_enc_loop6: lea edi,DWORD PTR 96[edi] movdqu xmm7,XMMWORD PTR 80[esi] lea esi,DWORD PTR 96[esi] -$L013ecb_enc_loop6_enter: +$L015ecb_enc_loop6_enter: call __aesni_encrypt6 mov edx,ebp mov ecx,ebx sub eax,96 - jnc $L014ecb_enc_loop6 + jnc $L016ecb_enc_loop6 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 movups XMMWORD PTR 32[edi],xmm4 @@ -419,18 +422,18 @@ $L013ecb_enc_loop6_enter: movups XMMWORD PTR 80[edi],xmm7 lea edi,DWORD PTR 96[edi] add eax,96 - jz $L010ecb_ret -$L012ecb_enc_tail: + jz $L012ecb_ret +$L014ecb_enc_tail: movups xmm2,XMMWORD PTR [esi] cmp eax,32 - jb $L015ecb_enc_one + jb $L017ecb_enc_one movups xmm3,XMMWORD PTR 16[esi] - je $L016ecb_enc_two + je $L018ecb_enc_two movups xmm4,XMMWORD PTR 32[esi] cmp eax,64 - jb $L017ecb_enc_three + jb $L019ecb_enc_three movups xmm5,XMMWORD PTR 48[esi] - je $L018ecb_enc_four + je $L020ecb_enc_four movups xmm6,XMMWORD PTR 64[esi] xorps xmm7,xmm7 call __aesni_encrypt6 @@ -439,49 +442,49 @@ $L012ecb_enc_tail: movups XMMWORD PTR 32[edi],xmm4 movups XMMWORD PTR 48[edi],xmm5 movups XMMWORD PTR 64[edi],xmm6 - jmp $L010ecb_ret + jmp $L012ecb_ret ALIGN 16 -$L015ecb_enc_one: +$L017ecb_enc_one: movups xmm0,XMMWORD PTR [edx] movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L019enc1_loop_3: +$L021enc1_loop_3: DB 102,15,56,220,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L019enc1_loop_3 + jnz $L021enc1_loop_3 DB 102,15,56,221,209 movups XMMWORD PTR [edi],xmm2 - jmp $L010ecb_ret + jmp $L012ecb_ret ALIGN 16 -$L016ecb_enc_two: +$L018ecb_enc_two: call __aesni_encrypt2 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 - jmp $L010ecb_ret + jmp $L012ecb_ret ALIGN 16 -$L017ecb_enc_three: +$L019ecb_enc_three: call __aesni_encrypt3 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 movups XMMWORD PTR 32[edi],xmm4 - jmp $L010ecb_ret + jmp $L012ecb_ret ALIGN 16 -$L018ecb_enc_four: +$L020ecb_enc_four: call __aesni_encrypt4 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 movups XMMWORD PTR 32[edi],xmm4 movups XMMWORD PTR 48[edi],xmm5 - jmp $L010ecb_ret + jmp $L012ecb_ret ALIGN 16 -$L011ecb_decrypt: +$L013ecb_decrypt: mov ebp,edx mov ebx,ecx cmp eax,96 - jb $L020ecb_dec_tail + jb $L022ecb_dec_tail movdqu xmm2,XMMWORD PTR [esi] movdqu xmm3,XMMWORD PTR 16[esi] movdqu xmm4,XMMWORD PTR 32[esi] @@ -490,9 +493,9 @@ $L011ecb_decrypt: movdqu xmm7,XMMWORD PTR 80[esi] lea esi,DWORD PTR 96[esi] sub eax,96 - jmp $L021ecb_dec_loop6_enter + jmp $L023ecb_dec_loop6_enter ALIGN 16 -$L022ecb_dec_loop6: +$L024ecb_dec_loop6: movups XMMWORD PTR [edi],xmm2 movdqu xmm2,XMMWORD PTR [esi] movups XMMWORD PTR 16[edi],xmm3 @@ -507,12 +510,12 @@ $L022ecb_dec_loop6: lea edi,DWORD PTR 96[edi] movdqu xmm7,XMMWORD PTR 80[esi] lea esi,DWORD PTR 96[esi] -$L021ecb_dec_loop6_enter: +$L023ecb_dec_loop6_enter: call __aesni_decrypt6 mov edx,ebp mov ecx,ebx sub eax,96 - jnc $L022ecb_dec_loop6 + jnc $L024ecb_dec_loop6 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 movups XMMWORD PTR 32[edi],xmm4 @@ -521,18 +524,18 @@ $L021ecb_dec_loop6_enter: movups XMMWORD PTR 80[edi],xmm7 lea edi,DWORD PTR 96[edi] add eax,96 - jz $L010ecb_ret -$L020ecb_dec_tail: + jz $L012ecb_ret +$L022ecb_dec_tail: movups xmm2,XMMWORD PTR [esi] cmp eax,32 - jb $L023ecb_dec_one + jb $L025ecb_dec_one movups xmm3,XMMWORD PTR 16[esi] - je $L024ecb_dec_two + je $L026ecb_dec_two movups xmm4,XMMWORD PTR 32[esi] cmp eax,64 - jb $L025ecb_dec_three + jb $L027ecb_dec_three movups xmm5,XMMWORD PTR 48[esi] - je $L026ecb_dec_four + je $L028ecb_dec_four movups xmm6,XMMWORD PTR 64[esi] xorps xmm7,xmm7 call __aesni_decrypt6 @@ -541,43 +544,51 @@ $L020ecb_dec_tail: movups XMMWORD PTR 32[edi],xmm4 movups XMMWORD PTR 48[edi],xmm5 movups XMMWORD PTR 64[edi],xmm6 - jmp $L010ecb_ret + jmp $L012ecb_ret ALIGN 16 -$L023ecb_dec_one: +$L025ecb_dec_one: movups xmm0,XMMWORD PTR [edx] movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L027dec1_loop_4: +$L029dec1_loop_4: DB 102,15,56,222,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L027dec1_loop_4 + jnz $L029dec1_loop_4 DB 102,15,56,223,209 movups XMMWORD PTR [edi],xmm2 - jmp $L010ecb_ret + jmp $L012ecb_ret ALIGN 16 -$L024ecb_dec_two: +$L026ecb_dec_two: call __aesni_decrypt2 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 - jmp $L010ecb_ret + jmp $L012ecb_ret ALIGN 16 -$L025ecb_dec_three: +$L027ecb_dec_three: call __aesni_decrypt3 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 movups XMMWORD PTR 32[edi],xmm4 - jmp $L010ecb_ret + jmp $L012ecb_ret ALIGN 16 -$L026ecb_dec_four: +$L028ecb_dec_four: call __aesni_decrypt4 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 movups XMMWORD PTR 32[edi],xmm4 movups XMMWORD PTR 48[edi],xmm5 -$L010ecb_ret: +$L012ecb_ret: + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + pxor xmm7,xmm7 pop edi pop esi pop ebx @@ -622,7 +633,7 @@ $L_aesni_ccm64_encrypt_blocks_begin:: lea edx,DWORD PTR 32[ecx*1+edx] sub ebx,ecx DB 102,15,56,0,253 -$L028ccm64_enc_outer: +$L030ccm64_enc_outer: movups xmm0,XMMWORD PTR [ebp] mov ecx,ebx movups xmm6,XMMWORD PTR [esi] @@ -631,7 +642,7 @@ $L028ccm64_enc_outer: xorps xmm0,xmm6 xorps xmm3,xmm0 movups xmm0,XMMWORD PTR 32[ebp] -$L029ccm64_enc2_loop: +$L031ccm64_enc2_loop: DB 102,15,56,220,209 DB 102,15,56,220,217 movups xmm1,XMMWORD PTR [ecx*1+edx] @@ -639,7 +650,7 @@ DB 102,15,56,220,217 DB 102,15,56,220,208 DB 102,15,56,220,216 movups xmm0,XMMWORD PTR [ecx*1+edx-16] - jnz $L029ccm64_enc2_loop + jnz $L031ccm64_enc2_loop DB 102,15,56,220,209 DB 102,15,56,220,217 paddq xmm7,XMMWORD PTR 16[esp] @@ -652,10 +663,18 @@ DB 102,15,56,221,216 movups XMMWORD PTR [edi],xmm6 DB 102,15,56,0,213 lea edi,DWORD PTR 16[edi] - jnz $L028ccm64_enc_outer + jnz $L030ccm64_enc_outer mov esp,DWORD PTR 48[esp] mov edi,DWORD PTR 40[esp] movups XMMWORD PTR [edi],xmm3 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + pxor xmm7,xmm7 pop edi pop esi pop ebx @@ -701,12 +720,12 @@ DB 102,15,56,0,253 movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L030enc1_loop_5: +$L032enc1_loop_5: DB 102,15,56,220,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L030enc1_loop_5 + jnz $L032enc1_loop_5 DB 102,15,56,221,209 shl ebx,4 mov ecx,16 @@ -716,16 +735,16 @@ DB 102,15,56,221,209 sub ecx,ebx lea edx,DWORD PTR 32[ebx*1+ebp] mov ebx,ecx - jmp $L031ccm64_dec_outer + jmp $L033ccm64_dec_outer ALIGN 16 -$L031ccm64_dec_outer: +$L033ccm64_dec_outer: xorps xmm6,xmm2 movdqa xmm2,xmm7 movups XMMWORD PTR [edi],xmm6 lea edi,DWORD PTR 16[edi] DB 102,15,56,0,213 sub eax,1 - jz $L032ccm64_dec_break + jz $L034ccm64_dec_break movups xmm0,XMMWORD PTR [ebp] mov ecx,ebx movups xmm1,XMMWORD PTR 16[ebp] @@ -733,7 +752,7 @@ DB 102,15,56,0,213 xorps xmm2,xmm0 xorps xmm3,xmm6 movups xmm0,XMMWORD PTR 32[ebp] -$L033ccm64_dec2_loop: +$L035ccm64_dec2_loop: DB 102,15,56,220,209 DB 102,15,56,220,217 movups xmm1,XMMWORD PTR [ecx*1+edx] @@ -741,7 +760,7 @@ DB 102,15,56,220,217 DB 102,15,56,220,208 DB 102,15,56,220,216 movups xmm0,XMMWORD PTR [ecx*1+edx-16] - jnz $L033ccm64_dec2_loop + jnz $L035ccm64_dec2_loop movups xmm6,XMMWORD PTR [esi] paddq xmm7,XMMWORD PTR 16[esp] DB 102,15,56,220,209 @@ -749,9 +768,9 @@ DB 102,15,56,220,217 DB 102,15,56,221,208 DB 102,15,56,221,216 lea esi,QWORD PTR 16[esi] - jmp $L031ccm64_dec_outer + jmp $L033ccm64_dec_outer ALIGN 16 -$L032ccm64_dec_break: +$L034ccm64_dec_break: mov ecx,DWORD PTR 240[ebp] mov edx,ebp movups xmm0,XMMWORD PTR [edx] @@ -759,16 +778,24 @@ $L032ccm64_dec_break: xorps xmm6,xmm0 lea edx,DWORD PTR 32[edx] xorps xmm3,xmm6 -$L034enc1_loop_6: +$L036enc1_loop_6: DB 102,15,56,220,217 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L034enc1_loop_6 + jnz $L036enc1_loop_6 DB 102,15,56,221,217 mov esp,DWORD PTR 48[esp] mov edi,DWORD PTR 40[esp] movups XMMWORD PTR [edi],xmm3 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + pxor xmm7,xmm7 pop edi pop esi pop ebx @@ -792,7 +819,7 @@ $L_aesni_ctr32_encrypt_blocks_begin:: and esp,-16 mov DWORD PTR 80[esp],ebp cmp eax,1 - je $L035ctr32_one_shortcut + je $L037ctr32_one_shortcut movdqu xmm7,XMMWORD PTR [ebx] mov DWORD PTR [esp],202182159 mov DWORD PTR 4[esp],134810123 @@ -830,7 +857,7 @@ DB 102,15,56,0,202 pshufd xmm2,xmm0,192 pshufd xmm3,xmm0,128 cmp eax,6 - jb $L036ctr32_tail + jb $L038ctr32_tail pxor xmm7,xmm6 shl ecx,4 mov ebx,16 @@ -839,9 +866,9 @@ DB 102,15,56,0,202 sub ebx,ecx lea edx,DWORD PTR 32[ecx*1+edx] sub eax,6 - jmp $L037ctr32_loop6 + jmp $L039ctr32_loop6 ALIGN 16 -$L037ctr32_loop6: +$L039ctr32_loop6: pshufd xmm4,xmm0,64 movdqa xmm0,XMMWORD PTR 32[esp] pshufd xmm5,xmm1,192 @@ -895,27 +922,27 @@ DB 102,15,56,0,202 lea edi,DWORD PTR 96[edi] pshufd xmm3,xmm0,128 sub eax,6 - jnc $L037ctr32_loop6 + jnc $L039ctr32_loop6 add eax,6 - jz $L038ctr32_ret + jz $L040ctr32_ret movdqu xmm7,XMMWORD PTR [ebp] mov edx,ebp pxor xmm7,XMMWORD PTR 32[esp] mov ecx,DWORD PTR 240[ebp] -$L036ctr32_tail: +$L038ctr32_tail: por xmm2,xmm7 cmp eax,2 - jb $L039ctr32_one + jb $L041ctr32_one pshufd xmm4,xmm0,64 por xmm3,xmm7 - je $L040ctr32_two + je $L042ctr32_two pshufd xmm5,xmm1,192 por xmm4,xmm7 cmp eax,4 - jb $L041ctr32_three + jb $L043ctr32_three pshufd xmm6,xmm1,128 por xmm5,xmm7 - je $L042ctr32_four + je $L044ctr32_four por xmm6,xmm7 call __aesni_encrypt6 movups xmm1,XMMWORD PTR [esi] @@ -933,29 +960,29 @@ $L036ctr32_tail: movups XMMWORD PTR 32[edi],xmm4 movups XMMWORD PTR 48[edi],xmm5 movups XMMWORD PTR 64[edi],xmm6 - jmp $L038ctr32_ret + jmp $L040ctr32_ret ALIGN 16 -$L035ctr32_one_shortcut: +$L037ctr32_one_shortcut: movups xmm2,XMMWORD PTR [ebx] mov ecx,DWORD PTR 240[edx] -$L039ctr32_one: +$L041ctr32_one: movups xmm0,XMMWORD PTR [edx] movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L043enc1_loop_7: +$L045enc1_loop_7: DB 102,15,56,220,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L043enc1_loop_7 + jnz $L045enc1_loop_7 DB 102,15,56,221,209 movups xmm6,XMMWORD PTR [esi] xorps xmm6,xmm2 movups XMMWORD PTR [edi],xmm6 - jmp $L038ctr32_ret + jmp $L040ctr32_ret ALIGN 16 -$L040ctr32_two: +$L042ctr32_two: call __aesni_encrypt2 movups xmm5,XMMWORD PTR [esi] movups xmm6,XMMWORD PTR 16[esi] @@ -963,9 +990,9 @@ $L040ctr32_two: xorps xmm3,xmm6 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 - jmp $L038ctr32_ret + jmp $L040ctr32_ret ALIGN 16 -$L041ctr32_three: +$L043ctr32_three: call __aesni_encrypt3 movups xmm5,XMMWORD PTR [esi] movups xmm6,XMMWORD PTR 16[esi] @@ -976,9 +1003,9 @@ $L041ctr32_three: xorps xmm4,xmm7 movups XMMWORD PTR 16[edi],xmm3 movups XMMWORD PTR 32[edi],xmm4 - jmp $L038ctr32_ret + jmp $L040ctr32_ret ALIGN 16 -$L042ctr32_four: +$L044ctr32_four: call __aesni_encrypt4 movups xmm6,XMMWORD PTR [esi] movups xmm7,XMMWORD PTR 16[esi] @@ -992,7 +1019,18 @@ $L042ctr32_four: xorps xmm5,xmm0 movups XMMWORD PTR 32[edi],xmm4 movups XMMWORD PTR 48[edi],xmm5 -$L038ctr32_ret: +$L040ctr32_ret: + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + movdqa XMMWORD PTR 32[esp],xmm0 + pxor xmm5,xmm5 + movdqa XMMWORD PTR 48[esp],xmm0 + pxor xmm6,xmm6 + movdqa XMMWORD PTR 64[esp],xmm0 + pxor xmm7,xmm7 mov esp,DWORD PTR 80[esp] pop edi pop esi @@ -1015,12 +1053,12 @@ $L_aesni_xts_encrypt_begin:: movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L044enc1_loop_8: +$L046enc1_loop_8: DB 102,15,56,220,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L044enc1_loop_8 + jnz $L046enc1_loop_8 DB 102,15,56,221,209 mov esi,DWORD PTR 20[esp] mov edi,DWORD PTR 24[esp] @@ -1044,14 +1082,14 @@ DB 102,15,56,221,209 mov ebp,edx mov ebx,ecx sub eax,96 - jc $L045xts_enc_short + jc $L047xts_enc_short shl ecx,4 mov ebx,16 sub ebx,ecx lea edx,DWORD PTR 32[ecx*1+edx] - jmp $L046xts_enc_loop6 + jmp $L048xts_enc_loop6 ALIGN 16 -$L046xts_enc_loop6: +$L048xts_enc_loop6: pshufd xmm2,xmm0,19 pxor xmm0,xmm0 movdqa XMMWORD PTR [esp],xmm1 @@ -1140,23 +1178,23 @@ DB 102,15,56,220,249 pcmpgtd xmm0,xmm1 pxor xmm1,xmm2 sub eax,96 - jnc $L046xts_enc_loop6 + jnc $L048xts_enc_loop6 mov ecx,DWORD PTR 240[ebp] mov edx,ebp mov ebx,ecx -$L045xts_enc_short: +$L047xts_enc_short: add eax,96 - jz $L047xts_enc_done6x + jz $L049xts_enc_done6x movdqa xmm5,xmm1 cmp eax,32 - jb $L048xts_enc_one + jb $L050xts_enc_one pshufd xmm2,xmm0,19 pxor xmm0,xmm0 paddq xmm1,xmm1 pand xmm2,xmm3 pcmpgtd xmm0,xmm1 pxor xmm1,xmm2 - je $L049xts_enc_two + je $L051xts_enc_two pshufd xmm2,xmm0,19 pxor xmm0,xmm0 movdqa xmm6,xmm1 @@ -1165,7 +1203,7 @@ $L045xts_enc_short: pcmpgtd xmm0,xmm1 pxor xmm1,xmm2 cmp eax,64 - jb $L050xts_enc_three + jb $L052xts_enc_three pshufd xmm2,xmm0,19 pxor xmm0,xmm0 movdqa xmm7,xmm1 @@ -1175,7 +1213,7 @@ $L045xts_enc_short: pxor xmm1,xmm2 movdqa XMMWORD PTR [esp],xmm5 movdqa XMMWORD PTR 16[esp],xmm6 - je $L051xts_enc_four + je $L053xts_enc_four movdqa XMMWORD PTR 32[esp],xmm7 pshufd xmm7,xmm0,19 movdqa XMMWORD PTR 48[esp],xmm1 @@ -1207,9 +1245,9 @@ $L045xts_enc_short: movups XMMWORD PTR 48[edi],xmm5 movups XMMWORD PTR 64[edi],xmm6 lea edi,DWORD PTR 80[edi] - jmp $L052xts_enc_done + jmp $L054xts_enc_done ALIGN 16 -$L048xts_enc_one: +$L050xts_enc_one: movups xmm2,XMMWORD PTR [esi] lea esi,DWORD PTR 16[esi] xorps xmm2,xmm5 @@ -1217,20 +1255,20 @@ $L048xts_enc_one: movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L053enc1_loop_9: +$L055enc1_loop_9: DB 102,15,56,220,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L053enc1_loop_9 + jnz $L055enc1_loop_9 DB 102,15,56,221,209 xorps xmm2,xmm5 movups XMMWORD PTR [edi],xmm2 lea edi,DWORD PTR 16[edi] movdqa xmm1,xmm5 - jmp $L052xts_enc_done + jmp $L054xts_enc_done ALIGN 16 -$L049xts_enc_two: +$L051xts_enc_two: movaps xmm6,xmm1 movups xmm2,XMMWORD PTR [esi] movups xmm3,XMMWORD PTR 16[esi] @@ -1244,9 +1282,9 @@ $L049xts_enc_two: movups XMMWORD PTR 16[edi],xmm3 lea edi,DWORD PTR 32[edi] movdqa xmm1,xmm6 - jmp $L052xts_enc_done + jmp $L054xts_enc_done ALIGN 16 -$L050xts_enc_three: +$L052xts_enc_three: movaps xmm7,xmm1 movups xmm2,XMMWORD PTR [esi] movups xmm3,XMMWORD PTR 16[esi] @@ -1264,9 +1302,9 @@ $L050xts_enc_three: movups XMMWORD PTR 32[edi],xmm4 lea edi,DWORD PTR 48[edi] movdqa xmm1,xmm7 - jmp $L052xts_enc_done + jmp $L054xts_enc_done ALIGN 16 -$L051xts_enc_four: +$L053xts_enc_four: movaps xmm6,xmm1 movups xmm2,XMMWORD PTR [esi] movups xmm3,XMMWORD PTR 16[esi] @@ -1288,28 +1326,28 @@ $L051xts_enc_four: movups XMMWORD PTR 48[edi],xmm5 lea edi,DWORD PTR 64[edi] movdqa xmm1,xmm6 - jmp $L052xts_enc_done + jmp $L054xts_enc_done ALIGN 16 -$L047xts_enc_done6x: +$L049xts_enc_done6x: mov eax,DWORD PTR 112[esp] and eax,15 - jz $L054xts_enc_ret + jz $L056xts_enc_ret movdqa xmm5,xmm1 mov DWORD PTR 112[esp],eax - jmp $L055xts_enc_steal + jmp $L057xts_enc_steal ALIGN 16 -$L052xts_enc_done: +$L054xts_enc_done: mov eax,DWORD PTR 112[esp] pxor xmm0,xmm0 and eax,15 - jz $L054xts_enc_ret + jz $L056xts_enc_ret pcmpgtd xmm0,xmm1 mov DWORD PTR 112[esp],eax pshufd xmm5,xmm0,19 paddq xmm1,xmm1 pand xmm5,XMMWORD PTR 96[esp] pxor xmm5,xmm1 -$L055xts_enc_steal: +$L057xts_enc_steal: movzx ecx,BYTE PTR [esi] movzx edx,BYTE PTR [edi-16] lea esi,DWORD PTR 1[esi] @@ -1317,7 +1355,7 @@ $L055xts_enc_steal: mov BYTE PTR [edi],dl lea edi,DWORD PTR 1[edi] sub eax,1 - jnz $L055xts_enc_steal + jnz $L057xts_enc_steal sub edi,DWORD PTR 112[esp] mov edx,ebp mov ecx,ebx @@ -1327,16 +1365,30 @@ $L055xts_enc_steal: movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L056enc1_loop_10: +$L058enc1_loop_10: DB 102,15,56,220,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L056enc1_loop_10 + jnz $L058enc1_loop_10 DB 102,15,56,221,209 xorps xmm2,xmm5 movups XMMWORD PTR [edi-16],xmm2 -$L054xts_enc_ret: +$L056xts_enc_ret: + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + movdqa XMMWORD PTR [esp],xmm0 + pxor xmm3,xmm3 + movdqa XMMWORD PTR 16[esp],xmm0 + pxor xmm4,xmm4 + movdqa XMMWORD PTR 32[esp],xmm0 + pxor xmm5,xmm5 + movdqa XMMWORD PTR 48[esp],xmm0 + pxor xmm6,xmm6 + movdqa XMMWORD PTR 64[esp],xmm0 + pxor xmm7,xmm7 + movdqa XMMWORD PTR 80[esp],xmm0 mov esp,DWORD PTR 116[esp] pop edi pop esi @@ -1359,12 +1411,12 @@ $L_aesni_xts_decrypt_begin:: movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L057enc1_loop_11: +$L059enc1_loop_11: DB 102,15,56,220,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L057enc1_loop_11 + jnz $L059enc1_loop_11 DB 102,15,56,221,209 mov esi,DWORD PTR 20[esp] mov edi,DWORD PTR 24[esp] @@ -1393,14 +1445,14 @@ DB 102,15,56,221,209 pcmpgtd xmm0,xmm1 and eax,-16 sub eax,96 - jc $L058xts_dec_short + jc $L060xts_dec_short shl ecx,4 mov ebx,16 sub ebx,ecx lea edx,DWORD PTR 32[ecx*1+edx] - jmp $L059xts_dec_loop6 + jmp $L061xts_dec_loop6 ALIGN 16 -$L059xts_dec_loop6: +$L061xts_dec_loop6: pshufd xmm2,xmm0,19 pxor xmm0,xmm0 movdqa XMMWORD PTR [esp],xmm1 @@ -1489,23 +1541,23 @@ DB 102,15,56,222,249 pcmpgtd xmm0,xmm1 pxor xmm1,xmm2 sub eax,96 - jnc $L059xts_dec_loop6 + jnc $L061xts_dec_loop6 mov ecx,DWORD PTR 240[ebp] mov edx,ebp mov ebx,ecx -$L058xts_dec_short: +$L060xts_dec_short: add eax,96 - jz $L060xts_dec_done6x + jz $L062xts_dec_done6x movdqa xmm5,xmm1 cmp eax,32 - jb $L061xts_dec_one + jb $L063xts_dec_one pshufd xmm2,xmm0,19 pxor xmm0,xmm0 paddq xmm1,xmm1 pand xmm2,xmm3 pcmpgtd xmm0,xmm1 pxor xmm1,xmm2 - je $L062xts_dec_two + je $L064xts_dec_two pshufd xmm2,xmm0,19 pxor xmm0,xmm0 movdqa xmm6,xmm1 @@ -1514,7 +1566,7 @@ $L058xts_dec_short: pcmpgtd xmm0,xmm1 pxor xmm1,xmm2 cmp eax,64 - jb $L063xts_dec_three + jb $L065xts_dec_three pshufd xmm2,xmm0,19 pxor xmm0,xmm0 movdqa xmm7,xmm1 @@ -1524,7 +1576,7 @@ $L058xts_dec_short: pxor xmm1,xmm2 movdqa XMMWORD PTR [esp],xmm5 movdqa XMMWORD PTR 16[esp],xmm6 - je $L064xts_dec_four + je $L066xts_dec_four movdqa XMMWORD PTR 32[esp],xmm7 pshufd xmm7,xmm0,19 movdqa XMMWORD PTR 48[esp],xmm1 @@ -1556,9 +1608,9 @@ $L058xts_dec_short: movups XMMWORD PTR 48[edi],xmm5 movups XMMWORD PTR 64[edi],xmm6 lea edi,DWORD PTR 80[edi] - jmp $L065xts_dec_done + jmp $L067xts_dec_done ALIGN 16 -$L061xts_dec_one: +$L063xts_dec_one: movups xmm2,XMMWORD PTR [esi] lea esi,DWORD PTR 16[esi] xorps xmm2,xmm5 @@ -1566,20 +1618,20 @@ $L061xts_dec_one: movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L066dec1_loop_12: +$L068dec1_loop_12: DB 102,15,56,222,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L066dec1_loop_12 + jnz $L068dec1_loop_12 DB 102,15,56,223,209 xorps xmm2,xmm5 movups XMMWORD PTR [edi],xmm2 lea edi,DWORD PTR 16[edi] movdqa xmm1,xmm5 - jmp $L065xts_dec_done + jmp $L067xts_dec_done ALIGN 16 -$L062xts_dec_two: +$L064xts_dec_two: movaps xmm6,xmm1 movups xmm2,XMMWORD PTR [esi] movups xmm3,XMMWORD PTR 16[esi] @@ -1593,9 +1645,9 @@ $L062xts_dec_two: movups XMMWORD PTR 16[edi],xmm3 lea edi,DWORD PTR 32[edi] movdqa xmm1,xmm6 - jmp $L065xts_dec_done + jmp $L067xts_dec_done ALIGN 16 -$L063xts_dec_three: +$L065xts_dec_three: movaps xmm7,xmm1 movups xmm2,XMMWORD PTR [esi] movups xmm3,XMMWORD PTR 16[esi] @@ -1613,9 +1665,9 @@ $L063xts_dec_three: movups XMMWORD PTR 32[edi],xmm4 lea edi,DWORD PTR 48[edi] movdqa xmm1,xmm7 - jmp $L065xts_dec_done + jmp $L067xts_dec_done ALIGN 16 -$L064xts_dec_four: +$L066xts_dec_four: movaps xmm6,xmm1 movups xmm2,XMMWORD PTR [esi] movups xmm3,XMMWORD PTR 16[esi] @@ -1637,20 +1689,20 @@ $L064xts_dec_four: movups XMMWORD PTR 48[edi],xmm5 lea edi,DWORD PTR 64[edi] movdqa xmm1,xmm6 - jmp $L065xts_dec_done + jmp $L067xts_dec_done ALIGN 16 -$L060xts_dec_done6x: +$L062xts_dec_done6x: mov eax,DWORD PTR 112[esp] and eax,15 - jz $L067xts_dec_ret + jz $L069xts_dec_ret mov DWORD PTR 112[esp],eax - jmp $L068xts_dec_only_one_more + jmp $L070xts_dec_only_one_more ALIGN 16 -$L065xts_dec_done: +$L067xts_dec_done: mov eax,DWORD PTR 112[esp] pxor xmm0,xmm0 and eax,15 - jz $L067xts_dec_ret + jz $L069xts_dec_ret pcmpgtd xmm0,xmm1 mov DWORD PTR 112[esp],eax pshufd xmm2,xmm0,19 @@ -1660,7 +1712,7 @@ $L065xts_dec_done: pand xmm2,xmm3 pcmpgtd xmm0,xmm1 pxor xmm1,xmm2 -$L068xts_dec_only_one_more: +$L070xts_dec_only_one_more: pshufd xmm5,xmm0,19 movdqa xmm6,xmm1 paddq xmm1,xmm1 @@ -1674,16 +1726,16 @@ $L068xts_dec_only_one_more: movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L069dec1_loop_13: +$L071dec1_loop_13: DB 102,15,56,222,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L069dec1_loop_13 + jnz $L071dec1_loop_13 DB 102,15,56,223,209 xorps xmm2,xmm5 movups XMMWORD PTR [edi],xmm2 -$L070xts_dec_steal: +$L072xts_dec_steal: movzx ecx,BYTE PTR 16[esi] movzx edx,BYTE PTR [edi] lea esi,DWORD PTR 1[esi] @@ -1691,7 +1743,7 @@ $L070xts_dec_steal: mov BYTE PTR 16[edi],dl lea edi,DWORD PTR 1[edi] sub eax,1 - jnz $L070xts_dec_steal + jnz $L072xts_dec_steal sub edi,DWORD PTR 112[esp] mov edx,ebp mov ecx,ebx @@ -1701,16 +1753,30 @@ $L070xts_dec_steal: movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L071dec1_loop_14: +$L073dec1_loop_14: DB 102,15,56,222,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L071dec1_loop_14 + jnz $L073dec1_loop_14 DB 102,15,56,223,209 xorps xmm2,xmm6 movups XMMWORD PTR [edi],xmm2 -$L067xts_dec_ret: +$L069xts_dec_ret: + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + movdqa XMMWORD PTR [esp],xmm0 + pxor xmm3,xmm3 + movdqa XMMWORD PTR 16[esp],xmm0 + pxor xmm4,xmm4 + movdqa XMMWORD PTR 32[esp],xmm0 + pxor xmm5,xmm5 + movdqa XMMWORD PTR 48[esp],xmm0 + pxor xmm6,xmm6 + movdqa XMMWORD PTR 64[esp],xmm0 + pxor xmm7,xmm7 + movdqa XMMWORD PTR 80[esp],xmm0 mov esp,DWORD PTR 116[esp] pop edi pop esi @@ -1734,7 +1800,7 @@ $L_aesni_cbc_encrypt_begin:: mov edx,DWORD PTR 32[esp] mov ebp,DWORD PTR 36[esp] test eax,eax - jz $L072cbc_abort + jz $L074cbc_abort cmp DWORD PTR 40[esp],0 xchg ebx,esp movups xmm7,XMMWORD PTR [ebp] @@ -1742,14 +1808,14 @@ $L_aesni_cbc_encrypt_begin:: mov ebp,edx mov DWORD PTR 16[esp],ebx mov ebx,ecx - je $L073cbc_decrypt + je $L075cbc_decrypt movaps xmm2,xmm7 cmp eax,16 - jb $L074cbc_enc_tail + jb $L076cbc_enc_tail sub eax,16 - jmp $L075cbc_enc_loop + jmp $L077cbc_enc_loop ALIGN 16 -$L075cbc_enc_loop: +$L077cbc_enc_loop: movups xmm7,XMMWORD PTR [esi] lea esi,DWORD PTR 16[esi] movups xmm0,XMMWORD PTR [edx] @@ -1757,24 +1823,25 @@ $L075cbc_enc_loop: xorps xmm7,xmm0 lea edx,DWORD PTR 32[edx] xorps xmm2,xmm7 -$L076enc1_loop_15: +$L078enc1_loop_15: DB 102,15,56,220,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L076enc1_loop_15 + jnz $L078enc1_loop_15 DB 102,15,56,221,209 mov ecx,ebx mov edx,ebp movups XMMWORD PTR [edi],xmm2 lea edi,DWORD PTR 16[edi] sub eax,16 - jnc $L075cbc_enc_loop + jnc $L077cbc_enc_loop add eax,16 - jnz $L074cbc_enc_tail + jnz $L076cbc_enc_tail movaps xmm7,xmm2 - jmp $L077cbc_ret -$L074cbc_enc_tail: + pxor xmm2,xmm2 + jmp $L079cbc_ret +$L076cbc_enc_tail: mov ecx,eax DD 2767451785 mov ecx,16 @@ -1785,20 +1852,20 @@ DD 2868115081 mov ecx,ebx mov esi,edi mov edx,ebp - jmp $L075cbc_enc_loop + jmp $L077cbc_enc_loop ALIGN 16 -$L073cbc_decrypt: +$L075cbc_decrypt: cmp eax,80 - jbe $L078cbc_dec_tail + jbe $L080cbc_dec_tail movaps XMMWORD PTR [esp],xmm7 sub eax,80 - jmp $L079cbc_dec_loop6_enter + jmp $L081cbc_dec_loop6_enter ALIGN 16 -$L080cbc_dec_loop6: +$L082cbc_dec_loop6: movaps XMMWORD PTR [esp],xmm0 movups XMMWORD PTR [edi],xmm7 lea edi,DWORD PTR 16[edi] -$L079cbc_dec_loop6_enter: +$L081cbc_dec_loop6_enter: movdqu xmm2,XMMWORD PTR [esi] movdqu xmm3,XMMWORD PTR 16[esi] movdqu xmm4,XMMWORD PTR 32[esi] @@ -1828,28 +1895,28 @@ $L079cbc_dec_loop6_enter: movups XMMWORD PTR 64[edi],xmm6 lea edi,DWORD PTR 80[edi] sub eax,96 - ja $L080cbc_dec_loop6 + ja $L082cbc_dec_loop6 movaps xmm2,xmm7 movaps xmm7,xmm0 add eax,80 - jle $L081cbc_dec_tail_collected + jle $L083cbc_dec_clear_tail_collected movups XMMWORD PTR [edi],xmm2 lea edi,DWORD PTR 16[edi] -$L078cbc_dec_tail: +$L080cbc_dec_tail: movups xmm2,XMMWORD PTR [esi] movaps xmm6,xmm2 cmp eax,16 - jbe $L082cbc_dec_one + jbe $L084cbc_dec_one movups xmm3,XMMWORD PTR 16[esi] movaps xmm5,xmm3 cmp eax,32 - jbe $L083cbc_dec_two + jbe $L085cbc_dec_two movups xmm4,XMMWORD PTR 32[esi] cmp eax,48 - jbe $L084cbc_dec_three + jbe $L086cbc_dec_three movups xmm5,XMMWORD PTR 48[esi] cmp eax,64 - jbe $L085cbc_dec_four + jbe $L087cbc_dec_four movups xmm6,XMMWORD PTR 64[esi] movaps XMMWORD PTR [esp],xmm7 movups xmm2,XMMWORD PTR [esi] @@ -1867,55 +1934,62 @@ $L078cbc_dec_tail: xorps xmm6,xmm0 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 + pxor xmm3,xmm3 movups XMMWORD PTR 32[edi],xmm4 + pxor xmm4,xmm4 movups XMMWORD PTR 48[edi],xmm5 + pxor xmm5,xmm5 lea edi,DWORD PTR 64[edi] movaps xmm2,xmm6 + pxor xmm6,xmm6 sub eax,80 - jmp $L081cbc_dec_tail_collected + jmp $L088cbc_dec_tail_collected ALIGN 16 -$L082cbc_dec_one: +$L084cbc_dec_one: movups xmm0,XMMWORD PTR [edx] movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L086dec1_loop_16: +$L089dec1_loop_16: DB 102,15,56,222,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L086dec1_loop_16 + jnz $L089dec1_loop_16 DB 102,15,56,223,209 xorps xmm2,xmm7 movaps xmm7,xmm6 sub eax,16 - jmp $L081cbc_dec_tail_collected + jmp $L088cbc_dec_tail_collected ALIGN 16 -$L083cbc_dec_two: +$L085cbc_dec_two: call __aesni_decrypt2 xorps xmm2,xmm7 xorps xmm3,xmm6 movups XMMWORD PTR [edi],xmm2 movaps xmm2,xmm3 + pxor xmm3,xmm3 lea edi,DWORD PTR 16[edi] movaps xmm7,xmm5 sub eax,32 - jmp $L081cbc_dec_tail_collected + jmp $L088cbc_dec_tail_collected ALIGN 16 -$L084cbc_dec_three: +$L086cbc_dec_three: call __aesni_decrypt3 xorps xmm2,xmm7 xorps xmm3,xmm6 xorps xmm4,xmm5 movups XMMWORD PTR [edi],xmm2 movaps xmm2,xmm4 + pxor xmm4,xmm4 movups XMMWORD PTR 16[edi],xmm3 + pxor xmm3,xmm3 lea edi,DWORD PTR 32[edi] movups xmm7,XMMWORD PTR 32[esi] sub eax,48 - jmp $L081cbc_dec_tail_collected + jmp $L088cbc_dec_tail_collected ALIGN 16 -$L085cbc_dec_four: +$L087cbc_dec_four: call __aesni_decrypt4 movups xmm1,XMMWORD PTR 16[esi] movups xmm0,XMMWORD PTR 32[esi] @@ -1925,28 +1999,44 @@ $L085cbc_dec_four: movups XMMWORD PTR [edi],xmm2 xorps xmm4,xmm1 movups XMMWORD PTR 16[edi],xmm3 + pxor xmm3,xmm3 xorps xmm5,xmm0 movups XMMWORD PTR 32[edi],xmm4 + pxor xmm4,xmm4 lea edi,DWORD PTR 48[edi] movaps xmm2,xmm5 + pxor xmm5,xmm5 sub eax,64 -$L081cbc_dec_tail_collected: + jmp $L088cbc_dec_tail_collected +ALIGN 16 +$L083cbc_dec_clear_tail_collected: + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 +$L088cbc_dec_tail_collected: and eax,15 - jnz $L087cbc_dec_tail_partial + jnz $L090cbc_dec_tail_partial movups XMMWORD PTR [edi],xmm2 - jmp $L077cbc_ret + pxor xmm0,xmm0 + jmp $L079cbc_ret ALIGN 16 -$L087cbc_dec_tail_partial: +$L090cbc_dec_tail_partial: movaps XMMWORD PTR [esp],xmm2 + pxor xmm0,xmm0 mov ecx,16 mov esi,esp sub ecx,eax DD 2767451785 -$L077cbc_ret: + movdqa XMMWORD PTR [esp],xmm2 +$L079cbc_ret: mov esp,DWORD PTR 16[esp] mov ebp,DWORD PTR 36[esp] + pxor xmm2,xmm2 + pxor xmm1,xmm1 movups XMMWORD PTR [ebp],xmm7 -$L072cbc_abort: + pxor xmm7,xmm7 +$L074cbc_abort: pop edi pop esi pop ebx @@ -1955,52 +2045,62 @@ $L072cbc_abort: _aesni_cbc_encrypt ENDP ALIGN 16 __aesni_set_encrypt_key PROC PRIVATE + push ebp + push ebx test eax,eax - jz $L088bad_pointer + jz $L091bad_pointer test edx,edx - jz $L088bad_pointer + jz $L091bad_pointer + call $L092pic +$L092pic: + pop ebx + lea ebx,DWORD PTR ($Lkey_const-$L092pic)[ebx] + lea ebp,DWORD PTR _OPENSSL_ia32cap_P movups xmm0,XMMWORD PTR [eax] xorps xmm4,xmm4 + mov ebp,DWORD PTR 4[ebp] lea edx,DWORD PTR 16[edx] + and ebp,268437504 cmp ecx,256 - je $L08914rounds + je $L09314rounds cmp ecx,192 - je $L09012rounds + je $L09412rounds cmp ecx,128 - jne $L091bad_keybits + jne $L095bad_keybits ALIGN 16 -$L09210rounds: +$L09610rounds: + cmp ebp,268435456 + je $L09710rounds_alt mov ecx,9 movups XMMWORD PTR [edx-16],xmm0 DB 102,15,58,223,200,1 - call $L093key_128_cold + call $L098key_128_cold DB 102,15,58,223,200,2 - call $L094key_128 + call $L099key_128 DB 102,15,58,223,200,4 - call $L094key_128 + call $L099key_128 DB 102,15,58,223,200,8 - call $L094key_128 + call $L099key_128 DB 102,15,58,223,200,16 - call $L094key_128 + call $L099key_128 DB 102,15,58,223,200,32 - call $L094key_128 + call $L099key_128 DB 102,15,58,223,200,64 - call $L094key_128 + call $L099key_128 DB 102,15,58,223,200,128 - call $L094key_128 + call $L099key_128 DB 102,15,58,223,200,27 - call $L094key_128 + call $L099key_128 DB 102,15,58,223,200,54 - call $L094key_128 + call $L099key_128 movups XMMWORD PTR [edx],xmm0 mov DWORD PTR 80[edx],ecx - xor eax,eax - ret + jmp $L100good_key ALIGN 16 -$L094key_128: +$L099key_128: movups XMMWORD PTR [edx],xmm0 lea edx,DWORD PTR 16[edx] -$L093key_128_cold: +$L098key_128_cold: shufps xmm4,xmm0,16 xorps xmm0,xmm4 shufps xmm4,xmm0,140 @@ -2009,38 +2109,91 @@ $L093key_128_cold: xorps xmm0,xmm1 ret ALIGN 16 -$L09012rounds: +$L09710rounds_alt: + movdqa xmm5,XMMWORD PTR [ebx] + mov ecx,8 + movdqa xmm4,XMMWORD PTR 32[ebx] + movdqa xmm2,xmm0 + movdqu XMMWORD PTR [edx-16],xmm0 +$L101loop_key128: +DB 102,15,56,0,197 +DB 102,15,56,221,196 + pslld xmm4,1 + lea edx,DWORD PTR 16[edx] + movdqa xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm2,xmm3 + pxor xmm0,xmm2 + movdqu XMMWORD PTR [edx-16],xmm0 + movdqa xmm2,xmm0 + dec ecx + jnz $L101loop_key128 + movdqa xmm4,XMMWORD PTR 48[ebx] +DB 102,15,56,0,197 +DB 102,15,56,221,196 + pslld xmm4,1 + movdqa xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm2,xmm3 + pxor xmm0,xmm2 + movdqu XMMWORD PTR [edx],xmm0 + movdqa xmm2,xmm0 +DB 102,15,56,0,197 +DB 102,15,56,221,196 + movdqa xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm2,xmm3 + pxor xmm0,xmm2 + movdqu XMMWORD PTR 16[edx],xmm0 + mov ecx,9 + mov DWORD PTR 96[edx],ecx + jmp $L100good_key +ALIGN 16 +$L09412rounds: movq xmm2,QWORD PTR 16[eax] + cmp ebp,268435456 + je $L10212rounds_alt mov ecx,11 movups XMMWORD PTR [edx-16],xmm0 DB 102,15,58,223,202,1 - call $L095key_192a_cold + call $L103key_192a_cold DB 102,15,58,223,202,2 - call $L096key_192b + call $L104key_192b DB 102,15,58,223,202,4 - call $L097key_192a + call $L105key_192a DB 102,15,58,223,202,8 - call $L096key_192b + call $L104key_192b DB 102,15,58,223,202,16 - call $L097key_192a + call $L105key_192a DB 102,15,58,223,202,32 - call $L096key_192b + call $L104key_192b DB 102,15,58,223,202,64 - call $L097key_192a + call $L105key_192a DB 102,15,58,223,202,128 - call $L096key_192b + call $L104key_192b movups XMMWORD PTR [edx],xmm0 mov DWORD PTR 48[edx],ecx - xor eax,eax - ret + jmp $L100good_key ALIGN 16 -$L097key_192a: +$L105key_192a: movups XMMWORD PTR [edx],xmm0 lea edx,DWORD PTR 16[edx] ALIGN 16 -$L095key_192a_cold: +$L103key_192a_cold: movaps xmm5,xmm2 -$L098key_192b_warm: +$L106key_192b_warm: shufps xmm4,xmm0,16 movdqa xmm3,xmm2 xorps xmm0,xmm4 @@ -2054,56 +2207,90 @@ $L098key_192b_warm: pxor xmm2,xmm3 ret ALIGN 16 -$L096key_192b: +$L104key_192b: movaps xmm3,xmm0 shufps xmm5,xmm0,68 movups XMMWORD PTR [edx],xmm5 shufps xmm3,xmm2,78 movups XMMWORD PTR 16[edx],xmm3 lea edx,DWORD PTR 32[edx] - jmp $L098key_192b_warm + jmp $L106key_192b_warm +ALIGN 16 +$L10212rounds_alt: + movdqa xmm5,XMMWORD PTR 16[ebx] + movdqa xmm4,XMMWORD PTR 32[ebx] + mov ecx,8 + movdqu XMMWORD PTR [edx-16],xmm0 +$L107loop_key192: + movq QWORD PTR [edx],xmm2 + movdqa xmm1,xmm2 +DB 102,15,56,0,213 +DB 102,15,56,221,212 + pslld xmm4,1 + lea edx,DWORD PTR 24[edx] + movdqa xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm0,xmm3 + pshufd xmm3,xmm0,255 + pxor xmm3,xmm1 + pslldq xmm1,4 + pxor xmm3,xmm1 + pxor xmm0,xmm2 + pxor xmm2,xmm3 + movdqu XMMWORD PTR [edx-16],xmm0 + dec ecx + jnz $L107loop_key192 + mov ecx,11 + mov DWORD PTR 32[edx],ecx + jmp $L100good_key ALIGN 16 -$L08914rounds: +$L09314rounds: movups xmm2,XMMWORD PTR 16[eax] - mov ecx,13 lea edx,DWORD PTR 16[edx] + cmp ebp,268435456 + je $L10814rounds_alt + mov ecx,13 movups XMMWORD PTR [edx-32],xmm0 movups XMMWORD PTR [edx-16],xmm2 DB 102,15,58,223,202,1 - call $L099key_256a_cold + call $L109key_256a_cold DB 102,15,58,223,200,1 - call $L100key_256b + call $L110key_256b DB 102,15,58,223,202,2 - call $L101key_256a + call $L111key_256a DB 102,15,58,223,200,2 - call $L100key_256b + call $L110key_256b DB 102,15,58,223,202,4 - call $L101key_256a + call $L111key_256a DB 102,15,58,223,200,4 - call $L100key_256b + call $L110key_256b DB 102,15,58,223,202,8 - call $L101key_256a + call $L111key_256a DB 102,15,58,223,200,8 - call $L100key_256b + call $L110key_256b DB 102,15,58,223,202,16 - call $L101key_256a + call $L111key_256a DB 102,15,58,223,200,16 - call $L100key_256b + call $L110key_256b DB 102,15,58,223,202,32 - call $L101key_256a + call $L111key_256a DB 102,15,58,223,200,32 - call $L100key_256b + call $L110key_256b DB 102,15,58,223,202,64 - call $L101key_256a + call $L111key_256a movups XMMWORD PTR [edx],xmm0 mov DWORD PTR 16[edx],ecx xor eax,eax - ret + jmp $L100good_key ALIGN 16 -$L101key_256a: +$L111key_256a: movups XMMWORD PTR [edx],xmm2 lea edx,DWORD PTR 16[edx] -$L099key_256a_cold: +$L109key_256a_cold: shufps xmm4,xmm0,16 xorps xmm0,xmm4 shufps xmm4,xmm0,140 @@ -2112,7 +2299,7 @@ $L099key_256a_cold: xorps xmm0,xmm1 ret ALIGN 16 -$L100key_256b: +$L110key_256b: movups XMMWORD PTR [edx],xmm0 lea edx,DWORD PTR 16[edx] shufps xmm4,xmm2,16 @@ -2122,13 +2309,70 @@ $L100key_256b: shufps xmm1,xmm1,170 xorps xmm2,xmm1 ret +ALIGN 16 +$L10814rounds_alt: + movdqa xmm5,XMMWORD PTR [ebx] + movdqa xmm4,XMMWORD PTR 32[ebx] + mov ecx,7 + movdqu XMMWORD PTR [edx-32],xmm0 + movdqa xmm1,xmm2 + movdqu XMMWORD PTR [edx-16],xmm2 +$L112loop_key256: +DB 102,15,56,0,213 +DB 102,15,56,221,212 + movdqa xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm0,xmm3 + pslld xmm4,1 + pxor xmm0,xmm2 + movdqu XMMWORD PTR [edx],xmm0 + dec ecx + jz $L113done_key256 + pshufd xmm2,xmm0,255 + pxor xmm3,xmm3 +DB 102,15,56,221,211 + movdqa xmm3,xmm1 + pslldq xmm1,4 + pxor xmm3,xmm1 + pslldq xmm1,4 + pxor xmm3,xmm1 + pslldq xmm1,4 + pxor xmm1,xmm3 + pxor xmm2,xmm1 + movdqu XMMWORD PTR 16[edx],xmm2 + lea edx,DWORD PTR 32[edx] + movdqa xmm1,xmm2 + jmp $L112loop_key256 +$L113done_key256: + mov ecx,13 + mov DWORD PTR 16[edx],ecx +$L100good_key: + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + xor eax,eax + pop ebx + pop ebp + ret ALIGN 4 -$L088bad_pointer: +$L091bad_pointer: mov eax,-1 + pop ebx + pop ebp ret ALIGN 4 -$L091bad_keybits: +$L095bad_keybits: + pxor xmm0,xmm0 mov eax,-2 + pop ebx + pop ebp ret __aesni_set_encrypt_key ENDP ALIGN 16 @@ -2150,7 +2394,7 @@ $L_aesni_set_decrypt_key_begin:: mov edx,DWORD PTR 12[esp] shl ecx,4 test eax,eax - jnz $L102dec_key_ret + jnz $L114dec_key_ret lea eax,DWORD PTR 16[ecx*1+edx] movups xmm0,XMMWORD PTR [edx] movups xmm1,XMMWORD PTR [eax] @@ -2158,7 +2402,7 @@ $L_aesni_set_decrypt_key_begin:: movups XMMWORD PTR [edx],xmm1 lea edx,DWORD PTR 16[edx] lea eax,DWORD PTR [eax-16] -$L103dec_key_inverse: +$L115dec_key_inverse: movups xmm0,XMMWORD PTR [edx] movups xmm1,XMMWORD PTR [eax] DB 102,15,56,219,192 @@ -2168,17 +2412,28 @@ DB 102,15,56,219,201 movups XMMWORD PTR 16[eax],xmm0 movups XMMWORD PTR [edx-16],xmm1 cmp eax,edx - ja $L103dec_key_inverse + ja $L115dec_key_inverse movups xmm0,XMMWORD PTR [edx] DB 102,15,56,219,192 movups XMMWORD PTR [edx],xmm0 + pxor xmm0,xmm0 + pxor xmm1,xmm1 xor eax,eax -$L102dec_key_ret: +$L114dec_key_ret: ret _aesni_set_decrypt_key ENDP +ALIGN 64 +$Lkey_const:: +DD 202313229,202313229,202313229,202313229 +DD 67569157,67569157,67569157,67569157 +DD 1,1,1,1 +DD 27,27,27,27 DB 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 DB 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 DB 115,108,46,111,114,103,62,0 .text$ ENDS +.bss SEGMENT 'BSS' +COMM _OPENSSL_ia32cap_P:DWORD:4 +.bss ENDS END diff --git a/deps/openssl/asm_obsolete/arm-void-gas/aes/aesv8-armx.S b/deps/openssl/asm_obsolete/arm-void-gas/aes/aesv8-armx.S index 732ba3d9c88b94..fd979d078f46ba 100644 --- a/deps/openssl/asm_obsolete/arm-void-gas/aes/aesv8-armx.S +++ b/deps/openssl/asm_obsolete/arm-void-gas/aes/aesv8-armx.S @@ -230,17 +230,17 @@ aes_v8_encrypt: .Loop_enc: .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 - vld1.32 {q0},[r2]! .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 + vld1.32 {q0},[r2]! subs r3,r3,#2 .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 - vld1.32 {q1},[r2]! .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 + vld1.32 {q1},[r2]! bgt .Loop_enc .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 - vld1.32 {q0},[r2] .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 + vld1.32 {q0},[r2] .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 veor q2,q2,q0 @@ -259,17 +259,17 @@ aes_v8_decrypt: .Loop_dec: .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 - vld1.32 {q0},[r2]! .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 + vld1.32 {q0},[r2]! subs r3,r3,#2 .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 - vld1.32 {q1},[r2]! .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 + vld1.32 {q1},[r2]! bgt .Loop_dec .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 - vld1.32 {q0},[r2] .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 + vld1.32 {q0},[r2] .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 veor q2,q2,q0 @@ -313,16 +313,42 @@ aes_v8_cbc_encrypt: veor q5,q8,q7 beq .Lcbc_enc128 + vld1.32 {q2-q3},[r7] + add r7,r3,#16 + add r6,r3,#16*4 + add r12,r3,#16*5 + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + add r14,r3,#16*6 + add r3,r3,#16*7 + b .Lenter_cbc_enc + +.align 4 .Loop_cbc_enc: .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 - vld1.32 {q8},[r7]! .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - subs r6,r6,#2 + vst1.8 {q6},[r1]! +.Lenter_cbc_enc: .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 - vld1.32 {q9},[r7]! .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - bgt .Loop_cbc_enc + .byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.32 {q8},[r6] + cmp r5,#4 + .byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.32 {q9},[r12] + beq .Lcbc_enc192 + + .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.32 {q8},[r14] + .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 + .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + vld1.32 {q9},[r3] + nop +.Lcbc_enc192: .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 subs r2,r2,#16 @@ -331,7 +357,6 @@ aes_v8_cbc_encrypt: moveq r8,#0 .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - add r7,r3,#16 .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 vld1.8 {q8},[r0],r8 @@ -340,16 +365,14 @@ aes_v8_cbc_encrypt: veor q8,q8,q5 .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 - vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] + vld1.32 {q9},[r7] @ re-pre-load rndkey[1] .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 - - mov r6,r5 veor q6,q0,q7 - vst1.8 {q6},[r1]! bhs .Loop_cbc_enc + vst1.8 {q6},[r1]! b .Lcbc_done .align 5 @@ -407,79 +430,78 @@ aes_v8_cbc_encrypt: .Loop3x_cbc_dec: .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 - .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 - .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 - vld1.32 {q8},[r7]! .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.32 {q8},[r7]! subs r6,r6,#2 .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 - .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 - .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 - vld1.32 {q9},[r7]! .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.32 {q9},[r7]! bgt .Loop3x_cbc_dec .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 - .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 - .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 - veor q4,q6,q7 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + veor q4,q6,q7 + subs r2,r2,#0x30 veor q5,q2,q7 + movlo r6,r2 @ r6, r6, is zero at this point .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 - .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 - .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 - veor q9,q3,q7 - subs r2,r2,#0x30 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 - vorr q6,q11,q11 - movlo r6,r2 @ r6, r6, is zero at this point - .byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 - .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 - .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 + veor q9,q3,q7 add r0,r0,r6 @ r0 is adjusted in such way that @ at exit from the loop q1-q10 @ are loaded with last "words" + vorr q6,q11,q11 + mov r7,r3 + .byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 - mov r7,r3 - .byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 - .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 - .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 vld1.8 {q2},[r0]! + .byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 vld1.8 {q3},[r0]! .byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 - .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 - .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 - vld1.8 {q11},[r0]! .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 + .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 - vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] + vld1.8 {q11},[r0]! .byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 .byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 - + vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] add r6,r5,#2 veor q4,q4,q0 veor q5,q5,q1 veor q10,q10,q9 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] - vorr q0,q2,q2 vst1.8 {q4},[r1]! - vorr q1,q3,q3 + vorr q0,q2,q2 vst1.8 {q5},[r1]! + vorr q1,q3,q3 vst1.8 {q10},[r1]! vorr q10,q11,q11 bhs .Loop3x_cbc_dec @@ -490,39 +512,39 @@ aes_v8_cbc_encrypt: .Lcbc_dec_tail: .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 - .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 - vld1.32 {q8},[r7]! .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.32 {q8},[r7]! subs r6,r6,#2 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 - .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 - vld1.32 {q9},[r7]! .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 + vld1.32 {q9},[r7]! bgt .Lcbc_dec_tail .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 - .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 - .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 - .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 cmn r2,#0x20 .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 - .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 veor q5,q6,q7 .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 - .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 + .byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 .byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 veor q9,q3,q7 .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 @@ -590,70 +612,69 @@ aes_v8_ctr32_encrypt_blocks: .align 4 .Loop3x_ctr32: .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 - .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 - .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 - vld1.32 {q8},[r7]! .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 + vld1.32 {q8},[r7]! subs r6,r6,#2 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 - .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 - .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 - vld1.32 {q9},[r7]! .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 + vld1.32 {q9},[r7]! bgt .Loop3x_ctr32 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 - .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 - .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 - mov r7,r3 .byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0 - vld1.8 {q2},[r0]! + .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 .byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1 - .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 + vld1.8 {q2},[r0]! vorr q0,q6,q6 - .byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 + .byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 + .byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 vld1.8 {q3},[r0]! - .byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 - .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 vorr q1,q6,q6 + .byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 - vld1.8 {q11},[r0]! + .byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + vld1.8 {q11},[r0]! + mov r7,r3 + .byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 .byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10 vorr q10,q6,q6 add r9,r8,#1 .byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12 + .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 .byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12 - .byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12 + .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 veor q2,q2,q7 add r10,r8,#2 - .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 - .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + .byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 veor q3,q3,q7 add r8,r8,#3 .byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13 + .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 .byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13 - .byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13 + .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 veor q11,q11,q7 rev r9,r9 - .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 - vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] - .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + .byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 vmov.32 d1[1], r9 rev r10,r10 .byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14 + .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 .byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14 - .byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14 + .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 vmov.32 d3[1], r10 rev r12,r8 - .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 - .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 + .byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14 .byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 vmov.32 d21[1], r12 subs r2,r2,#3 @@ -661,13 +682,14 @@ aes_v8_ctr32_encrypt_blocks: .byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15 .byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15 - mov r6,r5 veor q2,q2,q4 + vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] + vst1.8 {q2},[r1]! veor q3,q3,q5 + mov r6,r5 + vst1.8 {q3},[r1]! veor q11,q11,q9 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] - vst1.8 {q2},[r1]! - vst1.8 {q3},[r1]! vst1.8 {q11},[r1]! bhs .Loop3x_ctr32 @@ -679,40 +701,40 @@ aes_v8_ctr32_encrypt_blocks: .Lctr32_tail: .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 - .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 - vld1.32 {q8},[r7]! .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + vld1.32 {q8},[r7]! subs r6,r6,#2 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 - .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 - vld1.32 {q9},[r7]! .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + vld1.32 {q9},[r7]! bgt .Lctr32_tail .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 - .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 - .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 vld1.8 {q2},[r0],r12 .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 - .byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 - vld1.8 {q3},[r0] .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 + vld1.8 {q3},[r0] .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 - .byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 - .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 - .byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 veor q2,q2,q7 + .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 + .byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 veor q3,q3,q7 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 diff --git a/deps/openssl/asm_obsolete/arm-void-gas/modes/ghash-armv4.S b/deps/openssl/asm_obsolete/arm-void-gas/modes/ghash-armv4.S index d321235f79bd83..c54f5149974c6f 100644 --- a/deps/openssl/asm_obsolete/arm-void-gas/modes/ghash-armv4.S +++ b/deps/openssl/asm_obsolete/arm-void-gas/modes/ghash-armv4.S @@ -495,7 +495,7 @@ gcm_ghash_neon: veor q10,q10,q9 @ vshl.i64 q9,q0,#63 veor q10, q10, q9 @ - veor d1,d1,d20 @ + veor d1,d1,d20 @ veor d4,d4,d21 vshr.u64 q10,q0,#1 @ 2nd phase diff --git a/deps/openssl/asm_obsolete/arm-void-gas/modes/ghashv8-armx.S b/deps/openssl/asm_obsolete/arm-void-gas/modes/ghashv8-armx.S index 570d9175c47605..269574945fa6a2 100644 --- a/deps/openssl/asm_obsolete/arm-void-gas/modes/ghashv8-armx.S +++ b/deps/openssl/asm_obsolete/arm-void-gas/modes/ghashv8-armx.S @@ -7,109 +7,223 @@ .type gcm_init_v8,%function .align 4 gcm_init_v8: - vld1.64 {q9},[r1] @ load H - vmov.i8 q8,#0xe1 + vld1.64 {q9},[r1] @ load input H + vmov.i8 q11,#0xe1 + vshl.i64 q11,q11,#57 @ 0xc2.0 vext.8 q3,q9,q9,#8 - vshl.i64 q8,q8,#57 - vshr.u64 q10,q8,#63 - vext.8 q8,q10,q8,#8 @ t0=0xc2....01 + vshr.u64 q10,q11,#63 vdup.32 q9,d18[1] - vshr.u64 q11,q3,#63 + vext.8 q8,q10,q11,#8 @ t0=0xc2....01 + vshr.u64 q10,q3,#63 vshr.s32 q9,q9,#31 @ broadcast carry bit - vand q11,q11,q8 + vand q10,q10,q8 vshl.i64 q3,q3,#1 - vext.8 q11,q11,q11,#8 + vext.8 q10,q10,q10,#8 vand q8,q8,q9 - vorr q3,q3,q11 @ H<<<=1 - veor q3,q3,q8 @ twisted H - vst1.64 {q3},[r0] + vorr q3,q3,q10 @ H<<<=1 + veor q12,q3,q8 @ twisted H + vst1.64 {q12},[r0]! @ store Htable[0] + + @ calculate H^2 + vext.8 q8,q12,q12,#8 @ Karatsuba pre-processing + .byte 0xa8,0x0e,0xa8,0xf2 @ pmull q0,q12,q12 + veor q8,q8,q12 + .byte 0xa9,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q12 + .byte 0xa0,0x2e,0xa0,0xf2 @ pmull q1,q8,q8 + + vext.8 q9,q0,q2,#8 @ Karatsuba post-processing + veor q10,q0,q2 + veor q1,q1,q9 + veor q1,q1,q10 + .byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase + + vmov d4,d3 @ Xh|Xm - 256-bit result + vmov d3,d0 @ Xm is rotated Xl + veor q0,q1,q10 + + vext.8 q10,q0,q0,#8 @ 2nd phase + .byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11 + veor q10,q10,q2 + veor q14,q0,q10 + + vext.8 q9,q14,q14,#8 @ Karatsuba pre-processing + veor q9,q9,q14 + vext.8 q13,q8,q9,#8 @ pack Karatsuba pre-processed + vst1.64 {q13-q14},[r0] @ store Htable[1..2] bx lr .size gcm_init_v8,.-gcm_init_v8 - .global gcm_gmult_v8 .type gcm_gmult_v8,%function .align 4 gcm_gmult_v8: vld1.64 {q9},[r0] @ load Xi vmov.i8 q11,#0xe1 - vld1.64 {q12},[r1] @ load twisted H + vld1.64 {q12-q13},[r1] @ load twisted H, ... vshl.u64 q11,q11,#57 #ifndef __ARMEB__ vrev64.8 q9,q9 #endif - vext.8 q13,q12,q12,#8 - mov r3,#0 vext.8 q3,q9,q9,#8 - mov r12,#0 - veor q13,q13,q12 @ Karatsuba pre-processing - mov r2,r0 - b .Lgmult_v8 -.size gcm_gmult_v8,.-gcm_gmult_v8 + .byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo + veor q9,q9,q3 @ Karatsuba pre-processing + .byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi + .byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi) + + vext.8 q9,q0,q2,#8 @ Karatsuba post-processing + veor q10,q0,q2 + veor q1,q1,q9 + veor q1,q1,q10 + .byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction + + vmov d4,d3 @ Xh|Xm - 256-bit result + vmov d3,d0 @ Xm is rotated Xl + veor q0,q1,q10 + + vext.8 q10,q0,q0,#8 @ 2nd phase of reduction + .byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11 + veor q10,q10,q2 + veor q0,q0,q10 + +#ifndef __ARMEB__ + vrev64.8 q0,q0 +#endif + vext.8 q0,q0,q0,#8 + vst1.64 {q0},[r0] @ write out Xi + + bx lr +.size gcm_gmult_v8,.-gcm_gmult_v8 .global gcm_ghash_v8 .type gcm_ghash_v8,%function .align 4 gcm_ghash_v8: + vstmdb sp!,{d8-d15} @ 32-bit ABI says so vld1.64 {q0},[r0] @ load [rotated] Xi - subs r3,r3,#16 + @ "[rotated]" means that + @ loaded value would have + @ to be rotated in order to + @ make it appear as in + @ alorithm specification + subs r3,r3,#32 @ see if r3 is 32 or larger + mov r12,#16 @ r12 is used as post- + @ increment for input pointer; + @ as loop is modulo-scheduled + @ r12 is zeroed just in time + @ to preclude oversteping + @ inp[len], which means that + @ last block[s] are actually + @ loaded twice, but last + @ copy is not processed + vld1.64 {q12-q13},[r1]! @ load twisted H, ..., H^2 vmov.i8 q11,#0xe1 - mov r12,#16 - vld1.64 {q12},[r1] @ load twisted H - moveq r12,#0 - vext.8 q0,q0,q0,#8 - vshl.u64 q11,q11,#57 - vld1.64 {q9},[r2],r12 @ load [rotated] inp - vext.8 q13,q12,q12,#8 + vld1.64 {q14},[r1] + moveq r12,#0 @ is it time to zero r12? + vext.8 q0,q0,q0,#8 @ rotate Xi + vld1.64 {q8},[r2]! @ load [rotated] I[0] + vshl.u64 q11,q11,#57 @ compose 0xc2.0 constant #ifndef __ARMEB__ + vrev64.8 q8,q8 vrev64.8 q0,q0 +#endif + vext.8 q3,q8,q8,#8 @ rotate I[0] + blo .Lodd_tail_v8 @ r3 was less than 32 + vld1.64 {q9},[r2],r12 @ load [rotated] I[1] +#ifndef __ARMEB__ vrev64.8 q9,q9 #endif - veor q13,q13,q12 @ Karatsuba pre-processing - vext.8 q3,q9,q9,#8 - b .Loop_v8 + vext.8 q7,q9,q9,#8 + veor q3,q3,q0 @ I[i]^=Xi + .byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1 + veor q9,q9,q7 @ Karatsuba pre-processing + .byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7 + b .Loop_mod2x_v8 .align 4 -.Loop_v8: +.Loop_mod2x_v8: + vext.8 q10,q3,q3,#8 + subs r3,r3,#32 @ is there more data? + .byte 0x86,0x0e,0xac,0xf2 @ pmull q0,q14,q3 @ H^2.lo·Xi.lo + movlo r12,#0 @ is it time to zero r12? + + .byte 0xa2,0xae,0xaa,0xf2 @ pmull q5,q13,q9 + veor q10,q10,q3 @ Karatsuba pre-processing + .byte 0x87,0x4e,0xad,0xf2 @ pmull2 q2,q14,q3 @ H^2.hi·Xi.hi + veor q0,q0,q4 @ accumulate + .byte 0xa5,0x2e,0xab,0xf2 @ pmull2 q1,q13,q10 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi) + vld1.64 {q8},[r2],r12 @ load [rotated] I[i+2] + + veor q2,q2,q6 + moveq r12,#0 @ is it time to zero r12? + veor q1,q1,q5 + + vext.8 q9,q0,q2,#8 @ Karatsuba post-processing + veor q10,q0,q2 + veor q1,q1,q9 + vld1.64 {q9},[r2],r12 @ load [rotated] I[i+3] +#ifndef __ARMEB__ + vrev64.8 q8,q8 +#endif + veor q1,q1,q10 + .byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction + +#ifndef __ARMEB__ + vrev64.8 q9,q9 +#endif + vmov d4,d3 @ Xh|Xm - 256-bit result + vmov d3,d0 @ Xm is rotated Xl + vext.8 q7,q9,q9,#8 + vext.8 q3,q8,q8,#8 + veor q0,q1,q10 + .byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1 + veor q3,q3,q2 @ accumulate q3 early + + vext.8 q10,q0,q0,#8 @ 2nd phase of reduction + .byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11 + veor q3,q3,q10 + veor q9,q9,q7 @ Karatsuba pre-processing + veor q3,q3,q0 + .byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7 + bhs .Loop_mod2x_v8 @ there was at least 32 more bytes + + veor q2,q2,q10 + vext.8 q3,q8,q8,#8 @ re-construct q3 + adds r3,r3,#32 @ re-construct r3 + veor q0,q0,q2 @ re-construct q0 + beq .Ldone_v8 @ is r3 zero? +.Lodd_tail_v8: vext.8 q10,q0,q0,#8 veor q3,q3,q0 @ inp^=Xi - veor q9,q9,q10 @ q9 is rotated inp^Xi + veor q9,q8,q10 @ q9 is rotated inp^Xi -.Lgmult_v8: .byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo veor q9,q9,q3 @ Karatsuba pre-processing .byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi - subs r3,r3,#16 .byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi) - moveq r12,#0 vext.8 q9,q0,q2,#8 @ Karatsuba post-processing veor q10,q0,q2 veor q1,q1,q9 - vld1.64 {q9},[r2],r12 @ load [rotated] inp veor q1,q1,q10 - .byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase + .byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction vmov d4,d3 @ Xh|Xm - 256-bit result vmov d3,d0 @ Xm is rotated Xl -#ifndef __ARMEB__ - vrev64.8 q9,q9 -#endif veor q0,q1,q10 - vext.8 q3,q9,q9,#8 - vext.8 q10,q0,q0,#8 @ 2nd phase + vext.8 q10,q0,q0,#8 @ 2nd phase of reduction .byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11 veor q10,q10,q2 veor q0,q0,q10 - bhs .Loop_v8 +.Ldone_v8: #ifndef __ARMEB__ vrev64.8 q0,q0 #endif vext.8 q0,q0,q0,#8 vst1.64 {q0},[r0] @ write out Xi + vldmia sp!,{d8-d15} @ 32-bit ABI says so bx lr .size gcm_ghash_v8,.-gcm_ghash_v8 .asciz "GHASH for ARMv8, CRYPTOGAMS by " diff --git a/deps/openssl/asm_obsolete/arm-void-gas/sha/sha256-armv4.S b/deps/openssl/asm_obsolete/arm-void-gas/sha/sha256-armv4.S index bf1ce4f997e7b7..683f1cc0c874b5 100644 --- a/deps/openssl/asm_obsolete/arm-void-gas/sha/sha256-armv4.S +++ b/deps/openssl/asm_obsolete/arm-void-gas/sha/sha256-armv4.S @@ -1,7 +1,59 @@ -#include "arm_arch.h" + +@ ==================================================================== +@ Written by Andy Polyakov for the OpenSSL +@ project. The module is, however, dual licensed under OpenSSL and +@ CRYPTOGAMS licenses depending on where you obtain it. For further +@ details see http://www.openssl.org/~appro/cryptogams/. +@ +@ Permission to use under GPL terms is granted. +@ ==================================================================== + +@ SHA256 block procedure for ARMv4. May 2007. + +@ Performance is ~2x better than gcc 3.4 generated code and in "abso- +@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per +@ byte [on single-issue Xscale PXA250 core]. + +@ July 2010. +@ +@ Rescheduling for dual-issue pipeline resulted in 22% improvement on +@ Cortex A8 core and ~20 cycles per processed byte. + +@ February 2011. +@ +@ Profiler-assisted and platform-specific optimization resulted in 16% +@ improvement on Cortex A8 core and ~15.4 cycles per processed byte. + +@ September 2013. +@ +@ Add NEON implementation. On Cortex A8 it was measured to process one +@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon +@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only +@ code (meaning that latter performs sub-optimally, nothing was done +@ about it). + +@ May 2014. +@ +@ Add ARMv8 code path performing at 2.0 cpb on Apple A7. + +#ifndef __KERNEL__ +# include "arm_arch.h" +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ 7 +#endif .text +#if __ARM_ARCH__<7 .code 32 +#else +.syntax unified +# ifdef __thumb2__ +.thumb +# else +.code 32 +# endif +#endif .type K256,%object .align 5 @@ -24,7 +76,7 @@ K256: .word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .size K256,.-K256 .word 0 @ terminator -#if __ARM_MAX_ARCH__>=7 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) .LOPENSSL_armcap: .word OPENSSL_armcap_P-sha256_block_data_order #endif @@ -33,9 +85,12 @@ K256: .global sha256_block_data_order .type sha256_block_data_order,%function sha256_block_data_order: +#if __ARM_ARCH__<7 sub r3,pc,#8 @ sha256_block_data_order - add r2,r1,r2,lsl#6 @ len to point at the end of inp -#if __ARM_MAX_ARCH__>=7 +#else + adr r3,sha256_block_data_order +#endif +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) ldr r12,.LOPENSSL_armcap ldr r12,[r3,r12] @ OPENSSL_armcap_P tst r12,#ARMV8_SHA256 @@ -43,6 +98,7 @@ sha256_block_data_order: tst r12,#ARMV7_NEON bne .LNEON #endif + add r2,r1,r2,lsl#6 @ len to point at the end of inp stmdb sp!,{r0,r1,r2,r4-r11,lr} ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} sub r14,r3,#256+32 @ K256 @@ -1736,6 +1792,9 @@ sha256_block_data_order: eor r12,r12,r6 @ Maj(a,b,c) add r4,r4,r0,ror#2 @ h+=Sigma0(a) @ add r4,r4,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + ite eq @ Thumb2 thing, sanity check in ARM +#endif ldreq r3,[sp,#16*4] @ pull ctx bne .Lrounds_16_xx @@ -1777,16 +1836,19 @@ sha256_block_data_order: .arch armv7-a .fpu neon +.global sha256_block_data_order_neon .type sha256_block_data_order_neon,%function .align 4 sha256_block_data_order_neon: .LNEON: stmdb sp!,{r4-r12,lr} + sub r11,sp,#16*4+16 + adr r14,K256 + bic r11,r11,#15 @ align for 128-bit stores mov r12,sp - sub sp,sp,#16*4+16 @ alloca - sub r14,r3,#256+32 @ K256 - bic sp,sp,#15 @ align for 128-bit stores + mov sp,r11 @ alloca + add r2,r1,r2,lsl#6 @ len to point at the end of inp vld1.8 {q0},[r1]! vld1.8 {q1},[r1]! @@ -2224,11 +2286,13 @@ sha256_block_data_order_neon: ldr r0,[sp,#72] sub r14,r14,#256 @ rewind r14 teq r1,r0 + it eq subeq r1,r1,#64 @ avoid SEGV vld1.8 {q0},[r1]! @ load next input block vld1.8 {q1},[r1]! vld1.8 {q2},[r1]! vld1.8 {q3},[r1]! + it ne strne r1,[sp,#68] mov r1,sp add r11,r11,r2 @@ -2542,23 +2606,38 @@ sha256_block_data_order_neon: str r7,[r2],#4 stmia r2,{r8-r11} + ittte ne movne r1,sp ldrne r2,[sp,#0] eorne r12,r12,r12 ldreq sp,[sp,#76] @ restore original sp + itt ne eorne r3,r5,r6 bne .L_00_48 ldmia sp!,{r4-r12,pc} .size sha256_block_data_order_neon,.-sha256_block_data_order_neon #endif -#if __ARM_MAX_ARCH__>=7 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + +# ifdef __thumb2__ +# define INST(a,b,c,d) .byte c,d|0xc,a,b +# else +# define INST(a,b,c,d) .byte a,b,c,d +# endif + .type sha256_block_data_order_armv8,%function .align 5 sha256_block_data_order_armv8: .LARMv8: vld1.32 {q0,q1},[r0] - sub r3,r3,#sha256_block_data_order-K256 +# ifdef __thumb2__ + adr r3,.LARMv8 + sub r3,r3,#.LARMv8-K256 +# else + adrl r3,K256 +# endif + add r2,r1,r2,lsl#6 @ len to point at the end of inp .Loop_v8: vld1.8 {q8-q9},[r1]! @@ -2573,114 +2652,115 @@ sha256_block_data_order_armv8: teq r1,r2 vld1.32 {q13},[r3]! vadd.i32 q12,q12,q8 - .byte 0xe2,0x03,0xfa,0xf3 @ sha256su0 q8,q9 + INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 vmov q2,q0 - .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 - .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 - .byte 0xe6,0x0c,0x64,0xf3 @ sha256su1 q8,q10,q11 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 vld1.32 {q12},[r3]! vadd.i32 q13,q13,q9 - .byte 0xe4,0x23,0xfa,0xf3 @ sha256su0 q9,q10 + INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 vmov q2,q0 - .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 - .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 - .byte 0xe0,0x2c,0x66,0xf3 @ sha256su1 q9,q11,q8 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 vld1.32 {q13},[r3]! vadd.i32 q12,q12,q10 - .byte 0xe6,0x43,0xfa,0xf3 @ sha256su0 q10,q11 + INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 vmov q2,q0 - .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 - .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 - .byte 0xe2,0x4c,0x60,0xf3 @ sha256su1 q10,q8,q9 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 vld1.32 {q12},[r3]! vadd.i32 q13,q13,q11 - .byte 0xe0,0x63,0xfa,0xf3 @ sha256su0 q11,q8 + INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 vmov q2,q0 - .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 - .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 - .byte 0xe4,0x6c,0x62,0xf3 @ sha256su1 q11,q9,q10 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 vld1.32 {q13},[r3]! vadd.i32 q12,q12,q8 - .byte 0xe2,0x03,0xfa,0xf3 @ sha256su0 q8,q9 + INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 vmov q2,q0 - .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 - .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 - .byte 0xe6,0x0c,0x64,0xf3 @ sha256su1 q8,q10,q11 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 vld1.32 {q12},[r3]! vadd.i32 q13,q13,q9 - .byte 0xe4,0x23,0xfa,0xf3 @ sha256su0 q9,q10 + INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 vmov q2,q0 - .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 - .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 - .byte 0xe0,0x2c,0x66,0xf3 @ sha256su1 q9,q11,q8 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 vld1.32 {q13},[r3]! vadd.i32 q12,q12,q10 - .byte 0xe6,0x43,0xfa,0xf3 @ sha256su0 q10,q11 + INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 vmov q2,q0 - .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 - .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 - .byte 0xe2,0x4c,0x60,0xf3 @ sha256su1 q10,q8,q9 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 vld1.32 {q12},[r3]! vadd.i32 q13,q13,q11 - .byte 0xe0,0x63,0xfa,0xf3 @ sha256su0 q11,q8 + INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 vmov q2,q0 - .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 - .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 - .byte 0xe4,0x6c,0x62,0xf3 @ sha256su1 q11,q9,q10 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 vld1.32 {q13},[r3]! vadd.i32 q12,q12,q8 - .byte 0xe2,0x03,0xfa,0xf3 @ sha256su0 q8,q9 + INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 vmov q2,q0 - .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 - .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 - .byte 0xe6,0x0c,0x64,0xf3 @ sha256su1 q8,q10,q11 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 vld1.32 {q12},[r3]! vadd.i32 q13,q13,q9 - .byte 0xe4,0x23,0xfa,0xf3 @ sha256su0 q9,q10 + INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 vmov q2,q0 - .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 - .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 - .byte 0xe0,0x2c,0x66,0xf3 @ sha256su1 q9,q11,q8 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 vld1.32 {q13},[r3]! vadd.i32 q12,q12,q10 - .byte 0xe6,0x43,0xfa,0xf3 @ sha256su0 q10,q11 + INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 vmov q2,q0 - .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 - .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 - .byte 0xe2,0x4c,0x60,0xf3 @ sha256su1 q10,q8,q9 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 vld1.32 {q12},[r3]! vadd.i32 q13,q13,q11 - .byte 0xe0,0x63,0xfa,0xf3 @ sha256su0 q11,q8 + INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 vmov q2,q0 - .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 - .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 - .byte 0xe4,0x6c,0x62,0xf3 @ sha256su1 q11,q9,q10 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 vld1.32 {q13},[r3]! vadd.i32 q12,q12,q8 vmov q2,q0 - .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 - .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 vld1.32 {q12},[r3]! vadd.i32 q13,q13,q9 vmov q2,q0 - .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 - .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 vld1.32 {q13},[r3] vadd.i32 q12,q12,q10 sub r3,r3,#256-16 @ rewind vmov q2,q0 - .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 - .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 vadd.i32 q13,q13,q11 vmov q2,q0 - .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 - .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 vadd.i32 q0,q0,q14 vadd.i32 q1,q1,q15 + it ne bne .Loop_v8 vst1.32 {q0,q1},[r0] @@ -2690,6 +2770,6 @@ sha256_block_data_order_armv8: #endif .asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by " .align 2 -#if __ARM_MAX_ARCH__>=7 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) .comm OPENSSL_armcap_P,4,4 #endif diff --git a/deps/openssl/asm_obsolete/arm64-linux64-gas/aes/aesv8-armx.S b/deps/openssl/asm_obsolete/arm64-linux64-gas/aes/aesv8-armx.S index 0a4b1ac4c40082..f5dd6cbb86234e 100644 --- a/deps/openssl/asm_obsolete/arm64-linux64-gas/aes/aesv8-armx.S +++ b/deps/openssl/asm_obsolete/arm64-linux64-gas/aes/aesv8-armx.S @@ -227,17 +227,17 @@ aes_v8_encrypt: .Loop_enc: aese v2.16b,v0.16b - ld1 {v0.4s},[x2],#16 aesmc v2.16b,v2.16b + ld1 {v0.4s},[x2],#16 subs w3,w3,#2 aese v2.16b,v1.16b - ld1 {v1.4s},[x2],#16 aesmc v2.16b,v2.16b + ld1 {v1.4s},[x2],#16 b.gt .Loop_enc aese v2.16b,v0.16b - ld1 {v0.4s},[x2] aesmc v2.16b,v2.16b + ld1 {v0.4s},[x2] aese v2.16b,v1.16b eor v2.16b,v2.16b,v0.16b @@ -256,17 +256,17 @@ aes_v8_decrypt: .Loop_dec: aesd v2.16b,v0.16b - ld1 {v0.4s},[x2],#16 aesimc v2.16b,v2.16b + ld1 {v0.4s},[x2],#16 subs w3,w3,#2 aesd v2.16b,v1.16b - ld1 {v1.4s},[x2],#16 aesimc v2.16b,v2.16b + ld1 {v1.4s},[x2],#16 b.gt .Loop_dec aesd v2.16b,v0.16b - ld1 {v0.4s},[x2] aesimc v2.16b,v2.16b + ld1 {v0.4s},[x2] aesd v2.16b,v1.16b eor v2.16b,v2.16b,v0.16b @@ -308,16 +308,42 @@ aes_v8_cbc_encrypt: eor v5.16b,v16.16b,v7.16b b.eq .Lcbc_enc128 + ld1 {v2.4s-v3.4s},[x7] + add x7,x3,#16 + add x6,x3,#16*4 + add x12,x3,#16*5 + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + add x14,x3,#16*6 + add x3,x3,#16*7 + b .Lenter_cbc_enc + +.align 4 .Loop_cbc_enc: aese v0.16b,v16.16b - ld1 {v16.4s},[x7],#16 aesmc v0.16b,v0.16b - subs w6,w6,#2 + st1 {v6.16b},[x1],#16 +.Lenter_cbc_enc: aese v0.16b,v17.16b - ld1 {v17.4s},[x7],#16 aesmc v0.16b,v0.16b - b.gt .Loop_cbc_enc + aese v0.16b,v2.16b + aesmc v0.16b,v0.16b + ld1 {v16.4s},[x6] + cmp w5,#4 + aese v0.16b,v3.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x12] + b.eq .Lcbc_enc192 + + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + ld1 {v16.4s},[x14] + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x3] + nop +.Lcbc_enc192: aese v0.16b,v16.16b aesmc v0.16b,v0.16b subs x2,x2,#16 @@ -326,7 +352,6 @@ aes_v8_cbc_encrypt: csel x8,xzr,x8,eq aese v0.16b,v18.16b aesmc v0.16b,v0.16b - add x7,x3,#16 aese v0.16b,v19.16b aesmc v0.16b,v0.16b ld1 {v16.16b},[x0],x8 @@ -335,16 +360,14 @@ aes_v8_cbc_encrypt: eor v16.16b,v16.16b,v5.16b aese v0.16b,v21.16b aesmc v0.16b,v0.16b - ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + ld1 {v17.4s},[x7] // re-pre-load rndkey[1] aese v0.16b,v22.16b aesmc v0.16b,v0.16b aese v0.16b,v23.16b - - mov w6,w5 eor v6.16b,v0.16b,v7.16b - st1 {v6.16b},[x1],#16 b.hs .Loop_cbc_enc + st1 {v6.16b},[x1],#16 b .Lcbc_done .align 5 @@ -402,79 +425,78 @@ aes_v8_cbc_encrypt: .Loop3x_cbc_dec: aesd v0.16b,v16.16b - aesd v1.16b,v16.16b - aesd v18.16b,v16.16b - ld1 {v16.4s},[x7],#16 aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b aesimc v1.16b,v1.16b + aesd v18.16b,v16.16b aesimc v18.16b,v18.16b + ld1 {v16.4s},[x7],#16 subs w6,w6,#2 aesd v0.16b,v17.16b - aesd v1.16b,v17.16b - aesd v18.16b,v17.16b - ld1 {v17.4s},[x7],#16 aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b aesimc v1.16b,v1.16b + aesd v18.16b,v17.16b aesimc v18.16b,v18.16b + ld1 {v17.4s},[x7],#16 b.gt .Loop3x_cbc_dec aesd v0.16b,v16.16b - aesd v1.16b,v16.16b - aesd v18.16b,v16.16b - eor v4.16b,v6.16b,v7.16b aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b aesimc v1.16b,v1.16b + aesd v18.16b,v16.16b aesimc v18.16b,v18.16b + eor v4.16b,v6.16b,v7.16b + subs x2,x2,#0x30 eor v5.16b,v2.16b,v7.16b + csel x6,x2,x6,lo // x6, w6, is zero at this point aesd v0.16b,v17.16b - aesd v1.16b,v17.16b - aesd v18.16b,v17.16b - eor v17.16b,v3.16b,v7.16b - subs x2,x2,#0x30 aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b aesimc v1.16b,v1.16b + aesd v18.16b,v17.16b aesimc v18.16b,v18.16b - orr v6.16b,v19.16b,v19.16b - csel x6,x2,x6,lo // x6, w6, is zero at this point - aesd v0.16b,v20.16b - aesd v1.16b,v20.16b - aesd v18.16b,v20.16b + eor v17.16b,v3.16b,v7.16b add x0,x0,x6 // x0 is adjusted in such way that // at exit from the loop v1.16b-v18.16b // are loaded with last "words" + orr v6.16b,v19.16b,v19.16b + mov x7,x3 + aesd v0.16b,v20.16b aesimc v0.16b,v0.16b + aesd v1.16b,v20.16b aesimc v1.16b,v1.16b + aesd v18.16b,v20.16b aesimc v18.16b,v18.16b - mov x7,x3 - aesd v0.16b,v21.16b - aesd v1.16b,v21.16b - aesd v18.16b,v21.16b ld1 {v2.16b},[x0],#16 + aesd v0.16b,v21.16b aesimc v0.16b,v0.16b + aesd v1.16b,v21.16b aesimc v1.16b,v1.16b + aesd v18.16b,v21.16b aesimc v18.16b,v18.16b ld1 {v3.16b},[x0],#16 aesd v0.16b,v22.16b - aesd v1.16b,v22.16b - aesd v18.16b,v22.16b - ld1 {v19.16b},[x0],#16 aesimc v0.16b,v0.16b + aesd v1.16b,v22.16b aesimc v1.16b,v1.16b + aesd v18.16b,v22.16b aesimc v18.16b,v18.16b - ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + ld1 {v19.16b},[x0],#16 aesd v0.16b,v23.16b aesd v1.16b,v23.16b aesd v18.16b,v23.16b - + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] add w6,w5,#2 eor v4.16b,v4.16b,v0.16b eor v5.16b,v5.16b,v1.16b eor v18.16b,v18.16b,v17.16b ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] - orr v0.16b,v2.16b,v2.16b st1 {v4.16b},[x1],#16 - orr v1.16b,v3.16b,v3.16b + orr v0.16b,v2.16b,v2.16b st1 {v5.16b},[x1],#16 + orr v1.16b,v3.16b,v3.16b st1 {v18.16b},[x1],#16 orr v18.16b,v19.16b,v19.16b b.hs .Loop3x_cbc_dec @@ -485,39 +507,39 @@ aes_v8_cbc_encrypt: .Lcbc_dec_tail: aesd v1.16b,v16.16b - aesd v18.16b,v16.16b - ld1 {v16.4s},[x7],#16 aesimc v1.16b,v1.16b + aesd v18.16b,v16.16b aesimc v18.16b,v18.16b + ld1 {v16.4s},[x7],#16 subs w6,w6,#2 aesd v1.16b,v17.16b - aesd v18.16b,v17.16b - ld1 {v17.4s},[x7],#16 aesimc v1.16b,v1.16b + aesd v18.16b,v17.16b aesimc v18.16b,v18.16b + ld1 {v17.4s},[x7],#16 b.gt .Lcbc_dec_tail aesd v1.16b,v16.16b - aesd v18.16b,v16.16b aesimc v1.16b,v1.16b + aesd v18.16b,v16.16b aesimc v18.16b,v18.16b aesd v1.16b,v17.16b - aesd v18.16b,v17.16b aesimc v1.16b,v1.16b + aesd v18.16b,v17.16b aesimc v18.16b,v18.16b aesd v1.16b,v20.16b - aesd v18.16b,v20.16b aesimc v1.16b,v1.16b + aesd v18.16b,v20.16b aesimc v18.16b,v18.16b cmn x2,#0x20 aesd v1.16b,v21.16b - aesd v18.16b,v21.16b aesimc v1.16b,v1.16b + aesd v18.16b,v21.16b aesimc v18.16b,v18.16b eor v5.16b,v6.16b,v7.16b aesd v1.16b,v22.16b - aesd v18.16b,v22.16b aesimc v1.16b,v1.16b + aesd v18.16b,v22.16b aesimc v18.16b,v18.16b eor v17.16b,v3.16b,v7.16b aesd v1.16b,v23.16b @@ -583,70 +605,69 @@ aes_v8_ctr32_encrypt_blocks: .align 4 .Loop3x_ctr32: aese v0.16b,v16.16b - aese v1.16b,v16.16b - aese v18.16b,v16.16b - ld1 {v16.4s},[x7],#16 aesmc v0.16b,v0.16b + aese v1.16b,v16.16b aesmc v1.16b,v1.16b + aese v18.16b,v16.16b aesmc v18.16b,v18.16b + ld1 {v16.4s},[x7],#16 subs w6,w6,#2 aese v0.16b,v17.16b - aese v1.16b,v17.16b - aese v18.16b,v17.16b - ld1 {v17.4s},[x7],#16 aesmc v0.16b,v0.16b + aese v1.16b,v17.16b aesmc v1.16b,v1.16b + aese v18.16b,v17.16b aesmc v18.16b,v18.16b + ld1 {v17.4s},[x7],#16 b.gt .Loop3x_ctr32 aese v0.16b,v16.16b - aese v1.16b,v16.16b - aese v18.16b,v16.16b - mov x7,x3 aesmc v4.16b,v0.16b - ld1 {v2.16b},[x0],#16 + aese v1.16b,v16.16b aesmc v5.16b,v1.16b - aesmc v18.16b,v18.16b + ld1 {v2.16b},[x0],#16 orr v0.16b,v6.16b,v6.16b - aese v4.16b,v17.16b + aese v18.16b,v16.16b + aesmc v18.16b,v18.16b ld1 {v3.16b},[x0],#16 - aese v5.16b,v17.16b - aese v18.16b,v17.16b orr v1.16b,v6.16b,v6.16b + aese v4.16b,v17.16b aesmc v4.16b,v4.16b - ld1 {v19.16b},[x0],#16 + aese v5.16b,v17.16b aesmc v5.16b,v5.16b + ld1 {v19.16b},[x0],#16 + mov x7,x3 + aese v18.16b,v17.16b aesmc v17.16b,v18.16b orr v18.16b,v6.16b,v6.16b add w9,w8,#1 aese v4.16b,v20.16b + aesmc v4.16b,v4.16b aese v5.16b,v20.16b - aese v17.16b,v20.16b + aesmc v5.16b,v5.16b eor v2.16b,v2.16b,v7.16b add w10,w8,#2 - aesmc v4.16b,v4.16b - aesmc v5.16b,v5.16b + aese v17.16b,v20.16b aesmc v17.16b,v17.16b eor v3.16b,v3.16b,v7.16b add w8,w8,#3 aese v4.16b,v21.16b + aesmc v4.16b,v4.16b aese v5.16b,v21.16b - aese v17.16b,v21.16b + aesmc v5.16b,v5.16b eor v19.16b,v19.16b,v7.16b rev w9,w9 - aesmc v4.16b,v4.16b - ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] - aesmc v5.16b,v5.16b + aese v17.16b,v21.16b aesmc v17.16b,v17.16b mov v0.s[3], w9 rev w10,w10 aese v4.16b,v22.16b + aesmc v4.16b,v4.16b aese v5.16b,v22.16b - aese v17.16b,v22.16b + aesmc v5.16b,v5.16b mov v1.s[3], w10 rev w12,w8 - aesmc v4.16b,v4.16b - aesmc v5.16b,v5.16b + aese v17.16b,v22.16b aesmc v17.16b,v17.16b mov v18.s[3], w12 subs x2,x2,#3 @@ -654,13 +675,14 @@ aes_v8_ctr32_encrypt_blocks: aese v5.16b,v23.16b aese v17.16b,v23.16b - mov w6,w5 eor v2.16b,v2.16b,v4.16b + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + st1 {v2.16b},[x1],#16 eor v3.16b,v3.16b,v5.16b + mov w6,w5 + st1 {v3.16b},[x1],#16 eor v19.16b,v19.16b,v17.16b ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] - st1 {v2.16b},[x1],#16 - st1 {v3.16b},[x1],#16 st1 {v19.16b},[x1],#16 b.hs .Loop3x_ctr32 @@ -672,40 +694,40 @@ aes_v8_ctr32_encrypt_blocks: .Lctr32_tail: aese v0.16b,v16.16b - aese v1.16b,v16.16b - ld1 {v16.4s},[x7],#16 aesmc v0.16b,v0.16b + aese v1.16b,v16.16b aesmc v1.16b,v1.16b + ld1 {v16.4s},[x7],#16 subs w6,w6,#2 aese v0.16b,v17.16b - aese v1.16b,v17.16b - ld1 {v17.4s},[x7],#16 aesmc v0.16b,v0.16b + aese v1.16b,v17.16b aesmc v1.16b,v1.16b + ld1 {v17.4s},[x7],#16 b.gt .Lctr32_tail aese v0.16b,v16.16b - aese v1.16b,v16.16b aesmc v0.16b,v0.16b + aese v1.16b,v16.16b aesmc v1.16b,v1.16b aese v0.16b,v17.16b - aese v1.16b,v17.16b aesmc v0.16b,v0.16b + aese v1.16b,v17.16b aesmc v1.16b,v1.16b ld1 {v2.16b},[x0],x12 aese v0.16b,v20.16b - aese v1.16b,v20.16b - ld1 {v3.16b},[x0] aesmc v0.16b,v0.16b + aese v1.16b,v20.16b aesmc v1.16b,v1.16b + ld1 {v3.16b},[x0] aese v0.16b,v21.16b - aese v1.16b,v21.16b aesmc v0.16b,v0.16b + aese v1.16b,v21.16b aesmc v1.16b,v1.16b - aese v0.16b,v22.16b - aese v1.16b,v22.16b eor v2.16b,v2.16b,v7.16b + aese v0.16b,v22.16b aesmc v0.16b,v0.16b + aese v1.16b,v22.16b aesmc v1.16b,v1.16b eor v3.16b,v3.16b,v7.16b aese v0.16b,v23.16b diff --git a/deps/openssl/asm_obsolete/arm64-linux64-gas/modes/ghashv8-armx.S b/deps/openssl/asm_obsolete/arm64-linux64-gas/modes/ghashv8-armx.S index 1bfb26340a6e9e..479007dc54dfab 100644 --- a/deps/openssl/asm_obsolete/arm64-linux64-gas/modes/ghashv8-armx.S +++ b/deps/openssl/asm_obsolete/arm64-linux64-gas/modes/ghashv8-armx.S @@ -6,103 +6,215 @@ .type gcm_init_v8,%function .align 4 gcm_init_v8: - ld1 {v17.2d},[x1] //load H - movi v16.16b,#0xe1 + ld1 {v17.2d},[x1] //load input H + movi v19.16b,#0xe1 + shl v19.2d,v19.2d,#57 //0xc2.0 ext v3.16b,v17.16b,v17.16b,#8 - shl v16.2d,v16.2d,#57 - ushr v18.2d,v16.2d,#63 - ext v16.16b,v18.16b,v16.16b,#8 //t0=0xc2....01 + ushr v18.2d,v19.2d,#63 dup v17.4s,v17.s[1] - ushr v19.2d,v3.2d,#63 + ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01 + ushr v18.2d,v3.2d,#63 sshr v17.4s,v17.4s,#31 //broadcast carry bit - and v19.16b,v19.16b,v16.16b + and v18.16b,v18.16b,v16.16b shl v3.2d,v3.2d,#1 - ext v19.16b,v19.16b,v19.16b,#8 + ext v18.16b,v18.16b,v18.16b,#8 and v16.16b,v16.16b,v17.16b - orr v3.16b,v3.16b,v19.16b //H<<<=1 - eor v3.16b,v3.16b,v16.16b //twisted H - st1 {v3.2d},[x0] + orr v3.16b,v3.16b,v18.16b //H<<<=1 + eor v20.16b,v3.16b,v16.16b //twisted H + st1 {v20.2d},[x0],#16 //store Htable[0] + + //calculate H^2 + ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing + pmull v0.1q,v20.1d,v20.1d + eor v16.16b,v16.16b,v20.16b + pmull2 v2.1q,v20.2d,v20.2d + pmull v1.1q,v16.1d,v16.1d + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase + + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v22.16b,v0.16b,v18.16b + + ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing + eor v17.16b,v17.16b,v22.16b + ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed + st1 {v21.2d-v22.2d},[x0] //store Htable[1..2] ret .size gcm_init_v8,.-gcm_init_v8 - .global gcm_gmult_v8 .type gcm_gmult_v8,%function .align 4 gcm_gmult_v8: ld1 {v17.2d},[x0] //load Xi movi v19.16b,#0xe1 - ld1 {v20.2d},[x1] //load twisted H + ld1 {v20.2d-v21.2d},[x1] //load twisted H, ... shl v19.2d,v19.2d,#57 #ifndef __ARMEB__ rev64 v17.16b,v17.16b #endif - ext v21.16b,v20.16b,v20.16b,#8 - mov x3,#0 ext v3.16b,v17.16b,v17.16b,#8 - mov x12,#0 - eor v21.16b,v21.16b,v20.16b //Karatsuba pre-processing - mov x2,x0 - b .Lgmult_v8 -.size gcm_gmult_v8,.-gcm_gmult_v8 + pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo + eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing + pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi + pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi) + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + eor v0.16b,v1.16b,v18.16b + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v18.16b,v18.16b,v2.16b + eor v0.16b,v0.16b,v18.16b + +#ifndef __ARMEB__ + rev64 v0.16b,v0.16b +#endif + ext v0.16b,v0.16b,v0.16b,#8 + st1 {v0.2d},[x0] //write out Xi + + ret +.size gcm_gmult_v8,.-gcm_gmult_v8 .global gcm_ghash_v8 .type gcm_ghash_v8,%function .align 4 gcm_ghash_v8: ld1 {v0.2d},[x0] //load [rotated] Xi - subs x3,x3,#16 + //"[rotated]" means that + //loaded value would have + //to be rotated in order to + //make it appear as in + //alorithm specification + subs x3,x3,#32 //see if x3 is 32 or larger + mov x12,#16 //x12 is used as post- + //increment for input pointer; + //as loop is modulo-scheduled + //x12 is zeroed just in time + //to preclude oversteping + //inp[len], which means that + //last block[s] are actually + //loaded twice, but last + //copy is not processed + ld1 {v20.2d-v21.2d},[x1],#32 //load twisted H, ..., H^2 movi v19.16b,#0xe1 - mov x12,#16 - ld1 {v20.2d},[x1] //load twisted H - csel x12,xzr,x12,eq - ext v0.16b,v0.16b,v0.16b,#8 - shl v19.2d,v19.2d,#57 - ld1 {v17.2d},[x2],x12 //load [rotated] inp - ext v21.16b,v20.16b,v20.16b,#8 + ld1 {v22.2d},[x1] + csel x12,xzr,x12,eq //is it time to zero x12? + ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi + ld1 {v16.2d},[x2],#16 //load [rotated] I[0] + shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant #ifndef __ARMEB__ + rev64 v16.16b,v16.16b rev64 v0.16b,v0.16b +#endif + ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0] + b.lo .Lodd_tail_v8 //x3 was less than 32 + ld1 {v17.2d},[x2],x12 //load [rotated] I[1] +#ifndef __ARMEB__ rev64 v17.16b,v17.16b #endif - eor v21.16b,v21.16b,v20.16b //Karatsuba pre-processing - ext v3.16b,v17.16b,v17.16b,#8 - b .Loop_v8 + ext v7.16b,v17.16b,v17.16b,#8 + eor v3.16b,v3.16b,v0.16b //I[i]^=Xi + pmull v4.1q,v20.1d,v7.1d //H·Ii+1 + eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing + pmull2 v6.1q,v20.2d,v7.2d + b .Loop_mod2x_v8 .align 4 -.Loop_v8: +.Loop_mod2x_v8: + ext v18.16b,v3.16b,v3.16b,#8 + subs x3,x3,#32 //is there more data? + pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo + csel x12,xzr,x12,lo //is it time to zero x12? + + pmull v5.1q,v21.1d,v17.1d + eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing + pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi + eor v0.16b,v0.16b,v4.16b //accumulate + pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi) + ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2] + + eor v2.16b,v2.16b,v6.16b + csel x12,xzr,x12,eq //is it time to zero x12? + eor v1.16b,v1.16b,v5.16b + + ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing + eor v18.16b,v0.16b,v2.16b + eor v1.16b,v1.16b,v17.16b + ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3] +#ifndef __ARMEB__ + rev64 v16.16b,v16.16b +#endif + eor v1.16b,v1.16b,v18.16b + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction + +#ifndef __ARMEB__ + rev64 v17.16b,v17.16b +#endif + ins v2.d[0],v1.d[1] + ins v1.d[1],v0.d[0] + ext v7.16b,v17.16b,v17.16b,#8 + ext v3.16b,v16.16b,v16.16b,#8 + eor v0.16b,v1.16b,v18.16b + pmull v4.1q,v20.1d,v7.1d //H·Ii+1 + eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early + + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction + pmull v0.1q,v0.1d,v19.1d + eor v3.16b,v3.16b,v18.16b + eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing + eor v3.16b,v3.16b,v0.16b + pmull2 v6.1q,v20.2d,v7.2d + b.hs .Loop_mod2x_v8 //there was at least 32 more bytes + + eor v2.16b,v2.16b,v18.16b + ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b + adds x3,x3,#32 //re-construct x3 + eor v0.16b,v0.16b,v2.16b //re-construct v0.16b + b.eq .Ldone_v8 //is x3 zero? +.Lodd_tail_v8: ext v18.16b,v0.16b,v0.16b,#8 eor v3.16b,v3.16b,v0.16b //inp^=Xi - eor v17.16b,v17.16b,v18.16b //v17.16b is rotated inp^Xi + eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi -.Lgmult_v8: pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi - subs x3,x3,#16 pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi) - csel x12,xzr,x12,eq ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing eor v18.16b,v0.16b,v2.16b eor v1.16b,v1.16b,v17.16b - ld1 {v17.2d},[x2],x12 //load [rotated] inp eor v1.16b,v1.16b,v18.16b - pmull v18.1q,v0.1d,v19.1d //1st phase + pmull v18.1q,v0.1d,v19.1d //1st phase of reduction ins v2.d[0],v1.d[1] ins v1.d[1],v0.d[0] -#ifndef __ARMEB__ - rev64 v17.16b,v17.16b -#endif eor v0.16b,v1.16b,v18.16b - ext v3.16b,v17.16b,v17.16b,#8 - ext v18.16b,v0.16b,v0.16b,#8 //2nd phase + ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction pmull v0.1q,v0.1d,v19.1d eor v18.16b,v18.16b,v2.16b eor v0.16b,v0.16b,v18.16b - b.hs .Loop_v8 +.Ldone_v8: #ifndef __ARMEB__ rev64 v0.16b,v0.16b #endif diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/aes/aesni-x86_64.s b/deps/openssl/asm_obsolete/x64-elf-gas/aes/aesni-x86_64.s index 84708afbbb352f..6573fe4be3494d 100644 --- a/deps/openssl/asm_obsolete/x64-elf-gas/aes/aesni-x86_64.s +++ b/deps/openssl/asm_obsolete/x64-elf-gas/aes/aesni-x86_64.s @@ -17,7 +17,10 @@ aesni_encrypt: leaq 16(%rdx),%rdx jnz .Loop_enc1_1 .byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 .byte 0xf3,0xc3 .size aesni_encrypt,.-aesni_encrypt @@ -38,7 +41,10 @@ aesni_decrypt: leaq 16(%rdx),%rdx jnz .Loop_dec1_2 .byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 .byte 0xf3,0xc3 .size aesni_decrypt, .-aesni_decrypt .type _aesni_encrypt2,@function @@ -264,21 +270,18 @@ _aesni_encrypt6: pxor %xmm0,%xmm6 .byte 102,15,56,220,225 pxor %xmm0,%xmm7 + movups (%rcx,%rax,1),%xmm0 addq $16,%rax -.byte 102,15,56,220,233 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 - movups -16(%rcx,%rax,1),%xmm0 jmp .Lenc_loop6_enter .align 16 .Lenc_loop6: .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 +.Lenc_loop6_enter: .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 -.Lenc_loop6_enter: movups (%rcx,%rax,1),%xmm1 addq $32,%rax .byte 102,15,56,220,208 @@ -321,21 +324,18 @@ _aesni_decrypt6: pxor %xmm0,%xmm6 .byte 102,15,56,222,225 pxor %xmm0,%xmm7 + movups (%rcx,%rax,1),%xmm0 addq $16,%rax -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 - movups -16(%rcx,%rax,1),%xmm0 jmp .Ldec_loop6_enter .align 16 .Ldec_loop6: .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 +.Ldec_loop6_enter: .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 -.Ldec_loop6_enter: movups (%rcx,%rax,1),%xmm1 addq $32,%rax .byte 102,15,56,222,208 @@ -375,23 +375,18 @@ _aesni_encrypt8: leaq 32(%rcx,%rax,1),%rcx negq %rax .byte 102,15,56,220,209 - addq $16,%rax pxor %xmm0,%xmm7 -.byte 102,15,56,220,217 pxor %xmm0,%xmm8 +.byte 102,15,56,220,217 pxor %xmm0,%xmm9 -.byte 102,15,56,220,225 -.byte 102,15,56,220,233 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 -.byte 102,68,15,56,220,193 -.byte 102,68,15,56,220,201 - movups -16(%rcx,%rax,1),%xmm0 - jmp .Lenc_loop8_enter + movups (%rcx,%rax,1),%xmm0 + addq $16,%rax + jmp .Lenc_loop8_inner .align 16 .Lenc_loop8: .byte 102,15,56,220,209 .byte 102,15,56,220,217 +.Lenc_loop8_inner: .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 @@ -444,23 +439,18 @@ _aesni_decrypt8: leaq 32(%rcx,%rax,1),%rcx negq %rax .byte 102,15,56,222,209 - addq $16,%rax pxor %xmm0,%xmm7 -.byte 102,15,56,222,217 pxor %xmm0,%xmm8 +.byte 102,15,56,222,217 pxor %xmm0,%xmm9 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 -.byte 102,68,15,56,222,193 -.byte 102,68,15,56,222,201 - movups -16(%rcx,%rax,1),%xmm0 - jmp .Ldec_loop8_enter + movups (%rcx,%rax,1),%xmm0 + addq $16,%rax + jmp .Ldec_loop8_inner .align 16 .Ldec_loop8: .byte 102,15,56,222,209 .byte 102,15,56,222,217 +.Ldec_loop8_inner: .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 @@ -587,6 +577,7 @@ aesni_ecb_encrypt: movups 80(%rdi),%xmm7 je .Lecb_enc_six movdqu 96(%rdi),%xmm8 + xorps %xmm9,%xmm9 call _aesni_encrypt8 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) @@ -700,15 +691,23 @@ aesni_ecb_encrypt: jnc .Lecb_dec_loop8 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 movq %r11,%rcx movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 movl %r10d,%eax movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 movups %xmm7,80(%rsi) + pxor %xmm7,%xmm7 movups %xmm8,96(%rsi) + pxor %xmm8,%xmm8 movups %xmm9,112(%rsi) + pxor %xmm9,%xmm9 leaq 128(%rsi),%rsi addq $128,%rdx jz .Lecb_ret @@ -731,14 +730,23 @@ aesni_ecb_encrypt: je .Lecb_dec_six movups 96(%rdi),%xmm8 movups (%rcx),%xmm0 + xorps %xmm9,%xmm9 call _aesni_decrypt8 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 movups %xmm7,80(%rsi) + pxor %xmm7,%xmm7 movups %xmm8,96(%rsi) + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 jmp .Lecb_ret .align 16 .Lecb_dec_one: @@ -754,49 +762,73 @@ aesni_ecb_encrypt: jnz .Loop_dec1_4 .byte 102,15,56,223,209 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 jmp .Lecb_ret .align 16 .Lecb_dec_two: call _aesni_decrypt2 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 jmp .Lecb_ret .align 16 .Lecb_dec_three: call _aesni_decrypt3 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 jmp .Lecb_ret .align 16 .Lecb_dec_four: call _aesni_decrypt4 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 jmp .Lecb_ret .align 16 .Lecb_dec_five: xorps %xmm7,%xmm7 call _aesni_decrypt6 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 jmp .Lecb_ret .align 16 .Lecb_dec_six: call _aesni_decrypt6 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 movups %xmm7,80(%rsi) + pxor %xmm7,%xmm7 .Lecb_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 .byte 0xf3,0xc3 .size aesni_ecb_encrypt,.-aesni_ecb_encrypt .globl aesni_ccm64_encrypt_blocks @@ -853,7 +885,13 @@ aesni_ccm64_encrypt_blocks: leaq 16(%rsi),%rsi jnz .Lccm64_enc_outer + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 movups %xmm3,(%r9) + pxor %xmm3,%xmm3 + pxor %xmm8,%xmm8 + pxor %xmm6,%xmm6 .byte 0xf3,0xc3 .size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks .globl aesni_ccm64_decrypt_blocks @@ -944,21 +982,56 @@ aesni_ccm64_decrypt_blocks: leaq 16(%r11),%r11 jnz .Loop_enc1_6 .byte 102,15,56,221,217 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 movups %xmm3,(%r9) + pxor %xmm3,%xmm3 + pxor %xmm8,%xmm8 + pxor %xmm6,%xmm6 .byte 0xf3,0xc3 .size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks .globl aesni_ctr32_encrypt_blocks .type aesni_ctr32_encrypt_blocks,@function .align 16 aesni_ctr32_encrypt_blocks: + cmpq $1,%rdx + jne .Lctr32_bulk + + + + movups (%r8),%xmm2 + movups (%rdi),%xmm3 + movl 240(%rcx),%edx + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_enc1_7: +.byte 102,15,56,220,209 + decl %edx + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_enc1_7 +.byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + xorps %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movups %xmm2,(%rsi) + xorps %xmm2,%xmm2 + jmp .Lctr32_epilogue + +.align 16 +.Lctr32_bulk: leaq (%rsp),%rax pushq %rbp subq $128,%rsp andq $-16,%rsp leaq -8(%rax),%rbp - cmpq $1,%rdx - je .Lctr32_one_shortcut + + movdqu (%r8),%xmm2 movdqu (%rcx),%xmm0 @@ -1349,11 +1422,14 @@ aesni_ctr32_encrypt_blocks: leaq -128(%rcx),%rcx .Lctr32_tail: + + leaq 16(%rcx),%rcx cmpq $4,%rdx jb .Lctr32_loop3 je .Lctr32_loop4 + shll $4,%eax movdqa 96(%rsp),%xmm8 pxor %xmm9,%xmm9 @@ -1456,30 +1532,33 @@ aesni_ctr32_encrypt_blocks: movups 32(%rdi),%xmm12 xorps %xmm12,%xmm4 movups %xmm4,32(%rsi) - jmp .Lctr32_done -.align 16 -.Lctr32_one_shortcut: - movups (%r8),%xmm2 - movups (%rdi),%xmm10 - movl 240(%rcx),%eax - movups (%rcx),%xmm0 - movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx - xorps %xmm0,%xmm2 -.Loop_enc1_7: -.byte 102,15,56,220,209 - decl %eax - movups (%rcx),%xmm1 - leaq 16(%rcx),%rcx - jnz .Loop_enc1_7 -.byte 102,15,56,221,209 - xorps %xmm10,%xmm2 - movups %xmm2,(%rsi) - jmp .Lctr32_done - -.align 16 .Lctr32_done: + xorps %xmm0,%xmm0 + xorl %r11d,%r11d + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + movaps %xmm0,0(%rsp) + pxor %xmm8,%xmm8 + movaps %xmm0,16(%rsp) + pxor %xmm9,%xmm9 + movaps %xmm0,32(%rsp) + pxor %xmm10,%xmm10 + movaps %xmm0,48(%rsp) + pxor %xmm11,%xmm11 + movaps %xmm0,64(%rsp) + pxor %xmm12,%xmm12 + movaps %xmm0,80(%rsp) + pxor %xmm13,%xmm13 + movaps %xmm0,96(%rsp) + pxor %xmm14,%xmm14 + movaps %xmm0,112(%rsp) + pxor %xmm15,%xmm15 leaq (%rbp),%rsp popq %rbp .Lctr32_epilogue: @@ -1750,6 +1829,7 @@ aesni_xts_encrypt: shrl $4,%eax .Lxts_enc_short: + movl %eax,%r10d pxor %xmm0,%xmm10 addq $96,%rdx @@ -1778,6 +1858,7 @@ aesni_xts_encrypt: pxor %xmm12,%xmm4 pxor %xmm13,%xmm5 pxor %xmm14,%xmm6 + pxor %xmm7,%xmm7 call _aesni_encrypt6 @@ -1920,6 +2001,29 @@ aesni_xts_encrypt: movups %xmm2,-16(%rsi) .Lxts_enc_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + movaps %xmm0,0(%rsp) + pxor %xmm8,%xmm8 + movaps %xmm0,16(%rsp) + pxor %xmm9,%xmm9 + movaps %xmm0,32(%rsp) + pxor %xmm10,%xmm10 + movaps %xmm0,48(%rsp) + pxor %xmm11,%xmm11 + movaps %xmm0,64(%rsp) + pxor %xmm12,%xmm12 + movaps %xmm0,80(%rsp) + pxor %xmm13,%xmm13 + movaps %xmm0,96(%rsp) + pxor %xmm14,%xmm14 + pxor %xmm15,%xmm15 leaq (%rbp),%rsp popq %rbp .Lxts_enc_epilogue: @@ -2196,6 +2300,7 @@ aesni_xts_decrypt: shrl $4,%eax .Lxts_dec_short: + movl %eax,%r10d pxor %xmm0,%xmm10 pxor %xmm0,%xmm11 @@ -2398,6 +2503,29 @@ aesni_xts_decrypt: movups %xmm2,(%rsi) .Lxts_dec_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + movaps %xmm0,0(%rsp) + pxor %xmm8,%xmm8 + movaps %xmm0,16(%rsp) + pxor %xmm9,%xmm9 + movaps %xmm0,32(%rsp) + pxor %xmm10,%xmm10 + movaps %xmm0,48(%rsp) + pxor %xmm11,%xmm11 + movaps %xmm0,64(%rsp) + pxor %xmm12,%xmm12 + movaps %xmm0,80(%rsp) + pxor %xmm13,%xmm13 + movaps %xmm0,96(%rsp) + pxor %xmm14,%xmm14 + pxor %xmm15,%xmm15 leaq (%rbp),%rsp popq %rbp .Lxts_dec_epilogue: @@ -2446,7 +2574,11 @@ aesni_cbc_encrypt: jnc .Lcbc_enc_loop addq $16,%rdx jnz .Lcbc_enc_tail + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 movups %xmm2,(%r8) + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 jmp .Lcbc_ret .Lcbc_enc_tail: @@ -2466,6 +2598,35 @@ aesni_cbc_encrypt: .align 16 .Lcbc_decrypt: + cmpq $16,%rdx + jne .Lcbc_decrypt_bulk + + + + movdqu (%rdi),%xmm2 + movdqu (%r8),%xmm3 + movdqa %xmm2,%xmm4 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +.Loop_dec1_16: +.byte 102,15,56,222,209 + decl %r10d + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz .Loop_dec1_16 +.byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movdqu %xmm4,(%r8) + xorps %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + jmp .Lcbc_ret +.align 16 +.Lcbc_decrypt_bulk: leaq (%rsp),%rax pushq %rbp subq $16,%rsp @@ -2702,7 +2863,7 @@ aesni_cbc_encrypt: movaps %xmm9,%xmm2 leaq -112(%rcx),%rcx addq $112,%rdx - jle .Lcbc_dec_tail_collected + jle .Lcbc_dec_clear_tail_collected movups %xmm9,(%rsi) leaq 16(%rsi),%rsi cmpq $80,%rdx @@ -2721,14 +2882,19 @@ aesni_cbc_encrypt: movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 pxor %xmm13,%xmm5 movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 pxor %xmm14,%xmm6 movdqu %xmm5,48(%rsi) + pxor %xmm5,%xmm5 pxor %xmm15,%xmm7 movdqu %xmm6,64(%rsi) + pxor %xmm6,%xmm6 leaq 80(%rsi),%rsi movdqa %xmm7,%xmm2 + pxor %xmm7,%xmm7 jmp .Lcbc_dec_tail_collected .align 16 @@ -2743,16 +2909,23 @@ aesni_cbc_encrypt: movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 pxor %xmm13,%xmm5 movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 pxor %xmm14,%xmm6 movdqu %xmm5,48(%rsi) + pxor %xmm5,%xmm5 pxor %xmm15,%xmm7 movdqu %xmm6,64(%rsi) + pxor %xmm6,%xmm6 pxor %xmm9,%xmm8 movdqu %xmm7,80(%rsi) + pxor %xmm7,%xmm7 leaq 96(%rsi),%rsi movdqa %xmm8,%xmm2 + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 jmp .Lcbc_dec_tail_collected .align 16 @@ -2796,7 +2969,7 @@ aesni_cbc_encrypt: movdqa %xmm7,%xmm2 addq $80,%rdx - jle .Lcbc_dec_tail_collected + jle .Lcbc_dec_clear_tail_collected movups %xmm7,(%rsi) leaq 16(%rsi),%rsi @@ -2831,12 +3004,17 @@ aesni_cbc_encrypt: movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 pxor %xmm13,%xmm5 movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 pxor %xmm14,%xmm6 movdqu %xmm5,48(%rsi) + pxor %xmm5,%xmm5 leaq 64(%rsi),%rsi movdqa %xmm6,%xmm2 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 subq $16,%rdx jmp .Lcbc_dec_tail_collected @@ -2847,12 +3025,12 @@ aesni_cbc_encrypt: movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 -.Loop_dec1_16: +.Loop_dec1_17: .byte 102,15,56,222,209 decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz .Loop_dec1_16 + jnz .Loop_dec1_17 .byte 102,15,56,223,209 xorps %xmm10,%xmm2 movaps %xmm11,%xmm10 @@ -2866,6 +3044,7 @@ aesni_cbc_encrypt: pxor %xmm11,%xmm3 movdqu %xmm2,(%rsi) movdqa %xmm3,%xmm2 + pxor %xmm3,%xmm3 leaq 16(%rsi),%rsi jmp .Lcbc_dec_tail_collected .align 16 @@ -2878,7 +3057,9 @@ aesni_cbc_encrypt: movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 movdqa %xmm4,%xmm2 + pxor %xmm4,%xmm4 leaq 32(%rsi),%rsi jmp .Lcbc_dec_tail_collected .align 16 @@ -2891,29 +3072,45 @@ aesni_cbc_encrypt: movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 pxor %xmm13,%xmm5 movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 movdqa %xmm5,%xmm2 + pxor %xmm5,%xmm5 leaq 48(%rsi),%rsi jmp .Lcbc_dec_tail_collected .align 16 +.Lcbc_dec_clear_tail_collected: + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 .Lcbc_dec_tail_collected: movups %xmm10,(%r8) andq $15,%rdx jnz .Lcbc_dec_tail_partial movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 jmp .Lcbc_dec_ret .align 16 .Lcbc_dec_tail_partial: movaps %xmm2,(%rsp) + pxor %xmm2,%xmm2 movq $16,%rcx movq %rsi,%rdi subq %rdx,%rcx leaq (%rsp),%rsi .long 0x9066A4F3 + movdqa %xmm2,(%rsp) .Lcbc_dec_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 leaq (%rbp),%rsp popq %rbp .Lcbc_ret: @@ -2951,7 +3148,9 @@ aesni_set_decrypt_key: movups (%rdx),%xmm0 .byte 102,15,56,219,192 + pxor %xmm1,%xmm1 movups %xmm0,(%rdi) + pxor %xmm0,%xmm0 .Ldec_key_ret: addq $8,%rsp .byte 0xf3,0xc3 @@ -2969,8 +3168,10 @@ __aesni_set_encrypt_key: testq %rdx,%rdx jz .Lenc_key_ret + movl $268437504,%r10d movups (%rdi),%xmm0 xorps %xmm4,%xmm4 + andl OPENSSL_ia32cap_P+4(%rip),%r10d leaq 16(%rdx),%rax cmpl $256,%esi je .L14rounds @@ -2981,6 +3182,9 @@ __aesni_set_encrypt_key: .L10rounds: movl $9,%esi + cmpl $268435456,%r10d + je .L10rounds_alt + movups %xmm0,(%rdx) .byte 102,15,58,223,200,1 call .Lkey_expansion_128_cold @@ -3007,10 +3211,80 @@ __aesni_set_encrypt_key: xorl %eax,%eax jmp .Lenc_key_ret +.align 16 +.L10rounds_alt: + movdqa .Lkey_rotate(%rip),%xmm5 + movl $8,%r10d + movdqa .Lkey_rcon1(%rip),%xmm4 + movdqa %xmm0,%xmm2 + movdqu %xmm0,(%rdx) + jmp .Loop_key128 + +.align 16 +.Loop_key128: +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + leaq 16(%rax),%rax + + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,-16(%rax) + movdqa %xmm0,%xmm2 + + decl %r10d + jnz .Loop_key128 + + movdqa .Lkey_rcon1b(%rip),%xmm4 + +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,(%rax) + + movdqa %xmm0,%xmm2 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,16(%rax) + + movl %esi,96(%rax) + xorl %eax,%eax + jmp .Lenc_key_ret + .align 16 .L12rounds: movq 16(%rdi),%xmm2 movl $11,%esi + cmpl $268435456,%r10d + je .L12rounds_alt + movups %xmm0,(%rdx) .byte 102,15,58,223,202,1 call .Lkey_expansion_192a_cold @@ -3033,11 +3307,55 @@ __aesni_set_encrypt_key: xorq %rax,%rax jmp .Lenc_key_ret +.align 16 +.L12rounds_alt: + movdqa .Lkey_rotate192(%rip),%xmm5 + movdqa .Lkey_rcon1(%rip),%xmm4 + movl $8,%r10d + movdqu %xmm0,(%rdx) + jmp .Loop_key192 + +.align 16 +.Loop_key192: + movq %xmm2,0(%rax) + movdqa %xmm2,%xmm1 +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + pslld $1,%xmm4 + leaq 24(%rax),%rax + + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + + pshufd $255,%xmm0,%xmm3 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + + pxor %xmm2,%xmm0 + pxor %xmm3,%xmm2 + movdqu %xmm0,-16(%rax) + + decl %r10d + jnz .Loop_key192 + + movl %esi,32(%rax) + xorl %eax,%eax + jmp .Lenc_key_ret + .align 16 .L14rounds: movups 16(%rdi),%xmm2 movl $13,%esi leaq 16(%rax),%rax + cmpl $268435456,%r10d + je .L14rounds_alt + movups %xmm0,(%rdx) movups %xmm2,16(%rdx) .byte 102,15,58,223,202,1 @@ -3071,10 +3389,70 @@ __aesni_set_encrypt_key: xorq %rax,%rax jmp .Lenc_key_ret +.align 16 +.L14rounds_alt: + movdqa .Lkey_rotate(%rip),%xmm5 + movdqa .Lkey_rcon1(%rip),%xmm4 + movl $7,%r10d + movdqu %xmm0,0(%rdx) + movdqa %xmm2,%xmm1 + movdqu %xmm2,16(%rdx) + jmp .Loop_key256 + +.align 16 +.Loop_key256: +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pslld $1,%xmm4 + + pxor %xmm2,%xmm0 + movdqu %xmm0,(%rax) + + decl %r10d + jz .Ldone_key256 + + pshufd $255,%xmm0,%xmm2 + pxor %xmm3,%xmm3 +.byte 102,15,56,221,211 + + movdqa %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm3,%xmm1 + + pxor %xmm1,%xmm2 + movdqu %xmm2,16(%rax) + leaq 32(%rax),%rax + movdqa %xmm2,%xmm1 + + jmp .Loop_key256 + +.Ldone_key256: + movl %esi,16(%rax) + xorl %eax,%eax + jmp .Lenc_key_ret + .align 16 .Lbad_keybits: movq $-2,%rax .Lenc_key_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 addq $8,%rsp .byte 0xf3,0xc3 .LSEH_end_set_encrypt_key: @@ -3160,6 +3538,14 @@ __aesni_set_encrypt_key: .long 0x87,0,1,0 .Lincrement1: .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +.Lkey_rotate: +.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d +.Lkey_rotate192: +.long 0x04070605,0x04070605,0x04070605,0x04070605 +.Lkey_rcon1: +.long 1,1,1,1 +.Lkey_rcon1b: +.long 0x1b,0x1b,0x1b,0x1b .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .align 64 diff --git a/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont5.s b/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont5.s index 1bf368c7eb29bf..5f98ff2237b711 100644 --- a/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont5.s +++ b/deps/openssl/asm_obsolete/x64-elf-gas/bn/x86_64-mont5.s @@ -1755,11 +1755,16 @@ bn_from_mont8x: .type bn_get_bits5,@function .align 16 bn_get_bits5: - movq %rdi,%r10 + leaq 0(%rdi),%r10 + leaq 1(%rdi),%r11 movl %esi,%ecx - shrl $3,%esi - movzwl (%r10,%rsi,1),%eax - andl $7,%ecx + shrl $4,%esi + andl $15,%ecx + leal -8(%rcx),%eax + cmpl $11,%ecx + cmovaq %r11,%r10 + cmoval %eax,%ecx + movzwl (%r10,%rsi,2),%eax shrl %cl,%eax andl $31,%eax .byte 0xf3,0xc3 diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/aes/aesni-x86_64.s b/deps/openssl/asm_obsolete/x64-macosx-gas/aes/aesni-x86_64.s index 57509ae7196c08..41ad80eebd1f89 100644 --- a/deps/openssl/asm_obsolete/x64-macosx-gas/aes/aesni-x86_64.s +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/aes/aesni-x86_64.s @@ -17,7 +17,10 @@ L$oop_enc1_1: leaq 16(%rdx),%rdx jnz L$oop_enc1_1 .byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 .byte 0xf3,0xc3 @@ -38,7 +41,10 @@ L$oop_dec1_2: leaq 16(%rdx),%rdx jnz L$oop_dec1_2 .byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 .byte 0xf3,0xc3 @@ -264,21 +270,18 @@ _aesni_encrypt6: pxor %xmm0,%xmm6 .byte 102,15,56,220,225 pxor %xmm0,%xmm7 + movups (%rcx,%rax,1),%xmm0 addq $16,%rax -.byte 102,15,56,220,233 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 - movups -16(%rcx,%rax,1),%xmm0 jmp L$enc_loop6_enter .p2align 4 L$enc_loop6: .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 +L$enc_loop6_enter: .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 -L$enc_loop6_enter: movups (%rcx,%rax,1),%xmm1 addq $32,%rax .byte 102,15,56,220,208 @@ -321,21 +324,18 @@ _aesni_decrypt6: pxor %xmm0,%xmm6 .byte 102,15,56,222,225 pxor %xmm0,%xmm7 + movups (%rcx,%rax,1),%xmm0 addq $16,%rax -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 - movups -16(%rcx,%rax,1),%xmm0 jmp L$dec_loop6_enter .p2align 4 L$dec_loop6: .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 +L$dec_loop6_enter: .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 -L$dec_loop6_enter: movups (%rcx,%rax,1),%xmm1 addq $32,%rax .byte 102,15,56,222,208 @@ -375,23 +375,18 @@ _aesni_encrypt8: leaq 32(%rcx,%rax,1),%rcx negq %rax .byte 102,15,56,220,209 - addq $16,%rax pxor %xmm0,%xmm7 -.byte 102,15,56,220,217 pxor %xmm0,%xmm8 +.byte 102,15,56,220,217 pxor %xmm0,%xmm9 -.byte 102,15,56,220,225 -.byte 102,15,56,220,233 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 -.byte 102,68,15,56,220,193 -.byte 102,68,15,56,220,201 - movups -16(%rcx,%rax,1),%xmm0 - jmp L$enc_loop8_enter + movups (%rcx,%rax,1),%xmm0 + addq $16,%rax + jmp L$enc_loop8_inner .p2align 4 L$enc_loop8: .byte 102,15,56,220,209 .byte 102,15,56,220,217 +L$enc_loop8_inner: .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 @@ -444,23 +439,18 @@ _aesni_decrypt8: leaq 32(%rcx,%rax,1),%rcx negq %rax .byte 102,15,56,222,209 - addq $16,%rax pxor %xmm0,%xmm7 -.byte 102,15,56,222,217 pxor %xmm0,%xmm8 +.byte 102,15,56,222,217 pxor %xmm0,%xmm9 -.byte 102,15,56,222,225 -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 -.byte 102,68,15,56,222,193 -.byte 102,68,15,56,222,201 - movups -16(%rcx,%rax,1),%xmm0 - jmp L$dec_loop8_enter + movups (%rcx,%rax,1),%xmm0 + addq $16,%rax + jmp L$dec_loop8_inner .p2align 4 L$dec_loop8: .byte 102,15,56,222,209 .byte 102,15,56,222,217 +L$dec_loop8_inner: .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 @@ -587,6 +577,7 @@ L$ecb_enc_tail: movups 80(%rdi),%xmm7 je L$ecb_enc_six movdqu 96(%rdi),%xmm8 + xorps %xmm9,%xmm9 call _aesni_encrypt8 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) @@ -700,15 +691,23 @@ L$ecb_dec_loop8_enter: jnc L$ecb_dec_loop8 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 movq %r11,%rcx movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 movl %r10d,%eax movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 movups %xmm7,80(%rsi) + pxor %xmm7,%xmm7 movups %xmm8,96(%rsi) + pxor %xmm8,%xmm8 movups %xmm9,112(%rsi) + pxor %xmm9,%xmm9 leaq 128(%rsi),%rsi addq $128,%rdx jz L$ecb_ret @@ -731,14 +730,23 @@ L$ecb_dec_tail: je L$ecb_dec_six movups 96(%rdi),%xmm8 movups (%rcx),%xmm0 + xorps %xmm9,%xmm9 call _aesni_decrypt8 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 movups %xmm7,80(%rsi) + pxor %xmm7,%xmm7 movups %xmm8,96(%rsi) + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 jmp L$ecb_ret .p2align 4 L$ecb_dec_one: @@ -754,49 +762,73 @@ L$oop_dec1_4: jnz L$oop_dec1_4 .byte 102,15,56,223,209 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 jmp L$ecb_ret .p2align 4 L$ecb_dec_two: call _aesni_decrypt2 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 jmp L$ecb_ret .p2align 4 L$ecb_dec_three: call _aesni_decrypt3 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 jmp L$ecb_ret .p2align 4 L$ecb_dec_four: call _aesni_decrypt4 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 jmp L$ecb_ret .p2align 4 L$ecb_dec_five: xorps %xmm7,%xmm7 call _aesni_decrypt6 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 jmp L$ecb_ret .p2align 4 L$ecb_dec_six: call _aesni_decrypt6 movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 movups %xmm3,16(%rsi) + pxor %xmm3,%xmm3 movups %xmm4,32(%rsi) + pxor %xmm4,%xmm4 movups %xmm5,48(%rsi) + pxor %xmm5,%xmm5 movups %xmm6,64(%rsi) + pxor %xmm6,%xmm6 movups %xmm7,80(%rsi) + pxor %xmm7,%xmm7 L$ecb_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 .byte 0xf3,0xc3 .globl _aesni_ccm64_encrypt_blocks @@ -853,7 +885,13 @@ L$ccm64_enc2_loop: leaq 16(%rsi),%rsi jnz L$ccm64_enc_outer + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 movups %xmm3,(%r9) + pxor %xmm3,%xmm3 + pxor %xmm8,%xmm8 + pxor %xmm6,%xmm6 .byte 0xf3,0xc3 .globl _aesni_ccm64_decrypt_blocks @@ -944,21 +982,56 @@ L$oop_enc1_6: leaq 16(%r11),%r11 jnz L$oop_enc1_6 .byte 102,15,56,221,217 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 movups %xmm3,(%r9) + pxor %xmm3,%xmm3 + pxor %xmm8,%xmm8 + pxor %xmm6,%xmm6 .byte 0xf3,0xc3 .globl _aesni_ctr32_encrypt_blocks .p2align 4 _aesni_ctr32_encrypt_blocks: + cmpq $1,%rdx + jne L$ctr32_bulk + + + + movups (%r8),%xmm2 + movups (%rdi),%xmm3 + movl 240(%rcx),%edx + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_enc1_7: +.byte 102,15,56,220,209 + decl %edx + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_enc1_7 +.byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + xorps %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movups %xmm2,(%rsi) + xorps %xmm2,%xmm2 + jmp L$ctr32_epilogue + +.p2align 4 +L$ctr32_bulk: leaq (%rsp),%rax pushq %rbp subq $128,%rsp andq $-16,%rsp leaq -8(%rax),%rbp - cmpq $1,%rdx - je L$ctr32_one_shortcut + + movdqu (%r8),%xmm2 movdqu (%rcx),%xmm0 @@ -1349,11 +1422,14 @@ L$ctr32_enc_done: leaq -128(%rcx),%rcx L$ctr32_tail: + + leaq 16(%rcx),%rcx cmpq $4,%rdx jb L$ctr32_loop3 je L$ctr32_loop4 + shll $4,%eax movdqa 96(%rsp),%xmm8 pxor %xmm9,%xmm9 @@ -1456,30 +1532,33 @@ L$ctr32_loop3: movups 32(%rdi),%xmm12 xorps %xmm12,%xmm4 movups %xmm4,32(%rsi) - jmp L$ctr32_done -.p2align 4 -L$ctr32_one_shortcut: - movups (%r8),%xmm2 - movups (%rdi),%xmm10 - movl 240(%rcx),%eax - movups (%rcx),%xmm0 - movups 16(%rcx),%xmm1 - leaq 32(%rcx),%rcx - xorps %xmm0,%xmm2 -L$oop_enc1_7: -.byte 102,15,56,220,209 - decl %eax - movups (%rcx),%xmm1 - leaq 16(%rcx),%rcx - jnz L$oop_enc1_7 -.byte 102,15,56,221,209 - xorps %xmm10,%xmm2 - movups %xmm2,(%rsi) - jmp L$ctr32_done - -.p2align 4 L$ctr32_done: + xorps %xmm0,%xmm0 + xorl %r11d,%r11d + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + movaps %xmm0,0(%rsp) + pxor %xmm8,%xmm8 + movaps %xmm0,16(%rsp) + pxor %xmm9,%xmm9 + movaps %xmm0,32(%rsp) + pxor %xmm10,%xmm10 + movaps %xmm0,48(%rsp) + pxor %xmm11,%xmm11 + movaps %xmm0,64(%rsp) + pxor %xmm12,%xmm12 + movaps %xmm0,80(%rsp) + pxor %xmm13,%xmm13 + movaps %xmm0,96(%rsp) + pxor %xmm14,%xmm14 + movaps %xmm0,112(%rsp) + pxor %xmm15,%xmm15 leaq (%rbp),%rsp popq %rbp L$ctr32_epilogue: @@ -1750,6 +1829,7 @@ L$xts_enc_loop6: shrl $4,%eax L$xts_enc_short: + movl %eax,%r10d pxor %xmm0,%xmm10 addq $96,%rdx @@ -1778,6 +1858,7 @@ L$xts_enc_short: pxor %xmm12,%xmm4 pxor %xmm13,%xmm5 pxor %xmm14,%xmm6 + pxor %xmm7,%xmm7 call _aesni_encrypt6 @@ -1920,6 +2001,29 @@ L$oop_enc1_10: movups %xmm2,-16(%rsi) L$xts_enc_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + movaps %xmm0,0(%rsp) + pxor %xmm8,%xmm8 + movaps %xmm0,16(%rsp) + pxor %xmm9,%xmm9 + movaps %xmm0,32(%rsp) + pxor %xmm10,%xmm10 + movaps %xmm0,48(%rsp) + pxor %xmm11,%xmm11 + movaps %xmm0,64(%rsp) + pxor %xmm12,%xmm12 + movaps %xmm0,80(%rsp) + pxor %xmm13,%xmm13 + movaps %xmm0,96(%rsp) + pxor %xmm14,%xmm14 + pxor %xmm15,%xmm15 leaq (%rbp),%rsp popq %rbp L$xts_enc_epilogue: @@ -2196,6 +2300,7 @@ L$xts_dec_loop6: shrl $4,%eax L$xts_dec_short: + movl %eax,%r10d pxor %xmm0,%xmm10 pxor %xmm0,%xmm11 @@ -2398,6 +2503,29 @@ L$oop_dec1_14: movups %xmm2,(%rsi) L$xts_dec_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + movaps %xmm0,0(%rsp) + pxor %xmm8,%xmm8 + movaps %xmm0,16(%rsp) + pxor %xmm9,%xmm9 + movaps %xmm0,32(%rsp) + pxor %xmm10,%xmm10 + movaps %xmm0,48(%rsp) + pxor %xmm11,%xmm11 + movaps %xmm0,64(%rsp) + pxor %xmm12,%xmm12 + movaps %xmm0,80(%rsp) + pxor %xmm13,%xmm13 + movaps %xmm0,96(%rsp) + pxor %xmm14,%xmm14 + pxor %xmm15,%xmm15 leaq (%rbp),%rsp popq %rbp L$xts_dec_epilogue: @@ -2446,7 +2574,11 @@ L$oop_enc1_15: jnc L$cbc_enc_loop addq $16,%rdx jnz L$cbc_enc_tail + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 movups %xmm2,(%r8) + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 jmp L$cbc_ret L$cbc_enc_tail: @@ -2466,6 +2598,35 @@ L$cbc_enc_tail: .p2align 4 L$cbc_decrypt: + cmpq $16,%rdx + jne L$cbc_decrypt_bulk + + + + movdqu (%rdi),%xmm2 + movdqu (%r8),%xmm3 + movdqa %xmm2,%xmm4 + movups (%rcx),%xmm0 + movups 16(%rcx),%xmm1 + leaq 32(%rcx),%rcx + xorps %xmm0,%xmm2 +L$oop_dec1_16: +.byte 102,15,56,222,209 + decl %r10d + movups (%rcx),%xmm1 + leaq 16(%rcx),%rcx + jnz L$oop_dec1_16 +.byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + movdqu %xmm4,(%r8) + xorps %xmm3,%xmm2 + pxor %xmm3,%xmm3 + movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 + jmp L$cbc_ret +.p2align 4 +L$cbc_decrypt_bulk: leaq (%rsp),%rax pushq %rbp subq $16,%rsp @@ -2702,7 +2863,7 @@ L$cbc_dec_done: movaps %xmm9,%xmm2 leaq -112(%rcx),%rcx addq $112,%rdx - jle L$cbc_dec_tail_collected + jle L$cbc_dec_clear_tail_collected movups %xmm9,(%rsi) leaq 16(%rsi),%rsi cmpq $80,%rdx @@ -2721,14 +2882,19 @@ L$cbc_dec_six_or_seven: movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 pxor %xmm13,%xmm5 movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 pxor %xmm14,%xmm6 movdqu %xmm5,48(%rsi) + pxor %xmm5,%xmm5 pxor %xmm15,%xmm7 movdqu %xmm6,64(%rsi) + pxor %xmm6,%xmm6 leaq 80(%rsi),%rsi movdqa %xmm7,%xmm2 + pxor %xmm7,%xmm7 jmp L$cbc_dec_tail_collected .p2align 4 @@ -2743,16 +2909,23 @@ L$cbc_dec_seven: movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 pxor %xmm13,%xmm5 movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 pxor %xmm14,%xmm6 movdqu %xmm5,48(%rsi) + pxor %xmm5,%xmm5 pxor %xmm15,%xmm7 movdqu %xmm6,64(%rsi) + pxor %xmm6,%xmm6 pxor %xmm9,%xmm8 movdqu %xmm7,80(%rsi) + pxor %xmm7,%xmm7 leaq 96(%rsi),%rsi movdqa %xmm8,%xmm2 + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 jmp L$cbc_dec_tail_collected .p2align 4 @@ -2796,7 +2969,7 @@ L$cbc_dec_loop6_enter: movdqa %xmm7,%xmm2 addq $80,%rdx - jle L$cbc_dec_tail_collected + jle L$cbc_dec_clear_tail_collected movups %xmm7,(%rsi) leaq 16(%rsi),%rsi @@ -2831,12 +3004,17 @@ L$cbc_dec_tail: movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 pxor %xmm13,%xmm5 movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 pxor %xmm14,%xmm6 movdqu %xmm5,48(%rsi) + pxor %xmm5,%xmm5 leaq 64(%rsi),%rsi movdqa %xmm6,%xmm2 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 subq $16,%rdx jmp L$cbc_dec_tail_collected @@ -2847,12 +3025,12 @@ L$cbc_dec_one: movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 -L$oop_dec1_16: +L$oop_dec1_17: .byte 102,15,56,222,209 decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx - jnz L$oop_dec1_16 + jnz L$oop_dec1_17 .byte 102,15,56,223,209 xorps %xmm10,%xmm2 movaps %xmm11,%xmm10 @@ -2866,6 +3044,7 @@ L$cbc_dec_two: pxor %xmm11,%xmm3 movdqu %xmm2,(%rsi) movdqa %xmm3,%xmm2 + pxor %xmm3,%xmm3 leaq 16(%rsi),%rsi jmp L$cbc_dec_tail_collected .p2align 4 @@ -2878,7 +3057,9 @@ L$cbc_dec_three: movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 movdqa %xmm4,%xmm2 + pxor %xmm4,%xmm4 leaq 32(%rsi),%rsi jmp L$cbc_dec_tail_collected .p2align 4 @@ -2891,29 +3072,45 @@ L$cbc_dec_four: movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) + pxor %xmm3,%xmm3 pxor %xmm13,%xmm5 movdqu %xmm4,32(%rsi) + pxor %xmm4,%xmm4 movdqa %xmm5,%xmm2 + pxor %xmm5,%xmm5 leaq 48(%rsi),%rsi jmp L$cbc_dec_tail_collected .p2align 4 +L$cbc_dec_clear_tail_collected: + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + pxor %xmm8,%xmm8 + pxor %xmm9,%xmm9 L$cbc_dec_tail_collected: movups %xmm10,(%r8) andq $15,%rdx jnz L$cbc_dec_tail_partial movups %xmm2,(%rsi) + pxor %xmm2,%xmm2 jmp L$cbc_dec_ret .p2align 4 L$cbc_dec_tail_partial: movaps %xmm2,(%rsp) + pxor %xmm2,%xmm2 movq $16,%rcx movq %rsi,%rdi subq %rdx,%rcx leaq (%rsp),%rsi .long 0x9066A4F3 + movdqa %xmm2,(%rsp) L$cbc_dec_ret: + xorps %xmm0,%xmm0 + pxor %xmm1,%xmm1 leaq (%rbp),%rsp popq %rbp L$cbc_ret: @@ -2951,7 +3148,9 @@ L$dec_key_inverse: movups (%rdx),%xmm0 .byte 102,15,56,219,192 + pxor %xmm1,%xmm1 movups %xmm0,(%rdi) + pxor %xmm0,%xmm0 L$dec_key_ret: addq $8,%rsp .byte 0xf3,0xc3 @@ -2969,8 +3168,10 @@ __aesni_set_encrypt_key: testq %rdx,%rdx jz L$enc_key_ret + movl $268437504,%r10d movups (%rdi),%xmm0 xorps %xmm4,%xmm4 + andl _OPENSSL_ia32cap_P+4(%rip),%r10d leaq 16(%rdx),%rax cmpl $256,%esi je L$14rounds @@ -2981,6 +3182,9 @@ __aesni_set_encrypt_key: L$10rounds: movl $9,%esi + cmpl $268435456,%r10d + je L$10rounds_alt + movups %xmm0,(%rdx) .byte 102,15,58,223,200,1 call L$key_expansion_128_cold @@ -3007,10 +3211,80 @@ L$10rounds: xorl %eax,%eax jmp L$enc_key_ret +.p2align 4 +L$10rounds_alt: + movdqa L$key_rotate(%rip),%xmm5 + movl $8,%r10d + movdqa L$key_rcon1(%rip),%xmm4 + movdqa %xmm0,%xmm2 + movdqu %xmm0,(%rdx) + jmp L$oop_key128 + +.p2align 4 +L$oop_key128: +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + leaq 16(%rax),%rax + + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,-16(%rax) + movdqa %xmm0,%xmm2 + + decl %r10d + jnz L$oop_key128 + + movdqa L$key_rcon1b(%rip),%xmm4 + +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,(%rax) + + movdqa %xmm0,%xmm2 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,16(%rax) + + movl %esi,96(%rax) + xorl %eax,%eax + jmp L$enc_key_ret + .p2align 4 L$12rounds: movq 16(%rdi),%xmm2 movl $11,%esi + cmpl $268435456,%r10d + je L$12rounds_alt + movups %xmm0,(%rdx) .byte 102,15,58,223,202,1 call L$key_expansion_192a_cold @@ -3033,11 +3307,55 @@ L$12rounds: xorq %rax,%rax jmp L$enc_key_ret +.p2align 4 +L$12rounds_alt: + movdqa L$key_rotate192(%rip),%xmm5 + movdqa L$key_rcon1(%rip),%xmm4 + movl $8,%r10d + movdqu %xmm0,(%rdx) + jmp L$oop_key192 + +.p2align 4 +L$oop_key192: + movq %xmm2,0(%rax) + movdqa %xmm2,%xmm1 +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + pslld $1,%xmm4 + leaq 24(%rax),%rax + + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + + pshufd $255,%xmm0,%xmm3 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + + pxor %xmm2,%xmm0 + pxor %xmm3,%xmm2 + movdqu %xmm0,-16(%rax) + + decl %r10d + jnz L$oop_key192 + + movl %esi,32(%rax) + xorl %eax,%eax + jmp L$enc_key_ret + .p2align 4 L$14rounds: movups 16(%rdi),%xmm2 movl $13,%esi leaq 16(%rax),%rax + cmpl $268435456,%r10d + je L$14rounds_alt + movups %xmm0,(%rdx) movups %xmm2,16(%rdx) .byte 102,15,58,223,202,1 @@ -3071,10 +3389,70 @@ L$14rounds: xorq %rax,%rax jmp L$enc_key_ret +.p2align 4 +L$14rounds_alt: + movdqa L$key_rotate(%rip),%xmm5 + movdqa L$key_rcon1(%rip),%xmm4 + movl $7,%r10d + movdqu %xmm0,0(%rdx) + movdqa %xmm2,%xmm1 + movdqu %xmm2,16(%rdx) + jmp L$oop_key256 + +.p2align 4 +L$oop_key256: +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pslld $1,%xmm4 + + pxor %xmm2,%xmm0 + movdqu %xmm0,(%rax) + + decl %r10d + jz L$done_key256 + + pshufd $255,%xmm0,%xmm2 + pxor %xmm3,%xmm3 +.byte 102,15,56,221,211 + + movdqa %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm3,%xmm1 + + pxor %xmm1,%xmm2 + movdqu %xmm2,16(%rax) + leaq 32(%rax),%rax + movdqa %xmm2,%xmm1 + + jmp L$oop_key256 + +L$done_key256: + movl %esi,16(%rax) + xorl %eax,%eax + jmp L$enc_key_ret + .p2align 4 L$bad_keybits: movq $-2,%rax L$enc_key_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 addq $8,%rsp .byte 0xf3,0xc3 L$SEH_end_set_encrypt_key: @@ -3160,6 +3538,14 @@ L$xts_magic: .long 0x87,0,1,0 L$increment1: .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +L$key_rotate: +.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d +L$key_rotate192: +.long 0x04070605,0x04070605,0x04070605,0x04070605 +L$key_rcon1: +.long 1,1,1,1 +L$key_rcon1b: +.long 0x1b,0x1b,0x1b,0x1b .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .p2align 6 diff --git a/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont5.s b/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont5.s index ba4d62157cc5f4..049bf06473ae02 100644 --- a/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont5.s +++ b/deps/openssl/asm_obsolete/x64-macosx-gas/bn/x86_64-mont5.s @@ -1755,11 +1755,16 @@ L$from_epilogue: .p2align 4 _bn_get_bits5: - movq %rdi,%r10 + leaq 0(%rdi),%r10 + leaq 1(%rdi),%r11 movl %esi,%ecx - shrl $3,%esi - movzwl (%r10,%rsi,1),%eax - andl $7,%ecx + shrl $4,%esi + andl $15,%ecx + leal -8(%rcx),%eax + cmpl $11,%ecx + cmovaq %r11,%r10 + cmoval %eax,%ecx + movzwl (%r10,%rsi,2),%eax shrl %cl,%eax andl $31,%eax .byte 0xf3,0xc3 diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/aes/aesni-sha256-x86_64.asm b/deps/openssl/asm_obsolete/x64-win32-masm/aes/aesni-sha256-x86_64.asm index 94733526388550..34b554f9a91281 100644 --- a/deps/openssl/asm_obsolete/x64-win32-masm/aes/aesni-sha256-x86_64.asm +++ b/deps/openssl/asm_obsolete/x64-win32-masm/aes/aesni-sha256-x86_64.asm @@ -60,77 +60,6 @@ DB 54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98 DB 121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108 DB 46,111,114,103,62,0 ALIGN 64 - mov rsi,rax - mov rax,QWORD PTR[((64+56))+rax] - lea rax,QWORD PTR[48+rax] - - mov rbx,QWORD PTR[((-8))+rax] - mov rbp,QWORD PTR[((-16))+rax] - mov r12,QWORD PTR[((-24))+rax] - mov r13,QWORD PTR[((-32))+rax] - mov r14,QWORD PTR[((-40))+rax] - mov r15,QWORD PTR[((-48))+rax] - mov QWORD PTR[144+r8],rbx - mov QWORD PTR[160+r8],rbp - mov QWORD PTR[216+r8],r12 - mov QWORD PTR[224+r8],r13 - mov QWORD PTR[232+r8],r14 - mov QWORD PTR[240+r8],r15 - - lea rsi,QWORD PTR[((64+64))+rsi] - lea rdi,QWORD PTR[512+r8] - mov ecx,20 - DD 0a548f3fch - -$L$in_prologue:: - mov rdi,QWORD PTR[8+rax] - mov rsi,QWORD PTR[16+rax] - mov QWORD PTR[152+r8],rax - mov QWORD PTR[168+r8],rsi - mov QWORD PTR[176+r8],rdi - - mov rdi,QWORD PTR[40+r9] - mov rsi,r8 - mov ecx,154 - DD 0a548f3fch - - mov rsi,r9 - xor rcx,rcx - mov rdx,QWORD PTR[8+rsi] - mov r8,QWORD PTR[rsi] - mov r9,QWORD PTR[16+rsi] - mov r10,QWORD PTR[40+rsi] - lea r11,QWORD PTR[56+rsi] - lea r12,QWORD PTR[24+rsi] - mov QWORD PTR[32+rsp],r10 - mov QWORD PTR[40+rsp],r11 - mov QWORD PTR[48+rsp],r12 - mov QWORD PTR[56+rsp],rcx - call QWORD PTR[__imp_RtlVirtualUnwind] - - mov eax,1 - add rsp,64 - popfq - pop r15 - pop r14 - pop r13 - pop r12 - pop rbp - pop rbx - pop rdi - pop rsi - DB 0F3h,0C3h ;repret - .text$ ENDS -.pdata SEGMENT READONLY ALIGN(4) - DD imagerel $L$SEH_begin_aesni_cbc_sha256_enc_xop - DD imagerel $L$SEH_end_aesni_cbc_sha256_enc_xop - DD imagerel $L$SEH_info_aesni_cbc_sha256_enc_xop - - DD imagerel $L$SEH_begin_aesni_cbc_sha256_enc_avx - DD imagerel $L$SEH_end_aesni_cbc_sha256_enc_avx - DD imagerel $L$SEH_info_aesni_cbc_sha256_enc_avx - -.pdata ENDS END diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/aes/aesni-x86_64.asm b/deps/openssl/asm_obsolete/x64-win32-masm/aes/aesni-x86_64.asm index 53d8afc950f174..5e848125d679fe 100644 --- a/deps/openssl/asm_obsolete/x64-win32-masm/aes/aesni-x86_64.asm +++ b/deps/openssl/asm_obsolete/x64-win32-masm/aes/aesni-x86_64.asm @@ -18,7 +18,10 @@ DB 102,15,56,220,209 lea r8,QWORD PTR[16+r8] jnz $L$oop_enc1_1 DB 102,15,56,221,209 + pxor xmm0,xmm0 + pxor xmm1,xmm1 movups XMMWORD PTR[rdx],xmm2 + pxor xmm2,xmm2 DB 0F3h,0C3h ;repret aesni_encrypt ENDP @@ -39,7 +42,10 @@ DB 102,15,56,222,209 lea r8,QWORD PTR[16+r8] jnz $L$oop_dec1_2 DB 102,15,56,223,209 + pxor xmm0,xmm0 + pxor xmm1,xmm1 movups XMMWORD PTR[rdx],xmm2 + pxor xmm2,xmm2 DB 0F3h,0C3h ;repret aesni_decrypt ENDP @@ -265,21 +271,18 @@ DB 102,15,56,220,217 pxor xmm6,xmm0 DB 102,15,56,220,225 pxor xmm7,xmm0 + movups xmm0,XMMWORD PTR[rax*1+rcx] add rax,16 -DB 102,15,56,220,233 -DB 102,15,56,220,241 -DB 102,15,56,220,249 - movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] jmp $L$enc_loop6_enter ALIGN 16 $L$enc_loop6:: DB 102,15,56,220,209 DB 102,15,56,220,217 DB 102,15,56,220,225 +$L$enc_loop6_enter:: DB 102,15,56,220,233 DB 102,15,56,220,241 DB 102,15,56,220,249 -$L$enc_loop6_enter:: movups xmm1,XMMWORD PTR[rax*1+rcx] add rax,32 DB 102,15,56,220,208 @@ -322,21 +325,18 @@ DB 102,15,56,222,217 pxor xmm6,xmm0 DB 102,15,56,222,225 pxor xmm7,xmm0 + movups xmm0,XMMWORD PTR[rax*1+rcx] add rax,16 -DB 102,15,56,222,233 -DB 102,15,56,222,241 -DB 102,15,56,222,249 - movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] jmp $L$dec_loop6_enter ALIGN 16 $L$dec_loop6:: DB 102,15,56,222,209 DB 102,15,56,222,217 DB 102,15,56,222,225 +$L$dec_loop6_enter:: DB 102,15,56,222,233 DB 102,15,56,222,241 DB 102,15,56,222,249 -$L$dec_loop6_enter:: movups xmm1,XMMWORD PTR[rax*1+rcx] add rax,32 DB 102,15,56,222,208 @@ -376,23 +376,18 @@ _aesni_encrypt8 PROC PRIVATE lea rcx,QWORD PTR[32+rax*1+rcx] neg rax DB 102,15,56,220,209 - add rax,16 pxor xmm7,xmm0 -DB 102,15,56,220,217 pxor xmm8,xmm0 +DB 102,15,56,220,217 pxor xmm9,xmm0 -DB 102,15,56,220,225 -DB 102,15,56,220,233 -DB 102,15,56,220,241 -DB 102,15,56,220,249 -DB 102,68,15,56,220,193 -DB 102,68,15,56,220,201 - movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] - jmp $L$enc_loop8_enter + movups xmm0,XMMWORD PTR[rax*1+rcx] + add rax,16 + jmp $L$enc_loop8_inner ALIGN 16 $L$enc_loop8:: DB 102,15,56,220,209 DB 102,15,56,220,217 +$L$enc_loop8_inner:: DB 102,15,56,220,225 DB 102,15,56,220,233 DB 102,15,56,220,241 @@ -445,23 +440,18 @@ _aesni_decrypt8 PROC PRIVATE lea rcx,QWORD PTR[32+rax*1+rcx] neg rax DB 102,15,56,222,209 - add rax,16 pxor xmm7,xmm0 -DB 102,15,56,222,217 pxor xmm8,xmm0 +DB 102,15,56,222,217 pxor xmm9,xmm0 -DB 102,15,56,222,225 -DB 102,15,56,222,233 -DB 102,15,56,222,241 -DB 102,15,56,222,249 -DB 102,68,15,56,222,193 -DB 102,68,15,56,222,201 - movups xmm0,XMMWORD PTR[((-16))+rax*1+rcx] - jmp $L$dec_loop8_enter + movups xmm0,XMMWORD PTR[rax*1+rcx] + add rax,16 + jmp $L$dec_loop8_inner ALIGN 16 $L$dec_loop8:: DB 102,15,56,222,209 DB 102,15,56,222,217 +$L$dec_loop8_inner:: DB 102,15,56,222,225 DB 102,15,56,222,233 DB 102,15,56,222,241 @@ -605,6 +595,7 @@ $L$ecb_enc_tail:: movups xmm7,XMMWORD PTR[80+rdi] je $L$ecb_enc_six movdqu xmm8,XMMWORD PTR[96+rdi] + xorps xmm9,xmm9 call _aesni_encrypt8 movups XMMWORD PTR[rsi],xmm2 movups XMMWORD PTR[16+rsi],xmm3 @@ -718,15 +709,23 @@ $L$ecb_dec_loop8_enter:: jnc $L$ecb_dec_loop8 movups XMMWORD PTR[rsi],xmm2 + pxor xmm2,xmm2 mov rcx,r11 movups XMMWORD PTR[16+rsi],xmm3 + pxor xmm3,xmm3 mov eax,r10d movups XMMWORD PTR[32+rsi],xmm4 + pxor xmm4,xmm4 movups XMMWORD PTR[48+rsi],xmm5 + pxor xmm5,xmm5 movups XMMWORD PTR[64+rsi],xmm6 + pxor xmm6,xmm6 movups XMMWORD PTR[80+rsi],xmm7 + pxor xmm7,xmm7 movups XMMWORD PTR[96+rsi],xmm8 + pxor xmm8,xmm8 movups XMMWORD PTR[112+rsi],xmm9 + pxor xmm9,xmm9 lea rsi,QWORD PTR[128+rsi] add rdx,080h jz $L$ecb_ret @@ -749,14 +748,23 @@ $L$ecb_dec_tail:: je $L$ecb_dec_six movups xmm8,XMMWORD PTR[96+rdi] movups xmm0,XMMWORD PTR[rcx] + xorps xmm9,xmm9 call _aesni_decrypt8 movups XMMWORD PTR[rsi],xmm2 + pxor xmm2,xmm2 movups XMMWORD PTR[16+rsi],xmm3 + pxor xmm3,xmm3 movups XMMWORD PTR[32+rsi],xmm4 + pxor xmm4,xmm4 movups XMMWORD PTR[48+rsi],xmm5 + pxor xmm5,xmm5 movups XMMWORD PTR[64+rsi],xmm6 + pxor xmm6,xmm6 movups XMMWORD PTR[80+rsi],xmm7 + pxor xmm7,xmm7 movups XMMWORD PTR[96+rsi],xmm8 + pxor xmm8,xmm8 + pxor xmm9,xmm9 jmp $L$ecb_ret ALIGN 16 $L$ecb_dec_one:: @@ -772,53 +780,81 @@ DB 102,15,56,222,209 jnz $L$oop_dec1_4 DB 102,15,56,223,209 movups XMMWORD PTR[rsi],xmm2 + pxor xmm2,xmm2 jmp $L$ecb_ret ALIGN 16 $L$ecb_dec_two:: call _aesni_decrypt2 movups XMMWORD PTR[rsi],xmm2 + pxor xmm2,xmm2 movups XMMWORD PTR[16+rsi],xmm3 + pxor xmm3,xmm3 jmp $L$ecb_ret ALIGN 16 $L$ecb_dec_three:: call _aesni_decrypt3 movups XMMWORD PTR[rsi],xmm2 + pxor xmm2,xmm2 movups XMMWORD PTR[16+rsi],xmm3 + pxor xmm3,xmm3 movups XMMWORD PTR[32+rsi],xmm4 + pxor xmm4,xmm4 jmp $L$ecb_ret ALIGN 16 $L$ecb_dec_four:: call _aesni_decrypt4 movups XMMWORD PTR[rsi],xmm2 + pxor xmm2,xmm2 movups XMMWORD PTR[16+rsi],xmm3 + pxor xmm3,xmm3 movups XMMWORD PTR[32+rsi],xmm4 + pxor xmm4,xmm4 movups XMMWORD PTR[48+rsi],xmm5 + pxor xmm5,xmm5 jmp $L$ecb_ret ALIGN 16 $L$ecb_dec_five:: xorps xmm7,xmm7 call _aesni_decrypt6 movups XMMWORD PTR[rsi],xmm2 + pxor xmm2,xmm2 movups XMMWORD PTR[16+rsi],xmm3 + pxor xmm3,xmm3 movups XMMWORD PTR[32+rsi],xmm4 + pxor xmm4,xmm4 movups XMMWORD PTR[48+rsi],xmm5 + pxor xmm5,xmm5 movups XMMWORD PTR[64+rsi],xmm6 + pxor xmm6,xmm6 + pxor xmm7,xmm7 jmp $L$ecb_ret ALIGN 16 $L$ecb_dec_six:: call _aesni_decrypt6 movups XMMWORD PTR[rsi],xmm2 + pxor xmm2,xmm2 movups XMMWORD PTR[16+rsi],xmm3 + pxor xmm3,xmm3 movups XMMWORD PTR[32+rsi],xmm4 + pxor xmm4,xmm4 movups XMMWORD PTR[48+rsi],xmm5 + pxor xmm5,xmm5 movups XMMWORD PTR[64+rsi],xmm6 + pxor xmm6,xmm6 movups XMMWORD PTR[80+rsi],xmm7 + pxor xmm7,xmm7 $L$ecb_ret:: + xorps xmm0,xmm0 + pxor xmm1,xmm1 movaps xmm6,XMMWORD PTR[rsp] + movaps XMMWORD PTR[rsp],xmm0 movaps xmm7,XMMWORD PTR[16+rsp] + movaps XMMWORD PTR[16+rsp],xmm0 movaps xmm8,XMMWORD PTR[32+rsp] + movaps XMMWORD PTR[32+rsp],xmm0 movaps xmm9,XMMWORD PTR[48+rsp] + movaps XMMWORD PTR[48+rsp],xmm0 lea rsp,QWORD PTR[88+rsp] $L$ecb_enc_ret:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue @@ -898,11 +934,21 @@ DB 102,15,56,0,215 lea rsi,QWORD PTR[16+rsi] jnz $L$ccm64_enc_outer + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 movups XMMWORD PTR[r9],xmm3 + pxor xmm3,xmm3 + pxor xmm8,xmm8 + pxor xmm6,xmm6 movaps xmm6,XMMWORD PTR[rsp] + movaps XMMWORD PTR[rsp],xmm0 movaps xmm7,XMMWORD PTR[16+rsp] + movaps XMMWORD PTR[16+rsp],xmm0 movaps xmm8,XMMWORD PTR[32+rsp] + movaps XMMWORD PTR[32+rsp],xmm0 movaps xmm9,XMMWORD PTR[48+rsp] + movaps XMMWORD PTR[48+rsp],xmm0 lea rsp,QWORD PTR[88+rsp] $L$ccm64_enc_ret:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue @@ -1016,11 +1062,21 @@ DB 102,15,56,220,217 lea r11,QWORD PTR[16+r11] jnz $L$oop_enc1_6 DB 102,15,56,221,217 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 movups XMMWORD PTR[r9],xmm3 + pxor xmm3,xmm3 + pxor xmm8,xmm8 + pxor xmm6,xmm6 movaps xmm6,XMMWORD PTR[rsp] + movaps XMMWORD PTR[rsp],xmm0 movaps xmm7,XMMWORD PTR[16+rsp] + movaps XMMWORD PTR[16+rsp],xmm0 movaps xmm8,XMMWORD PTR[32+rsp] + movaps XMMWORD PTR[32+rsp],xmm0 movaps xmm9,XMMWORD PTR[48+rsp] + movaps XMMWORD PTR[48+rsp],xmm0 lea rsp,QWORD PTR[88+rsp] $L$ccm64_dec_ret:: mov rdi,QWORD PTR[8+rsp] ;WIN64 epilogue @@ -1043,6 +1099,35 @@ $L$SEH_begin_aesni_ctr32_encrypt_blocks:: mov r8,QWORD PTR[40+rsp] + cmp rdx,1 + jne $L$ctr32_bulk + + + + movups xmm2,XMMWORD PTR[r8] + movups xmm3,XMMWORD PTR[rdi] + mov edx,DWORD PTR[240+rcx] + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_enc1_7:: +DB 102,15,56,220,209 + dec edx + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_enc1_7 +DB 102,15,56,221,209 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + xorps xmm2,xmm3 + pxor xmm3,xmm3 + movups XMMWORD PTR[rsi],xmm2 + xorps xmm2,xmm2 + jmp $L$ctr32_epilogue + +ALIGN 16 +$L$ctr32_bulk:: lea rax,QWORD PTR[rsp] push rbp sub rsp,288 @@ -1060,8 +1145,8 @@ $L$SEH_begin_aesni_ctr32_encrypt_blocks:: $L$ctr32_body:: lea rbp,QWORD PTR[((-8))+rax] - cmp rdx,1 - je $L$ctr32_one_shortcut + + movdqu xmm2,XMMWORD PTR[r8] movdqu xmm0,XMMWORD PTR[rcx] @@ -1452,11 +1537,14 @@ DB 102,69,15,56,221,202 lea rcx,QWORD PTR[((-128))+rcx] $L$ctr32_tail:: + + lea rcx,QWORD PTR[16+rcx] cmp rdx,4 jb $L$ctr32_loop3 je $L$ctr32_loop4 + shl eax,4 movdqa xmm8,XMMWORD PTR[96+rsp] pxor xmm9,xmm9 @@ -1559,40 +1647,43 @@ DB 102,15,56,221,225 movups xmm12,XMMWORD PTR[32+rdi] xorps xmm4,xmm12 movups XMMWORD PTR[32+rsi],xmm4 - jmp $L$ctr32_done -ALIGN 16 -$L$ctr32_one_shortcut:: - movups xmm2,XMMWORD PTR[r8] - movups xmm10,XMMWORD PTR[rdi] - mov eax,DWORD PTR[240+rcx] - movups xmm0,XMMWORD PTR[rcx] - movups xmm1,XMMWORD PTR[16+rcx] - lea rcx,QWORD PTR[32+rcx] - xorps xmm2,xmm0 -$L$oop_enc1_7:: -DB 102,15,56,220,209 - dec eax - movups xmm1,XMMWORD PTR[rcx] - lea rcx,QWORD PTR[16+rcx] - jnz $L$oop_enc1_7 -DB 102,15,56,221,209 - xorps xmm2,xmm10 - movups XMMWORD PTR[rsi],xmm2 - jmp $L$ctr32_done - -ALIGN 16 $L$ctr32_done:: + xorps xmm0,xmm0 + xor r11d,r11d + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 movaps xmm6,XMMWORD PTR[((-160))+rbp] + movaps XMMWORD PTR[(-160)+rbp],xmm0 movaps xmm7,XMMWORD PTR[((-144))+rbp] + movaps XMMWORD PTR[(-144)+rbp],xmm0 movaps xmm8,XMMWORD PTR[((-128))+rbp] + movaps XMMWORD PTR[(-128)+rbp],xmm0 movaps xmm9,XMMWORD PTR[((-112))+rbp] + movaps XMMWORD PTR[(-112)+rbp],xmm0 movaps xmm10,XMMWORD PTR[((-96))+rbp] + movaps XMMWORD PTR[(-96)+rbp],xmm0 movaps xmm11,XMMWORD PTR[((-80))+rbp] + movaps XMMWORD PTR[(-80)+rbp],xmm0 movaps xmm12,XMMWORD PTR[((-64))+rbp] + movaps XMMWORD PTR[(-64)+rbp],xmm0 movaps xmm13,XMMWORD PTR[((-48))+rbp] + movaps XMMWORD PTR[(-48)+rbp],xmm0 movaps xmm14,XMMWORD PTR[((-32))+rbp] + movaps XMMWORD PTR[(-32)+rbp],xmm0 movaps xmm15,XMMWORD PTR[((-16))+rbp] + movaps XMMWORD PTR[(-16)+rbp],xmm0 + movaps XMMWORD PTR[rsp],xmm0 + movaps XMMWORD PTR[16+rsp],xmm0 + movaps XMMWORD PTR[32+rsp],xmm0 + movaps XMMWORD PTR[48+rsp],xmm0 + movaps XMMWORD PTR[64+rsp],xmm0 + movaps XMMWORD PTR[80+rsp],xmm0 + movaps XMMWORD PTR[96+rsp],xmm0 + movaps XMMWORD PTR[112+rsp],xmm0 lea rsp,QWORD PTR[rbp] pop rbp $L$ctr32_epilogue:: @@ -1889,6 +1980,7 @@ DB 102,15,56,221,124,36,80 shr eax,4 $L$xts_enc_short:: + mov r10d,eax pxor xmm10,xmm0 add rdx,16*6 @@ -1917,6 +2009,7 @@ $L$xts_enc_short:: pxor xmm4,xmm12 pxor xmm5,xmm13 pxor xmm6,xmm14 + pxor xmm7,xmm7 call _aesni_encrypt6 @@ -2059,16 +2152,39 @@ DB 102,15,56,221,209 movups XMMWORD PTR[(-16)+rsi],xmm2 $L$xts_enc_ret:: + xorps xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 movaps xmm6,XMMWORD PTR[((-160))+rbp] + movaps XMMWORD PTR[(-160)+rbp],xmm0 movaps xmm7,XMMWORD PTR[((-144))+rbp] + movaps XMMWORD PTR[(-144)+rbp],xmm0 movaps xmm8,XMMWORD PTR[((-128))+rbp] + movaps XMMWORD PTR[(-128)+rbp],xmm0 movaps xmm9,XMMWORD PTR[((-112))+rbp] + movaps XMMWORD PTR[(-112)+rbp],xmm0 movaps xmm10,XMMWORD PTR[((-96))+rbp] + movaps XMMWORD PTR[(-96)+rbp],xmm0 movaps xmm11,XMMWORD PTR[((-80))+rbp] + movaps XMMWORD PTR[(-80)+rbp],xmm0 movaps xmm12,XMMWORD PTR[((-64))+rbp] + movaps XMMWORD PTR[(-64)+rbp],xmm0 movaps xmm13,XMMWORD PTR[((-48))+rbp] + movaps XMMWORD PTR[(-48)+rbp],xmm0 movaps xmm14,XMMWORD PTR[((-32))+rbp] + movaps XMMWORD PTR[(-32)+rbp],xmm0 movaps xmm15,XMMWORD PTR[((-16))+rbp] + movaps XMMWORD PTR[(-16)+rbp],xmm0 + movaps XMMWORD PTR[rsp],xmm0 + movaps XMMWORD PTR[16+rsp],xmm0 + movaps XMMWORD PTR[32+rsp],xmm0 + movaps XMMWORD PTR[48+rsp],xmm0 + movaps XMMWORD PTR[64+rsp],xmm0 + movaps XMMWORD PTR[80+rsp],xmm0 + movaps XMMWORD PTR[96+rsp],xmm0 lea rsp,QWORD PTR[rbp] pop rbp $L$xts_enc_epilogue:: @@ -2371,6 +2487,7 @@ DB 102,15,56,223,124,36,80 shr eax,4 $L$xts_dec_short:: + mov r10d,eax pxor xmm10,xmm0 pxor xmm11,xmm0 @@ -2573,16 +2690,39 @@ DB 102,15,56,223,209 movups XMMWORD PTR[rsi],xmm2 $L$xts_dec_ret:: + xorps xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 movaps xmm6,XMMWORD PTR[((-160))+rbp] + movaps XMMWORD PTR[(-160)+rbp],xmm0 movaps xmm7,XMMWORD PTR[((-144))+rbp] + movaps XMMWORD PTR[(-144)+rbp],xmm0 movaps xmm8,XMMWORD PTR[((-128))+rbp] + movaps XMMWORD PTR[(-128)+rbp],xmm0 movaps xmm9,XMMWORD PTR[((-112))+rbp] + movaps XMMWORD PTR[(-112)+rbp],xmm0 movaps xmm10,XMMWORD PTR[((-96))+rbp] + movaps XMMWORD PTR[(-96)+rbp],xmm0 movaps xmm11,XMMWORD PTR[((-80))+rbp] + movaps XMMWORD PTR[(-80)+rbp],xmm0 movaps xmm12,XMMWORD PTR[((-64))+rbp] + movaps XMMWORD PTR[(-64)+rbp],xmm0 movaps xmm13,XMMWORD PTR[((-48))+rbp] + movaps XMMWORD PTR[(-48)+rbp],xmm0 movaps xmm14,XMMWORD PTR[((-32))+rbp] + movaps XMMWORD PTR[(-32)+rbp],xmm0 movaps xmm15,XMMWORD PTR[((-16))+rbp] + movaps XMMWORD PTR[(-16)+rbp],xmm0 + movaps XMMWORD PTR[rsp],xmm0 + movaps XMMWORD PTR[16+rsp],xmm0 + movaps XMMWORD PTR[32+rsp],xmm0 + movaps XMMWORD PTR[48+rsp],xmm0 + movaps XMMWORD PTR[64+rsp],xmm0 + movaps XMMWORD PTR[80+rsp],xmm0 + movaps XMMWORD PTR[96+rsp],xmm0 lea rsp,QWORD PTR[rbp] pop rbp $L$xts_dec_epilogue:: @@ -2646,7 +2786,11 @@ DB 102,15,56,221,209 jnc $L$cbc_enc_loop add rdx,16 jnz $L$cbc_enc_tail + pxor xmm0,xmm0 + pxor xmm1,xmm1 movups XMMWORD PTR[r8],xmm2 + pxor xmm2,xmm2 + pxor xmm3,xmm3 jmp $L$cbc_ret $L$cbc_enc_tail:: @@ -2666,6 +2810,35 @@ $L$cbc_enc_tail:: ALIGN 16 $L$cbc_decrypt:: + cmp rdx,16 + jne $L$cbc_decrypt_bulk + + + + movdqu xmm2,XMMWORD PTR[rdi] + movdqu xmm3,XMMWORD PTR[r8] + movdqa xmm4,xmm2 + movups xmm0,XMMWORD PTR[rcx] + movups xmm1,XMMWORD PTR[16+rcx] + lea rcx,QWORD PTR[32+rcx] + xorps xmm2,xmm0 +$L$oop_dec1_16:: +DB 102,15,56,222,209 + dec r10d + movups xmm1,XMMWORD PTR[rcx] + lea rcx,QWORD PTR[16+rcx] + jnz $L$oop_dec1_16 +DB 102,15,56,223,209 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + movdqu XMMWORD PTR[r8],xmm4 + xorps xmm2,xmm3 + pxor xmm3,xmm3 + movups XMMWORD PTR[rsi],xmm2 + pxor xmm2,xmm2 + jmp $L$cbc_ret +ALIGN 16 +$L$cbc_decrypt_bulk:: lea rax,QWORD PTR[rsp] push rbp sub rsp,176 @@ -2913,7 +3086,7 @@ DB 102,69,15,56,223,202 movaps xmm2,xmm9 lea rcx,QWORD PTR[((-112))+rcx] add rdx,070h - jle $L$cbc_dec_tail_collected + jle $L$cbc_dec_clear_tail_collected movups XMMWORD PTR[rsi],xmm9 lea rsi,QWORD PTR[16+rsi] cmp rdx,050h @@ -2932,14 +3105,19 @@ $L$cbc_dec_six_or_seven:: movdqu XMMWORD PTR[rsi],xmm2 pxor xmm4,xmm12 movdqu XMMWORD PTR[16+rsi],xmm3 + pxor xmm3,xmm3 pxor xmm5,xmm13 movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm4,xmm4 pxor xmm6,xmm14 movdqu XMMWORD PTR[48+rsi],xmm5 + pxor xmm5,xmm5 pxor xmm7,xmm15 movdqu XMMWORD PTR[64+rsi],xmm6 + pxor xmm6,xmm6 lea rsi,QWORD PTR[80+rsi] movdqa xmm2,xmm7 + pxor xmm7,xmm7 jmp $L$cbc_dec_tail_collected ALIGN 16 @@ -2954,16 +3132,23 @@ $L$cbc_dec_seven:: movdqu XMMWORD PTR[rsi],xmm2 pxor xmm4,xmm12 movdqu XMMWORD PTR[16+rsi],xmm3 + pxor xmm3,xmm3 pxor xmm5,xmm13 movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm4,xmm4 pxor xmm6,xmm14 movdqu XMMWORD PTR[48+rsi],xmm5 + pxor xmm5,xmm5 pxor xmm7,xmm15 movdqu XMMWORD PTR[64+rsi],xmm6 + pxor xmm6,xmm6 pxor xmm8,xmm9 movdqu XMMWORD PTR[80+rsi],xmm7 + pxor xmm7,xmm7 lea rsi,QWORD PTR[96+rsi] movdqa xmm2,xmm8 + pxor xmm8,xmm8 + pxor xmm9,xmm9 jmp $L$cbc_dec_tail_collected ALIGN 16 @@ -3007,7 +3192,7 @@ $L$cbc_dec_loop6_enter:: movdqa xmm2,xmm7 add rdx,050h - jle $L$cbc_dec_tail_collected + jle $L$cbc_dec_clear_tail_collected movups XMMWORD PTR[rsi],xmm7 lea rsi,QWORD PTR[16+rsi] @@ -3042,12 +3227,17 @@ $L$cbc_dec_tail:: movdqu XMMWORD PTR[rsi],xmm2 pxor xmm4,xmm12 movdqu XMMWORD PTR[16+rsi],xmm3 + pxor xmm3,xmm3 pxor xmm5,xmm13 movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm4,xmm4 pxor xmm6,xmm14 movdqu XMMWORD PTR[48+rsi],xmm5 + pxor xmm5,xmm5 lea rsi,QWORD PTR[64+rsi] movdqa xmm2,xmm6 + pxor xmm6,xmm6 + pxor xmm7,xmm7 sub rdx,010h jmp $L$cbc_dec_tail_collected @@ -3058,12 +3248,12 @@ $L$cbc_dec_one:: movups xmm1,XMMWORD PTR[16+rcx] lea rcx,QWORD PTR[32+rcx] xorps xmm2,xmm0 -$L$oop_dec1_16:: +$L$oop_dec1_17:: DB 102,15,56,222,209 dec eax movups xmm1,XMMWORD PTR[rcx] lea rcx,QWORD PTR[16+rcx] - jnz $L$oop_dec1_16 + jnz $L$oop_dec1_17 DB 102,15,56,223,209 xorps xmm2,xmm10 movaps xmm10,xmm11 @@ -3077,6 +3267,7 @@ $L$cbc_dec_two:: pxor xmm3,xmm11 movdqu XMMWORD PTR[rsi],xmm2 movdqa xmm2,xmm3 + pxor xmm3,xmm3 lea rsi,QWORD PTR[16+rsi] jmp $L$cbc_dec_tail_collected ALIGN 16 @@ -3089,7 +3280,9 @@ $L$cbc_dec_three:: movdqu XMMWORD PTR[rsi],xmm2 pxor xmm4,xmm12 movdqu XMMWORD PTR[16+rsi],xmm3 + pxor xmm3,xmm3 movdqa xmm2,xmm4 + pxor xmm4,xmm4 lea rsi,QWORD PTR[32+rsi] jmp $L$cbc_dec_tail_collected ALIGN 16 @@ -3102,39 +3295,61 @@ $L$cbc_dec_four:: movdqu XMMWORD PTR[rsi],xmm2 pxor xmm4,xmm12 movdqu XMMWORD PTR[16+rsi],xmm3 + pxor xmm3,xmm3 pxor xmm5,xmm13 movdqu XMMWORD PTR[32+rsi],xmm4 + pxor xmm4,xmm4 movdqa xmm2,xmm5 + pxor xmm5,xmm5 lea rsi,QWORD PTR[48+rsi] jmp $L$cbc_dec_tail_collected ALIGN 16 +$L$cbc_dec_clear_tail_collected:: + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 $L$cbc_dec_tail_collected:: movups XMMWORD PTR[r8],xmm10 and rdx,15 jnz $L$cbc_dec_tail_partial movups XMMWORD PTR[rsi],xmm2 + pxor xmm2,xmm2 jmp $L$cbc_dec_ret ALIGN 16 $L$cbc_dec_tail_partial:: movaps XMMWORD PTR[rsp],xmm2 + pxor xmm2,xmm2 mov rcx,16 mov rdi,rsi sub rcx,rdx lea rsi,QWORD PTR[rsp] DD 09066A4F3h + movdqa XMMWORD PTR[rsp],xmm2 $L$cbc_dec_ret:: + xorps xmm0,xmm0 + pxor xmm1,xmm1 movaps xmm6,XMMWORD PTR[16+rsp] + movaps XMMWORD PTR[16+rsp],xmm0 movaps xmm7,XMMWORD PTR[32+rsp] + movaps XMMWORD PTR[32+rsp],xmm0 movaps xmm8,XMMWORD PTR[48+rsp] + movaps XMMWORD PTR[48+rsp],xmm0 movaps xmm9,XMMWORD PTR[64+rsp] + movaps XMMWORD PTR[64+rsp],xmm0 movaps xmm10,XMMWORD PTR[80+rsp] + movaps XMMWORD PTR[80+rsp],xmm0 movaps xmm11,XMMWORD PTR[96+rsp] + movaps XMMWORD PTR[96+rsp],xmm0 movaps xmm12,XMMWORD PTR[112+rsp] + movaps XMMWORD PTR[112+rsp],xmm0 movaps xmm13,XMMWORD PTR[128+rsp] + movaps XMMWORD PTR[128+rsp],xmm0 movaps xmm14,XMMWORD PTR[144+rsp] + movaps XMMWORD PTR[144+rsp],xmm0 movaps xmm15,XMMWORD PTR[160+rsp] + movaps XMMWORD PTR[160+rsp],xmm0 lea rsp,QWORD PTR[rbp] pop rbp $L$cbc_ret:: @@ -3175,7 +3390,9 @@ DB 102,15,56,219,201 movups xmm0,XMMWORD PTR[r8] DB 102,15,56,219,192 + pxor xmm1,xmm1 movups XMMWORD PTR[rcx],xmm0 + pxor xmm0,xmm0 $L$dec_key_ret:: add rsp,8 DB 0F3h,0C3h ;repret @@ -3193,8 +3410,10 @@ DB 048h,083h,0ECh,008h test r8,r8 jz $L$enc_key_ret + mov r10d,268437504 movups xmm0,XMMWORD PTR[rcx] xorps xmm4,xmm4 + and r10d,DWORD PTR[((OPENSSL_ia32cap_P+4))] lea rax,QWORD PTR[16+r8] cmp edx,256 je $L$14rounds @@ -3205,6 +3424,9 @@ DB 048h,083h,0ECh,008h $L$10rounds:: mov edx,9 + cmp r10d,268435456 + je $L$10rounds_alt + movups XMMWORD PTR[r8],xmm0 DB 102,15,58,223,200,1 call $L$key_expansion_128_cold @@ -3231,10 +3453,80 @@ DB 102,15,58,223,200,54 xor eax,eax jmp $L$enc_key_ret +ALIGN 16 +$L$10rounds_alt:: + movdqa xmm5,XMMWORD PTR[$L$key_rotate] + mov r10d,8 + movdqa xmm4,XMMWORD PTR[$L$key_rcon1] + movdqa xmm2,xmm0 + movdqu XMMWORD PTR[r8],xmm0 + jmp $L$oop_key128 + +ALIGN 16 +$L$oop_key128:: +DB 102,15,56,0,197 +DB 102,15,56,221,196 + pslld xmm4,1 + lea rax,QWORD PTR[16+rax] + + movdqa xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm2,xmm3 + + pxor xmm0,xmm2 + movdqu XMMWORD PTR[(-16)+rax],xmm0 + movdqa xmm2,xmm0 + + dec r10d + jnz $L$oop_key128 + + movdqa xmm4,XMMWORD PTR[$L$key_rcon1b] + +DB 102,15,56,0,197 +DB 102,15,56,221,196 + pslld xmm4,1 + + movdqa xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm2,xmm3 + + pxor xmm0,xmm2 + movdqu XMMWORD PTR[rax],xmm0 + + movdqa xmm2,xmm0 +DB 102,15,56,0,197 +DB 102,15,56,221,196 + + movdqa xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm2,xmm3 + + pxor xmm0,xmm2 + movdqu XMMWORD PTR[16+rax],xmm0 + + mov DWORD PTR[96+rax],edx + xor eax,eax + jmp $L$enc_key_ret + ALIGN 16 $L$12rounds:: movq xmm2,QWORD PTR[16+rcx] mov edx,11 + cmp r10d,268435456 + je $L$12rounds_alt + movups XMMWORD PTR[r8],xmm0 DB 102,15,58,223,202,1 call $L$key_expansion_192a_cold @@ -3257,11 +3549,55 @@ DB 102,15,58,223,202,128 xor rax,rax jmp $L$enc_key_ret +ALIGN 16 +$L$12rounds_alt:: + movdqa xmm5,XMMWORD PTR[$L$key_rotate192] + movdqa xmm4,XMMWORD PTR[$L$key_rcon1] + mov r10d,8 + movdqu XMMWORD PTR[r8],xmm0 + jmp $L$oop_key192 + +ALIGN 16 +$L$oop_key192:: + movq QWORD PTR[rax],xmm2 + movdqa xmm1,xmm2 +DB 102,15,56,0,213 +DB 102,15,56,221,212 + pslld xmm4,1 + lea rax,QWORD PTR[24+rax] + + movdqa xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm0,xmm3 + + pshufd xmm3,xmm0,0ffh + pxor xmm3,xmm1 + pslldq xmm1,4 + pxor xmm3,xmm1 + + pxor xmm0,xmm2 + pxor xmm2,xmm3 + movdqu XMMWORD PTR[(-16)+rax],xmm0 + + dec r10d + jnz $L$oop_key192 + + mov DWORD PTR[32+rax],edx + xor eax,eax + jmp $L$enc_key_ret + ALIGN 16 $L$14rounds:: movups xmm2,XMMWORD PTR[16+rcx] mov edx,13 lea rax,QWORD PTR[16+rax] + cmp r10d,268435456 + je $L$14rounds_alt + movups XMMWORD PTR[r8],xmm0 movups XMMWORD PTR[16+r8],xmm2 DB 102,15,58,223,202,1 @@ -3295,10 +3631,70 @@ DB 102,15,58,223,202,64 xor rax,rax jmp $L$enc_key_ret +ALIGN 16 +$L$14rounds_alt:: + movdqa xmm5,XMMWORD PTR[$L$key_rotate] + movdqa xmm4,XMMWORD PTR[$L$key_rcon1] + mov r10d,7 + movdqu XMMWORD PTR[r8],xmm0 + movdqa xmm1,xmm2 + movdqu XMMWORD PTR[16+r8],xmm2 + jmp $L$oop_key256 + +ALIGN 16 +$L$oop_key256:: +DB 102,15,56,0,213 +DB 102,15,56,221,212 + + movdqa xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm0,xmm3 + pslld xmm4,1 + + pxor xmm0,xmm2 + movdqu XMMWORD PTR[rax],xmm0 + + dec r10d + jz $L$done_key256 + + pshufd xmm2,xmm0,0ffh + pxor xmm3,xmm3 +DB 102,15,56,221,211 + + movdqa xmm3,xmm1 + pslldq xmm1,4 + pxor xmm3,xmm1 + pslldq xmm1,4 + pxor xmm3,xmm1 + pslldq xmm1,4 + pxor xmm1,xmm3 + + pxor xmm2,xmm1 + movdqu XMMWORD PTR[16+rax],xmm2 + lea rax,QWORD PTR[32+rax] + movdqa xmm1,xmm2 + + jmp $L$oop_key256 + +$L$done_key256:: + mov DWORD PTR[16+rax],edx + xor eax,eax + jmp $L$enc_key_ret + ALIGN 16 $L$bad_keybits:: mov rax,-2 $L$enc_key_ret:: + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 add rsp,8 DB 0F3h,0C3h ;repret $L$SEH_end_set_encrypt_key:: @@ -3384,6 +3780,14 @@ $L$xts_magic:: DD 087h,0,1,0 $L$increment1:: DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +$L$key_rotate:: + DD 00c0f0e0dh,00c0f0e0dh,00c0f0e0dh,00c0f0e0dh +$L$key_rotate192:: + DD 004070605h,004070605h,004070605h,004070605h +$L$key_rcon1:: + DD 1,1,1,1 +$L$key_rcon1b:: + DD 01bh,01bh,01bh,01bh DB 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 DB 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 @@ -3489,7 +3893,7 @@ cbc_se_handler PROC PRIVATE mov rax,QWORD PTR[152+r8] mov rbx,QWORD PTR[248+r8] - lea r10,QWORD PTR[$L$cbc_decrypt] + lea r10,QWORD PTR[$L$cbc_decrypt_bulk] cmp rbx,r10 jb $L$common_seh_tail diff --git a/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont5.asm b/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont5.asm index c47130f44c90a1..f690ba58d37f19 100644 --- a/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont5.asm +++ b/deps/openssl/asm_obsolete/x64-win32-masm/bn/x86_64-mont5.asm @@ -1832,11 +1832,16 @@ PUBLIC bn_get_bits5 ALIGN 16 bn_get_bits5 PROC PUBLIC - mov r10,rcx + lea r10,QWORD PTR[rcx] + lea r11,QWORD PTR[1+rcx] mov ecx,edx - shr edx,3 - movzx eax,WORD PTR[rdx*1+r10] - and ecx,7 + shr edx,4 + and ecx,15 + lea eax,DWORD PTR[((-8))+rcx] + cmp ecx,11 + cmova r10,r11 + cmova ecx,eax + movzx eax,WORD PTR[rdx*2+r10] shr eax,cl and eax,31 DB 0F3h,0C3h ;repret diff --git a/deps/openssl/asm_obsolete/x86-elf-gas/aes/aesni-x86.s b/deps/openssl/asm_obsolete/x86-elf-gas/aes/aesni-x86.s index a68f7cdbe9cbe6..3bbc4e47d612a8 100644 --- a/deps/openssl/asm_obsolete/x86-elf-gas/aes/aesni-x86.s +++ b/deps/openssl/asm_obsolete/x86-elf-gas/aes/aesni-x86.s @@ -21,7 +21,10 @@ aesni_encrypt: leal 16(%edx),%edx jnz .L000enc1_loop_1 .byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 movups %xmm2,(%eax) + pxor %xmm2,%xmm2 ret .size aesni_encrypt,.-.L_aesni_encrypt_begin .globl aesni_decrypt @@ -45,7 +48,10 @@ aesni_decrypt: leal 16(%edx),%edx jnz .L001dec1_loop_2 .byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 movups %xmm2,(%eax) + pxor %xmm2,%xmm2 ret .size aesni_decrypt,.-.L_aesni_decrypt_begin .type _aesni_encrypt2,@function @@ -259,17 +265,15 @@ _aesni_encrypt6: negl %ecx .byte 102,15,56,220,225 pxor %xmm0,%xmm7 + movups (%edx,%ecx,1),%xmm0 addl $16,%ecx -.byte 102,15,56,220,233 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 - movups -16(%edx,%ecx,1),%xmm0 - jmp .L_aesni_encrypt6_enter + jmp .L008_aesni_encrypt6_inner .align 16 -.L008enc6_loop: +.L009enc6_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 +.L008_aesni_encrypt6_inner: .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 @@ -283,7 +287,7 @@ _aesni_encrypt6: .byte 102,15,56,220,240 .byte 102,15,56,220,248 movups -16(%edx,%ecx,1),%xmm0 - jnz .L008enc6_loop + jnz .L009enc6_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 @@ -315,17 +319,15 @@ _aesni_decrypt6: negl %ecx .byte 102,15,56,222,225 pxor %xmm0,%xmm7 + movups (%edx,%ecx,1),%xmm0 addl $16,%ecx -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 - movups -16(%edx,%ecx,1),%xmm0 - jmp .L_aesni_decrypt6_enter + jmp .L010_aesni_decrypt6_inner .align 16 -.L009dec6_loop: +.L011dec6_loop: .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 +.L010_aesni_decrypt6_inner: .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 @@ -339,7 +341,7 @@ _aesni_decrypt6: .byte 102,15,56,222,240 .byte 102,15,56,222,248 movups -16(%edx,%ecx,1),%xmm0 - jnz .L009dec6_loop + jnz .L011dec6_loop .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 @@ -369,14 +371,14 @@ aesni_ecb_encrypt: movl 32(%esp),%edx movl 36(%esp),%ebx andl $-16,%eax - jz .L010ecb_ret + jz .L012ecb_ret movl 240(%edx),%ecx testl %ebx,%ebx - jz .L011ecb_decrypt + jz .L013ecb_decrypt movl %edx,%ebp movl %ecx,%ebx cmpl $96,%eax - jb .L012ecb_enc_tail + jb .L014ecb_enc_tail movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 @@ -385,9 +387,9 @@ aesni_ecb_encrypt: movdqu 80(%esi),%xmm7 leal 96(%esi),%esi subl $96,%eax - jmp .L013ecb_enc_loop6_enter + jmp .L015ecb_enc_loop6_enter .align 16 -.L014ecb_enc_loop6: +.L016ecb_enc_loop6: movups %xmm2,(%edi) movdqu (%esi),%xmm2 movups %xmm3,16(%edi) @@ -402,12 +404,12 @@ aesni_ecb_encrypt: leal 96(%edi),%edi movdqu 80(%esi),%xmm7 leal 96(%esi),%esi -.L013ecb_enc_loop6_enter: +.L015ecb_enc_loop6_enter: call _aesni_encrypt6 movl %ebp,%edx movl %ebx,%ecx subl $96,%eax - jnc .L014ecb_enc_loop6 + jnc .L016ecb_enc_loop6 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) @@ -416,18 +418,18 @@ aesni_ecb_encrypt: movups %xmm7,80(%edi) leal 96(%edi),%edi addl $96,%eax - jz .L010ecb_ret -.L012ecb_enc_tail: + jz .L012ecb_ret +.L014ecb_enc_tail: movups (%esi),%xmm2 cmpl $32,%eax - jb .L015ecb_enc_one + jb .L017ecb_enc_one movups 16(%esi),%xmm3 - je .L016ecb_enc_two + je .L018ecb_enc_two movups 32(%esi),%xmm4 cmpl $64,%eax - jb .L017ecb_enc_three + jb .L019ecb_enc_three movups 48(%esi),%xmm5 - je .L018ecb_enc_four + je .L020ecb_enc_four movups 64(%esi),%xmm6 xorps %xmm7,%xmm7 call _aesni_encrypt6 @@ -436,49 +438,49 @@ aesni_ecb_encrypt: movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) - jmp .L010ecb_ret + jmp .L012ecb_ret .align 16 -.L015ecb_enc_one: +.L017ecb_enc_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L019enc1_loop_3: +.L021enc1_loop_3: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L019enc1_loop_3 + jnz .L021enc1_loop_3 .byte 102,15,56,221,209 movups %xmm2,(%edi) - jmp .L010ecb_ret + jmp .L012ecb_ret .align 16 -.L016ecb_enc_two: +.L018ecb_enc_two: call _aesni_encrypt2 movups %xmm2,(%edi) movups %xmm3,16(%edi) - jmp .L010ecb_ret + jmp .L012ecb_ret .align 16 -.L017ecb_enc_three: +.L019ecb_enc_three: call _aesni_encrypt3 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) - jmp .L010ecb_ret + jmp .L012ecb_ret .align 16 -.L018ecb_enc_four: +.L020ecb_enc_four: call _aesni_encrypt4 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) - jmp .L010ecb_ret + jmp .L012ecb_ret .align 16 -.L011ecb_decrypt: +.L013ecb_decrypt: movl %edx,%ebp movl %ecx,%ebx cmpl $96,%eax - jb .L020ecb_dec_tail + jb .L022ecb_dec_tail movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 @@ -487,9 +489,9 @@ aesni_ecb_encrypt: movdqu 80(%esi),%xmm7 leal 96(%esi),%esi subl $96,%eax - jmp .L021ecb_dec_loop6_enter + jmp .L023ecb_dec_loop6_enter .align 16 -.L022ecb_dec_loop6: +.L024ecb_dec_loop6: movups %xmm2,(%edi) movdqu (%esi),%xmm2 movups %xmm3,16(%edi) @@ -504,12 +506,12 @@ aesni_ecb_encrypt: leal 96(%edi),%edi movdqu 80(%esi),%xmm7 leal 96(%esi),%esi -.L021ecb_dec_loop6_enter: +.L023ecb_dec_loop6_enter: call _aesni_decrypt6 movl %ebp,%edx movl %ebx,%ecx subl $96,%eax - jnc .L022ecb_dec_loop6 + jnc .L024ecb_dec_loop6 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) @@ -518,18 +520,18 @@ aesni_ecb_encrypt: movups %xmm7,80(%edi) leal 96(%edi),%edi addl $96,%eax - jz .L010ecb_ret -.L020ecb_dec_tail: + jz .L012ecb_ret +.L022ecb_dec_tail: movups (%esi),%xmm2 cmpl $32,%eax - jb .L023ecb_dec_one + jb .L025ecb_dec_one movups 16(%esi),%xmm3 - je .L024ecb_dec_two + je .L026ecb_dec_two movups 32(%esi),%xmm4 cmpl $64,%eax - jb .L025ecb_dec_three + jb .L027ecb_dec_three movups 48(%esi),%xmm5 - je .L026ecb_dec_four + je .L028ecb_dec_four movups 64(%esi),%xmm6 xorps %xmm7,%xmm7 call _aesni_decrypt6 @@ -538,43 +540,51 @@ aesni_ecb_encrypt: movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) - jmp .L010ecb_ret + jmp .L012ecb_ret .align 16 -.L023ecb_dec_one: +.L025ecb_dec_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L027dec1_loop_4: +.L029dec1_loop_4: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L027dec1_loop_4 + jnz .L029dec1_loop_4 .byte 102,15,56,223,209 movups %xmm2,(%edi) - jmp .L010ecb_ret + jmp .L012ecb_ret .align 16 -.L024ecb_dec_two: +.L026ecb_dec_two: call _aesni_decrypt2 movups %xmm2,(%edi) movups %xmm3,16(%edi) - jmp .L010ecb_ret + jmp .L012ecb_ret .align 16 -.L025ecb_dec_three: +.L027ecb_dec_three: call _aesni_decrypt3 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) - jmp .L010ecb_ret + jmp .L012ecb_ret .align 16 -.L026ecb_dec_four: +.L028ecb_dec_four: call _aesni_decrypt4 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) -.L010ecb_ret: +.L012ecb_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 popl %edi popl %esi popl %ebx @@ -621,7 +631,7 @@ aesni_ccm64_encrypt_blocks: leal 32(%edx,%ecx,1),%edx subl %ecx,%ebx .byte 102,15,56,0,253 -.L028ccm64_enc_outer: +.L030ccm64_enc_outer: movups (%ebp),%xmm0 movl %ebx,%ecx movups (%esi),%xmm6 @@ -630,7 +640,7 @@ aesni_ccm64_encrypt_blocks: xorps %xmm6,%xmm0 xorps %xmm0,%xmm3 movups 32(%ebp),%xmm0 -.L029ccm64_enc2_loop: +.L031ccm64_enc2_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 movups (%edx,%ecx,1),%xmm1 @@ -638,7 +648,7 @@ aesni_ccm64_encrypt_blocks: .byte 102,15,56,220,208 .byte 102,15,56,220,216 movups -16(%edx,%ecx,1),%xmm0 - jnz .L029ccm64_enc2_loop + jnz .L031ccm64_enc2_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 paddq 16(%esp),%xmm7 @@ -651,10 +661,18 @@ aesni_ccm64_encrypt_blocks: movups %xmm6,(%edi) .byte 102,15,56,0,213 leal 16(%edi),%edi - jnz .L028ccm64_enc_outer + jnz .L030ccm64_enc_outer movl 48(%esp),%esp movl 40(%esp),%edi movups %xmm3,(%edi) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 popl %edi popl %esi popl %ebx @@ -702,12 +720,12 @@ aesni_ccm64_decrypt_blocks: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L030enc1_loop_5: +.L032enc1_loop_5: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L030enc1_loop_5 + jnz .L032enc1_loop_5 .byte 102,15,56,221,209 shll $4,%ebx movl $16,%ecx @@ -717,16 +735,16 @@ aesni_ccm64_decrypt_blocks: subl %ebx,%ecx leal 32(%ebp,%ebx,1),%edx movl %ecx,%ebx - jmp .L031ccm64_dec_outer + jmp .L033ccm64_dec_outer .align 16 -.L031ccm64_dec_outer: +.L033ccm64_dec_outer: xorps %xmm2,%xmm6 movdqa %xmm7,%xmm2 movups %xmm6,(%edi) leal 16(%edi),%edi .byte 102,15,56,0,213 subl $1,%eax - jz .L032ccm64_dec_break + jz .L034ccm64_dec_break movups (%ebp),%xmm0 movl %ebx,%ecx movups 16(%ebp),%xmm1 @@ -734,7 +752,7 @@ aesni_ccm64_decrypt_blocks: xorps %xmm0,%xmm2 xorps %xmm6,%xmm3 movups 32(%ebp),%xmm0 -.L033ccm64_dec2_loop: +.L035ccm64_dec2_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 movups (%edx,%ecx,1),%xmm1 @@ -742,7 +760,7 @@ aesni_ccm64_decrypt_blocks: .byte 102,15,56,220,208 .byte 102,15,56,220,216 movups -16(%edx,%ecx,1),%xmm0 - jnz .L033ccm64_dec2_loop + jnz .L035ccm64_dec2_loop movups (%esi),%xmm6 paddq 16(%esp),%xmm7 .byte 102,15,56,220,209 @@ -750,9 +768,9 @@ aesni_ccm64_decrypt_blocks: .byte 102,15,56,221,208 .byte 102,15,56,221,216 leal 16(%esi),%esi - jmp .L031ccm64_dec_outer + jmp .L033ccm64_dec_outer .align 16 -.L032ccm64_dec_break: +.L034ccm64_dec_break: movl 240(%ebp),%ecx movl %ebp,%edx movups (%edx),%xmm0 @@ -760,16 +778,24 @@ aesni_ccm64_decrypt_blocks: xorps %xmm0,%xmm6 leal 32(%edx),%edx xorps %xmm6,%xmm3 -.L034enc1_loop_6: +.L036enc1_loop_6: .byte 102,15,56,220,217 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L034enc1_loop_6 + jnz .L036enc1_loop_6 .byte 102,15,56,221,217 movl 48(%esp),%esp movl 40(%esp),%edi movups %xmm3,(%edi) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 popl %edi popl %esi popl %ebx @@ -795,7 +821,7 @@ aesni_ctr32_encrypt_blocks: andl $-16,%esp movl %ebp,80(%esp) cmpl $1,%eax - je .L035ctr32_one_shortcut + je .L037ctr32_one_shortcut movdqu (%ebx),%xmm7 movl $202182159,(%esp) movl $134810123,4(%esp) @@ -833,7 +859,7 @@ aesni_ctr32_encrypt_blocks: pshufd $192,%xmm0,%xmm2 pshufd $128,%xmm0,%xmm3 cmpl $6,%eax - jb .L036ctr32_tail + jb .L038ctr32_tail pxor %xmm6,%xmm7 shll $4,%ecx movl $16,%ebx @@ -842,9 +868,9 @@ aesni_ctr32_encrypt_blocks: subl %ecx,%ebx leal 32(%edx,%ecx,1),%edx subl $6,%eax - jmp .L037ctr32_loop6 + jmp .L039ctr32_loop6 .align 16 -.L037ctr32_loop6: +.L039ctr32_loop6: pshufd $64,%xmm0,%xmm4 movdqa 32(%esp),%xmm0 pshufd $192,%xmm1,%xmm5 @@ -898,27 +924,27 @@ aesni_ctr32_encrypt_blocks: leal 96(%edi),%edi pshufd $128,%xmm0,%xmm3 subl $6,%eax - jnc .L037ctr32_loop6 + jnc .L039ctr32_loop6 addl $6,%eax - jz .L038ctr32_ret + jz .L040ctr32_ret movdqu (%ebp),%xmm7 movl %ebp,%edx pxor 32(%esp),%xmm7 movl 240(%ebp),%ecx -.L036ctr32_tail: +.L038ctr32_tail: por %xmm7,%xmm2 cmpl $2,%eax - jb .L039ctr32_one + jb .L041ctr32_one pshufd $64,%xmm0,%xmm4 por %xmm7,%xmm3 - je .L040ctr32_two + je .L042ctr32_two pshufd $192,%xmm1,%xmm5 por %xmm7,%xmm4 cmpl $4,%eax - jb .L041ctr32_three + jb .L043ctr32_three pshufd $128,%xmm1,%xmm6 por %xmm7,%xmm5 - je .L042ctr32_four + je .L044ctr32_four por %xmm7,%xmm6 call _aesni_encrypt6 movups (%esi),%xmm1 @@ -936,29 +962,29 @@ aesni_ctr32_encrypt_blocks: movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) - jmp .L038ctr32_ret + jmp .L040ctr32_ret .align 16 -.L035ctr32_one_shortcut: +.L037ctr32_one_shortcut: movups (%ebx),%xmm2 movl 240(%edx),%ecx -.L039ctr32_one: +.L041ctr32_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L043enc1_loop_7: +.L045enc1_loop_7: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L043enc1_loop_7 + jnz .L045enc1_loop_7 .byte 102,15,56,221,209 movups (%esi),%xmm6 xorps %xmm2,%xmm6 movups %xmm6,(%edi) - jmp .L038ctr32_ret + jmp .L040ctr32_ret .align 16 -.L040ctr32_two: +.L042ctr32_two: call _aesni_encrypt2 movups (%esi),%xmm5 movups 16(%esi),%xmm6 @@ -966,9 +992,9 @@ aesni_ctr32_encrypt_blocks: xorps %xmm6,%xmm3 movups %xmm2,(%edi) movups %xmm3,16(%edi) - jmp .L038ctr32_ret + jmp .L040ctr32_ret .align 16 -.L041ctr32_three: +.L043ctr32_three: call _aesni_encrypt3 movups (%esi),%xmm5 movups 16(%esi),%xmm6 @@ -979,9 +1005,9 @@ aesni_ctr32_encrypt_blocks: xorps %xmm7,%xmm4 movups %xmm3,16(%edi) movups %xmm4,32(%edi) - jmp .L038ctr32_ret + jmp .L040ctr32_ret .align 16 -.L042ctr32_four: +.L044ctr32_four: call _aesni_encrypt4 movups (%esi),%xmm6 movups 16(%esi),%xmm7 @@ -995,7 +1021,18 @@ aesni_ctr32_encrypt_blocks: xorps %xmm0,%xmm5 movups %xmm4,32(%edi) movups %xmm5,48(%edi) -.L038ctr32_ret: +.L040ctr32_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 movl 80(%esp),%esp popl %edi popl %esi @@ -1020,12 +1057,12 @@ aesni_xts_encrypt: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L044enc1_loop_8: +.L046enc1_loop_8: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L044enc1_loop_8 + jnz .L046enc1_loop_8 .byte 102,15,56,221,209 movl 20(%esp),%esi movl 24(%esp),%edi @@ -1049,14 +1086,14 @@ aesni_xts_encrypt: movl %edx,%ebp movl %ecx,%ebx subl $96,%eax - jc .L045xts_enc_short + jc .L047xts_enc_short shll $4,%ecx movl $16,%ebx subl %ecx,%ebx leal 32(%edx,%ecx,1),%edx - jmp .L046xts_enc_loop6 + jmp .L048xts_enc_loop6 .align 16 -.L046xts_enc_loop6: +.L048xts_enc_loop6: pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,(%esp) @@ -1145,23 +1182,23 @@ aesni_xts_encrypt: pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 subl $96,%eax - jnc .L046xts_enc_loop6 + jnc .L048xts_enc_loop6 movl 240(%ebp),%ecx movl %ebp,%edx movl %ecx,%ebx -.L045xts_enc_short: +.L047xts_enc_short: addl $96,%eax - jz .L047xts_enc_done6x + jz .L049xts_enc_done6x movdqa %xmm1,%xmm5 cmpl $32,%eax - jb .L048xts_enc_one + jb .L050xts_enc_one pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 - je .L049xts_enc_two + je .L051xts_enc_two pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm6 @@ -1170,7 +1207,7 @@ aesni_xts_encrypt: pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 cmpl $64,%eax - jb .L050xts_enc_three + jb .L052xts_enc_three pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm7 @@ -1180,7 +1217,7 @@ aesni_xts_encrypt: pxor %xmm2,%xmm1 movdqa %xmm5,(%esp) movdqa %xmm6,16(%esp) - je .L051xts_enc_four + je .L053xts_enc_four movdqa %xmm7,32(%esp) pshufd $19,%xmm0,%xmm7 movdqa %xmm1,48(%esp) @@ -1212,9 +1249,9 @@ aesni_xts_encrypt: movups %xmm5,48(%edi) movups %xmm6,64(%edi) leal 80(%edi),%edi - jmp .L052xts_enc_done + jmp .L054xts_enc_done .align 16 -.L048xts_enc_one: +.L050xts_enc_one: movups (%esi),%xmm2 leal 16(%esi),%esi xorps %xmm5,%xmm2 @@ -1222,20 +1259,20 @@ aesni_xts_encrypt: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L053enc1_loop_9: +.L055enc1_loop_9: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L053enc1_loop_9 + jnz .L055enc1_loop_9 .byte 102,15,56,221,209 xorps %xmm5,%xmm2 movups %xmm2,(%edi) leal 16(%edi),%edi movdqa %xmm5,%xmm1 - jmp .L052xts_enc_done + jmp .L054xts_enc_done .align 16 -.L049xts_enc_two: +.L051xts_enc_two: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1249,9 +1286,9 @@ aesni_xts_encrypt: movups %xmm3,16(%edi) leal 32(%edi),%edi movdqa %xmm6,%xmm1 - jmp .L052xts_enc_done + jmp .L054xts_enc_done .align 16 -.L050xts_enc_three: +.L052xts_enc_three: movaps %xmm1,%xmm7 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1269,9 +1306,9 @@ aesni_xts_encrypt: movups %xmm4,32(%edi) leal 48(%edi),%edi movdqa %xmm7,%xmm1 - jmp .L052xts_enc_done + jmp .L054xts_enc_done .align 16 -.L051xts_enc_four: +.L053xts_enc_four: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1293,28 +1330,28 @@ aesni_xts_encrypt: movups %xmm5,48(%edi) leal 64(%edi),%edi movdqa %xmm6,%xmm1 - jmp .L052xts_enc_done + jmp .L054xts_enc_done .align 16 -.L047xts_enc_done6x: +.L049xts_enc_done6x: movl 112(%esp),%eax andl $15,%eax - jz .L054xts_enc_ret + jz .L056xts_enc_ret movdqa %xmm1,%xmm5 movl %eax,112(%esp) - jmp .L055xts_enc_steal + jmp .L057xts_enc_steal .align 16 -.L052xts_enc_done: +.L054xts_enc_done: movl 112(%esp),%eax pxor %xmm0,%xmm0 andl $15,%eax - jz .L054xts_enc_ret + jz .L056xts_enc_ret pcmpgtd %xmm1,%xmm0 movl %eax,112(%esp) pshufd $19,%xmm0,%xmm5 paddq %xmm1,%xmm1 pand 96(%esp),%xmm5 pxor %xmm1,%xmm5 -.L055xts_enc_steal: +.L057xts_enc_steal: movzbl (%esi),%ecx movzbl -16(%edi),%edx leal 1(%esi),%esi @@ -1322,7 +1359,7 @@ aesni_xts_encrypt: movb %dl,(%edi) leal 1(%edi),%edi subl $1,%eax - jnz .L055xts_enc_steal + jnz .L057xts_enc_steal subl 112(%esp),%edi movl %ebp,%edx movl %ebx,%ecx @@ -1332,16 +1369,30 @@ aesni_xts_encrypt: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L056enc1_loop_10: +.L058enc1_loop_10: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L056enc1_loop_10 + jnz .L058enc1_loop_10 .byte 102,15,56,221,209 xorps %xmm5,%xmm2 movups %xmm2,-16(%edi) -.L054xts_enc_ret: +.L056xts_enc_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + movdqa %xmm0,(%esp) + pxor %xmm3,%xmm3 + movdqa %xmm0,16(%esp) + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 + movdqa %xmm0,80(%esp) movl 116(%esp),%esp popl %edi popl %esi @@ -1366,12 +1417,12 @@ aesni_xts_decrypt: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L057enc1_loop_11: +.L059enc1_loop_11: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L057enc1_loop_11 + jnz .L059enc1_loop_11 .byte 102,15,56,221,209 movl 20(%esp),%esi movl 24(%esp),%edi @@ -1400,14 +1451,14 @@ aesni_xts_decrypt: pcmpgtd %xmm1,%xmm0 andl $-16,%eax subl $96,%eax - jc .L058xts_dec_short + jc .L060xts_dec_short shll $4,%ecx movl $16,%ebx subl %ecx,%ebx leal 32(%edx,%ecx,1),%edx - jmp .L059xts_dec_loop6 + jmp .L061xts_dec_loop6 .align 16 -.L059xts_dec_loop6: +.L061xts_dec_loop6: pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,(%esp) @@ -1496,23 +1547,23 @@ aesni_xts_decrypt: pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 subl $96,%eax - jnc .L059xts_dec_loop6 + jnc .L061xts_dec_loop6 movl 240(%ebp),%ecx movl %ebp,%edx movl %ecx,%ebx -.L058xts_dec_short: +.L060xts_dec_short: addl $96,%eax - jz .L060xts_dec_done6x + jz .L062xts_dec_done6x movdqa %xmm1,%xmm5 cmpl $32,%eax - jb .L061xts_dec_one + jb .L063xts_dec_one pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 - je .L062xts_dec_two + je .L064xts_dec_two pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm6 @@ -1521,7 +1572,7 @@ aesni_xts_decrypt: pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 cmpl $64,%eax - jb .L063xts_dec_three + jb .L065xts_dec_three pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm7 @@ -1531,7 +1582,7 @@ aesni_xts_decrypt: pxor %xmm2,%xmm1 movdqa %xmm5,(%esp) movdqa %xmm6,16(%esp) - je .L064xts_dec_four + je .L066xts_dec_four movdqa %xmm7,32(%esp) pshufd $19,%xmm0,%xmm7 movdqa %xmm1,48(%esp) @@ -1563,9 +1614,9 @@ aesni_xts_decrypt: movups %xmm5,48(%edi) movups %xmm6,64(%edi) leal 80(%edi),%edi - jmp .L065xts_dec_done + jmp .L067xts_dec_done .align 16 -.L061xts_dec_one: +.L063xts_dec_one: movups (%esi),%xmm2 leal 16(%esi),%esi xorps %xmm5,%xmm2 @@ -1573,20 +1624,20 @@ aesni_xts_decrypt: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L066dec1_loop_12: +.L068dec1_loop_12: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L066dec1_loop_12 + jnz .L068dec1_loop_12 .byte 102,15,56,223,209 xorps %xmm5,%xmm2 movups %xmm2,(%edi) leal 16(%edi),%edi movdqa %xmm5,%xmm1 - jmp .L065xts_dec_done + jmp .L067xts_dec_done .align 16 -.L062xts_dec_two: +.L064xts_dec_two: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1600,9 +1651,9 @@ aesni_xts_decrypt: movups %xmm3,16(%edi) leal 32(%edi),%edi movdqa %xmm6,%xmm1 - jmp .L065xts_dec_done + jmp .L067xts_dec_done .align 16 -.L063xts_dec_three: +.L065xts_dec_three: movaps %xmm1,%xmm7 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1620,9 +1671,9 @@ aesni_xts_decrypt: movups %xmm4,32(%edi) leal 48(%edi),%edi movdqa %xmm7,%xmm1 - jmp .L065xts_dec_done + jmp .L067xts_dec_done .align 16 -.L064xts_dec_four: +.L066xts_dec_four: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1644,20 +1695,20 @@ aesni_xts_decrypt: movups %xmm5,48(%edi) leal 64(%edi),%edi movdqa %xmm6,%xmm1 - jmp .L065xts_dec_done + jmp .L067xts_dec_done .align 16 -.L060xts_dec_done6x: +.L062xts_dec_done6x: movl 112(%esp),%eax andl $15,%eax - jz .L067xts_dec_ret + jz .L069xts_dec_ret movl %eax,112(%esp) - jmp .L068xts_dec_only_one_more + jmp .L070xts_dec_only_one_more .align 16 -.L065xts_dec_done: +.L067xts_dec_done: movl 112(%esp),%eax pxor %xmm0,%xmm0 andl $15,%eax - jz .L067xts_dec_ret + jz .L069xts_dec_ret pcmpgtd %xmm1,%xmm0 movl %eax,112(%esp) pshufd $19,%xmm0,%xmm2 @@ -1667,7 +1718,7 @@ aesni_xts_decrypt: pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 -.L068xts_dec_only_one_more: +.L070xts_dec_only_one_more: pshufd $19,%xmm0,%xmm5 movdqa %xmm1,%xmm6 paddq %xmm1,%xmm1 @@ -1681,16 +1732,16 @@ aesni_xts_decrypt: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L069dec1_loop_13: +.L071dec1_loop_13: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L069dec1_loop_13 + jnz .L071dec1_loop_13 .byte 102,15,56,223,209 xorps %xmm5,%xmm2 movups %xmm2,(%edi) -.L070xts_dec_steal: +.L072xts_dec_steal: movzbl 16(%esi),%ecx movzbl (%edi),%edx leal 1(%esi),%esi @@ -1698,7 +1749,7 @@ aesni_xts_decrypt: movb %dl,16(%edi) leal 1(%edi),%edi subl $1,%eax - jnz .L070xts_dec_steal + jnz .L072xts_dec_steal subl 112(%esp),%edi movl %ebp,%edx movl %ebx,%ecx @@ -1708,16 +1759,30 @@ aesni_xts_decrypt: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L071dec1_loop_14: +.L073dec1_loop_14: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L071dec1_loop_14 + jnz .L073dec1_loop_14 .byte 102,15,56,223,209 xorps %xmm6,%xmm2 movups %xmm2,(%edi) -.L067xts_dec_ret: +.L069xts_dec_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + movdqa %xmm0,(%esp) + pxor %xmm3,%xmm3 + movdqa %xmm0,16(%esp) + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 + movdqa %xmm0,80(%esp) movl 116(%esp),%esp popl %edi popl %esi @@ -1743,7 +1808,7 @@ aesni_cbc_encrypt: movl 32(%esp),%edx movl 36(%esp),%ebp testl %eax,%eax - jz .L072cbc_abort + jz .L074cbc_abort cmpl $0,40(%esp) xchgl %esp,%ebx movups (%ebp),%xmm7 @@ -1751,14 +1816,14 @@ aesni_cbc_encrypt: movl %edx,%ebp movl %ebx,16(%esp) movl %ecx,%ebx - je .L073cbc_decrypt + je .L075cbc_decrypt movaps %xmm7,%xmm2 cmpl $16,%eax - jb .L074cbc_enc_tail + jb .L076cbc_enc_tail subl $16,%eax - jmp .L075cbc_enc_loop + jmp .L077cbc_enc_loop .align 16 -.L075cbc_enc_loop: +.L077cbc_enc_loop: movups (%esi),%xmm7 leal 16(%esi),%esi movups (%edx),%xmm0 @@ -1766,24 +1831,25 @@ aesni_cbc_encrypt: xorps %xmm0,%xmm7 leal 32(%edx),%edx xorps %xmm7,%xmm2 -.L076enc1_loop_15: +.L078enc1_loop_15: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L076enc1_loop_15 + jnz .L078enc1_loop_15 .byte 102,15,56,221,209 movl %ebx,%ecx movl %ebp,%edx movups %xmm2,(%edi) leal 16(%edi),%edi subl $16,%eax - jnc .L075cbc_enc_loop + jnc .L077cbc_enc_loop addl $16,%eax - jnz .L074cbc_enc_tail + jnz .L076cbc_enc_tail movaps %xmm2,%xmm7 - jmp .L077cbc_ret -.L074cbc_enc_tail: + pxor %xmm2,%xmm2 + jmp .L079cbc_ret +.L076cbc_enc_tail: movl %eax,%ecx .long 2767451785 movl $16,%ecx @@ -1794,20 +1860,20 @@ aesni_cbc_encrypt: movl %ebx,%ecx movl %edi,%esi movl %ebp,%edx - jmp .L075cbc_enc_loop + jmp .L077cbc_enc_loop .align 16 -.L073cbc_decrypt: +.L075cbc_decrypt: cmpl $80,%eax - jbe .L078cbc_dec_tail + jbe .L080cbc_dec_tail movaps %xmm7,(%esp) subl $80,%eax - jmp .L079cbc_dec_loop6_enter + jmp .L081cbc_dec_loop6_enter .align 16 -.L080cbc_dec_loop6: +.L082cbc_dec_loop6: movaps %xmm0,(%esp) movups %xmm7,(%edi) leal 16(%edi),%edi -.L079cbc_dec_loop6_enter: +.L081cbc_dec_loop6_enter: movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 @@ -1837,28 +1903,28 @@ aesni_cbc_encrypt: movups %xmm6,64(%edi) leal 80(%edi),%edi subl $96,%eax - ja .L080cbc_dec_loop6 + ja .L082cbc_dec_loop6 movaps %xmm7,%xmm2 movaps %xmm0,%xmm7 addl $80,%eax - jle .L081cbc_dec_tail_collected + jle .L083cbc_dec_clear_tail_collected movups %xmm2,(%edi) leal 16(%edi),%edi -.L078cbc_dec_tail: +.L080cbc_dec_tail: movups (%esi),%xmm2 movaps %xmm2,%xmm6 cmpl $16,%eax - jbe .L082cbc_dec_one + jbe .L084cbc_dec_one movups 16(%esi),%xmm3 movaps %xmm3,%xmm5 cmpl $32,%eax - jbe .L083cbc_dec_two + jbe .L085cbc_dec_two movups 32(%esi),%xmm4 cmpl $48,%eax - jbe .L084cbc_dec_three + jbe .L086cbc_dec_three movups 48(%esi),%xmm5 cmpl $64,%eax - jbe .L085cbc_dec_four + jbe .L087cbc_dec_four movups 64(%esi),%xmm6 movaps %xmm7,(%esp) movups (%esi),%xmm2 @@ -1876,55 +1942,62 @@ aesni_cbc_encrypt: xorps %xmm0,%xmm6 movups %xmm2,(%edi) movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 movups %xmm4,32(%edi) + pxor %xmm4,%xmm4 movups %xmm5,48(%edi) + pxor %xmm5,%xmm5 leal 64(%edi),%edi movaps %xmm6,%xmm2 + pxor %xmm6,%xmm6 subl $80,%eax - jmp .L081cbc_dec_tail_collected + jmp .L088cbc_dec_tail_collected .align 16 -.L082cbc_dec_one: +.L084cbc_dec_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -.L086dec1_loop_16: +.L089dec1_loop_16: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz .L086dec1_loop_16 + jnz .L089dec1_loop_16 .byte 102,15,56,223,209 xorps %xmm7,%xmm2 movaps %xmm6,%xmm7 subl $16,%eax - jmp .L081cbc_dec_tail_collected + jmp .L088cbc_dec_tail_collected .align 16 -.L083cbc_dec_two: +.L085cbc_dec_two: call _aesni_decrypt2 xorps %xmm7,%xmm2 xorps %xmm6,%xmm3 movups %xmm2,(%edi) movaps %xmm3,%xmm2 + pxor %xmm3,%xmm3 leal 16(%edi),%edi movaps %xmm5,%xmm7 subl $32,%eax - jmp .L081cbc_dec_tail_collected + jmp .L088cbc_dec_tail_collected .align 16 -.L084cbc_dec_three: +.L086cbc_dec_three: call _aesni_decrypt3 xorps %xmm7,%xmm2 xorps %xmm6,%xmm3 xorps %xmm5,%xmm4 movups %xmm2,(%edi) movaps %xmm4,%xmm2 + pxor %xmm4,%xmm4 movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 leal 32(%edi),%edi movups 32(%esi),%xmm7 subl $48,%eax - jmp .L081cbc_dec_tail_collected + jmp .L088cbc_dec_tail_collected .align 16 -.L085cbc_dec_four: +.L087cbc_dec_four: call _aesni_decrypt4 movups 16(%esi),%xmm1 movups 32(%esi),%xmm0 @@ -1934,28 +2007,44 @@ aesni_cbc_encrypt: movups %xmm2,(%edi) xorps %xmm1,%xmm4 movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 xorps %xmm0,%xmm5 movups %xmm4,32(%edi) + pxor %xmm4,%xmm4 leal 48(%edi),%edi movaps %xmm5,%xmm2 + pxor %xmm5,%xmm5 subl $64,%eax -.L081cbc_dec_tail_collected: + jmp .L088cbc_dec_tail_collected +.align 16 +.L083cbc_dec_clear_tail_collected: + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 +.L088cbc_dec_tail_collected: andl $15,%eax - jnz .L087cbc_dec_tail_partial + jnz .L090cbc_dec_tail_partial movups %xmm2,(%edi) - jmp .L077cbc_ret + pxor %xmm0,%xmm0 + jmp .L079cbc_ret .align 16 -.L087cbc_dec_tail_partial: +.L090cbc_dec_tail_partial: movaps %xmm2,(%esp) + pxor %xmm0,%xmm0 movl $16,%ecx movl %esp,%esi subl %eax,%ecx .long 2767451785 -.L077cbc_ret: + movdqa %xmm2,(%esp) +.L079cbc_ret: movl 16(%esp),%esp movl 36(%esp),%ebp + pxor %xmm2,%xmm2 + pxor %xmm1,%xmm1 movups %xmm7,(%ebp) -.L072cbc_abort: + pxor %xmm7,%xmm7 +.L074cbc_abort: popl %edi popl %esi popl %ebx @@ -1965,52 +2054,62 @@ aesni_cbc_encrypt: .type _aesni_set_encrypt_key,@function .align 16 _aesni_set_encrypt_key: + pushl %ebp + pushl %ebx testl %eax,%eax - jz .L088bad_pointer + jz .L091bad_pointer testl %edx,%edx - jz .L088bad_pointer + jz .L091bad_pointer + call .L092pic +.L092pic: + popl %ebx + leal .Lkey_const-.L092pic(%ebx),%ebx + leal OPENSSL_ia32cap_P,%ebp movups (%eax),%xmm0 xorps %xmm4,%xmm4 + movl 4(%ebp),%ebp leal 16(%edx),%edx + andl $268437504,%ebp cmpl $256,%ecx - je .L08914rounds + je .L09314rounds cmpl $192,%ecx - je .L09012rounds + je .L09412rounds cmpl $128,%ecx - jne .L091bad_keybits + jne .L095bad_keybits .align 16 -.L09210rounds: +.L09610rounds: + cmpl $268435456,%ebp + je .L09710rounds_alt movl $9,%ecx movups %xmm0,-16(%edx) .byte 102,15,58,223,200,1 - call .L093key_128_cold + call .L098key_128_cold .byte 102,15,58,223,200,2 - call .L094key_128 + call .L099key_128 .byte 102,15,58,223,200,4 - call .L094key_128 + call .L099key_128 .byte 102,15,58,223,200,8 - call .L094key_128 + call .L099key_128 .byte 102,15,58,223,200,16 - call .L094key_128 + call .L099key_128 .byte 102,15,58,223,200,32 - call .L094key_128 + call .L099key_128 .byte 102,15,58,223,200,64 - call .L094key_128 + call .L099key_128 .byte 102,15,58,223,200,128 - call .L094key_128 + call .L099key_128 .byte 102,15,58,223,200,27 - call .L094key_128 + call .L099key_128 .byte 102,15,58,223,200,54 - call .L094key_128 + call .L099key_128 movups %xmm0,(%edx) movl %ecx,80(%edx) - xorl %eax,%eax - ret + jmp .L100good_key .align 16 -.L094key_128: +.L099key_128: movups %xmm0,(%edx) leal 16(%edx),%edx -.L093key_128_cold: +.L098key_128_cold: shufps $16,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 @@ -2019,38 +2118,91 @@ _aesni_set_encrypt_key: xorps %xmm1,%xmm0 ret .align 16 -.L09012rounds: +.L09710rounds_alt: + movdqa (%ebx),%xmm5 + movl $8,%ecx + movdqa 32(%ebx),%xmm4 + movdqa %xmm0,%xmm2 + movdqu %xmm0,-16(%edx) +.L101loop_key128: +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + leal 16(%edx),%edx + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,-16(%edx) + movdqa %xmm0,%xmm2 + decl %ecx + jnz .L101loop_key128 + movdqa 48(%ebx),%xmm4 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,(%edx) + movdqa %xmm0,%xmm2 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,16(%edx) + movl $9,%ecx + movl %ecx,96(%edx) + jmp .L100good_key +.align 16 +.L09412rounds: movq 16(%eax),%xmm2 + cmpl $268435456,%ebp + je .L10212rounds_alt movl $11,%ecx movups %xmm0,-16(%edx) .byte 102,15,58,223,202,1 - call .L095key_192a_cold + call .L103key_192a_cold .byte 102,15,58,223,202,2 - call .L096key_192b + call .L104key_192b .byte 102,15,58,223,202,4 - call .L097key_192a + call .L105key_192a .byte 102,15,58,223,202,8 - call .L096key_192b + call .L104key_192b .byte 102,15,58,223,202,16 - call .L097key_192a + call .L105key_192a .byte 102,15,58,223,202,32 - call .L096key_192b + call .L104key_192b .byte 102,15,58,223,202,64 - call .L097key_192a + call .L105key_192a .byte 102,15,58,223,202,128 - call .L096key_192b + call .L104key_192b movups %xmm0,(%edx) movl %ecx,48(%edx) - xorl %eax,%eax - ret + jmp .L100good_key .align 16 -.L097key_192a: +.L105key_192a: movups %xmm0,(%edx) leal 16(%edx),%edx .align 16 -.L095key_192a_cold: +.L103key_192a_cold: movaps %xmm2,%xmm5 -.L098key_192b_warm: +.L106key_192b_warm: shufps $16,%xmm0,%xmm4 movdqa %xmm2,%xmm3 xorps %xmm4,%xmm0 @@ -2064,56 +2216,90 @@ _aesni_set_encrypt_key: pxor %xmm3,%xmm2 ret .align 16 -.L096key_192b: +.L104key_192b: movaps %xmm0,%xmm3 shufps $68,%xmm0,%xmm5 movups %xmm5,(%edx) shufps $78,%xmm2,%xmm3 movups %xmm3,16(%edx) leal 32(%edx),%edx - jmp .L098key_192b_warm + jmp .L106key_192b_warm +.align 16 +.L10212rounds_alt: + movdqa 16(%ebx),%xmm5 + movdqa 32(%ebx),%xmm4 + movl $8,%ecx + movdqu %xmm0,-16(%edx) +.L107loop_key192: + movq %xmm2,(%edx) + movdqa %xmm2,%xmm1 +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + pslld $1,%xmm4 + leal 24(%edx),%edx + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pxor %xmm2,%xmm0 + pxor %xmm3,%xmm2 + movdqu %xmm0,-16(%edx) + decl %ecx + jnz .L107loop_key192 + movl $11,%ecx + movl %ecx,32(%edx) + jmp .L100good_key .align 16 -.L08914rounds: +.L09314rounds: movups 16(%eax),%xmm2 - movl $13,%ecx leal 16(%edx),%edx + cmpl $268435456,%ebp + je .L10814rounds_alt + movl $13,%ecx movups %xmm0,-32(%edx) movups %xmm2,-16(%edx) .byte 102,15,58,223,202,1 - call .L099key_256a_cold + call .L109key_256a_cold .byte 102,15,58,223,200,1 - call .L100key_256b + call .L110key_256b .byte 102,15,58,223,202,2 - call .L101key_256a + call .L111key_256a .byte 102,15,58,223,200,2 - call .L100key_256b + call .L110key_256b .byte 102,15,58,223,202,4 - call .L101key_256a + call .L111key_256a .byte 102,15,58,223,200,4 - call .L100key_256b + call .L110key_256b .byte 102,15,58,223,202,8 - call .L101key_256a + call .L111key_256a .byte 102,15,58,223,200,8 - call .L100key_256b + call .L110key_256b .byte 102,15,58,223,202,16 - call .L101key_256a + call .L111key_256a .byte 102,15,58,223,200,16 - call .L100key_256b + call .L110key_256b .byte 102,15,58,223,202,32 - call .L101key_256a + call .L111key_256a .byte 102,15,58,223,200,32 - call .L100key_256b + call .L110key_256b .byte 102,15,58,223,202,64 - call .L101key_256a + call .L111key_256a movups %xmm0,(%edx) movl %ecx,16(%edx) xorl %eax,%eax - ret + jmp .L100good_key .align 16 -.L101key_256a: +.L111key_256a: movups %xmm2,(%edx) leal 16(%edx),%edx -.L099key_256a_cold: +.L109key_256a_cold: shufps $16,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 @@ -2122,7 +2308,7 @@ _aesni_set_encrypt_key: xorps %xmm1,%xmm0 ret .align 16 -.L100key_256b: +.L110key_256b: movups %xmm0,(%edx) leal 16(%edx),%edx shufps $16,%xmm2,%xmm4 @@ -2132,13 +2318,70 @@ _aesni_set_encrypt_key: shufps $170,%xmm1,%xmm1 xorps %xmm1,%xmm2 ret +.align 16 +.L10814rounds_alt: + movdqa (%ebx),%xmm5 + movdqa 32(%ebx),%xmm4 + movl $7,%ecx + movdqu %xmm0,-32(%edx) + movdqa %xmm2,%xmm1 + movdqu %xmm2,-16(%edx) +.L112loop_key256: +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pslld $1,%xmm4 + pxor %xmm2,%xmm0 + movdqu %xmm0,(%edx) + decl %ecx + jz .L113done_key256 + pshufd $255,%xmm0,%xmm2 + pxor %xmm3,%xmm3 +.byte 102,15,56,221,211 + movdqa %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm3,%xmm1 + pxor %xmm1,%xmm2 + movdqu %xmm2,16(%edx) + leal 32(%edx),%edx + movdqa %xmm2,%xmm1 + jmp .L112loop_key256 +.L113done_key256: + movl $13,%ecx + movl %ecx,16(%edx) +.L100good_key: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + xorl %eax,%eax + popl %ebx + popl %ebp + ret .align 4 -.L088bad_pointer: +.L091bad_pointer: movl $-1,%eax + popl %ebx + popl %ebp ret .align 4 -.L091bad_keybits: +.L095bad_keybits: + pxor %xmm0,%xmm0 movl $-2,%eax + popl %ebx + popl %ebp ret .size _aesni_set_encrypt_key,.-_aesni_set_encrypt_key .globl aesni_set_encrypt_key @@ -2164,7 +2407,7 @@ aesni_set_decrypt_key: movl 12(%esp),%edx shll $4,%ecx testl %eax,%eax - jnz .L102dec_key_ret + jnz .L114dec_key_ret leal 16(%edx,%ecx,1),%eax movups (%edx),%xmm0 movups (%eax),%xmm1 @@ -2172,7 +2415,7 @@ aesni_set_decrypt_key: movups %xmm1,(%edx) leal 16(%edx),%edx leal -16(%eax),%eax -.L103dec_key_inverse: +.L115dec_key_inverse: movups (%edx),%xmm0 movups (%eax),%xmm1 .byte 102,15,56,219,192 @@ -2182,15 +2425,24 @@ aesni_set_decrypt_key: movups %xmm0,16(%eax) movups %xmm1,-16(%edx) cmpl %edx,%eax - ja .L103dec_key_inverse + ja .L115dec_key_inverse movups (%edx),%xmm0 .byte 102,15,56,219,192 movups %xmm0,(%edx) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 xorl %eax,%eax -.L102dec_key_ret: +.L114dec_key_ret: ret .size aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin +.align 64 +.Lkey_const: +.long 202313229,202313229,202313229,202313229 +.long 67569157,67569157,67569157,67569157 +.long 1,1,1,1 +.long 27,27,27,27 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 .byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 .byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 .byte 115,108,46,111,114,103,62,0 +.comm OPENSSL_ia32cap_P,16,4 diff --git a/deps/openssl/asm_obsolete/x86-macosx-gas/aes/aesni-x86.s b/deps/openssl/asm_obsolete/x86-macosx-gas/aes/aesni-x86.s index cecd5f83f71e6d..c1f5aec62ce4e3 100644 --- a/deps/openssl/asm_obsolete/x86-macosx-gas/aes/aesni-x86.s +++ b/deps/openssl/asm_obsolete/x86-macosx-gas/aes/aesni-x86.s @@ -20,7 +20,10 @@ L000enc1_loop_1: leal 16(%edx),%edx jnz L000enc1_loop_1 .byte 102,15,56,221,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 movups %xmm2,(%eax) + pxor %xmm2,%xmm2 ret .globl _aesni_decrypt .align 4 @@ -42,7 +45,10 @@ L001dec1_loop_2: leal 16(%edx),%edx jnz L001dec1_loop_2 .byte 102,15,56,223,209 + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 movups %xmm2,(%eax) + pxor %xmm2,%xmm2 ret .align 4 __aesni_encrypt2: @@ -242,17 +248,15 @@ __aesni_encrypt6: negl %ecx .byte 102,15,56,220,225 pxor %xmm0,%xmm7 + movups (%edx,%ecx,1),%xmm0 addl $16,%ecx -.byte 102,15,56,220,233 -.byte 102,15,56,220,241 -.byte 102,15,56,220,249 - movups -16(%edx,%ecx,1),%xmm0 - jmp L_aesni_encrypt6_enter + jmp L008_aesni_encrypt6_inner .align 4,0x90 -L008enc6_loop: +L009enc6_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 +L008_aesni_encrypt6_inner: .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 @@ -266,7 +270,7 @@ L_aesni_encrypt6_enter: .byte 102,15,56,220,240 .byte 102,15,56,220,248 movups -16(%edx,%ecx,1),%xmm0 - jnz L008enc6_loop + jnz L009enc6_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 @@ -296,17 +300,15 @@ __aesni_decrypt6: negl %ecx .byte 102,15,56,222,225 pxor %xmm0,%xmm7 + movups (%edx,%ecx,1),%xmm0 addl $16,%ecx -.byte 102,15,56,222,233 -.byte 102,15,56,222,241 -.byte 102,15,56,222,249 - movups -16(%edx,%ecx,1),%xmm0 - jmp L_aesni_decrypt6_enter + jmp L010_aesni_decrypt6_inner .align 4,0x90 -L009dec6_loop: +L011dec6_loop: .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 +L010_aesni_decrypt6_inner: .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 @@ -320,7 +322,7 @@ L_aesni_decrypt6_enter: .byte 102,15,56,222,240 .byte 102,15,56,222,248 movups -16(%edx,%ecx,1),%xmm0 - jnz L009dec6_loop + jnz L011dec6_loop .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 @@ -348,14 +350,14 @@ L_aesni_ecb_encrypt_begin: movl 32(%esp),%edx movl 36(%esp),%ebx andl $-16,%eax - jz L010ecb_ret + jz L012ecb_ret movl 240(%edx),%ecx testl %ebx,%ebx - jz L011ecb_decrypt + jz L013ecb_decrypt movl %edx,%ebp movl %ecx,%ebx cmpl $96,%eax - jb L012ecb_enc_tail + jb L014ecb_enc_tail movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 @@ -364,9 +366,9 @@ L_aesni_ecb_encrypt_begin: movdqu 80(%esi),%xmm7 leal 96(%esi),%esi subl $96,%eax - jmp L013ecb_enc_loop6_enter + jmp L015ecb_enc_loop6_enter .align 4,0x90 -L014ecb_enc_loop6: +L016ecb_enc_loop6: movups %xmm2,(%edi) movdqu (%esi),%xmm2 movups %xmm3,16(%edi) @@ -381,12 +383,12 @@ L014ecb_enc_loop6: leal 96(%edi),%edi movdqu 80(%esi),%xmm7 leal 96(%esi),%esi -L013ecb_enc_loop6_enter: +L015ecb_enc_loop6_enter: call __aesni_encrypt6 movl %ebp,%edx movl %ebx,%ecx subl $96,%eax - jnc L014ecb_enc_loop6 + jnc L016ecb_enc_loop6 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) @@ -395,18 +397,18 @@ L013ecb_enc_loop6_enter: movups %xmm7,80(%edi) leal 96(%edi),%edi addl $96,%eax - jz L010ecb_ret -L012ecb_enc_tail: + jz L012ecb_ret +L014ecb_enc_tail: movups (%esi),%xmm2 cmpl $32,%eax - jb L015ecb_enc_one + jb L017ecb_enc_one movups 16(%esi),%xmm3 - je L016ecb_enc_two + je L018ecb_enc_two movups 32(%esi),%xmm4 cmpl $64,%eax - jb L017ecb_enc_three + jb L019ecb_enc_three movups 48(%esi),%xmm5 - je L018ecb_enc_four + je L020ecb_enc_four movups 64(%esi),%xmm6 xorps %xmm7,%xmm7 call __aesni_encrypt6 @@ -415,49 +417,49 @@ L012ecb_enc_tail: movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) - jmp L010ecb_ret + jmp L012ecb_ret .align 4,0x90 -L015ecb_enc_one: +L017ecb_enc_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L019enc1_loop_3: +L021enc1_loop_3: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L019enc1_loop_3 + jnz L021enc1_loop_3 .byte 102,15,56,221,209 movups %xmm2,(%edi) - jmp L010ecb_ret + jmp L012ecb_ret .align 4,0x90 -L016ecb_enc_two: +L018ecb_enc_two: call __aesni_encrypt2 movups %xmm2,(%edi) movups %xmm3,16(%edi) - jmp L010ecb_ret + jmp L012ecb_ret .align 4,0x90 -L017ecb_enc_three: +L019ecb_enc_three: call __aesni_encrypt3 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) - jmp L010ecb_ret + jmp L012ecb_ret .align 4,0x90 -L018ecb_enc_four: +L020ecb_enc_four: call __aesni_encrypt4 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) - jmp L010ecb_ret + jmp L012ecb_ret .align 4,0x90 -L011ecb_decrypt: +L013ecb_decrypt: movl %edx,%ebp movl %ecx,%ebx cmpl $96,%eax - jb L020ecb_dec_tail + jb L022ecb_dec_tail movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 @@ -466,9 +468,9 @@ L011ecb_decrypt: movdqu 80(%esi),%xmm7 leal 96(%esi),%esi subl $96,%eax - jmp L021ecb_dec_loop6_enter + jmp L023ecb_dec_loop6_enter .align 4,0x90 -L022ecb_dec_loop6: +L024ecb_dec_loop6: movups %xmm2,(%edi) movdqu (%esi),%xmm2 movups %xmm3,16(%edi) @@ -483,12 +485,12 @@ L022ecb_dec_loop6: leal 96(%edi),%edi movdqu 80(%esi),%xmm7 leal 96(%esi),%esi -L021ecb_dec_loop6_enter: +L023ecb_dec_loop6_enter: call __aesni_decrypt6 movl %ebp,%edx movl %ebx,%ecx subl $96,%eax - jnc L022ecb_dec_loop6 + jnc L024ecb_dec_loop6 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) @@ -497,18 +499,18 @@ L021ecb_dec_loop6_enter: movups %xmm7,80(%edi) leal 96(%edi),%edi addl $96,%eax - jz L010ecb_ret -L020ecb_dec_tail: + jz L012ecb_ret +L022ecb_dec_tail: movups (%esi),%xmm2 cmpl $32,%eax - jb L023ecb_dec_one + jb L025ecb_dec_one movups 16(%esi),%xmm3 - je L024ecb_dec_two + je L026ecb_dec_two movups 32(%esi),%xmm4 cmpl $64,%eax - jb L025ecb_dec_three + jb L027ecb_dec_three movups 48(%esi),%xmm5 - je L026ecb_dec_four + je L028ecb_dec_four movups 64(%esi),%xmm6 xorps %xmm7,%xmm7 call __aesni_decrypt6 @@ -517,43 +519,51 @@ L020ecb_dec_tail: movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) - jmp L010ecb_ret + jmp L012ecb_ret .align 4,0x90 -L023ecb_dec_one: +L025ecb_dec_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L027dec1_loop_4: +L029dec1_loop_4: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L027dec1_loop_4 + jnz L029dec1_loop_4 .byte 102,15,56,223,209 movups %xmm2,(%edi) - jmp L010ecb_ret + jmp L012ecb_ret .align 4,0x90 -L024ecb_dec_two: +L026ecb_dec_two: call __aesni_decrypt2 movups %xmm2,(%edi) movups %xmm3,16(%edi) - jmp L010ecb_ret + jmp L012ecb_ret .align 4,0x90 -L025ecb_dec_three: +L027ecb_dec_three: call __aesni_decrypt3 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) - jmp L010ecb_ret + jmp L012ecb_ret .align 4,0x90 -L026ecb_dec_four: +L028ecb_dec_four: call __aesni_decrypt4 movups %xmm2,(%edi) movups %xmm3,16(%edi) movups %xmm4,32(%edi) movups %xmm5,48(%edi) -L010ecb_ret: +L012ecb_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 popl %edi popl %esi popl %ebx @@ -598,7 +608,7 @@ L_aesni_ccm64_encrypt_blocks_begin: leal 32(%edx,%ecx,1),%edx subl %ecx,%ebx .byte 102,15,56,0,253 -L028ccm64_enc_outer: +L030ccm64_enc_outer: movups (%ebp),%xmm0 movl %ebx,%ecx movups (%esi),%xmm6 @@ -607,7 +617,7 @@ L028ccm64_enc_outer: xorps %xmm6,%xmm0 xorps %xmm0,%xmm3 movups 32(%ebp),%xmm0 -L029ccm64_enc2_loop: +L031ccm64_enc2_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 movups (%edx,%ecx,1),%xmm1 @@ -615,7 +625,7 @@ L029ccm64_enc2_loop: .byte 102,15,56,220,208 .byte 102,15,56,220,216 movups -16(%edx,%ecx,1),%xmm0 - jnz L029ccm64_enc2_loop + jnz L031ccm64_enc2_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 paddq 16(%esp),%xmm7 @@ -628,10 +638,18 @@ L029ccm64_enc2_loop: movups %xmm6,(%edi) .byte 102,15,56,0,213 leal 16(%edi),%edi - jnz L028ccm64_enc_outer + jnz L030ccm64_enc_outer movl 48(%esp),%esp movl 40(%esp),%edi movups %xmm3,(%edi) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 popl %edi popl %esi popl %ebx @@ -677,12 +695,12 @@ L_aesni_ccm64_decrypt_blocks_begin: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L030enc1_loop_5: +L032enc1_loop_5: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L030enc1_loop_5 + jnz L032enc1_loop_5 .byte 102,15,56,221,209 shll $4,%ebx movl $16,%ecx @@ -692,16 +710,16 @@ L030enc1_loop_5: subl %ebx,%ecx leal 32(%ebp,%ebx,1),%edx movl %ecx,%ebx - jmp L031ccm64_dec_outer + jmp L033ccm64_dec_outer .align 4,0x90 -L031ccm64_dec_outer: +L033ccm64_dec_outer: xorps %xmm2,%xmm6 movdqa %xmm7,%xmm2 movups %xmm6,(%edi) leal 16(%edi),%edi .byte 102,15,56,0,213 subl $1,%eax - jz L032ccm64_dec_break + jz L034ccm64_dec_break movups (%ebp),%xmm0 movl %ebx,%ecx movups 16(%ebp),%xmm1 @@ -709,7 +727,7 @@ L031ccm64_dec_outer: xorps %xmm0,%xmm2 xorps %xmm6,%xmm3 movups 32(%ebp),%xmm0 -L033ccm64_dec2_loop: +L035ccm64_dec2_loop: .byte 102,15,56,220,209 .byte 102,15,56,220,217 movups (%edx,%ecx,1),%xmm1 @@ -717,7 +735,7 @@ L033ccm64_dec2_loop: .byte 102,15,56,220,208 .byte 102,15,56,220,216 movups -16(%edx,%ecx,1),%xmm0 - jnz L033ccm64_dec2_loop + jnz L035ccm64_dec2_loop movups (%esi),%xmm6 paddq 16(%esp),%xmm7 .byte 102,15,56,220,209 @@ -725,9 +743,9 @@ L033ccm64_dec2_loop: .byte 102,15,56,221,208 .byte 102,15,56,221,216 leal 16(%esi),%esi - jmp L031ccm64_dec_outer + jmp L033ccm64_dec_outer .align 4,0x90 -L032ccm64_dec_break: +L034ccm64_dec_break: movl 240(%ebp),%ecx movl %ebp,%edx movups (%edx),%xmm0 @@ -735,16 +753,24 @@ L032ccm64_dec_break: xorps %xmm0,%xmm6 leal 32(%edx),%edx xorps %xmm6,%xmm3 -L034enc1_loop_6: +L036enc1_loop_6: .byte 102,15,56,220,217 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L034enc1_loop_6 + jnz L036enc1_loop_6 .byte 102,15,56,221,217 movl 48(%esp),%esp movl 40(%esp),%edi movups %xmm3,(%edi) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 popl %edi popl %esi popl %ebx @@ -768,7 +794,7 @@ L_aesni_ctr32_encrypt_blocks_begin: andl $-16,%esp movl %ebp,80(%esp) cmpl $1,%eax - je L035ctr32_one_shortcut + je L037ctr32_one_shortcut movdqu (%ebx),%xmm7 movl $202182159,(%esp) movl $134810123,4(%esp) @@ -806,7 +832,7 @@ L_aesni_ctr32_encrypt_blocks_begin: pshufd $192,%xmm0,%xmm2 pshufd $128,%xmm0,%xmm3 cmpl $6,%eax - jb L036ctr32_tail + jb L038ctr32_tail pxor %xmm6,%xmm7 shll $4,%ecx movl $16,%ebx @@ -815,9 +841,9 @@ L_aesni_ctr32_encrypt_blocks_begin: subl %ecx,%ebx leal 32(%edx,%ecx,1),%edx subl $6,%eax - jmp L037ctr32_loop6 + jmp L039ctr32_loop6 .align 4,0x90 -L037ctr32_loop6: +L039ctr32_loop6: pshufd $64,%xmm0,%xmm4 movdqa 32(%esp),%xmm0 pshufd $192,%xmm1,%xmm5 @@ -871,27 +897,27 @@ L037ctr32_loop6: leal 96(%edi),%edi pshufd $128,%xmm0,%xmm3 subl $6,%eax - jnc L037ctr32_loop6 + jnc L039ctr32_loop6 addl $6,%eax - jz L038ctr32_ret + jz L040ctr32_ret movdqu (%ebp),%xmm7 movl %ebp,%edx pxor 32(%esp),%xmm7 movl 240(%ebp),%ecx -L036ctr32_tail: +L038ctr32_tail: por %xmm7,%xmm2 cmpl $2,%eax - jb L039ctr32_one + jb L041ctr32_one pshufd $64,%xmm0,%xmm4 por %xmm7,%xmm3 - je L040ctr32_two + je L042ctr32_two pshufd $192,%xmm1,%xmm5 por %xmm7,%xmm4 cmpl $4,%eax - jb L041ctr32_three + jb L043ctr32_three pshufd $128,%xmm1,%xmm6 por %xmm7,%xmm5 - je L042ctr32_four + je L044ctr32_four por %xmm7,%xmm6 call __aesni_encrypt6 movups (%esi),%xmm1 @@ -909,29 +935,29 @@ L036ctr32_tail: movups %xmm4,32(%edi) movups %xmm5,48(%edi) movups %xmm6,64(%edi) - jmp L038ctr32_ret + jmp L040ctr32_ret .align 4,0x90 -L035ctr32_one_shortcut: +L037ctr32_one_shortcut: movups (%ebx),%xmm2 movl 240(%edx),%ecx -L039ctr32_one: +L041ctr32_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L043enc1_loop_7: +L045enc1_loop_7: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L043enc1_loop_7 + jnz L045enc1_loop_7 .byte 102,15,56,221,209 movups (%esi),%xmm6 xorps %xmm2,%xmm6 movups %xmm6,(%edi) - jmp L038ctr32_ret + jmp L040ctr32_ret .align 4,0x90 -L040ctr32_two: +L042ctr32_two: call __aesni_encrypt2 movups (%esi),%xmm5 movups 16(%esi),%xmm6 @@ -939,9 +965,9 @@ L040ctr32_two: xorps %xmm6,%xmm3 movups %xmm2,(%edi) movups %xmm3,16(%edi) - jmp L038ctr32_ret + jmp L040ctr32_ret .align 4,0x90 -L041ctr32_three: +L043ctr32_three: call __aesni_encrypt3 movups (%esi),%xmm5 movups 16(%esi),%xmm6 @@ -952,9 +978,9 @@ L041ctr32_three: xorps %xmm7,%xmm4 movups %xmm3,16(%edi) movups %xmm4,32(%edi) - jmp L038ctr32_ret + jmp L040ctr32_ret .align 4,0x90 -L042ctr32_four: +L044ctr32_four: call __aesni_encrypt4 movups (%esi),%xmm6 movups 16(%esi),%xmm7 @@ -968,7 +994,18 @@ L042ctr32_four: xorps %xmm0,%xmm5 movups %xmm4,32(%edi) movups %xmm5,48(%edi) -L038ctr32_ret: +L040ctr32_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 movl 80(%esp),%esp popl %edi popl %esi @@ -991,12 +1028,12 @@ L_aesni_xts_encrypt_begin: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L044enc1_loop_8: +L046enc1_loop_8: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L044enc1_loop_8 + jnz L046enc1_loop_8 .byte 102,15,56,221,209 movl 20(%esp),%esi movl 24(%esp),%edi @@ -1020,14 +1057,14 @@ L044enc1_loop_8: movl %edx,%ebp movl %ecx,%ebx subl $96,%eax - jc L045xts_enc_short + jc L047xts_enc_short shll $4,%ecx movl $16,%ebx subl %ecx,%ebx leal 32(%edx,%ecx,1),%edx - jmp L046xts_enc_loop6 + jmp L048xts_enc_loop6 .align 4,0x90 -L046xts_enc_loop6: +L048xts_enc_loop6: pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,(%esp) @@ -1116,23 +1153,23 @@ L046xts_enc_loop6: pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 subl $96,%eax - jnc L046xts_enc_loop6 + jnc L048xts_enc_loop6 movl 240(%ebp),%ecx movl %ebp,%edx movl %ecx,%ebx -L045xts_enc_short: +L047xts_enc_short: addl $96,%eax - jz L047xts_enc_done6x + jz L049xts_enc_done6x movdqa %xmm1,%xmm5 cmpl $32,%eax - jb L048xts_enc_one + jb L050xts_enc_one pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 - je L049xts_enc_two + je L051xts_enc_two pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm6 @@ -1141,7 +1178,7 @@ L045xts_enc_short: pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 cmpl $64,%eax - jb L050xts_enc_three + jb L052xts_enc_three pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm7 @@ -1151,7 +1188,7 @@ L045xts_enc_short: pxor %xmm2,%xmm1 movdqa %xmm5,(%esp) movdqa %xmm6,16(%esp) - je L051xts_enc_four + je L053xts_enc_four movdqa %xmm7,32(%esp) pshufd $19,%xmm0,%xmm7 movdqa %xmm1,48(%esp) @@ -1183,9 +1220,9 @@ L045xts_enc_short: movups %xmm5,48(%edi) movups %xmm6,64(%edi) leal 80(%edi),%edi - jmp L052xts_enc_done + jmp L054xts_enc_done .align 4,0x90 -L048xts_enc_one: +L050xts_enc_one: movups (%esi),%xmm2 leal 16(%esi),%esi xorps %xmm5,%xmm2 @@ -1193,20 +1230,20 @@ L048xts_enc_one: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L053enc1_loop_9: +L055enc1_loop_9: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L053enc1_loop_9 + jnz L055enc1_loop_9 .byte 102,15,56,221,209 xorps %xmm5,%xmm2 movups %xmm2,(%edi) leal 16(%edi),%edi movdqa %xmm5,%xmm1 - jmp L052xts_enc_done + jmp L054xts_enc_done .align 4,0x90 -L049xts_enc_two: +L051xts_enc_two: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1220,9 +1257,9 @@ L049xts_enc_two: movups %xmm3,16(%edi) leal 32(%edi),%edi movdqa %xmm6,%xmm1 - jmp L052xts_enc_done + jmp L054xts_enc_done .align 4,0x90 -L050xts_enc_three: +L052xts_enc_three: movaps %xmm1,%xmm7 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1240,9 +1277,9 @@ L050xts_enc_three: movups %xmm4,32(%edi) leal 48(%edi),%edi movdqa %xmm7,%xmm1 - jmp L052xts_enc_done + jmp L054xts_enc_done .align 4,0x90 -L051xts_enc_four: +L053xts_enc_four: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1264,28 +1301,28 @@ L051xts_enc_four: movups %xmm5,48(%edi) leal 64(%edi),%edi movdqa %xmm6,%xmm1 - jmp L052xts_enc_done + jmp L054xts_enc_done .align 4,0x90 -L047xts_enc_done6x: +L049xts_enc_done6x: movl 112(%esp),%eax andl $15,%eax - jz L054xts_enc_ret + jz L056xts_enc_ret movdqa %xmm1,%xmm5 movl %eax,112(%esp) - jmp L055xts_enc_steal + jmp L057xts_enc_steal .align 4,0x90 -L052xts_enc_done: +L054xts_enc_done: movl 112(%esp),%eax pxor %xmm0,%xmm0 andl $15,%eax - jz L054xts_enc_ret + jz L056xts_enc_ret pcmpgtd %xmm1,%xmm0 movl %eax,112(%esp) pshufd $19,%xmm0,%xmm5 paddq %xmm1,%xmm1 pand 96(%esp),%xmm5 pxor %xmm1,%xmm5 -L055xts_enc_steal: +L057xts_enc_steal: movzbl (%esi),%ecx movzbl -16(%edi),%edx leal 1(%esi),%esi @@ -1293,7 +1330,7 @@ L055xts_enc_steal: movb %dl,(%edi) leal 1(%edi),%edi subl $1,%eax - jnz L055xts_enc_steal + jnz L057xts_enc_steal subl 112(%esp),%edi movl %ebp,%edx movl %ebx,%ecx @@ -1303,16 +1340,30 @@ L055xts_enc_steal: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L056enc1_loop_10: +L058enc1_loop_10: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L056enc1_loop_10 + jnz L058enc1_loop_10 .byte 102,15,56,221,209 xorps %xmm5,%xmm2 movups %xmm2,-16(%edi) -L054xts_enc_ret: +L056xts_enc_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + movdqa %xmm0,(%esp) + pxor %xmm3,%xmm3 + movdqa %xmm0,16(%esp) + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 + movdqa %xmm0,80(%esp) movl 116(%esp),%esp popl %edi popl %esi @@ -1335,12 +1386,12 @@ L_aesni_xts_decrypt_begin: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L057enc1_loop_11: +L059enc1_loop_11: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L057enc1_loop_11 + jnz L059enc1_loop_11 .byte 102,15,56,221,209 movl 20(%esp),%esi movl 24(%esp),%edi @@ -1369,14 +1420,14 @@ L057enc1_loop_11: pcmpgtd %xmm1,%xmm0 andl $-16,%eax subl $96,%eax - jc L058xts_dec_short + jc L060xts_dec_short shll $4,%ecx movl $16,%ebx subl %ecx,%ebx leal 32(%edx,%ecx,1),%edx - jmp L059xts_dec_loop6 + jmp L061xts_dec_loop6 .align 4,0x90 -L059xts_dec_loop6: +L061xts_dec_loop6: pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,(%esp) @@ -1465,23 +1516,23 @@ L059xts_dec_loop6: pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 subl $96,%eax - jnc L059xts_dec_loop6 + jnc L061xts_dec_loop6 movl 240(%ebp),%ecx movl %ebp,%edx movl %ecx,%ebx -L058xts_dec_short: +L060xts_dec_short: addl $96,%eax - jz L060xts_dec_done6x + jz L062xts_dec_done6x movdqa %xmm1,%xmm5 cmpl $32,%eax - jb L061xts_dec_one + jb L063xts_dec_one pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 paddq %xmm1,%xmm1 pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 - je L062xts_dec_two + je L064xts_dec_two pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm6 @@ -1490,7 +1541,7 @@ L058xts_dec_short: pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 cmpl $64,%eax - jb L063xts_dec_three + jb L065xts_dec_three pshufd $19,%xmm0,%xmm2 pxor %xmm0,%xmm0 movdqa %xmm1,%xmm7 @@ -1500,7 +1551,7 @@ L058xts_dec_short: pxor %xmm2,%xmm1 movdqa %xmm5,(%esp) movdqa %xmm6,16(%esp) - je L064xts_dec_four + je L066xts_dec_four movdqa %xmm7,32(%esp) pshufd $19,%xmm0,%xmm7 movdqa %xmm1,48(%esp) @@ -1532,9 +1583,9 @@ L058xts_dec_short: movups %xmm5,48(%edi) movups %xmm6,64(%edi) leal 80(%edi),%edi - jmp L065xts_dec_done + jmp L067xts_dec_done .align 4,0x90 -L061xts_dec_one: +L063xts_dec_one: movups (%esi),%xmm2 leal 16(%esi),%esi xorps %xmm5,%xmm2 @@ -1542,20 +1593,20 @@ L061xts_dec_one: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L066dec1_loop_12: +L068dec1_loop_12: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L066dec1_loop_12 + jnz L068dec1_loop_12 .byte 102,15,56,223,209 xorps %xmm5,%xmm2 movups %xmm2,(%edi) leal 16(%edi),%edi movdqa %xmm5,%xmm1 - jmp L065xts_dec_done + jmp L067xts_dec_done .align 4,0x90 -L062xts_dec_two: +L064xts_dec_two: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1569,9 +1620,9 @@ L062xts_dec_two: movups %xmm3,16(%edi) leal 32(%edi),%edi movdqa %xmm6,%xmm1 - jmp L065xts_dec_done + jmp L067xts_dec_done .align 4,0x90 -L063xts_dec_three: +L065xts_dec_three: movaps %xmm1,%xmm7 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1589,9 +1640,9 @@ L063xts_dec_three: movups %xmm4,32(%edi) leal 48(%edi),%edi movdqa %xmm7,%xmm1 - jmp L065xts_dec_done + jmp L067xts_dec_done .align 4,0x90 -L064xts_dec_four: +L066xts_dec_four: movaps %xmm1,%xmm6 movups (%esi),%xmm2 movups 16(%esi),%xmm3 @@ -1613,20 +1664,20 @@ L064xts_dec_four: movups %xmm5,48(%edi) leal 64(%edi),%edi movdqa %xmm6,%xmm1 - jmp L065xts_dec_done + jmp L067xts_dec_done .align 4,0x90 -L060xts_dec_done6x: +L062xts_dec_done6x: movl 112(%esp),%eax andl $15,%eax - jz L067xts_dec_ret + jz L069xts_dec_ret movl %eax,112(%esp) - jmp L068xts_dec_only_one_more + jmp L070xts_dec_only_one_more .align 4,0x90 -L065xts_dec_done: +L067xts_dec_done: movl 112(%esp),%eax pxor %xmm0,%xmm0 andl $15,%eax - jz L067xts_dec_ret + jz L069xts_dec_ret pcmpgtd %xmm1,%xmm0 movl %eax,112(%esp) pshufd $19,%xmm0,%xmm2 @@ -1636,7 +1687,7 @@ L065xts_dec_done: pand %xmm3,%xmm2 pcmpgtd %xmm1,%xmm0 pxor %xmm2,%xmm1 -L068xts_dec_only_one_more: +L070xts_dec_only_one_more: pshufd $19,%xmm0,%xmm5 movdqa %xmm1,%xmm6 paddq %xmm1,%xmm1 @@ -1650,16 +1701,16 @@ L068xts_dec_only_one_more: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L069dec1_loop_13: +L071dec1_loop_13: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L069dec1_loop_13 + jnz L071dec1_loop_13 .byte 102,15,56,223,209 xorps %xmm5,%xmm2 movups %xmm2,(%edi) -L070xts_dec_steal: +L072xts_dec_steal: movzbl 16(%esi),%ecx movzbl (%edi),%edx leal 1(%esi),%esi @@ -1667,7 +1718,7 @@ L070xts_dec_steal: movb %dl,16(%edi) leal 1(%edi),%edi subl $1,%eax - jnz L070xts_dec_steal + jnz L072xts_dec_steal subl 112(%esp),%edi movl %ebp,%edx movl %ebx,%ecx @@ -1677,16 +1728,30 @@ L070xts_dec_steal: movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L071dec1_loop_14: +L073dec1_loop_14: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L071dec1_loop_14 + jnz L073dec1_loop_14 .byte 102,15,56,223,209 xorps %xmm6,%xmm2 movups %xmm2,(%edi) -L067xts_dec_ret: +L069xts_dec_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + movdqa %xmm0,(%esp) + pxor %xmm3,%xmm3 + movdqa %xmm0,16(%esp) + pxor %xmm4,%xmm4 + movdqa %xmm0,32(%esp) + pxor %xmm5,%xmm5 + movdqa %xmm0,48(%esp) + pxor %xmm6,%xmm6 + movdqa %xmm0,64(%esp) + pxor %xmm7,%xmm7 + movdqa %xmm0,80(%esp) movl 116(%esp),%esp popl %edi popl %esi @@ -1710,7 +1775,7 @@ L_aesni_cbc_encrypt_begin: movl 32(%esp),%edx movl 36(%esp),%ebp testl %eax,%eax - jz L072cbc_abort + jz L074cbc_abort cmpl $0,40(%esp) xchgl %esp,%ebx movups (%ebp),%xmm7 @@ -1718,14 +1783,14 @@ L_aesni_cbc_encrypt_begin: movl %edx,%ebp movl %ebx,16(%esp) movl %ecx,%ebx - je L073cbc_decrypt + je L075cbc_decrypt movaps %xmm7,%xmm2 cmpl $16,%eax - jb L074cbc_enc_tail + jb L076cbc_enc_tail subl $16,%eax - jmp L075cbc_enc_loop + jmp L077cbc_enc_loop .align 4,0x90 -L075cbc_enc_loop: +L077cbc_enc_loop: movups (%esi),%xmm7 leal 16(%esi),%esi movups (%edx),%xmm0 @@ -1733,24 +1798,25 @@ L075cbc_enc_loop: xorps %xmm0,%xmm7 leal 32(%edx),%edx xorps %xmm7,%xmm2 -L076enc1_loop_15: +L078enc1_loop_15: .byte 102,15,56,220,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L076enc1_loop_15 + jnz L078enc1_loop_15 .byte 102,15,56,221,209 movl %ebx,%ecx movl %ebp,%edx movups %xmm2,(%edi) leal 16(%edi),%edi subl $16,%eax - jnc L075cbc_enc_loop + jnc L077cbc_enc_loop addl $16,%eax - jnz L074cbc_enc_tail + jnz L076cbc_enc_tail movaps %xmm2,%xmm7 - jmp L077cbc_ret -L074cbc_enc_tail: + pxor %xmm2,%xmm2 + jmp L079cbc_ret +L076cbc_enc_tail: movl %eax,%ecx .long 2767451785 movl $16,%ecx @@ -1761,20 +1827,20 @@ L074cbc_enc_tail: movl %ebx,%ecx movl %edi,%esi movl %ebp,%edx - jmp L075cbc_enc_loop + jmp L077cbc_enc_loop .align 4,0x90 -L073cbc_decrypt: +L075cbc_decrypt: cmpl $80,%eax - jbe L078cbc_dec_tail + jbe L080cbc_dec_tail movaps %xmm7,(%esp) subl $80,%eax - jmp L079cbc_dec_loop6_enter + jmp L081cbc_dec_loop6_enter .align 4,0x90 -L080cbc_dec_loop6: +L082cbc_dec_loop6: movaps %xmm0,(%esp) movups %xmm7,(%edi) leal 16(%edi),%edi -L079cbc_dec_loop6_enter: +L081cbc_dec_loop6_enter: movdqu (%esi),%xmm2 movdqu 16(%esi),%xmm3 movdqu 32(%esi),%xmm4 @@ -1804,28 +1870,28 @@ L079cbc_dec_loop6_enter: movups %xmm6,64(%edi) leal 80(%edi),%edi subl $96,%eax - ja L080cbc_dec_loop6 + ja L082cbc_dec_loop6 movaps %xmm7,%xmm2 movaps %xmm0,%xmm7 addl $80,%eax - jle L081cbc_dec_tail_collected + jle L083cbc_dec_clear_tail_collected movups %xmm2,(%edi) leal 16(%edi),%edi -L078cbc_dec_tail: +L080cbc_dec_tail: movups (%esi),%xmm2 movaps %xmm2,%xmm6 cmpl $16,%eax - jbe L082cbc_dec_one + jbe L084cbc_dec_one movups 16(%esi),%xmm3 movaps %xmm3,%xmm5 cmpl $32,%eax - jbe L083cbc_dec_two + jbe L085cbc_dec_two movups 32(%esi),%xmm4 cmpl $48,%eax - jbe L084cbc_dec_three + jbe L086cbc_dec_three movups 48(%esi),%xmm5 cmpl $64,%eax - jbe L085cbc_dec_four + jbe L087cbc_dec_four movups 64(%esi),%xmm6 movaps %xmm7,(%esp) movups (%esi),%xmm2 @@ -1843,55 +1909,62 @@ L078cbc_dec_tail: xorps %xmm0,%xmm6 movups %xmm2,(%edi) movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 movups %xmm4,32(%edi) + pxor %xmm4,%xmm4 movups %xmm5,48(%edi) + pxor %xmm5,%xmm5 leal 64(%edi),%edi movaps %xmm6,%xmm2 + pxor %xmm6,%xmm6 subl $80,%eax - jmp L081cbc_dec_tail_collected + jmp L088cbc_dec_tail_collected .align 4,0x90 -L082cbc_dec_one: +L084cbc_dec_one: movups (%edx),%xmm0 movups 16(%edx),%xmm1 leal 32(%edx),%edx xorps %xmm0,%xmm2 -L086dec1_loop_16: +L089dec1_loop_16: .byte 102,15,56,222,209 decl %ecx movups (%edx),%xmm1 leal 16(%edx),%edx - jnz L086dec1_loop_16 + jnz L089dec1_loop_16 .byte 102,15,56,223,209 xorps %xmm7,%xmm2 movaps %xmm6,%xmm7 subl $16,%eax - jmp L081cbc_dec_tail_collected + jmp L088cbc_dec_tail_collected .align 4,0x90 -L083cbc_dec_two: +L085cbc_dec_two: call __aesni_decrypt2 xorps %xmm7,%xmm2 xorps %xmm6,%xmm3 movups %xmm2,(%edi) movaps %xmm3,%xmm2 + pxor %xmm3,%xmm3 leal 16(%edi),%edi movaps %xmm5,%xmm7 subl $32,%eax - jmp L081cbc_dec_tail_collected + jmp L088cbc_dec_tail_collected .align 4,0x90 -L084cbc_dec_three: +L086cbc_dec_three: call __aesni_decrypt3 xorps %xmm7,%xmm2 xorps %xmm6,%xmm3 xorps %xmm5,%xmm4 movups %xmm2,(%edi) movaps %xmm4,%xmm2 + pxor %xmm4,%xmm4 movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 leal 32(%edi),%edi movups 32(%esi),%xmm7 subl $48,%eax - jmp L081cbc_dec_tail_collected + jmp L088cbc_dec_tail_collected .align 4,0x90 -L085cbc_dec_four: +L087cbc_dec_four: call __aesni_decrypt4 movups 16(%esi),%xmm1 movups 32(%esi),%xmm0 @@ -1901,28 +1974,44 @@ L085cbc_dec_four: movups %xmm2,(%edi) xorps %xmm1,%xmm4 movups %xmm3,16(%edi) + pxor %xmm3,%xmm3 xorps %xmm0,%xmm5 movups %xmm4,32(%edi) + pxor %xmm4,%xmm4 leal 48(%edi),%edi movaps %xmm5,%xmm2 + pxor %xmm5,%xmm5 subl $64,%eax -L081cbc_dec_tail_collected: + jmp L088cbc_dec_tail_collected +.align 4,0x90 +L083cbc_dec_clear_tail_collected: + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + pxor %xmm6,%xmm6 +L088cbc_dec_tail_collected: andl $15,%eax - jnz L087cbc_dec_tail_partial + jnz L090cbc_dec_tail_partial movups %xmm2,(%edi) - jmp L077cbc_ret + pxor %xmm0,%xmm0 + jmp L079cbc_ret .align 4,0x90 -L087cbc_dec_tail_partial: +L090cbc_dec_tail_partial: movaps %xmm2,(%esp) + pxor %xmm0,%xmm0 movl $16,%ecx movl %esp,%esi subl %eax,%ecx .long 2767451785 -L077cbc_ret: + movdqa %xmm2,(%esp) +L079cbc_ret: movl 16(%esp),%esp movl 36(%esp),%ebp + pxor %xmm2,%xmm2 + pxor %xmm1,%xmm1 movups %xmm7,(%ebp) -L072cbc_abort: + pxor %xmm7,%xmm7 +L074cbc_abort: popl %edi popl %esi popl %ebx @@ -1930,52 +2019,62 @@ L072cbc_abort: ret .align 4 __aesni_set_encrypt_key: + pushl %ebp + pushl %ebx testl %eax,%eax - jz L088bad_pointer + jz L091bad_pointer testl %edx,%edx - jz L088bad_pointer + jz L091bad_pointer + call L092pic +L092pic: + popl %ebx + leal Lkey_const-L092pic(%ebx),%ebx + movl L_OPENSSL_ia32cap_P$non_lazy_ptr-Lkey_const(%ebx),%ebp movups (%eax),%xmm0 xorps %xmm4,%xmm4 + movl 4(%ebp),%ebp leal 16(%edx),%edx + andl $268437504,%ebp cmpl $256,%ecx - je L08914rounds + je L09314rounds cmpl $192,%ecx - je L09012rounds + je L09412rounds cmpl $128,%ecx - jne L091bad_keybits + jne L095bad_keybits .align 4,0x90 -L09210rounds: +L09610rounds: + cmpl $268435456,%ebp + je L09710rounds_alt movl $9,%ecx movups %xmm0,-16(%edx) .byte 102,15,58,223,200,1 - call L093key_128_cold + call L098key_128_cold .byte 102,15,58,223,200,2 - call L094key_128 + call L099key_128 .byte 102,15,58,223,200,4 - call L094key_128 + call L099key_128 .byte 102,15,58,223,200,8 - call L094key_128 + call L099key_128 .byte 102,15,58,223,200,16 - call L094key_128 + call L099key_128 .byte 102,15,58,223,200,32 - call L094key_128 + call L099key_128 .byte 102,15,58,223,200,64 - call L094key_128 + call L099key_128 .byte 102,15,58,223,200,128 - call L094key_128 + call L099key_128 .byte 102,15,58,223,200,27 - call L094key_128 + call L099key_128 .byte 102,15,58,223,200,54 - call L094key_128 + call L099key_128 movups %xmm0,(%edx) movl %ecx,80(%edx) - xorl %eax,%eax - ret + jmp L100good_key .align 4,0x90 -L094key_128: +L099key_128: movups %xmm0,(%edx) leal 16(%edx),%edx -L093key_128_cold: +L098key_128_cold: shufps $16,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 @@ -1984,38 +2083,91 @@ L093key_128_cold: xorps %xmm1,%xmm0 ret .align 4,0x90 -L09012rounds: +L09710rounds_alt: + movdqa (%ebx),%xmm5 + movl $8,%ecx + movdqa 32(%ebx),%xmm4 + movdqa %xmm0,%xmm2 + movdqu %xmm0,-16(%edx) +L101loop_key128: +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + leal 16(%edx),%edx + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,-16(%edx) + movdqa %xmm0,%xmm2 + decl %ecx + jnz L101loop_key128 + movdqa 48(%ebx),%xmm4 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + pslld $1,%xmm4 + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,(%edx) + movdqa %xmm0,%xmm2 +.byte 102,15,56,0,197 +.byte 102,15,56,221,196 + movdqa %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm2,%xmm3 + pslldq $4,%xmm2 + pxor %xmm3,%xmm2 + pxor %xmm2,%xmm0 + movdqu %xmm0,16(%edx) + movl $9,%ecx + movl %ecx,96(%edx) + jmp L100good_key +.align 4,0x90 +L09412rounds: movq 16(%eax),%xmm2 + cmpl $268435456,%ebp + je L10212rounds_alt movl $11,%ecx movups %xmm0,-16(%edx) .byte 102,15,58,223,202,1 - call L095key_192a_cold + call L103key_192a_cold .byte 102,15,58,223,202,2 - call L096key_192b + call L104key_192b .byte 102,15,58,223,202,4 - call L097key_192a + call L105key_192a .byte 102,15,58,223,202,8 - call L096key_192b + call L104key_192b .byte 102,15,58,223,202,16 - call L097key_192a + call L105key_192a .byte 102,15,58,223,202,32 - call L096key_192b + call L104key_192b .byte 102,15,58,223,202,64 - call L097key_192a + call L105key_192a .byte 102,15,58,223,202,128 - call L096key_192b + call L104key_192b movups %xmm0,(%edx) movl %ecx,48(%edx) - xorl %eax,%eax - ret + jmp L100good_key .align 4,0x90 -L097key_192a: +L105key_192a: movups %xmm0,(%edx) leal 16(%edx),%edx .align 4,0x90 -L095key_192a_cold: +L103key_192a_cold: movaps %xmm2,%xmm5 -L098key_192b_warm: +L106key_192b_warm: shufps $16,%xmm0,%xmm4 movdqa %xmm2,%xmm3 xorps %xmm4,%xmm0 @@ -2029,56 +2181,90 @@ L098key_192b_warm: pxor %xmm3,%xmm2 ret .align 4,0x90 -L096key_192b: +L104key_192b: movaps %xmm0,%xmm3 shufps $68,%xmm0,%xmm5 movups %xmm5,(%edx) shufps $78,%xmm2,%xmm3 movups %xmm3,16(%edx) leal 32(%edx),%edx - jmp L098key_192b_warm + jmp L106key_192b_warm .align 4,0x90 -L08914rounds: +L10212rounds_alt: + movdqa 16(%ebx),%xmm5 + movdqa 32(%ebx),%xmm4 + movl $8,%ecx + movdqu %xmm0,-16(%edx) +L107loop_key192: + movq %xmm2,(%edx) + movdqa %xmm2,%xmm1 +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + pslld $1,%xmm4 + leal 24(%edx),%edx + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pshufd $255,%xmm0,%xmm3 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pxor %xmm2,%xmm0 + pxor %xmm3,%xmm2 + movdqu %xmm0,-16(%edx) + decl %ecx + jnz L107loop_key192 + movl $11,%ecx + movl %ecx,32(%edx) + jmp L100good_key +.align 4,0x90 +L09314rounds: movups 16(%eax),%xmm2 - movl $13,%ecx leal 16(%edx),%edx + cmpl $268435456,%ebp + je L10814rounds_alt + movl $13,%ecx movups %xmm0,-32(%edx) movups %xmm2,-16(%edx) .byte 102,15,58,223,202,1 - call L099key_256a_cold + call L109key_256a_cold .byte 102,15,58,223,200,1 - call L100key_256b + call L110key_256b .byte 102,15,58,223,202,2 - call L101key_256a + call L111key_256a .byte 102,15,58,223,200,2 - call L100key_256b + call L110key_256b .byte 102,15,58,223,202,4 - call L101key_256a + call L111key_256a .byte 102,15,58,223,200,4 - call L100key_256b + call L110key_256b .byte 102,15,58,223,202,8 - call L101key_256a + call L111key_256a .byte 102,15,58,223,200,8 - call L100key_256b + call L110key_256b .byte 102,15,58,223,202,16 - call L101key_256a + call L111key_256a .byte 102,15,58,223,200,16 - call L100key_256b + call L110key_256b .byte 102,15,58,223,202,32 - call L101key_256a + call L111key_256a .byte 102,15,58,223,200,32 - call L100key_256b + call L110key_256b .byte 102,15,58,223,202,64 - call L101key_256a + call L111key_256a movups %xmm0,(%edx) movl %ecx,16(%edx) xorl %eax,%eax - ret + jmp L100good_key .align 4,0x90 -L101key_256a: +L111key_256a: movups %xmm2,(%edx) leal 16(%edx),%edx -L099key_256a_cold: +L109key_256a_cold: shufps $16,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 @@ -2087,7 +2273,7 @@ L099key_256a_cold: xorps %xmm1,%xmm0 ret .align 4,0x90 -L100key_256b: +L110key_256b: movups %xmm0,(%edx) leal 16(%edx),%edx shufps $16,%xmm2,%xmm4 @@ -2097,13 +2283,70 @@ L100key_256b: shufps $170,%xmm1,%xmm1 xorps %xmm1,%xmm2 ret +.align 4,0x90 +L10814rounds_alt: + movdqa (%ebx),%xmm5 + movdqa 32(%ebx),%xmm4 + movl $7,%ecx + movdqu %xmm0,-32(%edx) + movdqa %xmm2,%xmm1 + movdqu %xmm2,-16(%edx) +L112loop_key256: +.byte 102,15,56,0,213 +.byte 102,15,56,221,212 + movdqa %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm0,%xmm3 + pslldq $4,%xmm0 + pxor %xmm3,%xmm0 + pslld $1,%xmm4 + pxor %xmm2,%xmm0 + movdqu %xmm0,(%edx) + decl %ecx + jz L113done_key256 + pshufd $255,%xmm0,%xmm2 + pxor %xmm3,%xmm3 +.byte 102,15,56,221,211 + movdqa %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm1,%xmm3 + pslldq $4,%xmm1 + pxor %xmm3,%xmm1 + pxor %xmm1,%xmm2 + movdqu %xmm2,16(%edx) + leal 32(%edx),%edx + movdqa %xmm2,%xmm1 + jmp L112loop_key256 +L113done_key256: + movl $13,%ecx + movl %ecx,16(%edx) +L100good_key: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 + xorl %eax,%eax + popl %ebx + popl %ebp + ret .align 2,0x90 -L088bad_pointer: +L091bad_pointer: movl $-1,%eax + popl %ebx + popl %ebp ret .align 2,0x90 -L091bad_keybits: +L095bad_keybits: + pxor %xmm0,%xmm0 movl $-2,%eax + popl %ebx + popl %ebp ret .globl _aesni_set_encrypt_key .align 4 @@ -2125,7 +2368,7 @@ L_aesni_set_decrypt_key_begin: movl 12(%esp),%edx shll $4,%ecx testl %eax,%eax - jnz L102dec_key_ret + jnz L114dec_key_ret leal 16(%edx,%ecx,1),%eax movups (%edx),%xmm0 movups (%eax),%xmm1 @@ -2133,7 +2376,7 @@ L_aesni_set_decrypt_key_begin: movups %xmm1,(%edx) leal 16(%edx),%edx leal -16(%eax),%eax -L103dec_key_inverse: +L115dec_key_inverse: movups (%edx),%xmm0 movups (%eax),%xmm1 .byte 102,15,56,219,192 @@ -2143,14 +2386,27 @@ L103dec_key_inverse: movups %xmm0,16(%eax) movups %xmm1,-16(%edx) cmpl %edx,%eax - ja L103dec_key_inverse + ja L115dec_key_inverse movups (%edx),%xmm0 .byte 102,15,56,219,192 movups %xmm0,(%edx) + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 xorl %eax,%eax -L102dec_key_ret: +L114dec_key_ret: ret +.align 6,0x90 +Lkey_const: +.long 202313229,202313229,202313229,202313229 +.long 67569157,67569157,67569157,67569157 +.long 1,1,1,1 +.long 27,27,27,27 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 .byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 .byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 .byte 115,108,46,111,114,103,62,0 +.section __IMPORT,__pointers,non_lazy_symbol_pointers +L_OPENSSL_ia32cap_P$non_lazy_ptr: +.indirect_symbol _OPENSSL_ia32cap_P +.long 0 +.comm _OPENSSL_ia32cap_P,16,2 diff --git a/deps/openssl/asm_obsolete/x86-win32-masm/aes/aesni-x86.asm b/deps/openssl/asm_obsolete/x86-win32-masm/aes/aesni-x86.asm index 43fdb5a0345e93..6511c21bcf87ca 100644 --- a/deps/openssl/asm_obsolete/x86-win32-masm/aes/aesni-x86.asm +++ b/deps/openssl/asm_obsolete/x86-win32-masm/aes/aesni-x86.asm @@ -17,6 +17,7 @@ IF @Version LT 800 ELSE .text$ SEGMENT ALIGN(64) 'CODE' ENDIF +;EXTERN _OPENSSL_ia32cap_P:NEAR ALIGN 16 _aesni_encrypt PROC PUBLIC $L_aesni_encrypt_begin:: @@ -36,7 +37,10 @@ DB 102,15,56,220,209 lea edx,DWORD PTR 16[edx] jnz $L000enc1_loop_1 DB 102,15,56,221,209 + pxor xmm0,xmm0 + pxor xmm1,xmm1 movups XMMWORD PTR [eax],xmm2 + pxor xmm2,xmm2 ret _aesni_encrypt ENDP ALIGN 16 @@ -58,7 +62,10 @@ DB 102,15,56,222,209 lea edx,DWORD PTR 16[edx] jnz $L001dec1_loop_2 DB 102,15,56,223,209 + pxor xmm0,xmm0 + pxor xmm1,xmm1 movups XMMWORD PTR [eax],xmm2 + pxor xmm2,xmm2 ret _aesni_decrypt ENDP ALIGN 16 @@ -265,17 +272,15 @@ DB 102,15,56,220,217 neg ecx DB 102,15,56,220,225 pxor xmm7,xmm0 + movups xmm0,XMMWORD PTR [ecx*1+edx] add ecx,16 -DB 102,15,56,220,233 -DB 102,15,56,220,241 -DB 102,15,56,220,249 - movups xmm0,XMMWORD PTR [ecx*1+edx-16] - jmp $L_aesni_encrypt6_enter + jmp $L008_aesni_encrypt6_inner ALIGN 16 -$L008enc6_loop: +$L009enc6_loop: DB 102,15,56,220,209 DB 102,15,56,220,217 DB 102,15,56,220,225 +$L008_aesni_encrypt6_inner: DB 102,15,56,220,233 DB 102,15,56,220,241 DB 102,15,56,220,249 @@ -289,7 +294,7 @@ DB 102,15,56,220,232 DB 102,15,56,220,240 DB 102,15,56,220,248 movups xmm0,XMMWORD PTR [ecx*1+edx-16] - jnz $L008enc6_loop + jnz $L009enc6_loop DB 102,15,56,220,209 DB 102,15,56,220,217 DB 102,15,56,220,225 @@ -320,17 +325,15 @@ DB 102,15,56,222,217 neg ecx DB 102,15,56,222,225 pxor xmm7,xmm0 + movups xmm0,XMMWORD PTR [ecx*1+edx] add ecx,16 -DB 102,15,56,222,233 -DB 102,15,56,222,241 -DB 102,15,56,222,249 - movups xmm0,XMMWORD PTR [ecx*1+edx-16] - jmp $L_aesni_decrypt6_enter + jmp $L010_aesni_decrypt6_inner ALIGN 16 -$L009dec6_loop: +$L011dec6_loop: DB 102,15,56,222,209 DB 102,15,56,222,217 DB 102,15,56,222,225 +$L010_aesni_decrypt6_inner: DB 102,15,56,222,233 DB 102,15,56,222,241 DB 102,15,56,222,249 @@ -344,7 +347,7 @@ DB 102,15,56,222,232 DB 102,15,56,222,240 DB 102,15,56,222,248 movups xmm0,XMMWORD PTR [ecx*1+edx-16] - jnz $L009dec6_loop + jnz $L011dec6_loop DB 102,15,56,222,209 DB 102,15,56,222,217 DB 102,15,56,222,225 @@ -372,14 +375,14 @@ $L_aesni_ecb_encrypt_begin:: mov edx,DWORD PTR 32[esp] mov ebx,DWORD PTR 36[esp] and eax,-16 - jz $L010ecb_ret + jz $L012ecb_ret mov ecx,DWORD PTR 240[edx] test ebx,ebx - jz $L011ecb_decrypt + jz $L013ecb_decrypt mov ebp,edx mov ebx,ecx cmp eax,96 - jb $L012ecb_enc_tail + jb $L014ecb_enc_tail movdqu xmm2,XMMWORD PTR [esi] movdqu xmm3,XMMWORD PTR 16[esi] movdqu xmm4,XMMWORD PTR 32[esi] @@ -388,9 +391,9 @@ $L_aesni_ecb_encrypt_begin:: movdqu xmm7,XMMWORD PTR 80[esi] lea esi,DWORD PTR 96[esi] sub eax,96 - jmp $L013ecb_enc_loop6_enter + jmp $L015ecb_enc_loop6_enter ALIGN 16 -$L014ecb_enc_loop6: +$L016ecb_enc_loop6: movups XMMWORD PTR [edi],xmm2 movdqu xmm2,XMMWORD PTR [esi] movups XMMWORD PTR 16[edi],xmm3 @@ -405,12 +408,12 @@ $L014ecb_enc_loop6: lea edi,DWORD PTR 96[edi] movdqu xmm7,XMMWORD PTR 80[esi] lea esi,DWORD PTR 96[esi] -$L013ecb_enc_loop6_enter: +$L015ecb_enc_loop6_enter: call __aesni_encrypt6 mov edx,ebp mov ecx,ebx sub eax,96 - jnc $L014ecb_enc_loop6 + jnc $L016ecb_enc_loop6 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 movups XMMWORD PTR 32[edi],xmm4 @@ -419,18 +422,18 @@ $L013ecb_enc_loop6_enter: movups XMMWORD PTR 80[edi],xmm7 lea edi,DWORD PTR 96[edi] add eax,96 - jz $L010ecb_ret -$L012ecb_enc_tail: + jz $L012ecb_ret +$L014ecb_enc_tail: movups xmm2,XMMWORD PTR [esi] cmp eax,32 - jb $L015ecb_enc_one + jb $L017ecb_enc_one movups xmm3,XMMWORD PTR 16[esi] - je $L016ecb_enc_two + je $L018ecb_enc_two movups xmm4,XMMWORD PTR 32[esi] cmp eax,64 - jb $L017ecb_enc_three + jb $L019ecb_enc_three movups xmm5,XMMWORD PTR 48[esi] - je $L018ecb_enc_four + je $L020ecb_enc_four movups xmm6,XMMWORD PTR 64[esi] xorps xmm7,xmm7 call __aesni_encrypt6 @@ -439,49 +442,49 @@ $L012ecb_enc_tail: movups XMMWORD PTR 32[edi],xmm4 movups XMMWORD PTR 48[edi],xmm5 movups XMMWORD PTR 64[edi],xmm6 - jmp $L010ecb_ret + jmp $L012ecb_ret ALIGN 16 -$L015ecb_enc_one: +$L017ecb_enc_one: movups xmm0,XMMWORD PTR [edx] movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L019enc1_loop_3: +$L021enc1_loop_3: DB 102,15,56,220,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L019enc1_loop_3 + jnz $L021enc1_loop_3 DB 102,15,56,221,209 movups XMMWORD PTR [edi],xmm2 - jmp $L010ecb_ret + jmp $L012ecb_ret ALIGN 16 -$L016ecb_enc_two: +$L018ecb_enc_two: call __aesni_encrypt2 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 - jmp $L010ecb_ret + jmp $L012ecb_ret ALIGN 16 -$L017ecb_enc_three: +$L019ecb_enc_three: call __aesni_encrypt3 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 movups XMMWORD PTR 32[edi],xmm4 - jmp $L010ecb_ret + jmp $L012ecb_ret ALIGN 16 -$L018ecb_enc_four: +$L020ecb_enc_four: call __aesni_encrypt4 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 movups XMMWORD PTR 32[edi],xmm4 movups XMMWORD PTR 48[edi],xmm5 - jmp $L010ecb_ret + jmp $L012ecb_ret ALIGN 16 -$L011ecb_decrypt: +$L013ecb_decrypt: mov ebp,edx mov ebx,ecx cmp eax,96 - jb $L020ecb_dec_tail + jb $L022ecb_dec_tail movdqu xmm2,XMMWORD PTR [esi] movdqu xmm3,XMMWORD PTR 16[esi] movdqu xmm4,XMMWORD PTR 32[esi] @@ -490,9 +493,9 @@ $L011ecb_decrypt: movdqu xmm7,XMMWORD PTR 80[esi] lea esi,DWORD PTR 96[esi] sub eax,96 - jmp $L021ecb_dec_loop6_enter + jmp $L023ecb_dec_loop6_enter ALIGN 16 -$L022ecb_dec_loop6: +$L024ecb_dec_loop6: movups XMMWORD PTR [edi],xmm2 movdqu xmm2,XMMWORD PTR [esi] movups XMMWORD PTR 16[edi],xmm3 @@ -507,12 +510,12 @@ $L022ecb_dec_loop6: lea edi,DWORD PTR 96[edi] movdqu xmm7,XMMWORD PTR 80[esi] lea esi,DWORD PTR 96[esi] -$L021ecb_dec_loop6_enter: +$L023ecb_dec_loop6_enter: call __aesni_decrypt6 mov edx,ebp mov ecx,ebx sub eax,96 - jnc $L022ecb_dec_loop6 + jnc $L024ecb_dec_loop6 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 movups XMMWORD PTR 32[edi],xmm4 @@ -521,18 +524,18 @@ $L021ecb_dec_loop6_enter: movups XMMWORD PTR 80[edi],xmm7 lea edi,DWORD PTR 96[edi] add eax,96 - jz $L010ecb_ret -$L020ecb_dec_tail: + jz $L012ecb_ret +$L022ecb_dec_tail: movups xmm2,XMMWORD PTR [esi] cmp eax,32 - jb $L023ecb_dec_one + jb $L025ecb_dec_one movups xmm3,XMMWORD PTR 16[esi] - je $L024ecb_dec_two + je $L026ecb_dec_two movups xmm4,XMMWORD PTR 32[esi] cmp eax,64 - jb $L025ecb_dec_three + jb $L027ecb_dec_three movups xmm5,XMMWORD PTR 48[esi] - je $L026ecb_dec_four + je $L028ecb_dec_four movups xmm6,XMMWORD PTR 64[esi] xorps xmm7,xmm7 call __aesni_decrypt6 @@ -541,43 +544,51 @@ $L020ecb_dec_tail: movups XMMWORD PTR 32[edi],xmm4 movups XMMWORD PTR 48[edi],xmm5 movups XMMWORD PTR 64[edi],xmm6 - jmp $L010ecb_ret + jmp $L012ecb_ret ALIGN 16 -$L023ecb_dec_one: +$L025ecb_dec_one: movups xmm0,XMMWORD PTR [edx] movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L027dec1_loop_4: +$L029dec1_loop_4: DB 102,15,56,222,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L027dec1_loop_4 + jnz $L029dec1_loop_4 DB 102,15,56,223,209 movups XMMWORD PTR [edi],xmm2 - jmp $L010ecb_ret + jmp $L012ecb_ret ALIGN 16 -$L024ecb_dec_two: +$L026ecb_dec_two: call __aesni_decrypt2 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 - jmp $L010ecb_ret + jmp $L012ecb_ret ALIGN 16 -$L025ecb_dec_three: +$L027ecb_dec_three: call __aesni_decrypt3 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 movups XMMWORD PTR 32[edi],xmm4 - jmp $L010ecb_ret + jmp $L012ecb_ret ALIGN 16 -$L026ecb_dec_four: +$L028ecb_dec_four: call __aesni_decrypt4 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 movups XMMWORD PTR 32[edi],xmm4 movups XMMWORD PTR 48[edi],xmm5 -$L010ecb_ret: +$L012ecb_ret: + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + pxor xmm7,xmm7 pop edi pop esi pop ebx @@ -622,7 +633,7 @@ $L_aesni_ccm64_encrypt_blocks_begin:: lea edx,DWORD PTR 32[ecx*1+edx] sub ebx,ecx DB 102,15,56,0,253 -$L028ccm64_enc_outer: +$L030ccm64_enc_outer: movups xmm0,XMMWORD PTR [ebp] mov ecx,ebx movups xmm6,XMMWORD PTR [esi] @@ -631,7 +642,7 @@ $L028ccm64_enc_outer: xorps xmm0,xmm6 xorps xmm3,xmm0 movups xmm0,XMMWORD PTR 32[ebp] -$L029ccm64_enc2_loop: +$L031ccm64_enc2_loop: DB 102,15,56,220,209 DB 102,15,56,220,217 movups xmm1,XMMWORD PTR [ecx*1+edx] @@ -639,7 +650,7 @@ DB 102,15,56,220,217 DB 102,15,56,220,208 DB 102,15,56,220,216 movups xmm0,XMMWORD PTR [ecx*1+edx-16] - jnz $L029ccm64_enc2_loop + jnz $L031ccm64_enc2_loop DB 102,15,56,220,209 DB 102,15,56,220,217 paddq xmm7,XMMWORD PTR 16[esp] @@ -652,10 +663,18 @@ DB 102,15,56,221,216 movups XMMWORD PTR [edi],xmm6 DB 102,15,56,0,213 lea edi,DWORD PTR 16[edi] - jnz $L028ccm64_enc_outer + jnz $L030ccm64_enc_outer mov esp,DWORD PTR 48[esp] mov edi,DWORD PTR 40[esp] movups XMMWORD PTR [edi],xmm3 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + pxor xmm7,xmm7 pop edi pop esi pop ebx @@ -701,12 +720,12 @@ DB 102,15,56,0,253 movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L030enc1_loop_5: +$L032enc1_loop_5: DB 102,15,56,220,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L030enc1_loop_5 + jnz $L032enc1_loop_5 DB 102,15,56,221,209 shl ebx,4 mov ecx,16 @@ -716,16 +735,16 @@ DB 102,15,56,221,209 sub ecx,ebx lea edx,DWORD PTR 32[ebx*1+ebp] mov ebx,ecx - jmp $L031ccm64_dec_outer + jmp $L033ccm64_dec_outer ALIGN 16 -$L031ccm64_dec_outer: +$L033ccm64_dec_outer: xorps xmm6,xmm2 movdqa xmm2,xmm7 movups XMMWORD PTR [edi],xmm6 lea edi,DWORD PTR 16[edi] DB 102,15,56,0,213 sub eax,1 - jz $L032ccm64_dec_break + jz $L034ccm64_dec_break movups xmm0,XMMWORD PTR [ebp] mov ecx,ebx movups xmm1,XMMWORD PTR 16[ebp] @@ -733,7 +752,7 @@ DB 102,15,56,0,213 xorps xmm2,xmm0 xorps xmm3,xmm6 movups xmm0,XMMWORD PTR 32[ebp] -$L033ccm64_dec2_loop: +$L035ccm64_dec2_loop: DB 102,15,56,220,209 DB 102,15,56,220,217 movups xmm1,XMMWORD PTR [ecx*1+edx] @@ -741,7 +760,7 @@ DB 102,15,56,220,217 DB 102,15,56,220,208 DB 102,15,56,220,216 movups xmm0,XMMWORD PTR [ecx*1+edx-16] - jnz $L033ccm64_dec2_loop + jnz $L035ccm64_dec2_loop movups xmm6,XMMWORD PTR [esi] paddq xmm7,XMMWORD PTR 16[esp] DB 102,15,56,220,209 @@ -749,9 +768,9 @@ DB 102,15,56,220,217 DB 102,15,56,221,208 DB 102,15,56,221,216 lea esi,QWORD PTR 16[esi] - jmp $L031ccm64_dec_outer + jmp $L033ccm64_dec_outer ALIGN 16 -$L032ccm64_dec_break: +$L034ccm64_dec_break: mov ecx,DWORD PTR 240[ebp] mov edx,ebp movups xmm0,XMMWORD PTR [edx] @@ -759,16 +778,24 @@ $L032ccm64_dec_break: xorps xmm6,xmm0 lea edx,DWORD PTR 32[edx] xorps xmm3,xmm6 -$L034enc1_loop_6: +$L036enc1_loop_6: DB 102,15,56,220,217 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L034enc1_loop_6 + jnz $L036enc1_loop_6 DB 102,15,56,221,217 mov esp,DWORD PTR 48[esp] mov edi,DWORD PTR 40[esp] movups XMMWORD PTR [edi],xmm3 + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 + pxor xmm7,xmm7 pop edi pop esi pop ebx @@ -792,7 +819,7 @@ $L_aesni_ctr32_encrypt_blocks_begin:: and esp,-16 mov DWORD PTR 80[esp],ebp cmp eax,1 - je $L035ctr32_one_shortcut + je $L037ctr32_one_shortcut movdqu xmm7,XMMWORD PTR [ebx] mov DWORD PTR [esp],202182159 mov DWORD PTR 4[esp],134810123 @@ -830,7 +857,7 @@ DB 102,15,56,0,202 pshufd xmm2,xmm0,192 pshufd xmm3,xmm0,128 cmp eax,6 - jb $L036ctr32_tail + jb $L038ctr32_tail pxor xmm7,xmm6 shl ecx,4 mov ebx,16 @@ -839,9 +866,9 @@ DB 102,15,56,0,202 sub ebx,ecx lea edx,DWORD PTR 32[ecx*1+edx] sub eax,6 - jmp $L037ctr32_loop6 + jmp $L039ctr32_loop6 ALIGN 16 -$L037ctr32_loop6: +$L039ctr32_loop6: pshufd xmm4,xmm0,64 movdqa xmm0,XMMWORD PTR 32[esp] pshufd xmm5,xmm1,192 @@ -895,27 +922,27 @@ DB 102,15,56,0,202 lea edi,DWORD PTR 96[edi] pshufd xmm3,xmm0,128 sub eax,6 - jnc $L037ctr32_loop6 + jnc $L039ctr32_loop6 add eax,6 - jz $L038ctr32_ret + jz $L040ctr32_ret movdqu xmm7,XMMWORD PTR [ebp] mov edx,ebp pxor xmm7,XMMWORD PTR 32[esp] mov ecx,DWORD PTR 240[ebp] -$L036ctr32_tail: +$L038ctr32_tail: por xmm2,xmm7 cmp eax,2 - jb $L039ctr32_one + jb $L041ctr32_one pshufd xmm4,xmm0,64 por xmm3,xmm7 - je $L040ctr32_two + je $L042ctr32_two pshufd xmm5,xmm1,192 por xmm4,xmm7 cmp eax,4 - jb $L041ctr32_three + jb $L043ctr32_three pshufd xmm6,xmm1,128 por xmm5,xmm7 - je $L042ctr32_four + je $L044ctr32_four por xmm6,xmm7 call __aesni_encrypt6 movups xmm1,XMMWORD PTR [esi] @@ -933,29 +960,29 @@ $L036ctr32_tail: movups XMMWORD PTR 32[edi],xmm4 movups XMMWORD PTR 48[edi],xmm5 movups XMMWORD PTR 64[edi],xmm6 - jmp $L038ctr32_ret + jmp $L040ctr32_ret ALIGN 16 -$L035ctr32_one_shortcut: +$L037ctr32_one_shortcut: movups xmm2,XMMWORD PTR [ebx] mov ecx,DWORD PTR 240[edx] -$L039ctr32_one: +$L041ctr32_one: movups xmm0,XMMWORD PTR [edx] movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L043enc1_loop_7: +$L045enc1_loop_7: DB 102,15,56,220,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L043enc1_loop_7 + jnz $L045enc1_loop_7 DB 102,15,56,221,209 movups xmm6,XMMWORD PTR [esi] xorps xmm6,xmm2 movups XMMWORD PTR [edi],xmm6 - jmp $L038ctr32_ret + jmp $L040ctr32_ret ALIGN 16 -$L040ctr32_two: +$L042ctr32_two: call __aesni_encrypt2 movups xmm5,XMMWORD PTR [esi] movups xmm6,XMMWORD PTR 16[esi] @@ -963,9 +990,9 @@ $L040ctr32_two: xorps xmm3,xmm6 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 - jmp $L038ctr32_ret + jmp $L040ctr32_ret ALIGN 16 -$L041ctr32_three: +$L043ctr32_three: call __aesni_encrypt3 movups xmm5,XMMWORD PTR [esi] movups xmm6,XMMWORD PTR 16[esi] @@ -976,9 +1003,9 @@ $L041ctr32_three: xorps xmm4,xmm7 movups XMMWORD PTR 16[edi],xmm3 movups XMMWORD PTR 32[edi],xmm4 - jmp $L038ctr32_ret + jmp $L040ctr32_ret ALIGN 16 -$L042ctr32_four: +$L044ctr32_four: call __aesni_encrypt4 movups xmm6,XMMWORD PTR [esi] movups xmm7,XMMWORD PTR 16[esi] @@ -992,7 +1019,18 @@ $L042ctr32_four: xorps xmm5,xmm0 movups XMMWORD PTR 32[edi],xmm4 movups XMMWORD PTR 48[edi],xmm5 -$L038ctr32_ret: +$L040ctr32_ret: + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + movdqa XMMWORD PTR 32[esp],xmm0 + pxor xmm5,xmm5 + movdqa XMMWORD PTR 48[esp],xmm0 + pxor xmm6,xmm6 + movdqa XMMWORD PTR 64[esp],xmm0 + pxor xmm7,xmm7 mov esp,DWORD PTR 80[esp] pop edi pop esi @@ -1015,12 +1053,12 @@ $L_aesni_xts_encrypt_begin:: movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L044enc1_loop_8: +$L046enc1_loop_8: DB 102,15,56,220,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L044enc1_loop_8 + jnz $L046enc1_loop_8 DB 102,15,56,221,209 mov esi,DWORD PTR 20[esp] mov edi,DWORD PTR 24[esp] @@ -1044,14 +1082,14 @@ DB 102,15,56,221,209 mov ebp,edx mov ebx,ecx sub eax,96 - jc $L045xts_enc_short + jc $L047xts_enc_short shl ecx,4 mov ebx,16 sub ebx,ecx lea edx,DWORD PTR 32[ecx*1+edx] - jmp $L046xts_enc_loop6 + jmp $L048xts_enc_loop6 ALIGN 16 -$L046xts_enc_loop6: +$L048xts_enc_loop6: pshufd xmm2,xmm0,19 pxor xmm0,xmm0 movdqa XMMWORD PTR [esp],xmm1 @@ -1140,23 +1178,23 @@ DB 102,15,56,220,249 pcmpgtd xmm0,xmm1 pxor xmm1,xmm2 sub eax,96 - jnc $L046xts_enc_loop6 + jnc $L048xts_enc_loop6 mov ecx,DWORD PTR 240[ebp] mov edx,ebp mov ebx,ecx -$L045xts_enc_short: +$L047xts_enc_short: add eax,96 - jz $L047xts_enc_done6x + jz $L049xts_enc_done6x movdqa xmm5,xmm1 cmp eax,32 - jb $L048xts_enc_one + jb $L050xts_enc_one pshufd xmm2,xmm0,19 pxor xmm0,xmm0 paddq xmm1,xmm1 pand xmm2,xmm3 pcmpgtd xmm0,xmm1 pxor xmm1,xmm2 - je $L049xts_enc_two + je $L051xts_enc_two pshufd xmm2,xmm0,19 pxor xmm0,xmm0 movdqa xmm6,xmm1 @@ -1165,7 +1203,7 @@ $L045xts_enc_short: pcmpgtd xmm0,xmm1 pxor xmm1,xmm2 cmp eax,64 - jb $L050xts_enc_three + jb $L052xts_enc_three pshufd xmm2,xmm0,19 pxor xmm0,xmm0 movdqa xmm7,xmm1 @@ -1175,7 +1213,7 @@ $L045xts_enc_short: pxor xmm1,xmm2 movdqa XMMWORD PTR [esp],xmm5 movdqa XMMWORD PTR 16[esp],xmm6 - je $L051xts_enc_four + je $L053xts_enc_four movdqa XMMWORD PTR 32[esp],xmm7 pshufd xmm7,xmm0,19 movdqa XMMWORD PTR 48[esp],xmm1 @@ -1207,9 +1245,9 @@ $L045xts_enc_short: movups XMMWORD PTR 48[edi],xmm5 movups XMMWORD PTR 64[edi],xmm6 lea edi,DWORD PTR 80[edi] - jmp $L052xts_enc_done + jmp $L054xts_enc_done ALIGN 16 -$L048xts_enc_one: +$L050xts_enc_one: movups xmm2,XMMWORD PTR [esi] lea esi,DWORD PTR 16[esi] xorps xmm2,xmm5 @@ -1217,20 +1255,20 @@ $L048xts_enc_one: movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L053enc1_loop_9: +$L055enc1_loop_9: DB 102,15,56,220,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L053enc1_loop_9 + jnz $L055enc1_loop_9 DB 102,15,56,221,209 xorps xmm2,xmm5 movups XMMWORD PTR [edi],xmm2 lea edi,DWORD PTR 16[edi] movdqa xmm1,xmm5 - jmp $L052xts_enc_done + jmp $L054xts_enc_done ALIGN 16 -$L049xts_enc_two: +$L051xts_enc_two: movaps xmm6,xmm1 movups xmm2,XMMWORD PTR [esi] movups xmm3,XMMWORD PTR 16[esi] @@ -1244,9 +1282,9 @@ $L049xts_enc_two: movups XMMWORD PTR 16[edi],xmm3 lea edi,DWORD PTR 32[edi] movdqa xmm1,xmm6 - jmp $L052xts_enc_done + jmp $L054xts_enc_done ALIGN 16 -$L050xts_enc_three: +$L052xts_enc_three: movaps xmm7,xmm1 movups xmm2,XMMWORD PTR [esi] movups xmm3,XMMWORD PTR 16[esi] @@ -1264,9 +1302,9 @@ $L050xts_enc_three: movups XMMWORD PTR 32[edi],xmm4 lea edi,DWORD PTR 48[edi] movdqa xmm1,xmm7 - jmp $L052xts_enc_done + jmp $L054xts_enc_done ALIGN 16 -$L051xts_enc_four: +$L053xts_enc_four: movaps xmm6,xmm1 movups xmm2,XMMWORD PTR [esi] movups xmm3,XMMWORD PTR 16[esi] @@ -1288,28 +1326,28 @@ $L051xts_enc_four: movups XMMWORD PTR 48[edi],xmm5 lea edi,DWORD PTR 64[edi] movdqa xmm1,xmm6 - jmp $L052xts_enc_done + jmp $L054xts_enc_done ALIGN 16 -$L047xts_enc_done6x: +$L049xts_enc_done6x: mov eax,DWORD PTR 112[esp] and eax,15 - jz $L054xts_enc_ret + jz $L056xts_enc_ret movdqa xmm5,xmm1 mov DWORD PTR 112[esp],eax - jmp $L055xts_enc_steal + jmp $L057xts_enc_steal ALIGN 16 -$L052xts_enc_done: +$L054xts_enc_done: mov eax,DWORD PTR 112[esp] pxor xmm0,xmm0 and eax,15 - jz $L054xts_enc_ret + jz $L056xts_enc_ret pcmpgtd xmm0,xmm1 mov DWORD PTR 112[esp],eax pshufd xmm5,xmm0,19 paddq xmm1,xmm1 pand xmm5,XMMWORD PTR 96[esp] pxor xmm5,xmm1 -$L055xts_enc_steal: +$L057xts_enc_steal: movzx ecx,BYTE PTR [esi] movzx edx,BYTE PTR [edi-16] lea esi,DWORD PTR 1[esi] @@ -1317,7 +1355,7 @@ $L055xts_enc_steal: mov BYTE PTR [edi],dl lea edi,DWORD PTR 1[edi] sub eax,1 - jnz $L055xts_enc_steal + jnz $L057xts_enc_steal sub edi,DWORD PTR 112[esp] mov edx,ebp mov ecx,ebx @@ -1327,16 +1365,30 @@ $L055xts_enc_steal: movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L056enc1_loop_10: +$L058enc1_loop_10: DB 102,15,56,220,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L056enc1_loop_10 + jnz $L058enc1_loop_10 DB 102,15,56,221,209 xorps xmm2,xmm5 movups XMMWORD PTR [edi-16],xmm2 -$L054xts_enc_ret: +$L056xts_enc_ret: + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + movdqa XMMWORD PTR [esp],xmm0 + pxor xmm3,xmm3 + movdqa XMMWORD PTR 16[esp],xmm0 + pxor xmm4,xmm4 + movdqa XMMWORD PTR 32[esp],xmm0 + pxor xmm5,xmm5 + movdqa XMMWORD PTR 48[esp],xmm0 + pxor xmm6,xmm6 + movdqa XMMWORD PTR 64[esp],xmm0 + pxor xmm7,xmm7 + movdqa XMMWORD PTR 80[esp],xmm0 mov esp,DWORD PTR 116[esp] pop edi pop esi @@ -1359,12 +1411,12 @@ $L_aesni_xts_decrypt_begin:: movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L057enc1_loop_11: +$L059enc1_loop_11: DB 102,15,56,220,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L057enc1_loop_11 + jnz $L059enc1_loop_11 DB 102,15,56,221,209 mov esi,DWORD PTR 20[esp] mov edi,DWORD PTR 24[esp] @@ -1393,14 +1445,14 @@ DB 102,15,56,221,209 pcmpgtd xmm0,xmm1 and eax,-16 sub eax,96 - jc $L058xts_dec_short + jc $L060xts_dec_short shl ecx,4 mov ebx,16 sub ebx,ecx lea edx,DWORD PTR 32[ecx*1+edx] - jmp $L059xts_dec_loop6 + jmp $L061xts_dec_loop6 ALIGN 16 -$L059xts_dec_loop6: +$L061xts_dec_loop6: pshufd xmm2,xmm0,19 pxor xmm0,xmm0 movdqa XMMWORD PTR [esp],xmm1 @@ -1489,23 +1541,23 @@ DB 102,15,56,222,249 pcmpgtd xmm0,xmm1 pxor xmm1,xmm2 sub eax,96 - jnc $L059xts_dec_loop6 + jnc $L061xts_dec_loop6 mov ecx,DWORD PTR 240[ebp] mov edx,ebp mov ebx,ecx -$L058xts_dec_short: +$L060xts_dec_short: add eax,96 - jz $L060xts_dec_done6x + jz $L062xts_dec_done6x movdqa xmm5,xmm1 cmp eax,32 - jb $L061xts_dec_one + jb $L063xts_dec_one pshufd xmm2,xmm0,19 pxor xmm0,xmm0 paddq xmm1,xmm1 pand xmm2,xmm3 pcmpgtd xmm0,xmm1 pxor xmm1,xmm2 - je $L062xts_dec_two + je $L064xts_dec_two pshufd xmm2,xmm0,19 pxor xmm0,xmm0 movdqa xmm6,xmm1 @@ -1514,7 +1566,7 @@ $L058xts_dec_short: pcmpgtd xmm0,xmm1 pxor xmm1,xmm2 cmp eax,64 - jb $L063xts_dec_three + jb $L065xts_dec_three pshufd xmm2,xmm0,19 pxor xmm0,xmm0 movdqa xmm7,xmm1 @@ -1524,7 +1576,7 @@ $L058xts_dec_short: pxor xmm1,xmm2 movdqa XMMWORD PTR [esp],xmm5 movdqa XMMWORD PTR 16[esp],xmm6 - je $L064xts_dec_four + je $L066xts_dec_four movdqa XMMWORD PTR 32[esp],xmm7 pshufd xmm7,xmm0,19 movdqa XMMWORD PTR 48[esp],xmm1 @@ -1556,9 +1608,9 @@ $L058xts_dec_short: movups XMMWORD PTR 48[edi],xmm5 movups XMMWORD PTR 64[edi],xmm6 lea edi,DWORD PTR 80[edi] - jmp $L065xts_dec_done + jmp $L067xts_dec_done ALIGN 16 -$L061xts_dec_one: +$L063xts_dec_one: movups xmm2,XMMWORD PTR [esi] lea esi,DWORD PTR 16[esi] xorps xmm2,xmm5 @@ -1566,20 +1618,20 @@ $L061xts_dec_one: movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L066dec1_loop_12: +$L068dec1_loop_12: DB 102,15,56,222,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L066dec1_loop_12 + jnz $L068dec1_loop_12 DB 102,15,56,223,209 xorps xmm2,xmm5 movups XMMWORD PTR [edi],xmm2 lea edi,DWORD PTR 16[edi] movdqa xmm1,xmm5 - jmp $L065xts_dec_done + jmp $L067xts_dec_done ALIGN 16 -$L062xts_dec_two: +$L064xts_dec_two: movaps xmm6,xmm1 movups xmm2,XMMWORD PTR [esi] movups xmm3,XMMWORD PTR 16[esi] @@ -1593,9 +1645,9 @@ $L062xts_dec_two: movups XMMWORD PTR 16[edi],xmm3 lea edi,DWORD PTR 32[edi] movdqa xmm1,xmm6 - jmp $L065xts_dec_done + jmp $L067xts_dec_done ALIGN 16 -$L063xts_dec_three: +$L065xts_dec_three: movaps xmm7,xmm1 movups xmm2,XMMWORD PTR [esi] movups xmm3,XMMWORD PTR 16[esi] @@ -1613,9 +1665,9 @@ $L063xts_dec_three: movups XMMWORD PTR 32[edi],xmm4 lea edi,DWORD PTR 48[edi] movdqa xmm1,xmm7 - jmp $L065xts_dec_done + jmp $L067xts_dec_done ALIGN 16 -$L064xts_dec_four: +$L066xts_dec_four: movaps xmm6,xmm1 movups xmm2,XMMWORD PTR [esi] movups xmm3,XMMWORD PTR 16[esi] @@ -1637,20 +1689,20 @@ $L064xts_dec_four: movups XMMWORD PTR 48[edi],xmm5 lea edi,DWORD PTR 64[edi] movdqa xmm1,xmm6 - jmp $L065xts_dec_done + jmp $L067xts_dec_done ALIGN 16 -$L060xts_dec_done6x: +$L062xts_dec_done6x: mov eax,DWORD PTR 112[esp] and eax,15 - jz $L067xts_dec_ret + jz $L069xts_dec_ret mov DWORD PTR 112[esp],eax - jmp $L068xts_dec_only_one_more + jmp $L070xts_dec_only_one_more ALIGN 16 -$L065xts_dec_done: +$L067xts_dec_done: mov eax,DWORD PTR 112[esp] pxor xmm0,xmm0 and eax,15 - jz $L067xts_dec_ret + jz $L069xts_dec_ret pcmpgtd xmm0,xmm1 mov DWORD PTR 112[esp],eax pshufd xmm2,xmm0,19 @@ -1660,7 +1712,7 @@ $L065xts_dec_done: pand xmm2,xmm3 pcmpgtd xmm0,xmm1 pxor xmm1,xmm2 -$L068xts_dec_only_one_more: +$L070xts_dec_only_one_more: pshufd xmm5,xmm0,19 movdqa xmm6,xmm1 paddq xmm1,xmm1 @@ -1674,16 +1726,16 @@ $L068xts_dec_only_one_more: movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L069dec1_loop_13: +$L071dec1_loop_13: DB 102,15,56,222,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L069dec1_loop_13 + jnz $L071dec1_loop_13 DB 102,15,56,223,209 xorps xmm2,xmm5 movups XMMWORD PTR [edi],xmm2 -$L070xts_dec_steal: +$L072xts_dec_steal: movzx ecx,BYTE PTR 16[esi] movzx edx,BYTE PTR [edi] lea esi,DWORD PTR 1[esi] @@ -1691,7 +1743,7 @@ $L070xts_dec_steal: mov BYTE PTR 16[edi],dl lea edi,DWORD PTR 1[edi] sub eax,1 - jnz $L070xts_dec_steal + jnz $L072xts_dec_steal sub edi,DWORD PTR 112[esp] mov edx,ebp mov ecx,ebx @@ -1701,16 +1753,30 @@ $L070xts_dec_steal: movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L071dec1_loop_14: +$L073dec1_loop_14: DB 102,15,56,222,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L071dec1_loop_14 + jnz $L073dec1_loop_14 DB 102,15,56,223,209 xorps xmm2,xmm6 movups XMMWORD PTR [edi],xmm2 -$L067xts_dec_ret: +$L069xts_dec_ret: + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + movdqa XMMWORD PTR [esp],xmm0 + pxor xmm3,xmm3 + movdqa XMMWORD PTR 16[esp],xmm0 + pxor xmm4,xmm4 + movdqa XMMWORD PTR 32[esp],xmm0 + pxor xmm5,xmm5 + movdqa XMMWORD PTR 48[esp],xmm0 + pxor xmm6,xmm6 + movdqa XMMWORD PTR 64[esp],xmm0 + pxor xmm7,xmm7 + movdqa XMMWORD PTR 80[esp],xmm0 mov esp,DWORD PTR 116[esp] pop edi pop esi @@ -1734,7 +1800,7 @@ $L_aesni_cbc_encrypt_begin:: mov edx,DWORD PTR 32[esp] mov ebp,DWORD PTR 36[esp] test eax,eax - jz $L072cbc_abort + jz $L074cbc_abort cmp DWORD PTR 40[esp],0 xchg ebx,esp movups xmm7,XMMWORD PTR [ebp] @@ -1742,14 +1808,14 @@ $L_aesni_cbc_encrypt_begin:: mov ebp,edx mov DWORD PTR 16[esp],ebx mov ebx,ecx - je $L073cbc_decrypt + je $L075cbc_decrypt movaps xmm2,xmm7 cmp eax,16 - jb $L074cbc_enc_tail + jb $L076cbc_enc_tail sub eax,16 - jmp $L075cbc_enc_loop + jmp $L077cbc_enc_loop ALIGN 16 -$L075cbc_enc_loop: +$L077cbc_enc_loop: movups xmm7,XMMWORD PTR [esi] lea esi,DWORD PTR 16[esi] movups xmm0,XMMWORD PTR [edx] @@ -1757,24 +1823,25 @@ $L075cbc_enc_loop: xorps xmm7,xmm0 lea edx,DWORD PTR 32[edx] xorps xmm2,xmm7 -$L076enc1_loop_15: +$L078enc1_loop_15: DB 102,15,56,220,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L076enc1_loop_15 + jnz $L078enc1_loop_15 DB 102,15,56,221,209 mov ecx,ebx mov edx,ebp movups XMMWORD PTR [edi],xmm2 lea edi,DWORD PTR 16[edi] sub eax,16 - jnc $L075cbc_enc_loop + jnc $L077cbc_enc_loop add eax,16 - jnz $L074cbc_enc_tail + jnz $L076cbc_enc_tail movaps xmm7,xmm2 - jmp $L077cbc_ret -$L074cbc_enc_tail: + pxor xmm2,xmm2 + jmp $L079cbc_ret +$L076cbc_enc_tail: mov ecx,eax DD 2767451785 mov ecx,16 @@ -1785,20 +1852,20 @@ DD 2868115081 mov ecx,ebx mov esi,edi mov edx,ebp - jmp $L075cbc_enc_loop + jmp $L077cbc_enc_loop ALIGN 16 -$L073cbc_decrypt: +$L075cbc_decrypt: cmp eax,80 - jbe $L078cbc_dec_tail + jbe $L080cbc_dec_tail movaps XMMWORD PTR [esp],xmm7 sub eax,80 - jmp $L079cbc_dec_loop6_enter + jmp $L081cbc_dec_loop6_enter ALIGN 16 -$L080cbc_dec_loop6: +$L082cbc_dec_loop6: movaps XMMWORD PTR [esp],xmm0 movups XMMWORD PTR [edi],xmm7 lea edi,DWORD PTR 16[edi] -$L079cbc_dec_loop6_enter: +$L081cbc_dec_loop6_enter: movdqu xmm2,XMMWORD PTR [esi] movdqu xmm3,XMMWORD PTR 16[esi] movdqu xmm4,XMMWORD PTR 32[esi] @@ -1828,28 +1895,28 @@ $L079cbc_dec_loop6_enter: movups XMMWORD PTR 64[edi],xmm6 lea edi,DWORD PTR 80[edi] sub eax,96 - ja $L080cbc_dec_loop6 + ja $L082cbc_dec_loop6 movaps xmm2,xmm7 movaps xmm7,xmm0 add eax,80 - jle $L081cbc_dec_tail_collected + jle $L083cbc_dec_clear_tail_collected movups XMMWORD PTR [edi],xmm2 lea edi,DWORD PTR 16[edi] -$L078cbc_dec_tail: +$L080cbc_dec_tail: movups xmm2,XMMWORD PTR [esi] movaps xmm6,xmm2 cmp eax,16 - jbe $L082cbc_dec_one + jbe $L084cbc_dec_one movups xmm3,XMMWORD PTR 16[esi] movaps xmm5,xmm3 cmp eax,32 - jbe $L083cbc_dec_two + jbe $L085cbc_dec_two movups xmm4,XMMWORD PTR 32[esi] cmp eax,48 - jbe $L084cbc_dec_three + jbe $L086cbc_dec_three movups xmm5,XMMWORD PTR 48[esi] cmp eax,64 - jbe $L085cbc_dec_four + jbe $L087cbc_dec_four movups xmm6,XMMWORD PTR 64[esi] movaps XMMWORD PTR [esp],xmm7 movups xmm2,XMMWORD PTR [esi] @@ -1867,55 +1934,62 @@ $L078cbc_dec_tail: xorps xmm6,xmm0 movups XMMWORD PTR [edi],xmm2 movups XMMWORD PTR 16[edi],xmm3 + pxor xmm3,xmm3 movups XMMWORD PTR 32[edi],xmm4 + pxor xmm4,xmm4 movups XMMWORD PTR 48[edi],xmm5 + pxor xmm5,xmm5 lea edi,DWORD PTR 64[edi] movaps xmm2,xmm6 + pxor xmm6,xmm6 sub eax,80 - jmp $L081cbc_dec_tail_collected + jmp $L088cbc_dec_tail_collected ALIGN 16 -$L082cbc_dec_one: +$L084cbc_dec_one: movups xmm0,XMMWORD PTR [edx] movups xmm1,XMMWORD PTR 16[edx] lea edx,DWORD PTR 32[edx] xorps xmm2,xmm0 -$L086dec1_loop_16: +$L089dec1_loop_16: DB 102,15,56,222,209 dec ecx movups xmm1,XMMWORD PTR [edx] lea edx,DWORD PTR 16[edx] - jnz $L086dec1_loop_16 + jnz $L089dec1_loop_16 DB 102,15,56,223,209 xorps xmm2,xmm7 movaps xmm7,xmm6 sub eax,16 - jmp $L081cbc_dec_tail_collected + jmp $L088cbc_dec_tail_collected ALIGN 16 -$L083cbc_dec_two: +$L085cbc_dec_two: call __aesni_decrypt2 xorps xmm2,xmm7 xorps xmm3,xmm6 movups XMMWORD PTR [edi],xmm2 movaps xmm2,xmm3 + pxor xmm3,xmm3 lea edi,DWORD PTR 16[edi] movaps xmm7,xmm5 sub eax,32 - jmp $L081cbc_dec_tail_collected + jmp $L088cbc_dec_tail_collected ALIGN 16 -$L084cbc_dec_three: +$L086cbc_dec_three: call __aesni_decrypt3 xorps xmm2,xmm7 xorps xmm3,xmm6 xorps xmm4,xmm5 movups XMMWORD PTR [edi],xmm2 movaps xmm2,xmm4 + pxor xmm4,xmm4 movups XMMWORD PTR 16[edi],xmm3 + pxor xmm3,xmm3 lea edi,DWORD PTR 32[edi] movups xmm7,XMMWORD PTR 32[esi] sub eax,48 - jmp $L081cbc_dec_tail_collected + jmp $L088cbc_dec_tail_collected ALIGN 16 -$L085cbc_dec_four: +$L087cbc_dec_four: call __aesni_decrypt4 movups xmm1,XMMWORD PTR 16[esi] movups xmm0,XMMWORD PTR 32[esi] @@ -1925,28 +1999,44 @@ $L085cbc_dec_four: movups XMMWORD PTR [edi],xmm2 xorps xmm4,xmm1 movups XMMWORD PTR 16[edi],xmm3 + pxor xmm3,xmm3 xorps xmm5,xmm0 movups XMMWORD PTR 32[edi],xmm4 + pxor xmm4,xmm4 lea edi,DWORD PTR 48[edi] movaps xmm2,xmm5 + pxor xmm5,xmm5 sub eax,64 -$L081cbc_dec_tail_collected: + jmp $L088cbc_dec_tail_collected +ALIGN 16 +$L083cbc_dec_clear_tail_collected: + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + pxor xmm6,xmm6 +$L088cbc_dec_tail_collected: and eax,15 - jnz $L087cbc_dec_tail_partial + jnz $L090cbc_dec_tail_partial movups XMMWORD PTR [edi],xmm2 - jmp $L077cbc_ret + pxor xmm0,xmm0 + jmp $L079cbc_ret ALIGN 16 -$L087cbc_dec_tail_partial: +$L090cbc_dec_tail_partial: movaps XMMWORD PTR [esp],xmm2 + pxor xmm0,xmm0 mov ecx,16 mov esi,esp sub ecx,eax DD 2767451785 -$L077cbc_ret: + movdqa XMMWORD PTR [esp],xmm2 +$L079cbc_ret: mov esp,DWORD PTR 16[esp] mov ebp,DWORD PTR 36[esp] + pxor xmm2,xmm2 + pxor xmm1,xmm1 movups XMMWORD PTR [ebp],xmm7 -$L072cbc_abort: + pxor xmm7,xmm7 +$L074cbc_abort: pop edi pop esi pop ebx @@ -1955,52 +2045,62 @@ $L072cbc_abort: _aesni_cbc_encrypt ENDP ALIGN 16 __aesni_set_encrypt_key PROC PRIVATE + push ebp + push ebx test eax,eax - jz $L088bad_pointer + jz $L091bad_pointer test edx,edx - jz $L088bad_pointer + jz $L091bad_pointer + call $L092pic +$L092pic: + pop ebx + lea ebx,DWORD PTR ($Lkey_const-$L092pic)[ebx] + lea ebp,DWORD PTR _OPENSSL_ia32cap_P movups xmm0,XMMWORD PTR [eax] xorps xmm4,xmm4 + mov ebp,DWORD PTR 4[ebp] lea edx,DWORD PTR 16[edx] + and ebp,268437504 cmp ecx,256 - je $L08914rounds + je $L09314rounds cmp ecx,192 - je $L09012rounds + je $L09412rounds cmp ecx,128 - jne $L091bad_keybits + jne $L095bad_keybits ALIGN 16 -$L09210rounds: +$L09610rounds: + cmp ebp,268435456 + je $L09710rounds_alt mov ecx,9 movups XMMWORD PTR [edx-16],xmm0 DB 102,15,58,223,200,1 - call $L093key_128_cold + call $L098key_128_cold DB 102,15,58,223,200,2 - call $L094key_128 + call $L099key_128 DB 102,15,58,223,200,4 - call $L094key_128 + call $L099key_128 DB 102,15,58,223,200,8 - call $L094key_128 + call $L099key_128 DB 102,15,58,223,200,16 - call $L094key_128 + call $L099key_128 DB 102,15,58,223,200,32 - call $L094key_128 + call $L099key_128 DB 102,15,58,223,200,64 - call $L094key_128 + call $L099key_128 DB 102,15,58,223,200,128 - call $L094key_128 + call $L099key_128 DB 102,15,58,223,200,27 - call $L094key_128 + call $L099key_128 DB 102,15,58,223,200,54 - call $L094key_128 + call $L099key_128 movups XMMWORD PTR [edx],xmm0 mov DWORD PTR 80[edx],ecx - xor eax,eax - ret + jmp $L100good_key ALIGN 16 -$L094key_128: +$L099key_128: movups XMMWORD PTR [edx],xmm0 lea edx,DWORD PTR 16[edx] -$L093key_128_cold: +$L098key_128_cold: shufps xmm4,xmm0,16 xorps xmm0,xmm4 shufps xmm4,xmm0,140 @@ -2009,38 +2109,91 @@ $L093key_128_cold: xorps xmm0,xmm1 ret ALIGN 16 -$L09012rounds: +$L09710rounds_alt: + movdqa xmm5,XMMWORD PTR [ebx] + mov ecx,8 + movdqa xmm4,XMMWORD PTR 32[ebx] + movdqa xmm2,xmm0 + movdqu XMMWORD PTR [edx-16],xmm0 +$L101loop_key128: +DB 102,15,56,0,197 +DB 102,15,56,221,196 + pslld xmm4,1 + lea edx,DWORD PTR 16[edx] + movdqa xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm2,xmm3 + pxor xmm0,xmm2 + movdqu XMMWORD PTR [edx-16],xmm0 + movdqa xmm2,xmm0 + dec ecx + jnz $L101loop_key128 + movdqa xmm4,XMMWORD PTR 48[ebx] +DB 102,15,56,0,197 +DB 102,15,56,221,196 + pslld xmm4,1 + movdqa xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm2,xmm3 + pxor xmm0,xmm2 + movdqu XMMWORD PTR [edx],xmm0 + movdqa xmm2,xmm0 +DB 102,15,56,0,197 +DB 102,15,56,221,196 + movdqa xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm3,xmm2 + pslldq xmm2,4 + pxor xmm2,xmm3 + pxor xmm0,xmm2 + movdqu XMMWORD PTR 16[edx],xmm0 + mov ecx,9 + mov DWORD PTR 96[edx],ecx + jmp $L100good_key +ALIGN 16 +$L09412rounds: movq xmm2,QWORD PTR 16[eax] + cmp ebp,268435456 + je $L10212rounds_alt mov ecx,11 movups XMMWORD PTR [edx-16],xmm0 DB 102,15,58,223,202,1 - call $L095key_192a_cold + call $L103key_192a_cold DB 102,15,58,223,202,2 - call $L096key_192b + call $L104key_192b DB 102,15,58,223,202,4 - call $L097key_192a + call $L105key_192a DB 102,15,58,223,202,8 - call $L096key_192b + call $L104key_192b DB 102,15,58,223,202,16 - call $L097key_192a + call $L105key_192a DB 102,15,58,223,202,32 - call $L096key_192b + call $L104key_192b DB 102,15,58,223,202,64 - call $L097key_192a + call $L105key_192a DB 102,15,58,223,202,128 - call $L096key_192b + call $L104key_192b movups XMMWORD PTR [edx],xmm0 mov DWORD PTR 48[edx],ecx - xor eax,eax - ret + jmp $L100good_key ALIGN 16 -$L097key_192a: +$L105key_192a: movups XMMWORD PTR [edx],xmm0 lea edx,DWORD PTR 16[edx] ALIGN 16 -$L095key_192a_cold: +$L103key_192a_cold: movaps xmm5,xmm2 -$L098key_192b_warm: +$L106key_192b_warm: shufps xmm4,xmm0,16 movdqa xmm3,xmm2 xorps xmm0,xmm4 @@ -2054,56 +2207,90 @@ $L098key_192b_warm: pxor xmm2,xmm3 ret ALIGN 16 -$L096key_192b: +$L104key_192b: movaps xmm3,xmm0 shufps xmm5,xmm0,68 movups XMMWORD PTR [edx],xmm5 shufps xmm3,xmm2,78 movups XMMWORD PTR 16[edx],xmm3 lea edx,DWORD PTR 32[edx] - jmp $L098key_192b_warm + jmp $L106key_192b_warm +ALIGN 16 +$L10212rounds_alt: + movdqa xmm5,XMMWORD PTR 16[ebx] + movdqa xmm4,XMMWORD PTR 32[ebx] + mov ecx,8 + movdqu XMMWORD PTR [edx-16],xmm0 +$L107loop_key192: + movq QWORD PTR [edx],xmm2 + movdqa xmm1,xmm2 +DB 102,15,56,0,213 +DB 102,15,56,221,212 + pslld xmm4,1 + lea edx,DWORD PTR 24[edx] + movdqa xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm0,xmm3 + pshufd xmm3,xmm0,255 + pxor xmm3,xmm1 + pslldq xmm1,4 + pxor xmm3,xmm1 + pxor xmm0,xmm2 + pxor xmm2,xmm3 + movdqu XMMWORD PTR [edx-16],xmm0 + dec ecx + jnz $L107loop_key192 + mov ecx,11 + mov DWORD PTR 32[edx],ecx + jmp $L100good_key ALIGN 16 -$L08914rounds: +$L09314rounds: movups xmm2,XMMWORD PTR 16[eax] - mov ecx,13 lea edx,DWORD PTR 16[edx] + cmp ebp,268435456 + je $L10814rounds_alt + mov ecx,13 movups XMMWORD PTR [edx-32],xmm0 movups XMMWORD PTR [edx-16],xmm2 DB 102,15,58,223,202,1 - call $L099key_256a_cold + call $L109key_256a_cold DB 102,15,58,223,200,1 - call $L100key_256b + call $L110key_256b DB 102,15,58,223,202,2 - call $L101key_256a + call $L111key_256a DB 102,15,58,223,200,2 - call $L100key_256b + call $L110key_256b DB 102,15,58,223,202,4 - call $L101key_256a + call $L111key_256a DB 102,15,58,223,200,4 - call $L100key_256b + call $L110key_256b DB 102,15,58,223,202,8 - call $L101key_256a + call $L111key_256a DB 102,15,58,223,200,8 - call $L100key_256b + call $L110key_256b DB 102,15,58,223,202,16 - call $L101key_256a + call $L111key_256a DB 102,15,58,223,200,16 - call $L100key_256b + call $L110key_256b DB 102,15,58,223,202,32 - call $L101key_256a + call $L111key_256a DB 102,15,58,223,200,32 - call $L100key_256b + call $L110key_256b DB 102,15,58,223,202,64 - call $L101key_256a + call $L111key_256a movups XMMWORD PTR [edx],xmm0 mov DWORD PTR 16[edx],ecx xor eax,eax - ret + jmp $L100good_key ALIGN 16 -$L101key_256a: +$L111key_256a: movups XMMWORD PTR [edx],xmm2 lea edx,DWORD PTR 16[edx] -$L099key_256a_cold: +$L109key_256a_cold: shufps xmm4,xmm0,16 xorps xmm0,xmm4 shufps xmm4,xmm0,140 @@ -2112,7 +2299,7 @@ $L099key_256a_cold: xorps xmm0,xmm1 ret ALIGN 16 -$L100key_256b: +$L110key_256b: movups XMMWORD PTR [edx],xmm0 lea edx,DWORD PTR 16[edx] shufps xmm4,xmm2,16 @@ -2122,13 +2309,70 @@ $L100key_256b: shufps xmm1,xmm1,170 xorps xmm2,xmm1 ret +ALIGN 16 +$L10814rounds_alt: + movdqa xmm5,XMMWORD PTR [ebx] + movdqa xmm4,XMMWORD PTR 32[ebx] + mov ecx,7 + movdqu XMMWORD PTR [edx-32],xmm0 + movdqa xmm1,xmm2 + movdqu XMMWORD PTR [edx-16],xmm2 +$L112loop_key256: +DB 102,15,56,0,213 +DB 102,15,56,221,212 + movdqa xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm3,xmm0 + pslldq xmm0,4 + pxor xmm0,xmm3 + pslld xmm4,1 + pxor xmm0,xmm2 + movdqu XMMWORD PTR [edx],xmm0 + dec ecx + jz $L113done_key256 + pshufd xmm2,xmm0,255 + pxor xmm3,xmm3 +DB 102,15,56,221,211 + movdqa xmm3,xmm1 + pslldq xmm1,4 + pxor xmm3,xmm1 + pslldq xmm1,4 + pxor xmm3,xmm1 + pslldq xmm1,4 + pxor xmm1,xmm3 + pxor xmm2,xmm1 + movdqu XMMWORD PTR 16[edx],xmm2 + lea edx,DWORD PTR 32[edx] + movdqa xmm1,xmm2 + jmp $L112loop_key256 +$L113done_key256: + mov ecx,13 + mov DWORD PTR 16[edx],ecx +$L100good_key: + pxor xmm0,xmm0 + pxor xmm1,xmm1 + pxor xmm2,xmm2 + pxor xmm3,xmm3 + pxor xmm4,xmm4 + pxor xmm5,xmm5 + xor eax,eax + pop ebx + pop ebp + ret ALIGN 4 -$L088bad_pointer: +$L091bad_pointer: mov eax,-1 + pop ebx + pop ebp ret ALIGN 4 -$L091bad_keybits: +$L095bad_keybits: + pxor xmm0,xmm0 mov eax,-2 + pop ebx + pop ebp ret __aesni_set_encrypt_key ENDP ALIGN 16 @@ -2150,7 +2394,7 @@ $L_aesni_set_decrypt_key_begin:: mov edx,DWORD PTR 12[esp] shl ecx,4 test eax,eax - jnz $L102dec_key_ret + jnz $L114dec_key_ret lea eax,DWORD PTR 16[ecx*1+edx] movups xmm0,XMMWORD PTR [edx] movups xmm1,XMMWORD PTR [eax] @@ -2158,7 +2402,7 @@ $L_aesni_set_decrypt_key_begin:: movups XMMWORD PTR [edx],xmm1 lea edx,DWORD PTR 16[edx] lea eax,DWORD PTR [eax-16] -$L103dec_key_inverse: +$L115dec_key_inverse: movups xmm0,XMMWORD PTR [edx] movups xmm1,XMMWORD PTR [eax] DB 102,15,56,219,192 @@ -2168,17 +2412,28 @@ DB 102,15,56,219,201 movups XMMWORD PTR 16[eax],xmm0 movups XMMWORD PTR [edx-16],xmm1 cmp eax,edx - ja $L103dec_key_inverse + ja $L115dec_key_inverse movups xmm0,XMMWORD PTR [edx] DB 102,15,56,219,192 movups XMMWORD PTR [edx],xmm0 + pxor xmm0,xmm0 + pxor xmm1,xmm1 xor eax,eax -$L102dec_key_ret: +$L114dec_key_ret: ret _aesni_set_decrypt_key ENDP +ALIGN 64 +$Lkey_const:: +DD 202313229,202313229,202313229,202313229 +DD 67569157,67569157,67569157,67569157 +DD 1,1,1,1 +DD 27,27,27,27 DB 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 DB 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 DB 115,108,46,111,114,103,62,0 .text$ ENDS +.bss SEGMENT 'BSS' +COMM _OPENSSL_ia32cap_P:DWORD:4 +.bss ENDS END diff --git a/deps/openssl/doc/UPGRADING.md b/deps/openssl/doc/UPGRADING.md index 81f129c4326cb0..e9a7a7ce9ec6db 100644 --- a/deps/openssl/doc/UPGRADING.md +++ b/deps/openssl/doc/UPGRADING.md @@ -1,7 +1,7 @@ ## How to upgrade openssl library in io.js -This document describes the procedure to upgrade openssl from 1.0.1m -to 1.0.2a in io.js. +This document describes the procedure to upgrade openssl from 1.0.2a +to 1.0.2b in io.js. ### Build System and Upgrading Overview @@ -91,16 +91,16 @@ https://github.com/openssl/openssl/blob/OpenSSL_1_0_2-stable/crypto/sha/asm/sha5 otherwise asm_obsolete are used. The following is the detail instruction steps how to upgrade openssl -version from 1.0.1m to 1.0.2a in iojs. +version from 1.0.2a to 1.0.2b in iojs. ### 1. Replace openssl source in `deps/openssl/openssl` Remove old openssl sources in `deps/openssl/openssl` . Get original openssl sources from -https://www.openssl.org/source/openssl-1.0.2a.tar.gz and extract all +https://www.openssl.org/source/openssl-1.0.2b.tar.gz and extract all files into `deps/openssl/openssl` . ### 2. Apply private patches -There are three kinds of private patches to be applied in openssl-1.0.2a. +There are three kinds of private patches to be applied in openssl-1.0.2b. - The two fixes of assembly error on ia32 win32. masm is no longer supported in openssl. We should move to use nasm or yasm in future @@ -109,13 +109,8 @@ There are three kinds of private patches to be applied in openssl-1.0.2a. - The fix of openssl-cli built on win. Key press requirement of openssl-cli in win causes timeout failures of several tests. -- Backport patches for alt cert feature from openssl-1.1.x. Root certs - of 1024bit RSA key length were deprecated in io.js. When a tls - server has a cross root cert, io.js client leads CERT_UNTRUSTED - error because openssl does not find alternate cert chains. This fix - supports its feature but was made the current master which is - openssl-1.1.x. We backported them privately into openssl-1.0.2 on - iojs. +- A new `-no_rand_screen` option to openssl s_client. This makes test + time of test-tls-server-verify be much faster. ### 3. Replace openssl header files in `deps/openssl/openssl/include/openssl` all header files in `deps/openssl/openssl/include/openssl/*.h` are @@ -126,35 +121,10 @@ file such as #include "../../crypto/aes/aes.h" ```` ### 4. Change `opensslconf.h` so as to fit each platform. -The opensslconf.h in each target was created in advance by typing -`deps/openssl/openssl/Configure {target}` and copied -into `deps/openssl/conf/archs/{target}/opensslconf.h`. -`deps/openssl/conf/openssconf.h` includes each file according to its -target by checking pre-defined compiler macros. These can be generated -by using `deps/openssl/conf/Makefile` - -We should remove OPENSSL_CPUID_OBJ define in opensslconf.h because it -causes build error when --openss-no-asm option is specified. Instead, -the OPENSSL_CPUID_OBJ is defined in `deps/openssl/openssl.gypi` -according to the configure options. - -One fix of opensslconf.h is needed in 64-bit MacOS. -The current openssl release does not use RC4 asm since it explicitly -specified as `$asm=~s/rc4\-[^:]+//;` in -https://github.com/openssl/openssl/blob/OpenSSL_1_0_1-stable/Configure#L584 -But iojs has used RC4 asm on MacOS for long time. Fix type of RC4_INT -into `unsigned int` in opensslconf.h of darwin64-x86_64-cc to work on -the RC4 asm. +No change. ### 5. Update openssl.gyp and openssl.gypi -Sources, cflags and define parameters that depends on each target can -be obtained via `Configure TABLE`. Its list is put in the table of -[define and cflags changes in openssl-1.0.2a](openssl_define_list.pdf) - -There is no way to verify all necessary sources automatically. We can -only carefully look at the source list and compiled objects in -Makefile of openssl and compare the compiled objects that stored -stored under `out/Release/obj.target/openssl/deps/openssl/' in iojs. +No change. ### 6. ASM files for openssl We provide two sets of asm files. One is for the latest assembler @@ -163,7 +133,7 @@ and the other is the older one. ### 6.1. asm files for the latest compiler This was made in `deps/openssl/asm/Makefile` - Updated asm files for each platforms which are required in - openssl-1.0.2a. + openssl-1.0.2b. - Some perl files need CC and ASM envs. Added a check if these envs exist. Followed asm files are to be generated with CC=gcc and ASM=nasm on Linux. See diff --git a/deps/openssl/openssl/CHANGES b/deps/openssl/openssl/CHANGES index 56f7112faca062..84e076411be153 100644 --- a/deps/openssl/openssl/CHANGES +++ b/deps/openssl/openssl/CHANGES @@ -2,6 +2,90 @@ OpenSSL CHANGES _______________ + Changes between 1.0.2a and 1.0.2b [11 Jun 2015] + + *) Malformed ECParameters causes infinite loop + + When processing an ECParameters structure OpenSSL enters an infinite loop + if the curve specified is over a specially malformed binary polynomial + field. + + This can be used to perform denial of service against any + system which processes public keys, certificate requests or + certificates. This includes TLS clients and TLS servers with + client authentication enabled. + + This issue was reported to OpenSSL by Joseph Barr-Pixton. + (CVE-2015-1788) + [Andy Polyakov] + + *) Exploitable out-of-bounds read in X509_cmp_time + + X509_cmp_time does not properly check the length of the ASN1_TIME + string and can read a few bytes out of bounds. In addition, + X509_cmp_time accepts an arbitrary number of fractional seconds in the + time string. + + An attacker can use this to craft malformed certificates and CRLs of + various sizes and potentially cause a segmentation fault, resulting in + a DoS on applications that verify certificates or CRLs. TLS clients + that verify CRLs are affected. TLS clients and servers with client + authentication enabled may be affected if they use custom verification + callbacks. + + This issue was reported to OpenSSL by Robert Swiecki (Google), and + independently by Hanno Böck. + (CVE-2015-1789) + [Emilia Käsper] + + *) PKCS7 crash with missing EnvelopedContent + + The PKCS#7 parsing code does not handle missing inner EncryptedContent + correctly. An attacker can craft malformed ASN.1-encoded PKCS#7 blobs + with missing content and trigger a NULL pointer dereference on parsing. + + Applications that decrypt PKCS#7 data or otherwise parse PKCS#7 + structures from untrusted sources are affected. OpenSSL clients and + servers are not affected. + + This issue was reported to OpenSSL by Michal Zalewski (Google). + (CVE-2015-1790) + [Emilia Käsper] + + *) CMS verify infinite loop with unknown hash function + + When verifying a signedData message the CMS code can enter an infinite loop + if presented with an unknown hash function OID. This can be used to perform + denial of service against any system which verifies signedData messages using + the CMS code. + This issue was reported to OpenSSL by Johannes Bauer. + (CVE-2015-1792) + [Stephen Henson] + + *) Race condition handling NewSessionTicket + + If a NewSessionTicket is received by a multi-threaded client when attempting to + reuse a previous ticket then a race condition can occur potentially leading to + a double free of the ticket data. + (CVE-2015-1791) + [Matt Caswell] + + *) Removed support for the two export grade static DH ciphersuites + EXP-DH-RSA-DES-CBC-SHA and EXP-DH-DSS-DES-CBC-SHA. These two ciphersuites + were newly added (along with a number of other static DH ciphersuites) to + 1.0.2. However the two export ones have *never* worked since they were + introduced. It seems strange in any case to be adding new export + ciphersuites, and given "logjam" it also does not seem correct to fix them. + [Matt Caswell] + + *) Only support 256-bit or stronger elliptic curves with the + 'ecdh_auto' setting (server) or by default (client). Of supported + curves, prefer P-256 (both). + [Emilia Kasper] + + *) Reject DH handshakes with parameters shorter than 768 bits. + [Kurt Roeckx and Emilia Kasper] + Changes between 1.0.2 and 1.0.2a [19 Mar 2015] *) ClientHello sigalgs DoS fix @@ -373,7 +457,7 @@ [Steve Henson] *) Add new "valid_flags" field to CERT_PKEY structure which determines what - the certificate can be used for (if anything). Set valid_flags field + the certificate can be used for (if anything). Set valid_flags field in new tls1_check_chain function. Simplify ssl_set_cert_masks which used to have similar checks in it. @@ -416,7 +500,7 @@ *) Fix OCSP checking. [Rob Stradling and Ben Laurie] - *) Initial experimental support for explicitly trusted non-root CAs. + *) Initial experimental support for explicitly trusted non-root CAs. OpenSSL still tries to build a complete chain to a root but if an intermediate CA has a trust setting included that is used. The first setting is used: whether to trust (e.g., -addtrust option to the x509 @@ -467,7 +551,7 @@ to set list of supported curves. [Steve Henson] - *) New ctrls to retrieve supported signature algorithms and + *) New ctrls to retrieve supported signature algorithms and supported curve values as an array of NIDs. Extend openssl utility to print out received values. [Steve Henson] diff --git a/deps/openssl/openssl/Configure b/deps/openssl/openssl/Configure index f776e23359e283..d99eed7f9303c7 100755 --- a/deps/openssl/openssl/Configure +++ b/deps/openssl/openssl/Configure @@ -105,7 +105,24 @@ my $usage="Usage: Configure [no- ...] [enable- ...] [experimenta my $gcc_devteam_warn = "-Wall -pedantic -DPEDANTIC -Wno-long-long -Wsign-compare -Wmissing-prototypes -Wshadow -Wformat -Werror -DCRYPTO_MDEBUG_ALL -DCRYPTO_MDEBUG_ABORT -DREF_CHECK -DOPENSSL_NO_DEPRECATED"; -my $clang_disabled_warnings = "-Wno-language-extension-token -Wno-extended-offsetof -Wno-padded -Wno-shorten-64-to-32 -Wno-format-nonliteral -Wno-missing-noreturn -Wno-unused-parameter -Wno-sign-conversion -Wno-unreachable-code -Wno-conversion -Wno-documentation -Wno-missing-variable-declarations -Wno-cast-align -Wno-incompatible-pointer-types-discards-qualifiers -Wno-missing-variable-declarations -Wno-missing-field-initializers -Wno-unused-macros -Wno-disabled-macro-expansion -Wno-conditional-uninitialized -Wno-switch-enum"; +# TODO(openssl-team): fix problems and investigate if (at least) the following +# warnings can also be enabled: +# -Wconditional-uninitialized, -Wswitch-enum, -Wunused-macros, +# -Wmissing-field-initializers, -Wmissing-variable-declarations, +# -Wincompatible-pointer-types-discards-qualifiers, -Wcast-align, +# -Wunreachable-code -Wunused-parameter -Wlanguage-extension-token +# -Wextended-offsetof +my $clang_disabled_warnings = "-Wno-unused-parameter -Wno-missing-field-initializers -Wno-language-extension-token -Wno-extended-offsetof"; + +# These are used in addition to $gcc_devteam_warn when the compiler is clang. +# TODO(openssl-team): fix problems and investigate if (at least) the +# following warnings can also be enabled: -Wconditional-uninitialized, +# -Wswitch-enum, -Wunused-macros, -Wmissing-field-initializers, +# -Wmissing-variable-declarations, +# -Wincompatible-pointer-types-discards-qualifiers, -Wcast-align, +# -Wunreachable-code -Wunused-parameter -Wlanguage-extension-token +# -Wextended-offsetof +my $clang_devteam_warn = "-Wno-unused-parameter -Wno-missing-field-initializers -Wno-language-extension-token -Wno-extended-offsetof -Qunused-arguments"; my $strict_warnings = 0; @@ -233,12 +250,12 @@ my %table=( #### SPARC Solaris with GNU C setups "solaris-sparcv7-gcc","gcc:-O3 -fomit-frame-pointer -Wall -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${no_asm}:dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -"solaris-sparcv8-gcc","gcc:-mv8 -O3 -fomit-frame-pointer -Wall -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${sparcv8_asm}:dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"solaris-sparcv8-gcc","gcc:-mcpu=v8 -O3 -fomit-frame-pointer -Wall -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${sparcv8_asm}:dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", # -m32 should be safe to add as long as driver recognizes -mcpu=ultrasparc "solaris-sparcv9-gcc","gcc:-m32 -mcpu=ultrasparc -O3 -fomit-frame-pointer -Wall -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${sparcv9_asm}:dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "solaris64-sparcv9-gcc","gcc:-m64 -mcpu=ultrasparc -O3 -Wall -DB_ENDIAN::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR:${sparcv9_asm}:dlfcn:solaris-shared:-fPIC:-m64 -shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::/64", #### -"debug-solaris-sparcv8-gcc","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG_ALL -O -g -mv8 -Wall -DB_ENDIAN::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${sparcv8_asm}:dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"debug-solaris-sparcv8-gcc","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG_ALL -O -g -mcpu=v8 -Wall -DB_ENDIAN::-D_REENTRANT::-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${sparcv8_asm}:dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "debug-solaris-sparcv9-gcc","gcc:-DBN_DEBUG -DREF_CHECK -DCONF_DEBUG -DBN_CTX_DEBUG -DCRYPTO_MDEBUG_ALL -DPEDANTIC -O -g -mcpu=ultrasparc -pedantic -ansi -Wall -Wshadow -Wno-long-long -D__EXTENSIONS__ -DB_ENDIAN -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-lsocket -lnsl -ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${sparcv9_asm}:dlfcn:solaris-shared:-fPIC:-shared:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", #### SPARC Solaris with Sun C setups @@ -255,7 +272,7 @@ my %table=( #### SunOS configs, assuming sparc for the gcc one. #"sunos-cc", "cc:-O4 -DNOPROTO -DNOCONST::(unknown):SUNOS::DES_UNROLL:${no_asm}::", -"sunos-gcc","gcc:-O3 -mv8 -Dssize_t=int::(unknown):SUNOS::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL DES_PTR DES_RISC1:${no_asm}::", +"sunos-gcc","gcc:-O3 -mcpu=v8 -Dssize_t=int::(unknown):SUNOS::BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL DES_PTR DES_RISC1:${no_asm}::", #### IRIX 5.x configs # -mips2 flag is added by ./config when appropriate. @@ -398,7 +415,7 @@ my %table=( "linux-ia64", "gcc:-DL_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-ia64-icc","icc:-DL_ENDIAN -O2 -Wall::-D_REENTRANT::-ldl -no_cpprt:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_INT:${ia64_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "linux-x86_64", "gcc:-m64 -DL_ENDIAN -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", -"linux-x86_64-clang", "clang: -m64 -DL_ENDIAN -O3 -Weverything $clang_disabled_warnings -Qunused-arguments::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", +"linux-x86_64-clang", "clang: -m64 -DL_ENDIAN -O3 -Wall -Wextra $clang_disabled_warnings -Qunused-arguments::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", "linux-x86_64-icc", "icc:-DL_ENDIAN -O2::-D_REENTRANT::-ldl -no_cpprt:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", "linux-x32", "gcc:-mx32 -DL_ENDIAN -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT RC4_CHUNK_LL DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-mx32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::x32", "linux64-s390x", "gcc:-m64 -DB_ENDIAN -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL:${s390x_asm}:64:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::64", @@ -419,7 +436,7 @@ my %table=( #### SPARC Linux setups # Ray Miller has patiently # assisted with debugging of following two configs. -"linux-sparcv8","gcc:-mv8 -DB_ENDIAN -O3 -fomit-frame-pointer -Wall -DBN_DIV2W::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${sparcv8_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"linux-sparcv8","gcc:-mcpu=v8 -DB_ENDIAN -O3 -fomit-frame-pointer -Wall -DBN_DIV2W::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${sparcv8_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", # it's a real mess with -mcpu=ultrasparc option under Linux, but # -Wa,-Av8plus should do the trick no matter what. "linux-sparcv9","gcc:-m32 -mcpu=ultrasparc -DB_ENDIAN -O3 -fomit-frame-pointer -Wall -Wa,-Av8plus -DBN_DIV2W::-D_REENTRANT:ULTRASPARC:-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR:${sparcv9_asm}:dlfcn:linux-shared:-fPIC:-m32:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", @@ -454,7 +471,7 @@ my %table=( "BSD-x86", "gcc:-DL_ENDIAN -O3 -fomit-frame-pointer -Wall::${BSDthreads}:::BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_asm}:a.out:dlfcn:bsd-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "BSD-x86-elf", "gcc:-DL_ENDIAN -O3 -fomit-frame-pointer -Wall::${BSDthreads}:::BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:bsd-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "debug-BSD-x86-elf", "gcc:-DL_ENDIAN -O3 -Wall -g::${BSDthreads}:::BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:bsd-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -"BSD-sparcv8", "gcc:-DB_ENDIAN -O3 -mv8 -Wall::${BSDthreads}:::BN_LLONG RC2_CHAR RC4_INDEX DES_INT DES_UNROLL:${sparcv8_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"BSD-sparcv8", "gcc:-DB_ENDIAN -O3 -mcpu=v8 -Wall::${BSDthreads}:::BN_LLONG RC2_CHAR RC4_INDEX DES_INT DES_UNROLL:${sparcv8_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "BSD-generic64","gcc:-O3 -Wall::${BSDthreads}:::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${no_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", # -DMD32_REG_T=int doesn't actually belong in sparc64 target, it @@ -462,7 +479,7 @@ my %table=( # triggered by RIPEMD160 code. "BSD-sparc64", "gcc:-DB_ENDIAN -O3 -DMD32_REG_T=int -Wall::${BSDthreads}:::BN_LLONG RC2_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC2 BF_PTR:${sparcv9_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "BSD-ia64", "gcc:-DL_ENDIAN -O3 -Wall::${BSDthreads}:::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_UNROLL DES_INT:${ia64_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -"BSD-x86_64", "gcc:-DL_ENDIAN -O3 -Wall::${BSDthreads}:::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"BSD-x86_64", "cc:-DL_ENDIAN -O3 -Wall::${BSDthreads}:::SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "bsdi-elf-gcc", "gcc:-DPERL5 -DL_ENDIAN -fomit-frame-pointer -O3 -march=i486 -Wall::(unknown)::-ldl:BN_LLONG ${x86_gcc_des} ${x86_gcc_opts}:${x86_elf_asm}:dlfcn:bsd-gcc-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", @@ -1195,6 +1212,7 @@ my $cc = $fields[$idx_cc]; if($ENV{CC}) { $cc = $ENV{CC}; } + my $cflags = $fields[$idx_cflags]; my $unistd = $fields[$idx_unistd]; my $thread_cflag = $fields[$idx_thread_cflag]; @@ -1630,12 +1648,21 @@ if ($shlib_version_number =~ /(^[0-9]*)\.([0-9\.]*)/) if ($strict_warnings) { + my $ecc = $cc; + $ecc = "clang" if `$cc --version 2>&1` =~ /clang/; my $wopt; - die "ERROR --strict-warnings requires gcc or clang" unless ($cc =~ /gcc$/ or $cc =~ /clang$/); + die "ERROR --strict-warnings requires gcc or clang" unless ($ecc =~ /gcc$/ or $ecc =~ /clang$/); foreach $wopt (split /\s+/, $gcc_devteam_warn) { $cflags .= " $wopt" unless ($cflags =~ /$wopt/) } + if ($ecc eq "clang") + { + foreach $wopt (split /\s+/, $clang_devteam_warn) + { + $cflags .= " $wopt" unless ($cflags =~ /$wopt/) + } + } } open(IN,'crypto/objects/obj_xref.h - -apps/openssl-vms.cnf: apps/openssl.cnf - $(PERL) VMS/VMSify-conf.pl < apps/openssl.cnf > apps/openssl-vms.cnf - -crypto/bn/bn_prime.h: crypto/bn/bn_prime.pl - $(PERL) crypto/bn/bn_prime.pl >crypto/bn/bn_prime.h - - TABLE: Configure (echo 'Output of `Configure TABLE'"':"; \ $(PERL) Configure TABLE) > TABLE -update: errors stacks util/libeay.num util/ssleay.num crypto/objects/obj_dat.h crypto/objects/obj_xref.h apps/openssl-vms.cnf crypto/bn/bn_prime.h TABLE depend - # Build distribution tar-file. As the list of files returned by "find" is # pretty long, on several platforms a "too many arguments" error or similar # would occur. Therefore the list of files is temporarily stored into a file diff --git a/deps/openssl/openssl/Makefile.bak b/deps/openssl/openssl/Makefile.bak index 7c1a73ff51e975..71c8d9fc6cd98a 100644 --- a/deps/openssl/openssl/Makefile.bak +++ b/deps/openssl/openssl/Makefile.bak @@ -4,7 +4,7 @@ ## Makefile for OpenSSL ## -VERSION=1.0.2a-dev +VERSION=1.0.2b-dev MAJOR=1 MINOR=0.2 SHLIB_VERSION_NUMBER=1.0.0 @@ -90,7 +90,7 @@ PROCESSOR= # CPUID module collects small commonly used assembler snippets CPUID_OBJ= mem_clr.o BN_ASM= bn_asm.o -EC_ASM= +EC_ASM= DES_ENC= des_enc.o fcrypt_b.o AES_ENC= aes_core.o aes_cbc.o BF_ENC= bf_enc.o @@ -187,7 +187,7 @@ WTARFILE= $(NAME)-win.tar EXHEADER= e_os2.h HEADER= e_os.h -all: Makefile build_all openssl.pc libssl.pc libcrypto.pc +all: Makefile build_all # as we stick to -e, CLEARENV ensures that local variables in lower # Makefiles remain local and variable. $${VAR+VAR} is tribute to Korn @@ -273,7 +273,10 @@ reflect: sub_all: build_all build_all: build_libs build_apps build_tests build_tools -build_libs: build_crypto build_ssl build_engines +build_libs: build_libcrypto build_libssl openssl.pc + +build_libcrypto: build_crypto build_engines libcrypto.pc +build_libssl: build_ssl libssl.pc build_crypto: @dir=crypto; target=all; $(BUILD_ONE_CMD) @@ -459,6 +462,9 @@ tests: rehash report: @$(PERL) util/selftest.pl +update: errors stacks util/libeay.num util/ssleay.num TABLE + @set -e; target=update; $(RECURSIVE_BUILD_CMD) + depend: @set -e; target=depend; $(RECURSIVE_BUILD_CMD) @@ -483,26 +489,10 @@ util/libeay.num:: util/ssleay.num:: $(PERL) util/mkdef.pl ssl update -crypto/objects/obj_dat.h: crypto/objects/obj_dat.pl crypto/objects/obj_mac.h - $(PERL) crypto/objects/obj_dat.pl crypto/objects/obj_mac.h crypto/objects/obj_dat.h -crypto/objects/obj_mac.h: crypto/objects/objects.pl crypto/objects/objects.txt crypto/objects/obj_mac.num - $(PERL) crypto/objects/objects.pl crypto/objects/objects.txt crypto/objects/obj_mac.num crypto/objects/obj_mac.h -crypto/objects/obj_xref.h: crypto/objects/objxref.pl crypto/objects/obj_xref.txt crypto/objects/obj_mac.num - $(PERL) crypto/objects/objxref.pl crypto/objects/obj_mac.num crypto/objects/obj_xref.txt >crypto/objects/obj_xref.h - -apps/openssl-vms.cnf: apps/openssl.cnf - $(PERL) VMS/VMSify-conf.pl < apps/openssl.cnf > apps/openssl-vms.cnf - -crypto/bn/bn_prime.h: crypto/bn/bn_prime.pl - $(PERL) crypto/bn/bn_prime.pl >crypto/bn/bn_prime.h - - TABLE: Configure (echo 'Output of `Configure TABLE'"':"; \ $(PERL) Configure TABLE) > TABLE -update: errors stacks util/libeay.num util/ssleay.num crypto/objects/obj_dat.h crypto/objects/obj_xref.h apps/openssl-vms.cnf crypto/bn/bn_prime.h TABLE depend - # Build distribution tar-file. As the list of files returned by "find" is # pretty long, on several platforms a "too many arguments" error or similar # would occur. Therefore the list of files is temporarily stored into a file diff --git a/deps/openssl/openssl/Makefile.org b/deps/openssl/openssl/Makefile.org index b7a3f96c9f4ab5..9f4faae2df9521 100644 --- a/deps/openssl/openssl/Makefile.org +++ b/deps/openssl/openssl/Makefile.org @@ -185,7 +185,7 @@ WTARFILE= $(NAME)-win.tar EXHEADER= e_os2.h HEADER= e_os.h -all: Makefile build_all openssl.pc libssl.pc libcrypto.pc +all: Makefile build_all # as we stick to -e, CLEARENV ensures that local variables in lower # Makefiles remain local and variable. $${VAR+VAR} is tribute to Korn @@ -271,7 +271,10 @@ reflect: sub_all: build_all build_all: build_libs build_apps build_tests build_tools -build_libs: build_crypto build_ssl build_engines +build_libs: build_libcrypto build_libssl openssl.pc + +build_libcrypto: build_crypto build_engines libcrypto.pc +build_libssl: build_ssl libssl.pc build_crypto: @dir=crypto; target=all; $(BUILD_ONE_CMD) @@ -457,6 +460,9 @@ tests: rehash report: @$(PERL) util/selftest.pl +update: errors stacks util/libeay.num util/ssleay.num TABLE + @set -e; target=update; $(RECURSIVE_BUILD_CMD) + depend: @set -e; target=depend; $(RECURSIVE_BUILD_CMD) @@ -481,26 +487,10 @@ util/libeay.num:: util/ssleay.num:: $(PERL) util/mkdef.pl ssl update -crypto/objects/obj_dat.h: crypto/objects/obj_dat.pl crypto/objects/obj_mac.h - $(PERL) crypto/objects/obj_dat.pl crypto/objects/obj_mac.h crypto/objects/obj_dat.h -crypto/objects/obj_mac.h: crypto/objects/objects.pl crypto/objects/objects.txt crypto/objects/obj_mac.num - $(PERL) crypto/objects/objects.pl crypto/objects/objects.txt crypto/objects/obj_mac.num crypto/objects/obj_mac.h -crypto/objects/obj_xref.h: crypto/objects/objxref.pl crypto/objects/obj_xref.txt crypto/objects/obj_mac.num - $(PERL) crypto/objects/objxref.pl crypto/objects/obj_mac.num crypto/objects/obj_xref.txt >crypto/objects/obj_xref.h - -apps/openssl-vms.cnf: apps/openssl.cnf - $(PERL) VMS/VMSify-conf.pl < apps/openssl.cnf > apps/openssl-vms.cnf - -crypto/bn/bn_prime.h: crypto/bn/bn_prime.pl - $(PERL) crypto/bn/bn_prime.pl >crypto/bn/bn_prime.h - - TABLE: Configure (echo 'Output of `Configure TABLE'"':"; \ $(PERL) Configure TABLE) > TABLE -update: errors stacks util/libeay.num util/ssleay.num crypto/objects/obj_dat.h crypto/objects/obj_xref.h apps/openssl-vms.cnf crypto/bn/bn_prime.h TABLE depend - # Build distribution tar-file. As the list of files returned by "find" is # pretty long, on several platforms a "too many arguments" error or similar # would occur. Therefore the list of files is temporarily stored into a file diff --git a/deps/openssl/openssl/NEWS b/deps/openssl/openssl/NEWS index 682c583da56cd7..e6147935b28f89 100644 --- a/deps/openssl/openssl/NEWS +++ b/deps/openssl/openssl/NEWS @@ -5,6 +5,14 @@ This file gives a brief overview of the major changes between each OpenSSL release. For more details please read the CHANGES file. + Major changes between OpenSSL 1.0.2a and OpenSSL 1.0.2b [11 Jun 2015] + + o Malformed ECParameters causes infinite loop (CVE-2015-1788) + o Exploitable out-of-bounds read in X509_cmp_time (CVE-2015-1789) + o PKCS7 crash with missing EnvelopedContent (CVE-2015-1790) + o CMS verify infinite loop with unknown hash function (CVE-2015-1792) + o Race condition handling NewSessionTicket (CVE-2015-1791) + Major changes between OpenSSL 1.0.2 and OpenSSL 1.0.2a [19 Mar 2015] o OpenSSL 1.0.2 ClientHello sigalgs DoS fix (CVE-2015-0291) diff --git a/deps/openssl/openssl/README b/deps/openssl/openssl/README index 8ce093dd43ac3b..8de5c74d8ed46c 100644 --- a/deps/openssl/openssl/README +++ b/deps/openssl/openssl/README @@ -1,5 +1,5 @@ - OpenSSL 1.0.2a 19 Mar 2015 + OpenSSL 1.0.2b 11 Jun 2015 Copyright (c) 1998-2011 The OpenSSL Project Copyright (c) 1995-1998 Eric A. Young, Tim J. Hudson diff --git a/deps/openssl/openssl/apps/Makefile b/deps/openssl/openssl/apps/Makefile index 72657ea658733c..cafe55458ddc14 100644 --- a/deps/openssl/openssl/apps/Makefile +++ b/deps/openssl/openssl/apps/Makefile @@ -94,6 +94,9 @@ req: sreq.o $(A_OBJ) $(DLIBCRYPTO) sreq.o: req.c $(CC) -c $(INCLUDES) $(CFLAG) -o sreq.o req.c +openssl-vms.cnf: openssl.cnf + $(PERL) $(TOP)/VMS/VMSify-conf.pl < openssl.cnf > openssl-vms.cnf + files: $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO @@ -127,12 +130,12 @@ links: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff -depend: - @if [ -z "$(THIS)" ]; then \ - $(MAKE) -f $(TOP)/Makefile reflect THIS=$@; \ - else \ - $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(SRC); \ - fi +update: openssl-vms.cnf local_depend + +depend: local_depend + @if [ -z "$(THIS)" ]; then $(MAKE) -f $(TOP)/Makefile reflect THIS=$@; fi +local_depend: + @[ -z "$(THIS)" ] || $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(SRC); \ dclean: $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new @@ -144,10 +147,10 @@ clean: rm -f req $(DLIBSSL): - (cd ..; $(MAKE) DIRS=ssl all) + (cd ..; $(MAKE) build_libssl) $(DLIBCRYPTO): - (cd ..; $(MAKE) DIRS=crypto all) + (cd ..; $(MAKE) build_libcrypto) $(EXE): progs.h $(E_OBJ) $(PROGRAM).o $(DLIBCRYPTO) $(DLIBSSL) $(RM) $(EXE) diff --git a/deps/openssl/openssl/apps/apps.c b/deps/openssl/openssl/apps/apps.c index b0acbc7c14fa29..7478fc379a55da 100644 --- a/deps/openssl/openssl/apps/apps.c +++ b/deps/openssl/openssl/apps/apps.c @@ -574,7 +574,7 @@ int password_callback(char *buf, int bufsiz, int verify, PW_CB_DATA *cb_tmp) char *prompt = NULL; prompt = UI_construct_prompt(ui, "pass phrase", prompt_info); - if(!prompt) { + if (!prompt) { BIO_printf(bio_err, "Out of memory\n"); UI_free(ui); return 0; @@ -588,7 +588,7 @@ int password_callback(char *buf, int bufsiz, int verify, PW_CB_DATA *cb_tmp) PW_MIN_LENGTH, bufsiz - 1); if (ok >= 0 && verify) { buff = (char *)OPENSSL_malloc(bufsiz); - if(!buff) { + if (!buff) { BIO_printf(bio_err, "Out of memory\n"); UI_free(ui); OPENSSL_free(prompt); @@ -2371,6 +2371,8 @@ int args_verify(char ***pargs, int *pargc, flags |= X509_V_FLAG_SUITEB_192_LOS; else if (!strcmp(arg, "-partial_chain")) flags |= X509_V_FLAG_PARTIAL_CHAIN; + else if (!strcmp(arg, "-no_alt_chains")) + flags |= X509_V_FLAG_NO_ALT_CHAINS; else return 0; diff --git a/deps/openssl/openssl/apps/asn1pars.c b/deps/openssl/openssl/apps/asn1pars.c index 7a0f1694328ecc..11b07875943be8 100644 --- a/deps/openssl/openssl/apps/asn1pars.c +++ b/deps/openssl/openssl/apps/asn1pars.c @@ -375,7 +375,7 @@ static int do_generate(BIO *bio, char *genstr, char *genconf, BUF_MEM *buf) { CONF *cnf = NULL; int len; - long errline; + long errline = 0; unsigned char *p; ASN1_TYPE *atyp = NULL; diff --git a/deps/openssl/openssl/apps/ca.c b/deps/openssl/openssl/apps/ca.c index d64ec4f14ce591..3b7336c0466e5d 100644 --- a/deps/openssl/openssl/apps/ca.c +++ b/deps/openssl/openssl/apps/ca.c @@ -563,7 +563,7 @@ int MAIN(int argc, char **argv) #ifdef OPENSSL_SYS_VMS len = strlen(s) + sizeof(CONFIG_FILE); tofree = OPENSSL_malloc(len); - if(!tofree) { + if (!tofree) { BIO_printf(bio_err, "Out of memory\n"); goto err; } @@ -571,7 +571,7 @@ int MAIN(int argc, char **argv) #else len = strlen(s) + sizeof(CONFIG_FILE) + 1; tofree = OPENSSL_malloc(len); - if(!tofree) { + if (!tofree) { BIO_printf(bio_err, "Out of memory\n"); goto err; } @@ -2821,7 +2821,7 @@ int unpack_revinfo(ASN1_TIME **prevtm, int *preason, ASN1_OBJECT **phold, ASN1_GENERALIZEDTIME *comp_time = NULL; tmp = BUF_strdup(str); - if(!tmp) { + if (!tmp) { BIO_printf(bio_err, "memory allocation failure\n"); goto err; } @@ -2843,7 +2843,7 @@ int unpack_revinfo(ASN1_TIME **prevtm, int *preason, ASN1_OBJECT **phold, if (prevtm) { *prevtm = ASN1_UTCTIME_new(); - if(!*prevtm) { + if (!*prevtm) { BIO_printf(bio_err, "memory allocation failure\n"); goto err; } @@ -2887,7 +2887,7 @@ int unpack_revinfo(ASN1_TIME **prevtm, int *preason, ASN1_OBJECT **phold, goto err; } comp_time = ASN1_GENERALIZEDTIME_new(); - if(!comp_time) { + if (!comp_time) { BIO_printf(bio_err, "memory allocation failure\n"); goto err; } diff --git a/deps/openssl/openssl/apps/cms.c b/deps/openssl/openssl/apps/cms.c index 2c922537c5c0b6..60479374cdf28e 100644 --- a/deps/openssl/openssl/apps/cms.c +++ b/deps/openssl/openssl/apps/cms.c @@ -463,7 +463,7 @@ int MAIN(int argc, char **argv) if (key_param == NULL || key_param->idx != keyidx) { cms_key_param *nparam; nparam = OPENSSL_malloc(sizeof(cms_key_param)); - if(!nparam) { + if (!nparam) { BIO_printf(bio_err, "Out of memory\n"); goto argerr; } @@ -645,6 +645,8 @@ int MAIN(int argc, char **argv) BIO_printf(bio_err, "-CApath dir trusted certificates directory\n"); BIO_printf(bio_err, "-CAfile file trusted certificates file\n"); + BIO_printf(bio_err, + "-no_alt_chains only ever use the first certificate chain found\n"); BIO_printf(bio_err, "-crl_check check revocation status of signer's certificate using CRLs\n"); BIO_printf(bio_err, diff --git a/deps/openssl/openssl/apps/enc.c b/deps/openssl/openssl/apps/enc.c index 5c2cf7a4acf3e2..7b7c70b132d722 100644 --- a/deps/openssl/openssl/apps/enc.c +++ b/deps/openssl/openssl/apps/enc.c @@ -548,9 +548,14 @@ int MAIN(int argc, char **argv) else OPENSSL_cleanse(str, strlen(str)); } - if ((hiv != NULL) && !set_hex(hiv, iv, sizeof iv)) { - BIO_printf(bio_err, "invalid hex iv value\n"); - goto end; + if (hiv != NULL) { + int siz = EVP_CIPHER_iv_length(cipher); + if (siz == 0) { + BIO_printf(bio_err, "warning: iv not use by this cipher\n"); + } else if (!set_hex(hiv, iv, sizeof iv)) { + BIO_printf(bio_err, "invalid hex iv value\n"); + goto end; + } } if ((hiv == NULL) && (str == NULL) && EVP_CIPHER_iv_length(cipher) != 0) { @@ -562,7 +567,7 @@ int MAIN(int argc, char **argv) BIO_printf(bio_err, "iv undefined\n"); goto end; } - if ((hkey != NULL) && !set_hex(hkey, key, sizeof key)) { + if ((hkey != NULL) && !set_hex(hkey, key, EVP_CIPHER_key_length(cipher))) { BIO_printf(bio_err, "invalid hex key value\n"); goto end; } diff --git a/deps/openssl/openssl/apps/ocsp.c b/deps/openssl/openssl/apps/ocsp.c index ebb3732cd76f37..b858b8d3ee0023 100644 --- a/deps/openssl/openssl/apps/ocsp.c +++ b/deps/openssl/openssl/apps/ocsp.c @@ -535,6 +535,8 @@ int MAIN(int argc, char **argv) "-CApath dir trusted certificates directory\n"); BIO_printf(bio_err, "-CAfile file trusted certificates file\n"); + BIO_printf(bio_err, + "-no_alt_chains only ever use the first certificate chain found\n"); BIO_printf(bio_err, "-VAfile file validator certificates file\n"); BIO_printf(bio_err, diff --git a/deps/openssl/openssl/apps/s_cb.c b/deps/openssl/openssl/apps/s_cb.c index f6e6bcd765e1db..dd3aa74e02afd4 100644 --- a/deps/openssl/openssl/apps/s_cb.c +++ b/deps/openssl/openssl/apps/s_cb.c @@ -111,6 +111,7 @@ #include #include +#include /* for memcpy() and strcmp() */ #define USE_SOCKETS #define NON_MAIN #include "apps.h" @@ -456,7 +457,7 @@ int ssl_print_curves(BIO *out, SSL *s, int noshared) if (ncurves <= 0) return 1; curves = OPENSSL_malloc(ncurves * sizeof(int)); - if(!curves) { + if (!curves) { BIO_puts(out, "Malloc error getting supported curves\n"); return 0; } @@ -1012,7 +1013,7 @@ int MS_CALLBACK generate_cookie_callback(SSL *ssl, unsigned char *cookie, /* Initialize a random secret */ if (!cookie_initialized) { - if (!RAND_bytes(cookie_secret, COOKIE_SECRET_LENGTH)) { + if (RAND_bytes(cookie_secret, COOKIE_SECRET_LENGTH) <= 0) { BIO_printf(bio_err, "error setting random cookie secret\n"); return 0; } diff --git a/deps/openssl/openssl/apps/s_client.c b/deps/openssl/openssl/apps/s_client.c index d5297d3070de92..d89f2c51066574 100644 --- a/deps/openssl/openssl/apps/s_client.c +++ b/deps/openssl/openssl/apps/s_client.c @@ -339,6 +339,8 @@ static void sc_usage(void) " -pass arg - private key file pass phrase source\n"); BIO_printf(bio_err, " -CApath arg - PEM format directory of CA's\n"); BIO_printf(bio_err, " -CAfile arg - PEM format file of CA's\n"); + BIO_printf(bio_err, + " -no_alt_chains - only ever use the first certificate chain found\n"); BIO_printf(bio_err, " -reconnect - Drop and re-make the connection with the same Session-ID\n"); BIO_printf(bio_err, @@ -572,7 +574,7 @@ static char *MS_CALLBACK ssl_give_srp_client_pwd_cb(SSL *s, void *arg) PW_CB_DATA cb_tmp; int l; - if(!pass) { + if (!pass) { BIO_printf(bio_err, "Malloc failure\n"); return NULL; } @@ -1352,13 +1354,12 @@ int MAIN(int argc, char **argv) SSL_CTX_set_verify(ctx, verify, verify_callback); - if ((!SSL_CTX_load_verify_locations(ctx, CAfile, CApath)) || - (!SSL_CTX_set_default_verify_paths(ctx))) { - /* - * BIO_printf(bio_err,"error setting default verify locations\n"); - */ + if ((CAfile || CApath) + && !SSL_CTX_load_verify_locations(ctx, CAfile, CApath)) { + ERR_print_errors(bio_err); + } + if (!SSL_CTX_set_default_verify_paths(ctx)) { ERR_print_errors(bio_err); - /* goto end; */ } ssl_ctx_add_crls(ctx, crls, crl_download); diff --git a/deps/openssl/openssl/apps/s_server.c b/deps/openssl/openssl/apps/s_server.c index a8491acfdd3af0..acef382c2c4bb0 100644 --- a/deps/openssl/openssl/apps/s_server.c +++ b/deps/openssl/openssl/apps/s_server.c @@ -222,7 +222,7 @@ static void init_session_cache_ctx(SSL_CTX *sctx); static void free_sessions(void); #ifndef OPENSSL_NO_DH static DH *load_dh_param(const char *dhfile); -static DH *get_dh512(void); +static DH *get_dh2048(void); #endif #ifdef MONOLITH @@ -230,30 +230,48 @@ static void s_server_init(void); #endif #ifndef OPENSSL_NO_DH -static unsigned char dh512_p[] = { - 0xDA, 0x58, 0x3C, 0x16, 0xD9, 0x85, 0x22, 0x89, 0xD0, 0xE4, 0xAF, 0x75, - 0x6F, 0x4C, 0xCA, 0x92, 0xDD, 0x4B, 0xE5, 0x33, 0xB8, 0x04, 0xFB, 0x0F, - 0xED, 0x94, 0xEF, 0x9C, 0x8A, 0x44, 0x03, 0xED, 0x57, 0x46, 0x50, 0xD3, - 0x69, 0x99, 0xDB, 0x29, 0xD7, 0x76, 0x27, 0x6B, 0xA2, 0xD3, 0xD4, 0x12, - 0xE2, 0x18, 0xF4, 0xDD, 0x1E, 0x08, 0x4C, 0xF6, 0xD8, 0x00, 0x3E, 0x7C, - 0x47, 0x74, 0xE8, 0x33, +static unsigned char dh2048_p[] = { + 0xF6,0x42,0x57,0xB7,0x08,0x7F,0x08,0x17,0x72,0xA2,0xBA,0xD6, + 0xA9,0x42,0xF3,0x05,0xE8,0xF9,0x53,0x11,0x39,0x4F,0xB6,0xF1, + 0x6E,0xB9,0x4B,0x38,0x20,0xDA,0x01,0xA7,0x56,0xA3,0x14,0xE9, + 0x8F,0x40,0x55,0xF3,0xD0,0x07,0xC6,0xCB,0x43,0xA9,0x94,0xAD, + 0xF7,0x4C,0x64,0x86,0x49,0xF8,0x0C,0x83,0xBD,0x65,0xE9,0x17, + 0xD4,0xA1,0xD3,0x50,0xF8,0xF5,0x59,0x5F,0xDC,0x76,0x52,0x4F, + 0x3D,0x3D,0x8D,0xDB,0xCE,0x99,0xE1,0x57,0x92,0x59,0xCD,0xFD, + 0xB8,0xAE,0x74,0x4F,0xC5,0xFC,0x76,0xBC,0x83,0xC5,0x47,0x30, + 0x61,0xCE,0x7C,0xC9,0x66,0xFF,0x15,0xF9,0xBB,0xFD,0x91,0x5E, + 0xC7,0x01,0xAA,0xD3,0x5B,0x9E,0x8D,0xA0,0xA5,0x72,0x3A,0xD4, + 0x1A,0xF0,0xBF,0x46,0x00,0x58,0x2B,0xE5,0xF4,0x88,0xFD,0x58, + 0x4E,0x49,0xDB,0xCD,0x20,0xB4,0x9D,0xE4,0x91,0x07,0x36,0x6B, + 0x33,0x6C,0x38,0x0D,0x45,0x1D,0x0F,0x7C,0x88,0xB3,0x1C,0x7C, + 0x5B,0x2D,0x8E,0xF6,0xF3,0xC9,0x23,0xC0,0x43,0xF0,0xA5,0x5B, + 0x18,0x8D,0x8E,0xBB,0x55,0x8C,0xB8,0x5D,0x38,0xD3,0x34,0xFD, + 0x7C,0x17,0x57,0x43,0xA3,0x1D,0x18,0x6C,0xDE,0x33,0x21,0x2C, + 0xB5,0x2A,0xFF,0x3C,0xE1,0xB1,0x29,0x40,0x18,0x11,0x8D,0x7C, + 0x84,0xA7,0x0A,0x72,0xD6,0x86,0xC4,0x03,0x19,0xC8,0x07,0x29, + 0x7A,0xCA,0x95,0x0C,0xD9,0x96,0x9F,0xAB,0xD0,0x0A,0x50,0x9B, + 0x02,0x46,0xD3,0x08,0x3D,0x66,0xA4,0x5D,0x41,0x9F,0x9C,0x7C, + 0xBD,0x89,0x4B,0x22,0x19,0x26,0xBA,0xAB,0xA2,0x5E,0xC3,0x55, + 0xE9,0x32,0x0B,0x3B, }; -static unsigned char dh512_g[] = { +static unsigned char dh2048_g[] = { 0x02, }; -static DH *get_dh512(void) +DH *get_dh2048() { - DH *dh = NULL; + DH *dh; if ((dh = DH_new()) == NULL) - return (NULL); - dh->p = BN_bin2bn(dh512_p, sizeof(dh512_p), NULL); - dh->g = BN_bin2bn(dh512_g, sizeof(dh512_g), NULL); - if ((dh->p == NULL) || (dh->g == NULL)) - return (NULL); - return (dh); + return NULL; + dh->p=BN_bin2bn(dh2048_p, sizeof(dh2048_p), NULL); + dh->g=BN_bin2bn(dh2048_g, sizeof(dh2048_g), NULL); + if (dh->p == NULL || dh->g == NULL) { + DH_free(dh); + return NULL; + } + return dh; } #endif @@ -553,6 +571,8 @@ static void sv_usage(void) BIO_printf(bio_err, " -state - Print the SSL states\n"); BIO_printf(bio_err, " -CApath arg - PEM format directory of CA's\n"); BIO_printf(bio_err, " -CAfile arg - PEM format file of CA's\n"); + BIO_printf(bio_err, + " -no_alt_chains - only ever use the first certificate chain found\n"); BIO_printf(bio_err, " -nocert - Don't use any certificates (Anon-DH)\n"); BIO_printf(bio_err, @@ -754,7 +774,7 @@ static int ebcdic_write(BIO *b, const char *in, int inl) num = inl; wbuf = (EBCDIC_OUTBUFF *) OPENSSL_malloc(sizeof(EBCDIC_OUTBUFF) + num); - if(!wbuf) + if (!wbuf) return 0; OPENSSL_free(b->ptr); @@ -1865,7 +1885,11 @@ int MAIN(int argc, char *argv[]) BIO_printf(bio_s_out, "Setting temp DH parameters\n"); } else { BIO_printf(bio_s_out, "Using default temp DH parameters\n"); - dh = get_dh512(); + dh = get_dh2048(); + if (dh == NULL) { + ERR_print_errors(bio_err); + goto end; + } } (void)BIO_flush(bio_s_out); @@ -2453,8 +2477,10 @@ static int sv_body(char *hostname, int s, int stype, unsigned char *context) ret = 1; goto err; } - l += k; - i -= k; + if (k > 0) { + l += k; + i -= k; + } if (i <= 0) break; } @@ -3281,7 +3307,8 @@ static int generate_session_id(const SSL *ssl, unsigned char *id, { unsigned int count = 0; do { - RAND_pseudo_bytes(id, *id_len); + if (RAND_pseudo_bytes(id, *id_len) < 0) + return 0; /* * Prefix the session_id with the required prefix. NB: If our prefix * is too long, clip it - but there will be worse effects anyway, eg. @@ -3323,7 +3350,7 @@ static int add_session(SSL *ssl, SSL_SESSION *session) unsigned char *p; sess = OPENSSL_malloc(sizeof(simple_ssl_session)); - if(!sess) { + if (!sess) { BIO_printf(bio_err, "Out of memory adding session to external cache\n"); return 0; } @@ -3334,12 +3361,12 @@ static int add_session(SSL *ssl, SSL_SESSION *session) sess->id = BUF_memdup(SSL_SESSION_get_id(session, NULL), sess->idlen); sess->der = OPENSSL_malloc(sess->derlen); - if(!sess->id || !sess->der) { + if (!sess->id || !sess->der) { BIO_printf(bio_err, "Out of memory adding session to external cache\n"); - if(sess->id) + if (sess->id) OPENSSL_free(sess->id); - if(sess->der) + if (sess->der) OPENSSL_free(sess->der); OPENSSL_free(sess); return 0; diff --git a/deps/openssl/openssl/apps/s_time.c b/deps/openssl/openssl/apps/s_time.c index a40997a22e532d..38788f7130c7cc 100644 --- a/deps/openssl/openssl/apps/s_time.c +++ b/deps/openssl/openssl/apps/s_time.c @@ -302,7 +302,7 @@ static int parseArgs(int argc, char **argv) if (--argc < 1) goto bad; maxTime = atoi(*(++argv)); - if(maxTime <= 0) { + if (maxTime <= 0) { BIO_printf(bio_err, "time must be > 0\n"); badop = 1; } diff --git a/deps/openssl/openssl/apps/smime.c b/deps/openssl/openssl/apps/smime.c index 764509f23f47c3..6044ccf5f5905f 100644 --- a/deps/openssl/openssl/apps/smime.c +++ b/deps/openssl/openssl/apps/smime.c @@ -441,6 +441,8 @@ int MAIN(int argc, char **argv) BIO_printf(bio_err, "-CApath dir trusted certificates directory\n"); BIO_printf(bio_err, "-CAfile file trusted certificates file\n"); + BIO_printf(bio_err, + "-no_alt_chains only ever use the first certificate chain found\n"); BIO_printf(bio_err, "-crl_check check revocation status of signer's certificate using CRLs\n"); BIO_printf(bio_err, diff --git a/deps/openssl/openssl/apps/speed.c b/deps/openssl/openssl/apps/speed.c index 7b1acc18994d8d..3697b71ec18b45 100644 --- a/deps/openssl/openssl/apps/speed.c +++ b/deps/openssl/openssl/apps/speed.c @@ -2775,7 +2775,7 @@ static void multiblock_speed(const EVP_CIPHER *evp_cipher) inp = OPENSSL_malloc(mblengths[num - 1]); out = OPENSSL_malloc(mblengths[num - 1] + 1024); - if(!inp || !out) { + if (!inp || !out) { BIO_printf(bio_err,"Out of memory\n"); goto end; } @@ -2791,7 +2791,7 @@ static void multiblock_speed(const EVP_CIPHER *evp_cipher) print_message(alg_name, 0, mblengths[j]); Time_F(START); for (count = 0, run = 1; run && count < 0x7fffffff; count++) { - unsigned char aad[13]; + unsigned char aad[EVP_AEAD_TLS1_AAD_LEN]; EVP_CTRL_TLS1_1_MULTIBLOCK_PARAM mb_param; size_t len = mblengths[j]; int packlen; @@ -2826,7 +2826,8 @@ static void multiblock_speed(const EVP_CIPHER *evp_cipher) aad[11] = len >> 8; aad[12] = len; pad = EVP_CIPHER_CTX_ctrl(&ctx, - EVP_CTRL_AEAD_TLS1_AAD, 13, aad); + EVP_CTRL_AEAD_TLS1_AAD, + EVP_AEAD_TLS1_AAD_LEN, aad); EVP_Cipher(&ctx, out, inp, len + pad); } } @@ -2865,9 +2866,9 @@ static void multiblock_speed(const EVP_CIPHER *evp_cipher) } end: - if(inp) + if (inp) OPENSSL_free(inp); - if(out) + if (out) OPENSSL_free(out); } #endif diff --git a/deps/openssl/openssl/apps/srp.c b/deps/openssl/openssl/apps/srp.c index c679448ee7c55b..c0ff4171cabf8e 100644 --- a/deps/openssl/openssl/apps/srp.c +++ b/deps/openssl/openssl/apps/srp.c @@ -435,7 +435,7 @@ int MAIN(int argc, char **argv) # ifdef OPENSSL_SYS_VMS len = strlen(s) + sizeof(CONFIG_FILE); tofree = OPENSSL_malloc(len); - if(!tofree) { + if (!tofree) { BIO_printf(bio_err, "Out of memory\n"); goto err; } @@ -443,7 +443,7 @@ int MAIN(int argc, char **argv) # else len = strlen(s) + sizeof(CONFIG_FILE) + 1; tofree = OPENSSL_malloc(len); - if(!tofree) { + if (!tofree) { BIO_printf(bio_err, "Out of memory\n"); goto err; } diff --git a/deps/openssl/openssl/apps/verify.c b/deps/openssl/openssl/apps/verify.c index b3ba53d97f02ae..78e729fc890f99 100644 --- a/deps/openssl/openssl/apps/verify.c +++ b/deps/openssl/openssl/apps/verify.c @@ -232,7 +232,7 @@ int MAIN(int argc, char **argv) if (ret == 1) { BIO_printf(bio_err, "usage: verify [-verbose] [-CApath path] [-CAfile file] [-purpose purpose] [-crl_check]"); - BIO_printf(bio_err, " [-attime timestamp]"); + BIO_printf(bio_err, " [-no_alt_chains] [-attime timestamp]"); #ifndef OPENSSL_NO_ENGINE BIO_printf(bio_err, " [-engine e]"); #endif diff --git a/deps/openssl/openssl/crypto/Makefile b/deps/openssl/openssl/crypto/Makefile index 9a39e934ad5c8e..7869996a9c074a 100644 --- a/deps/openssl/openssl/crypto/Makefile +++ b/deps/openssl/openssl/crypto/Makefile @@ -125,12 +125,17 @@ install: lint: @target=lint; $(RECURSIVE_MAKE) -depend: +update: local_depend + @[ -z "$(THIS)" ] || (set -e; target=update; $(RECURSIVE_MAKE) ) + @if [ -z "$(THIS)" ]; then $(MAKE) -f $(TOP)/Makefile reflect THIS=$@; fi + +depend: local_depend + @[ -z "$(THIS)" ] || (set -e; target=depend; $(RECURSIVE_MAKE) ) + @if [ -z "$(THIS)" ]; then $(MAKE) -f $(TOP)/Makefile reflect THIS=$@; fi +local_depend: @[ -z "$(THIS)" -o -f buildinf.h ] || touch buildinf.h # fake buildinf.h if it does not exist @[ -z "$(THIS)" ] || $(MAKEDEPEND) -- $(CFLAG) $(INCLUDE) $(DEPFLAG) -- $(PROGS) $(LIBSRC) @[ -z "$(THIS)" -o -s buildinf.h ] || rm buildinf.h - @[ -z "$(THIS)" ] || (set -e; target=depend; $(RECURSIVE_MAKE) ) - @if [ -z "$(THIS)" ]; then $(MAKE) -f $(TOP)/Makefile reflect THIS=$@; fi clean: rm -f buildinf.h *.s *.o */*.o *.obj lib tags core .pure .nfs* *.old *.bak fluff diff --git a/deps/openssl/openssl/crypto/aes/Makefile b/deps/openssl/openssl/crypto/aes/Makefile index b94ca72a41a343..e825c140194f60 100644 --- a/deps/openssl/openssl/crypto/aes/Makefile +++ b/deps/openssl/openssl/crypto/aes/Makefile @@ -122,6 +122,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/aes/asm/aesni-mb-x86_64.pl b/deps/openssl/openssl/crypto/aes/asm/aesni-mb-x86_64.pl index 9f7bb7c163902f..33b1aed3c0b4c3 100644 --- a/deps/openssl/openssl/crypto/aes/asm/aesni-mb-x86_64.pl +++ b/deps/openssl/openssl/crypto/aes/asm/aesni-mb-x86_64.pl @@ -127,7 +127,7 @@ movaps %xmm10,0x40(%rsp) movaps %xmm11,0x50(%rsp) movaps %xmm12,0x60(%rsp) - movaps %xmm13,-0x68(%rax) # not used, saved to share se_handler + movaps %xmm13,-0x68(%rax) # not used, saved to share se_handler movaps %xmm14,-0x58(%rax) movaps %xmm15,-0x48(%rax) ___ @@ -301,9 +301,9 @@ movups @out[0],-16(@outptr[0],$offset) pxor @inp[0],@out[0] - movups @out[1],-16(@outptr[1],$offset) + movups @out[1],-16(@outptr[1],$offset) pxor @inp[1],@out[1] - movups @out[2],-16(@outptr[2],$offset) + movups @out[2],-16(@outptr[2],$offset) pxor @inp[2],@out[2] movups @out[3],-16(@outptr[3],$offset) pxor @inp[3],@out[3] @@ -386,7 +386,7 @@ movaps %xmm10,0x40(%rsp) movaps %xmm11,0x50(%rsp) movaps %xmm12,0x60(%rsp) - movaps %xmm13,-0x68(%rax) # not used, saved to share se_handler + movaps %xmm13,-0x68(%rax) # not used, saved to share se_handler movaps %xmm14,-0x58(%rax) movaps %xmm15,-0x48(%rax) ___ @@ -556,10 +556,10 @@ movups @out[0],-16(@outptr[0],$offset) movdqu (@inptr[0],$offset),@out[0] - movups @out[1],-16(@outptr[1],$offset) + movups @out[1],-16(@outptr[1],$offset) movdqu (@inptr[1],$offset),@out[1] pxor $zero,@out[0] - movups @out[2],-16(@outptr[2],$offset) + movups @out[2],-16(@outptr[2],$offset) movdqu (@inptr[2],$offset),@out[2] pxor $zero,@out[1] movups @out[3],-16(@outptr[3],$offset) @@ -828,10 +828,10 @@ vmovups @out[0],-16(@ptr[0]) # write output sub $offset,@ptr[0] # switch to input vpxor 0x00($offload),@out[0],@out[0] - vmovups @out[1],-16(@ptr[1]) + vmovups @out[1],-16(@ptr[1]) sub `64+1*8`(%rsp),@ptr[1] vpxor 0x10($offload),@out[1],@out[1] - vmovups @out[2],-16(@ptr[2]) + vmovups @out[2],-16(@ptr[2]) sub `64+2*8`(%rsp),@ptr[2] vpxor 0x20($offload),@out[2],@out[2] vmovups @out[3],-16(@ptr[3]) @@ -840,10 +840,10 @@ vmovups @out[4],-16(@ptr[4]) sub `64+4*8`(%rsp),@ptr[4] vpxor @inp[0],@out[4],@out[4] - vmovups @out[5],-16(@ptr[5]) + vmovups @out[5],-16(@ptr[5]) sub `64+5*8`(%rsp),@ptr[5] vpxor @inp[1],@out[5],@out[5] - vmovups @out[6],-16(@ptr[6]) + vmovups @out[6],-16(@ptr[6]) sub `64+6*8`(%rsp),@ptr[6] vpxor @inp[2],@out[6],@out[6] vmovups @out[7],-16(@ptr[7]) @@ -1121,12 +1121,12 @@ sub $offset,@ptr[0] # switch to input vmovdqu 128+0(%rsp),@out[0] vpxor 0x70($offload),@out[7],@out[7] - vmovups @out[1],-16(@ptr[1]) + vmovups @out[1],-16(@ptr[1]) sub `64+1*8`(%rsp),@ptr[1] vmovdqu @out[0],0x00($offload) vpxor $zero,@out[0],@out[0] vmovdqu 128+16(%rsp),@out[1] - vmovups @out[2],-16(@ptr[2]) + vmovups @out[2],-16(@ptr[2]) sub `64+2*8`(%rsp),@ptr[2] vmovdqu @out[1],0x10($offload) vpxor $zero,@out[1],@out[1] @@ -1142,11 +1142,11 @@ vpxor $zero,@out[3],@out[3] vmovdqu @inp[0],0x40($offload) vpxor @inp[0],$zero,@out[4] - vmovups @out[5],-16(@ptr[5]) + vmovups @out[5],-16(@ptr[5]) sub `64+5*8`(%rsp),@ptr[5] vmovdqu @inp[1],0x50($offload) vpxor @inp[1],$zero,@out[5] - vmovups @out[6],-16(@ptr[6]) + vmovups @out[6],-16(@ptr[6]) sub `64+6*8`(%rsp),@ptr[6] vmovdqu @inp[2],0x60($offload) vpxor @inp[2],$zero,@out[6] diff --git a/deps/openssl/openssl/crypto/aes/asm/aesni-sha1-x86_64.pl b/deps/openssl/openssl/crypto/aes/asm/aesni-sha1-x86_64.pl index a31e49b8bea54d..97992adca7c348 100644 --- a/deps/openssl/openssl/crypto/aes/asm/aesni-sha1-x86_64.pl +++ b/deps/openssl/openssl/crypto/aes/asm/aesni-sha1-x86_64.pl @@ -784,7 +784,7 @@ () sub body_20_39_dec () { # b^d^c # on entry @T[0]=b^d return &body_40_59_dec() if ($rx==39); - + my @r=@body_20_39; unshift (@r,@aes256_dec[$rx]) if (@aes256_dec[$rx]); @@ -2013,7 +2013,7 @@ sub sha1op38 { my $instr = shift; my %opcodelet = ( "sha1nexte" => 0xc8, - "sha1msg1" => 0xc9, + "sha1msg1" => 0xc9, "sha1msg2" => 0xca ); if (defined($opcodelet{$instr}) && @_[0] =~ /%xmm([0-9]+),\s*%xmm([0-9]+)/) { diff --git a/deps/openssl/openssl/crypto/aes/asm/aesni-sha256-x86_64.pl b/deps/openssl/openssl/crypto/aes/asm/aesni-sha256-x86_64.pl index 09b61481850790..19b0433b3b1bdb 100644 --- a/deps/openssl/openssl/crypto/aes/asm/aesni-sha256-x86_64.pl +++ b/deps/openssl/openssl/crypto/aes/asm/aesni-sha256-x86_64.pl @@ -542,7 +542,7 @@ () &XOP_256_00_47($j,\&body_00_15,@X); push(@X,shift(@X)); # rotate(@X) } - &mov ("%r12",$_inp); # borrow $a4 + &mov ("%r12",$_inp); # borrow $a4 &vpand ($temp,$temp,$mask14); &mov ("%r15",$_out); # borrow $a2 &vpor ($iv,$iv,$temp); @@ -793,7 +793,7 @@ () &AVX_256_00_47($j,\&body_00_15,@X); push(@X,shift(@X)); # rotate(@X) } - &mov ("%r12",$_inp); # borrow $a4 + &mov ("%r12",$_inp); # borrow $a4 &vpand ($temp,$temp,$mask14); &mov ("%r15",$_out); # borrow $a2 &vpor ($iv,$iv,$temp); @@ -879,7 +879,7 @@ () ###################################################################### # AVX2+BMI code path # -my $a5=$SZ==4?"%esi":"%rsi"; # zap $inp +my $a5=$SZ==4?"%esi":"%rsi"; # zap $inp my $PUSH8=8*2*$SZ; use integer; @@ -1499,13 +1499,13 @@ () # EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, # CONTEXT *context,DISPATCHER_CONTEXT *disp) -if ($win64) { +if ($win64 && $avx) { $rec="%rcx"; $frame="%rdx"; $context="%r8"; $disp="%r9"; -$code.=<<___ if ($avx); +$code.=<<___; .extern __imp_RtlVirtualUnwind .type se_handler,\@abi-omnipotent .align 16 @@ -1643,7 +1643,7 @@ () .rva .LSEH_end_${func}_shaext .rva .LSEH_info_${func}_shaext ___ -$code.=<<___ if ($avx); +$code.=<<___; .section .xdata .align 8 .LSEH_info_${func}_xop: @@ -1684,7 +1684,7 @@ sub rex { { my %opcodelet = ( "sha256rnds2" => 0xcb, - "sha256msg1" => 0xcc, + "sha256msg1" => 0xcc, "sha256msg2" => 0xcd ); sub sha256op38 { diff --git a/deps/openssl/openssl/crypto/aes/asm/aesni-x86.pl b/deps/openssl/openssl/crypto/aes/asm/aesni-x86.pl index 3deb86aed636e1..f67df8cf13da3e 100644 --- a/deps/openssl/openssl/crypto/aes/asm/aesni-x86.pl +++ b/deps/openssl/openssl/crypto/aes/asm/aesni-x86.pl @@ -51,7 +51,7 @@ # Westmere 3.77/1.37 1.37 1.52 1.27 # * Bridge 5.07/0.98 0.99 1.09 0.91 # Haswell 4.44/0.80 0.97 1.03 0.72 -# Atom 5.77/3.56 3.67 4.03 3.46 +# Silvermont 5.77/3.56 3.67 4.03 3.46 # Bulldozer 5.80/0.98 1.05 1.24 0.93 $PREFIX="aesni"; # if $PREFIX is set to "AES", the script @@ -65,6 +65,9 @@ &asm_init($ARGV[0],$0); +&external_label("OPENSSL_ia32cap_P"); +&static_label("key_const"); + if ($PREFIX eq "aesni") { $movekey=\&movups; } else { $movekey=\&movups; } @@ -181,7 +184,10 @@ sub aesni_generate1 # fully unrolled loop { &aesni_inline_generate1("enc"); } else { &call ("_aesni_encrypt1"); } + &pxor ($rndkey0,$rndkey0); # clear register bank + &pxor ($rndkey1,$rndkey1); &movups (&QWP(0,"eax"),$inout0); + &pxor ($inout0,$inout0); &ret (); &function_end_B("${PREFIX}_encrypt"); @@ -197,7 +203,10 @@ sub aesni_generate1 # fully unrolled loop { &aesni_inline_generate1("dec"); } else { &call ("_aesni_decrypt1"); } + &pxor ($rndkey0,$rndkey0); # clear register bank + &pxor ($rndkey1,$rndkey1); &movups (&QWP(0,"eax"),$inout0); + &pxor ($inout0,$inout0); &ret (); &function_end_B("${PREFIX}_decrypt"); @@ -349,17 +358,15 @@ sub aesni_generate6 &neg ($rounds); eval"&aes${p} ($inout2,$rndkey1)"; &pxor ($inout5,$rndkey0); + &$movekey ($rndkey0,&QWP(0,$key,$rounds)); &add ($rounds,16); - eval"&aes${p} ($inout3,$rndkey1)"; - eval"&aes${p} ($inout4,$rndkey1)"; - eval"&aes${p} ($inout5,$rndkey1)"; - &$movekey ($rndkey0,&QWP(-16,$key,$rounds)); - &jmp (&label("_aesni_${p}rypt6_enter")); + &jmp (&label("_aesni_${p}rypt6_inner")); &set_label("${p}6_loop",16); eval"&aes${p} ($inout0,$rndkey1)"; eval"&aes${p} ($inout1,$rndkey1)"; eval"&aes${p} ($inout2,$rndkey1)"; + &set_label("_aesni_${p}rypt6_inner"); eval"&aes${p} ($inout3,$rndkey1)"; eval"&aes${p} ($inout4,$rndkey1)"; eval"&aes${p} ($inout5,$rndkey1)"; @@ -615,6 +622,14 @@ sub aesni_generate6 &movups (&QWP(0x30,$out),$inout3); &set_label("ecb_ret"); + &pxor ("xmm0","xmm0"); # clear register bank + &pxor ("xmm1","xmm1"); + &pxor ("xmm2","xmm2"); + &pxor ("xmm3","xmm3"); + &pxor ("xmm4","xmm4"); + &pxor ("xmm5","xmm5"); + &pxor ("xmm6","xmm6"); + &pxor ("xmm7","xmm7"); &function_end("aesni_ecb_encrypt"); ###################################################################### @@ -704,6 +719,15 @@ sub aesni_generate6 &mov ("esp",&DWP(48,"esp")); &mov ($out,&wparam(5)); &movups (&QWP(0,$out),$cmac); + + &pxor ("xmm0","xmm0"); # clear register bank + &pxor ("xmm1","xmm1"); + &pxor ("xmm2","xmm2"); + &pxor ("xmm3","xmm3"); + &pxor ("xmm4","xmm4"); + &pxor ("xmm5","xmm5"); + &pxor ("xmm6","xmm6"); + &pxor ("xmm7","xmm7"); &function_end("aesni_ccm64_encrypt_blocks"); &function_begin("aesni_ccm64_decrypt_blocks"); @@ -804,6 +828,15 @@ sub aesni_generate6 &mov ("esp",&DWP(48,"esp")); &mov ($out,&wparam(5)); &movups (&QWP(0,$out),$cmac); + + &pxor ("xmm0","xmm0"); # clear register bank + &pxor ("xmm1","xmm1"); + &pxor ("xmm2","xmm2"); + &pxor ("xmm3","xmm3"); + &pxor ("xmm4","xmm4"); + &pxor ("xmm5","xmm5"); + &pxor ("xmm6","xmm6"); + &pxor ("xmm7","xmm7"); &function_end("aesni_ccm64_decrypt_blocks"); } @@ -1053,6 +1086,17 @@ sub aesni_generate6 &movups (&QWP(0x30,$out),$inout3); &set_label("ctr32_ret"); + &pxor ("xmm0","xmm0"); # clear register bank + &pxor ("xmm1","xmm1"); + &pxor ("xmm2","xmm2"); + &pxor ("xmm3","xmm3"); + &pxor ("xmm4","xmm4"); + &movdqa (&QWP(32,"esp"),"xmm0"); # clear stack + &pxor ("xmm5","xmm5"); + &movdqa (&QWP(48,"esp"),"xmm0"); + &pxor ("xmm6","xmm6"); + &movdqa (&QWP(64,"esp"),"xmm0"); + &pxor ("xmm7","xmm7"); &mov ("esp",&DWP(80,"esp")); &function_end("aesni_ctr32_encrypt_blocks"); @@ -1394,6 +1438,20 @@ sub aesni_generate6 &movups (&QWP(-16,$out),$inout0); # write output &set_label("xts_enc_ret"); + &pxor ("xmm0","xmm0"); # clear register bank + &pxor ("xmm1","xmm1"); + &pxor ("xmm2","xmm2"); + &movdqa (&QWP(16*0,"esp"),"xmm0"); # clear stack + &pxor ("xmm3","xmm3"); + &movdqa (&QWP(16*1,"esp"),"xmm0"); + &pxor ("xmm4","xmm4"); + &movdqa (&QWP(16*2,"esp"),"xmm0"); + &pxor ("xmm5","xmm5"); + &movdqa (&QWP(16*3,"esp"),"xmm0"); + &pxor ("xmm6","xmm6"); + &movdqa (&QWP(16*4,"esp"),"xmm0"); + &pxor ("xmm7","xmm7"); + &movdqa (&QWP(16*5,"esp"),"xmm0"); &mov ("esp",&DWP(16*7+4,"esp")); # restore %esp &function_end("aesni_xts_encrypt"); @@ -1756,6 +1814,20 @@ sub aesni_generate6 &movups (&QWP(0,$out),$inout0); # write output &set_label("xts_dec_ret"); + &pxor ("xmm0","xmm0"); # clear register bank + &pxor ("xmm1","xmm1"); + &pxor ("xmm2","xmm2"); + &movdqa (&QWP(16*0,"esp"),"xmm0"); # clear stack + &pxor ("xmm3","xmm3"); + &movdqa (&QWP(16*1,"esp"),"xmm0"); + &pxor ("xmm4","xmm4"); + &movdqa (&QWP(16*2,"esp"),"xmm0"); + &pxor ("xmm5","xmm5"); + &movdqa (&QWP(16*3,"esp"),"xmm0"); + &pxor ("xmm6","xmm6"); + &movdqa (&QWP(16*4,"esp"),"xmm0"); + &pxor ("xmm7","xmm7"); + &movdqa (&QWP(16*5,"esp"),"xmm0"); &mov ("esp",&DWP(16*7+4,"esp")); # restore %esp &function_end("aesni_xts_decrypt"); } @@ -1808,6 +1880,7 @@ sub aesni_generate6 &add ($len,16); &jnz (&label("cbc_enc_tail")); &movaps ($ivec,$inout0); + &pxor ($inout0,$inout0); &jmp (&label("cbc_ret")); &set_label("cbc_enc_tail"); @@ -1871,7 +1944,7 @@ sub aesni_generate6 &movaps ($inout0,$inout5); &movaps ($ivec,$rndkey0); &add ($len,0x50); - &jle (&label("cbc_dec_tail_collected")); + &jle (&label("cbc_dec_clear_tail_collected")); &movups (&QWP(0,$out),$inout0); &lea ($out,&DWP(0x10,$out)); &set_label("cbc_dec_tail"); @@ -1910,10 +1983,14 @@ sub aesni_generate6 &xorps ($inout4,$rndkey0); &movups (&QWP(0,$out),$inout0); &movups (&QWP(0x10,$out),$inout1); + &pxor ($inout1,$inout1); &movups (&QWP(0x20,$out),$inout2); + &pxor ($inout2,$inout2); &movups (&QWP(0x30,$out),$inout3); + &pxor ($inout3,$inout3); &lea ($out,&DWP(0x40,$out)); &movaps ($inout0,$inout4); + &pxor ($inout4,$inout4); &sub ($len,0x50); &jmp (&label("cbc_dec_tail_collected")); @@ -1933,6 +2010,7 @@ sub aesni_generate6 &xorps ($inout1,$in0); &movups (&QWP(0,$out),$inout0); &movaps ($inout0,$inout1); + &pxor ($inout1,$inout1); &lea ($out,&DWP(0x10,$out)); &movaps ($ivec,$in1); &sub ($len,0x20); @@ -1945,7 +2023,9 @@ sub aesni_generate6 &xorps ($inout2,$in1); &movups (&QWP(0,$out),$inout0); &movaps ($inout0,$inout2); + &pxor ($inout2,$inout2); &movups (&QWP(0x10,$out),$inout1); + &pxor ($inout1,$inout1); &lea ($out,&DWP(0x20,$out)); &movups ($ivec,&QWP(0x20,$inp)); &sub ($len,0x30); @@ -1961,29 +2041,44 @@ sub aesni_generate6 &movups (&QWP(0,$out),$inout0); &xorps ($inout2,$rndkey1); &movups (&QWP(0x10,$out),$inout1); + &pxor ($inout1,$inout1); &xorps ($inout3,$rndkey0); &movups (&QWP(0x20,$out),$inout2); + &pxor ($inout2,$inout2); &lea ($out,&DWP(0x30,$out)); &movaps ($inout0,$inout3); + &pxor ($inout3,$inout3); &sub ($len,0x40); + &jmp (&label("cbc_dec_tail_collected")); +&set_label("cbc_dec_clear_tail_collected",16); + &pxor ($inout1,$inout1); + &pxor ($inout2,$inout2); + &pxor ($inout3,$inout3); + &pxor ($inout4,$inout4); &set_label("cbc_dec_tail_collected"); &and ($len,15); &jnz (&label("cbc_dec_tail_partial")); &movups (&QWP(0,$out),$inout0); + &pxor ($rndkey0,$rndkey0); &jmp (&label("cbc_ret")); &set_label("cbc_dec_tail_partial",16); &movaps (&QWP(0,"esp"),$inout0); + &pxor ($rndkey0,$rndkey0); &mov ("ecx",16); &mov ($inp,"esp"); &sub ("ecx",$len); &data_word(0xA4F3F689); # rep movsb + &movdqa (&QWP(0,"esp"),$inout0); &set_label("cbc_ret"); &mov ("esp",&DWP(16,"esp")); # pull original %esp &mov ($key_,&wparam(4)); + &pxor ($inout0,$inout0); + &pxor ($rndkey1,$rndkey1); &movups (&QWP(0,$key_),$ivec); # output IV + &pxor ($ivec,$ivec); &set_label("cbc_abort"); &function_end("${PREFIX}_cbc_encrypt"); @@ -2000,14 +2095,24 @@ sub aesni_generate6 # $round rounds &function_begin_B("_aesni_set_encrypt_key"); + &push ("ebp"); + &push ("ebx"); &test ("eax","eax"); &jz (&label("bad_pointer")); &test ($key,$key); &jz (&label("bad_pointer")); + &call (&label("pic")); +&set_label("pic"); + &blindpop("ebx"); + &lea ("ebx",&DWP(&label("key_const")."-".&label("pic"),"ebx")); + + &picmeup("ebp","OPENSSL_ia32cap_P","ebx",&label("key_const")); &movups ("xmm0",&QWP(0,"eax")); # pull first 128 bits of *userKey &xorps ("xmm4","xmm4"); # low dword of xmm4 is assumed 0 + &mov ("ebp",&DWP(4,"ebp")); &lea ($key,&DWP(16,$key)); + &and ("ebp",1<<28|1<<11); # AVX and XOP bits &cmp ($rounds,256); &je (&label("14rounds")); &cmp ($rounds,192); @@ -2016,6 +2121,9 @@ sub aesni_generate6 &jne (&label("bad_keybits")); &set_label("10rounds",16); + &cmp ("ebp",1<<28); + &je (&label("10rounds_alt")); + &mov ($rounds,9); &$movekey (&QWP(-16,$key),"xmm0"); # round 0 &aeskeygenassist("xmm1","xmm0",0x01); # round 1 @@ -2040,8 +2148,8 @@ sub aesni_generate6 &call (&label("key_128")); &$movekey (&QWP(0,$key),"xmm0"); &mov (&DWP(80,$key),$rounds); - &xor ("eax","eax"); - &ret(); + + &jmp (&label("good_key")); &set_label("key_128",16); &$movekey (&QWP(0,$key),"xmm0"); @@ -2055,8 +2163,76 @@ sub aesni_generate6 &xorps ("xmm0","xmm1"); &ret(); +&set_label("10rounds_alt",16); + &movdqa ("xmm5",&QWP(0x00,"ebx")); + &mov ($rounds,8); + &movdqa ("xmm4",&QWP(0x20,"ebx")); + &movdqa ("xmm2","xmm0"); + &movdqu (&QWP(-16,$key),"xmm0"); + +&set_label("loop_key128"); + &pshufb ("xmm0","xmm5"); + &aesenclast ("xmm0","xmm4"); + &pslld ("xmm4",1); + &lea ($key,&DWP(16,$key)); + + &movdqa ("xmm3","xmm2"); + &pslldq ("xmm2",4); + &pxor ("xmm3","xmm2"); + &pslldq ("xmm2",4); + &pxor ("xmm3","xmm2"); + &pslldq ("xmm2",4); + &pxor ("xmm2","xmm3"); + + &pxor ("xmm0","xmm2"); + &movdqu (&QWP(-16,$key),"xmm0"); + &movdqa ("xmm2","xmm0"); + + &dec ($rounds); + &jnz (&label("loop_key128")); + + &movdqa ("xmm4",&QWP(0x30,"ebx")); + + &pshufb ("xmm0","xmm5"); + &aesenclast ("xmm0","xmm4"); + &pslld ("xmm4",1); + + &movdqa ("xmm3","xmm2"); + &pslldq ("xmm2",4); + &pxor ("xmm3","xmm2"); + &pslldq ("xmm2",4); + &pxor ("xmm3","xmm2"); + &pslldq ("xmm2",4); + &pxor ("xmm2","xmm3"); + + &pxor ("xmm0","xmm2"); + &movdqu (&QWP(0,$key),"xmm0"); + + &movdqa ("xmm2","xmm0"); + &pshufb ("xmm0","xmm5"); + &aesenclast ("xmm0","xmm4"); + + &movdqa ("xmm3","xmm2"); + &pslldq ("xmm2",4); + &pxor ("xmm3","xmm2"); + &pslldq ("xmm2",4); + &pxor ("xmm3","xmm2"); + &pslldq ("xmm2",4); + &pxor ("xmm2","xmm3"); + + &pxor ("xmm0","xmm2"); + &movdqu (&QWP(16,$key),"xmm0"); + + &mov ($rounds,9); + &mov (&DWP(96,$key),$rounds); + + &jmp (&label("good_key")); + &set_label("12rounds",16); &movq ("xmm2",&QWP(16,"eax")); # remaining 1/3 of *userKey + &cmp ("ebp",1<<28); + &je (&label("12rounds_alt")); + &mov ($rounds,11); &$movekey (&QWP(-16,$key),"xmm0"); # round 0 &aeskeygenassist("xmm1","xmm2",0x01); # round 1,2 @@ -2077,8 +2253,8 @@ sub aesni_generate6 &call (&label("key_192b")); &$movekey (&QWP(0,$key),"xmm0"); &mov (&DWP(48,$key),$rounds); - &xor ("eax","eax"); - &ret(); + + &jmp (&label("good_key")); &set_label("key_192a",16); &$movekey (&QWP(0,$key),"xmm0"); @@ -2108,10 +2284,52 @@ sub aesni_generate6 &lea ($key,&DWP(32,$key)); &jmp (&label("key_192b_warm")); +&set_label("12rounds_alt",16); + &movdqa ("xmm5",&QWP(0x10,"ebx")); + &movdqa ("xmm4",&QWP(0x20,"ebx")); + &mov ($rounds,8); + &movdqu (&QWP(-16,$key),"xmm0"); + +&set_label("loop_key192"); + &movq (&QWP(0,$key),"xmm2"); + &movdqa ("xmm1","xmm2"); + &pshufb ("xmm2","xmm5"); + &aesenclast ("xmm2","xmm4"); + &pslld ("xmm4",1); + &lea ($key,&DWP(24,$key)); + + &movdqa ("xmm3","xmm0"); + &pslldq ("xmm0",4); + &pxor ("xmm3","xmm0"); + &pslldq ("xmm0",4); + &pxor ("xmm3","xmm0"); + &pslldq ("xmm0",4); + &pxor ("xmm0","xmm3"); + + &pshufd ("xmm3","xmm0",0xff); + &pxor ("xmm3","xmm1"); + &pslldq ("xmm1",4); + &pxor ("xmm3","xmm1"); + + &pxor ("xmm0","xmm2"); + &pxor ("xmm2","xmm3"); + &movdqu (&QWP(-16,$key),"xmm0"); + + &dec ($rounds); + &jnz (&label("loop_key192")); + + &mov ($rounds,11); + &mov (&DWP(32,$key),$rounds); + + &jmp (&label("good_key")); + &set_label("14rounds",16); &movups ("xmm2",&QWP(16,"eax")); # remaining half of *userKey - &mov ($rounds,13); &lea ($key,&DWP(16,$key)); + &cmp ("ebp",1<<28); + &je (&label("14rounds_alt")); + + &mov ($rounds,13); &$movekey (&QWP(-32,$key),"xmm0"); # round 0 &$movekey (&QWP(-16,$key),"xmm2"); # round 1 &aeskeygenassist("xmm1","xmm2",0x01); # round 2 @@ -2143,7 +2361,8 @@ sub aesni_generate6 &$movekey (&QWP(0,$key),"xmm0"); &mov (&DWP(16,$key),$rounds); &xor ("eax","eax"); - &ret(); + + &jmp (&label("good_key")); &set_label("key_256a",16); &$movekey (&QWP(0,$key),"xmm2"); @@ -2169,11 +2388,77 @@ sub aesni_generate6 &xorps ("xmm2","xmm1"); &ret(); +&set_label("14rounds_alt",16); + &movdqa ("xmm5",&QWP(0x00,"ebx")); + &movdqa ("xmm4",&QWP(0x20,"ebx")); + &mov ($rounds,7); + &movdqu (&QWP(-32,$key),"xmm0"); + &movdqa ("xmm1","xmm2"); + &movdqu (&QWP(-16,$key),"xmm2"); + +&set_label("loop_key256"); + &pshufb ("xmm2","xmm5"); + &aesenclast ("xmm2","xmm4"); + + &movdqa ("xmm3","xmm0"); + &pslldq ("xmm0",4); + &pxor ("xmm3","xmm0"); + &pslldq ("xmm0",4); + &pxor ("xmm3","xmm0"); + &pslldq ("xmm0",4); + &pxor ("xmm0","xmm3"); + &pslld ("xmm4",1); + + &pxor ("xmm0","xmm2"); + &movdqu (&QWP(0,$key),"xmm0"); + + &dec ($rounds); + &jz (&label("done_key256")); + + &pshufd ("xmm2","xmm0",0xff); + &pxor ("xmm3","xmm3"); + &aesenclast ("xmm2","xmm3"); + + &movdqa ("xmm3","xmm1") + &pslldq ("xmm1",4); + &pxor ("xmm3","xmm1"); + &pslldq ("xmm1",4); + &pxor ("xmm3","xmm1"); + &pslldq ("xmm1",4); + &pxor ("xmm1","xmm3"); + + &pxor ("xmm2","xmm1"); + &movdqu (&QWP(16,$key),"xmm2"); + &lea ($key,&DWP(32,$key)); + &movdqa ("xmm1","xmm2"); + &jmp (&label("loop_key256")); + +&set_label("done_key256"); + &mov ($rounds,13); + &mov (&DWP(16,$key),$rounds); + +&set_label("good_key"); + &pxor ("xmm0","xmm0"); + &pxor ("xmm1","xmm1"); + &pxor ("xmm2","xmm2"); + &pxor ("xmm3","xmm3"); + &pxor ("xmm4","xmm4"); + &pxor ("xmm5","xmm5"); + &xor ("eax","eax"); + &pop ("ebx"); + &pop ("ebp"); + &ret (); + &set_label("bad_pointer",4); &mov ("eax",-1); + &pop ("ebx"); + &pop ("ebp"); &ret (); &set_label("bad_keybits",4); + &pxor ("xmm0","xmm0"); &mov ("eax",-2); + &pop ("ebx"); + &pop ("ebp"); &ret (); &function_end_B("_aesni_set_encrypt_key"); @@ -2223,10 +2508,18 @@ sub aesni_generate6 &aesimc ("xmm0","xmm0"); &$movekey (&QWP(0,$key),"xmm0"); + &pxor ("xmm0","xmm0"); + &pxor ("xmm1","xmm1"); &xor ("eax","eax"); # return success &set_label("dec_key_ret"); &ret (); &function_end_B("${PREFIX}_set_decrypt_key"); + +&set_label("key_const",64); +&data_word(0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d); +&data_word(0x04070605,0x04070605,0x04070605,0x04070605); +&data_word(1,1,1,1); +&data_word(0x1b,0x1b,0x1b,0x1b); &asciz("AES for Intel AES-NI, CRYPTOGAMS by "); &asm_finish(); diff --git a/deps/openssl/openssl/crypto/aes/asm/aesni-x86_64.pl b/deps/openssl/openssl/crypto/aes/asm/aesni-x86_64.pl index c8f36c1eda134c..25ca574f6a2f90 100644 --- a/deps/openssl/openssl/crypto/aes/asm/aesni-x86_64.pl +++ b/deps/openssl/openssl/crypto/aes/asm/aesni-x86_64.pl @@ -165,11 +165,11 @@ # Westmere 3.77/1.25 1.25 1.25 1.26 # * Bridge 5.07/0.74 0.75 0.90 0.85 # Haswell 4.44/0.63 0.63 0.73 0.63 -# Atom 5.75/3.54 3.56 4.12 3.87(*) +# Silvermont 5.75/3.54 3.56 4.12 3.87(*) # Bulldozer 5.77/0.70 0.72 0.90 0.70 # -# (*) Atom ECB result is suboptimal because of penalties incurred -# by operations on %xmm8-15. As ECB is not considered +# (*) Atom Silvermont ECB result is suboptimal because of penalties +# incurred by operations on %xmm8-15. As ECB is not considered # critical, nothing was done to mitigate the problem. $PREFIX="aesni"; # if $PREFIX is set to "AES", the script @@ -263,7 +263,10 @@ sub aesni_generate1 { ___ &aesni_generate1("enc",$key,$rounds); $code.=<<___; + pxor $rndkey0,$rndkey0 # clear register bank + pxor $rndkey1,$rndkey1 movups $inout0,($out) # output + pxor $inout0,$inout0 ret .size ${PREFIX}_encrypt,.-${PREFIX}_encrypt @@ -276,7 +279,10 @@ sub aesni_generate1 { ___ &aesni_generate1("dec",$key,$rounds); $code.=<<___; + pxor $rndkey0,$rndkey0 # clear register bank + pxor $rndkey1,$rndkey1 movups $inout0,($out) # output + pxor $inout0,$inout0 ret .size ${PREFIX}_decrypt, .-${PREFIX}_decrypt ___ @@ -293,7 +299,7 @@ sub aesni_generate1 { # on 2x subroutine on Atom Silvermont account. For processors that # can schedule aes[enc|dec] every cycle optimal interleave factor # equals to corresponding instructions latency. 8x is optimal for -# * Bridge and "super-optimal" for other Intel CPUs... +# * Bridge and "super-optimal" for other Intel CPUs... sub aesni_generate2 { my $dir=shift; @@ -445,21 +451,18 @@ sub aesni_generate6 { pxor $rndkey0,$inout4 aes${dir} $rndkey1,$inout2 pxor $rndkey0,$inout5 + $movkey ($key,%rax),$rndkey0 add \$16,%rax - aes${dir} $rndkey1,$inout3 - aes${dir} $rndkey1,$inout4 - aes${dir} $rndkey1,$inout5 - $movkey -16($key,%rax),$rndkey0 jmp .L${dir}_loop6_enter .align 16 .L${dir}_loop6: aes${dir} $rndkey1,$inout0 aes${dir} $rndkey1,$inout1 aes${dir} $rndkey1,$inout2 +.L${dir}_loop6_enter: aes${dir} $rndkey1,$inout3 aes${dir} $rndkey1,$inout4 aes${dir} $rndkey1,$inout5 -.L${dir}_loop6_enter: $movkey ($key,%rax),$rndkey1 add \$32,%rax aes${dir} $rndkey0,$inout0 @@ -506,23 +509,18 @@ sub aesni_generate8 { lea 32($key,$rounds),$key neg %rax # $rounds aes${dir} $rndkey1,$inout0 - add \$16,%rax pxor $rndkey0,$inout5 - aes${dir} $rndkey1,$inout1 pxor $rndkey0,$inout6 + aes${dir} $rndkey1,$inout1 pxor $rndkey0,$inout7 - aes${dir} $rndkey1,$inout2 - aes${dir} $rndkey1,$inout3 - aes${dir} $rndkey1,$inout4 - aes${dir} $rndkey1,$inout5 - aes${dir} $rndkey1,$inout6 - aes${dir} $rndkey1,$inout7 - $movkey -16($key,%rax),$rndkey0 - jmp .L${dir}_loop8_enter + $movkey ($key,%rax),$rndkey0 + add \$16,%rax + jmp .L${dir}_loop8_inner .align 16 .L${dir}_loop8: aes${dir} $rndkey1,$inout0 aes${dir} $rndkey1,$inout1 +.L${dir}_loop8_inner: aes${dir} $rndkey1,$inout2 aes${dir} $rndkey1,$inout3 aes${dir} $rndkey1,$inout4 @@ -587,15 +585,15 @@ sub aesni_generate8 { ___ $code.=<<___ if ($win64); lea -0x58(%rsp),%rsp - movaps %xmm6,(%rsp) + movaps %xmm6,(%rsp) # offload $inout4..7 movaps %xmm7,0x10(%rsp) movaps %xmm8,0x20(%rsp) movaps %xmm9,0x30(%rsp) .Lecb_enc_body: ___ $code.=<<___; - and \$-16,$len - jz .Lecb_ret + and \$-16,$len # if ($len<16) + jz .Lecb_ret # return mov 240($key),$rounds # key->rounds $movkey ($key),$rndkey0 @@ -604,10 +602,10 @@ sub aesni_generate8 { test %r8d,%r8d # 5th argument jz .Lecb_decrypt #--------------------------- ECB ENCRYPT ------------------------------# - cmp \$0x80,$len - jb .Lecb_enc_tail + cmp \$0x80,$len # if ($len<8*16) + jb .Lecb_enc_tail # short input - movdqu ($inp),$inout0 + movdqu ($inp),$inout0 # load 8 input blocks movdqu 0x10($inp),$inout1 movdqu 0x20($inp),$inout2 movdqu 0x30($inp),$inout3 @@ -615,14 +613,14 @@ sub aesni_generate8 { movdqu 0x50($inp),$inout5 movdqu 0x60($inp),$inout6 movdqu 0x70($inp),$inout7 - lea 0x80($inp),$inp - sub \$0x80,$len + lea 0x80($inp),$inp # $inp+=8*16 + sub \$0x80,$len # $len-=8*16 (can be zero) jmp .Lecb_enc_loop8_enter .align 16 .Lecb_enc_loop8: - movups $inout0,($out) + movups $inout0,($out) # store 8 output blocks mov $key_,$key # restore $key - movdqu ($inp),$inout0 + movdqu ($inp),$inout0 # load 8 input blocks mov $rnds_,$rounds # restore $rounds movups $inout1,0x10($out) movdqu 0x10($inp),$inout1 @@ -637,17 +635,17 @@ sub aesni_generate8 { movups $inout6,0x60($out) movdqu 0x60($inp),$inout6 movups $inout7,0x70($out) - lea 0x80($out),$out + lea 0x80($out),$out # $out+=8*16 movdqu 0x70($inp),$inout7 - lea 0x80($inp),$inp + lea 0x80($inp),$inp # $inp+=8*16 .Lecb_enc_loop8_enter: call _aesni_encrypt8 sub \$0x80,$len - jnc .Lecb_enc_loop8 + jnc .Lecb_enc_loop8 # loop if $len-=8*16 didn't borrow - movups $inout0,($out) + movups $inout0,($out) # store 8 output blocks mov $key_,$key # restore $key movups $inout1,0x10($out) mov $rnds_,$rounds # restore $rounds @@ -657,11 +655,11 @@ sub aesni_generate8 { movups $inout5,0x50($out) movups $inout6,0x60($out) movups $inout7,0x70($out) - lea 0x80($out),$out - add \$0x80,$len - jz .Lecb_ret + lea 0x80($out),$out # $out+=8*16 + add \$0x80,$len # restore real remaining $len + jz .Lecb_ret # done if ($len==0) -.Lecb_enc_tail: +.Lecb_enc_tail: # $len is less than 8*16 movups ($inp),$inout0 cmp \$0x20,$len jb .Lecb_enc_one @@ -678,8 +676,9 @@ sub aesni_generate8 { movups 0x50($inp),$inout5 je .Lecb_enc_six movdqu 0x60($inp),$inout6 + xorps $inout7,$inout7 call _aesni_encrypt8 - movups $inout0,($out) + movups $inout0,($out) # store 7 output blocks movups $inout1,0x10($out) movups $inout2,0x20($out) movups $inout3,0x30($out) @@ -692,25 +691,25 @@ sub aesni_generate8 { ___ &aesni_generate1("enc",$key,$rounds); $code.=<<___; - movups $inout0,($out) + movups $inout0,($out) # store one output block jmp .Lecb_ret .align 16 .Lecb_enc_two: call _aesni_encrypt2 - movups $inout0,($out) + movups $inout0,($out) # store 2 output blocks movups $inout1,0x10($out) jmp .Lecb_ret .align 16 .Lecb_enc_three: call _aesni_encrypt3 - movups $inout0,($out) + movups $inout0,($out) # store 3 output blocks movups $inout1,0x10($out) movups $inout2,0x20($out) jmp .Lecb_ret .align 16 .Lecb_enc_four: call _aesni_encrypt4 - movups $inout0,($out) + movups $inout0,($out) # store 4 output blocks movups $inout1,0x10($out) movups $inout2,0x20($out) movups $inout3,0x30($out) @@ -719,7 +718,7 @@ sub aesni_generate8 { .Lecb_enc_five: xorps $inout5,$inout5 call _aesni_encrypt6 - movups $inout0,($out) + movups $inout0,($out) # store 5 output blocks movups $inout1,0x10($out) movups $inout2,0x20($out) movups $inout3,0x30($out) @@ -728,7 +727,7 @@ sub aesni_generate8 { .align 16 .Lecb_enc_six: call _aesni_encrypt6 - movups $inout0,($out) + movups $inout0,($out) # store 6 output blocks movups $inout1,0x10($out) movups $inout2,0x20($out) movups $inout3,0x30($out) @@ -738,10 +737,10 @@ sub aesni_generate8 { #--------------------------- ECB DECRYPT ------------------------------# .align 16 .Lecb_decrypt: - cmp \$0x80,$len - jb .Lecb_dec_tail + cmp \$0x80,$len # if ($len<8*16) + jb .Lecb_dec_tail # short input - movdqu ($inp),$inout0 + movdqu ($inp),$inout0 # load 8 input blocks movdqu 0x10($inp),$inout1 movdqu 0x20($inp),$inout2 movdqu 0x30($inp),$inout3 @@ -749,14 +748,14 @@ sub aesni_generate8 { movdqu 0x50($inp),$inout5 movdqu 0x60($inp),$inout6 movdqu 0x70($inp),$inout7 - lea 0x80($inp),$inp - sub \$0x80,$len + lea 0x80($inp),$inp # $inp+=8*16 + sub \$0x80,$len # $len-=8*16 (can be zero) jmp .Lecb_dec_loop8_enter .align 16 .Lecb_dec_loop8: - movups $inout0,($out) + movups $inout0,($out) # store 8 output blocks mov $key_,$key # restore $key - movdqu ($inp),$inout0 + movdqu ($inp),$inout0 # load 8 input blocks mov $rnds_,$rounds # restore $rounds movups $inout1,0x10($out) movdqu 0x10($inp),$inout1 @@ -771,30 +770,38 @@ sub aesni_generate8 { movups $inout6,0x60($out) movdqu 0x60($inp),$inout6 movups $inout7,0x70($out) - lea 0x80($out),$out + lea 0x80($out),$out # $out+=8*16 movdqu 0x70($inp),$inout7 - lea 0x80($inp),$inp + lea 0x80($inp),$inp # $inp+=8*16 .Lecb_dec_loop8_enter: call _aesni_decrypt8 $movkey ($key_),$rndkey0 sub \$0x80,$len - jnc .Lecb_dec_loop8 + jnc .Lecb_dec_loop8 # loop if $len-=8*16 didn't borrow - movups $inout0,($out) + movups $inout0,($out) # store 8 output blocks + pxor $inout0,$inout0 # clear register bank mov $key_,$key # restore $key movups $inout1,0x10($out) + pxor $inout1,$inout1 mov $rnds_,$rounds # restore $rounds movups $inout2,0x20($out) + pxor $inout2,$inout2 movups $inout3,0x30($out) + pxor $inout3,$inout3 movups $inout4,0x40($out) + pxor $inout4,$inout4 movups $inout5,0x50($out) + pxor $inout5,$inout5 movups $inout6,0x60($out) + pxor $inout6,$inout6 movups $inout7,0x70($out) - lea 0x80($out),$out - add \$0x80,$len - jz .Lecb_ret + pxor $inout7,$inout7 + lea 0x80($out),$out # $out+=8*16 + add \$0x80,$len # restore real remaining $len + jz .Lecb_ret # done if ($len==0) .Lecb_dec_tail: movups ($inp),$inout0 @@ -814,70 +821,107 @@ sub aesni_generate8 { je .Lecb_dec_six movups 0x60($inp),$inout6 $movkey ($key),$rndkey0 + xorps $inout7,$inout7 call _aesni_decrypt8 - movups $inout0,($out) + movups $inout0,($out) # store 7 output blocks + pxor $inout0,$inout0 # clear register bank movups $inout1,0x10($out) + pxor $inout1,$inout1 movups $inout2,0x20($out) + pxor $inout2,$inout2 movups $inout3,0x30($out) + pxor $inout3,$inout3 movups $inout4,0x40($out) + pxor $inout4,$inout4 movups $inout5,0x50($out) + pxor $inout5,$inout5 movups $inout6,0x60($out) + pxor $inout6,$inout6 + pxor $inout7,$inout7 jmp .Lecb_ret .align 16 .Lecb_dec_one: ___ &aesni_generate1("dec",$key,$rounds); $code.=<<___; - movups $inout0,($out) + movups $inout0,($out) # store one output block + pxor $inout0,$inout0 # clear register bank jmp .Lecb_ret .align 16 .Lecb_dec_two: call _aesni_decrypt2 - movups $inout0,($out) + movups $inout0,($out) # store 2 output blocks + pxor $inout0,$inout0 # clear register bank movups $inout1,0x10($out) + pxor $inout1,$inout1 jmp .Lecb_ret .align 16 .Lecb_dec_three: call _aesni_decrypt3 - movups $inout0,($out) + movups $inout0,($out) # store 3 output blocks + pxor $inout0,$inout0 # clear register bank movups $inout1,0x10($out) + pxor $inout1,$inout1 movups $inout2,0x20($out) + pxor $inout2,$inout2 jmp .Lecb_ret .align 16 .Lecb_dec_four: call _aesni_decrypt4 - movups $inout0,($out) + movups $inout0,($out) # store 4 output blocks + pxor $inout0,$inout0 # clear register bank movups $inout1,0x10($out) + pxor $inout1,$inout1 movups $inout2,0x20($out) + pxor $inout2,$inout2 movups $inout3,0x30($out) + pxor $inout3,$inout3 jmp .Lecb_ret .align 16 .Lecb_dec_five: xorps $inout5,$inout5 call _aesni_decrypt6 - movups $inout0,($out) + movups $inout0,($out) # store 5 output blocks + pxor $inout0,$inout0 # clear register bank movups $inout1,0x10($out) + pxor $inout1,$inout1 movups $inout2,0x20($out) + pxor $inout2,$inout2 movups $inout3,0x30($out) + pxor $inout3,$inout3 movups $inout4,0x40($out) + pxor $inout4,$inout4 + pxor $inout5,$inout5 jmp .Lecb_ret .align 16 .Lecb_dec_six: call _aesni_decrypt6 - movups $inout0,($out) + movups $inout0,($out) # store 6 output blocks + pxor $inout0,$inout0 # clear register bank movups $inout1,0x10($out) + pxor $inout1,$inout1 movups $inout2,0x20($out) + pxor $inout2,$inout2 movups $inout3,0x30($out) + pxor $inout3,$inout3 movups $inout4,0x40($out) + pxor $inout4,$inout4 movups $inout5,0x50($out) + pxor $inout5,$inout5 .Lecb_ret: + xorps $rndkey0,$rndkey0 # %xmm0 + pxor $rndkey1,$rndkey1 ___ $code.=<<___ if ($win64); movaps (%rsp),%xmm6 + movaps %xmm0,(%rsp) # clear stack movaps 0x10(%rsp),%xmm7 + movaps %xmm0,0x10(%rsp) movaps 0x20(%rsp),%xmm8 + movaps %xmm0,0x20(%rsp) movaps 0x30(%rsp),%xmm9 + movaps %xmm0,0x30(%rsp) lea 0x58(%rsp),%rsp .Lecb_enc_ret: ___ @@ -911,10 +955,10 @@ sub aesni_generate8 { ___ $code.=<<___ if ($win64); lea -0x58(%rsp),%rsp - movaps %xmm6,(%rsp) - movaps %xmm7,0x10(%rsp) - movaps %xmm8,0x20(%rsp) - movaps %xmm9,0x30(%rsp) + movaps %xmm6,(%rsp) # $iv + movaps %xmm7,0x10(%rsp) # $bswap_mask + movaps %xmm8,0x20(%rsp) # $in0 + movaps %xmm9,0x30(%rsp) # $increment .Lccm64_enc_body: ___ $code.=<<___; @@ -956,7 +1000,7 @@ sub aesni_generate8 { aesenc $rndkey1,$inout0 aesenc $rndkey1,$inout1 paddq $increment,$iv - dec $len + dec $len # $len-- ($len is in blocks) aesenclast $rndkey0,$inout0 aesenclast $rndkey0,$inout1 @@ -965,16 +1009,26 @@ sub aesni_generate8 { movdqa $iv,$inout0 movups $in0,($out) # save output pshufb $bswap_mask,$inout0 - lea 16($out),$out - jnz .Lccm64_enc_outer + lea 16($out),$out # $out+=16 + jnz .Lccm64_enc_outer # loop if ($len!=0) - movups $inout1,($cmac) + pxor $rndkey0,$rndkey0 # clear register bank + pxor $rndkey1,$rndkey1 + pxor $inout0,$inout0 + movups $inout1,($cmac) # store resulting mac + pxor $inout1,$inout1 + pxor $in0,$in0 + pxor $iv,$iv ___ $code.=<<___ if ($win64); movaps (%rsp),%xmm6 + movaps %xmm0,(%rsp) # clear stack movaps 0x10(%rsp),%xmm7 + movaps %xmm0,0x10(%rsp) movaps 0x20(%rsp),%xmm8 + movaps %xmm0,0x20(%rsp) movaps 0x30(%rsp),%xmm9 + movaps %xmm0,0x30(%rsp) lea 0x58(%rsp),%rsp .Lccm64_enc_ret: ___ @@ -991,10 +1045,10 @@ sub aesni_generate8 { ___ $code.=<<___ if ($win64); lea -0x58(%rsp),%rsp - movaps %xmm6,(%rsp) - movaps %xmm7,0x10(%rsp) - movaps %xmm8,0x20(%rsp) - movaps %xmm9,0x30(%rsp) + movaps %xmm6,(%rsp) # $iv + movaps %xmm7,0x10(%rsp) # $bswap_mask + movaps %xmm8,0x20(%rsp) # $in8 + movaps %xmm9,0x30(%rsp) # $increment .Lccm64_dec_body: ___ $code.=<<___; @@ -1015,7 +1069,7 @@ sub aesni_generate8 { mov \$16,$rounds movups ($inp),$in0 # load inp paddq $increment,$iv - lea 16($inp),$inp + lea 16($inp),$inp # $inp+=16 sub %r10,%rax # twisted $rounds lea 32($key_,$rnds_),$key # end of key schedule mov %rax,%r10 @@ -1025,11 +1079,11 @@ sub aesni_generate8 { xorps $inout0,$in0 # inp ^= E(iv) movdqa $iv,$inout0 movups $in0,($out) # save output - lea 16($out),$out + lea 16($out),$out # $out+=16 pshufb $bswap_mask,$inout0 - sub \$1,$len - jz .Lccm64_dec_break + sub \$1,$len # $len-- ($len is in blocks) + jz .Lccm64_dec_break # if ($len==0) break $movkey ($key_),$rndkey0 mov %r10,%rax @@ -1049,13 +1103,13 @@ sub aesni_generate8 { aesenc $rndkey0,$inout1 $movkey -16($key,%rax),$rndkey0 jnz .Lccm64_dec2_loop - movups ($inp),$in0 # load inp + movups ($inp),$in0 # load input paddq $increment,$iv aesenc $rndkey1,$inout0 aesenc $rndkey1,$inout1 aesenclast $rndkey0,$inout0 aesenclast $rndkey0,$inout1 - lea 16($inp),$inp + lea 16($inp),$inp # $inp+=16 jmp .Lccm64_dec_outer .align 16 @@ -1065,13 +1119,23 @@ sub aesni_generate8 { ___ &aesni_generate1("enc",$key_,$rounds,$inout1,$in0); $code.=<<___; - movups $inout1,($cmac) + pxor $rndkey0,$rndkey0 # clear register bank + pxor $rndkey1,$rndkey1 + pxor $inout0,$inout0 + movups $inout1,($cmac) # store resulting mac + pxor $inout1,$inout1 + pxor $in0,$in0 + pxor $iv,$iv ___ $code.=<<___ if ($win64); movaps (%rsp),%xmm6 + movaps %xmm0,(%rsp) # clear stack movaps 0x10(%rsp),%xmm7 + movaps %xmm0,0x10(%rsp) movaps 0x20(%rsp),%xmm8 + movaps %xmm0,0x20(%rsp) movaps 0x30(%rsp),%xmm9 + movaps %xmm0,0x30(%rsp) lea 0x58(%rsp),%rsp .Lccm64_dec_ret: ___ @@ -1102,13 +1166,34 @@ sub aesni_generate8 { .type aesni_ctr32_encrypt_blocks,\@function,5 .align 16 aesni_ctr32_encrypt_blocks: + cmp \$1,$len + jne .Lctr32_bulk + + # handle single block without allocating stack frame, + # useful when handling edges + movups ($ivp),$inout0 + movups ($inp),$inout1 + mov 240($key),%edx # key->rounds +___ + &aesni_generate1("enc",$key,"%edx"); +$code.=<<___; + pxor $rndkey0,$rndkey0 # clear register bank + pxor $rndkey1,$rndkey1 + xorps $inout1,$inout0 + pxor $inout1,$inout1 + movups $inout0,($out) + xorps $inout0,$inout0 + jmp .Lctr32_epilogue + +.align 16 +.Lctr32_bulk: lea (%rsp),%rax push %rbp sub \$$frame_size,%rsp and \$-16,%rsp # Linux kernel stack can be incorrectly seeded ___ $code.=<<___ if ($win64); - movaps %xmm6,-0xa8(%rax) + movaps %xmm6,-0xa8(%rax) # offload everything movaps %xmm7,-0x98(%rax) movaps %xmm8,-0x88(%rax) movaps %xmm9,-0x78(%rax) @@ -1123,8 +1208,8 @@ sub aesni_generate8 { $code.=<<___; lea -8(%rax),%rbp - cmp \$1,$len - je .Lctr32_one_shortcut + # 8 16-byte words on top of stack are counter values + # xor-ed with zero-round key movdqu ($ivp),$inout0 movdqu ($key),$rndkey0 @@ -1139,7 +1224,7 @@ sub aesni_generate8 { movdqa $inout0,0x40(%rsp) movdqa $inout0,0x50(%rsp) movdqa $inout0,0x60(%rsp) - mov %rdx,%r10 # borrow %rdx + mov %rdx,%r10 # about to borrow %rdx movdqa $inout0,0x70(%rsp) lea 1($ctr),%rax @@ -1173,7 +1258,7 @@ sub aesni_generate8 { lea 7($ctr),%r9 mov %r10d,0x60+12(%rsp) bswap %r9d - mov OPENSSL_ia32cap_P+4(%rip),%r10d + mov OPENSSL_ia32cap_P+4(%rip),%r10d xor $key0,%r9d and \$`1<<26|1<<22`,%r10d # isolate XSAVE+MOVBE mov %r9d,0x70+12(%rsp) @@ -1183,15 +1268,15 @@ sub aesni_generate8 { movdqa 0x40(%rsp),$inout4 movdqa 0x50(%rsp),$inout5 - cmp \$8,$len - jb .Lctr32_tail + cmp \$8,$len # $len is in blocks + jb .Lctr32_tail # short input if ($len<8) - sub \$6,$len + sub \$6,$len # $len is biased by -6 cmp \$`1<<22`,%r10d # check for MOVBE without XSAVE - je .Lctr32_6x + je .Lctr32_6x # [which denotes Atom Silvermont] lea 0x80($key),$key # size optimization - sub \$2,$len + sub \$2,$len # $len is biased by -8 jmp .Lctr32_loop8 .align 16 @@ -1205,13 +1290,13 @@ sub aesni_generate8 { .align 16 .Lctr32_loop6: - add \$6,$ctr + add \$6,$ctr # next counter value $movkey -48($key,$rnds_),$rndkey0 aesenc $rndkey1,$inout0 mov $ctr,%eax xor $key0,%eax aesenc $rndkey1,$inout1 - movbe %eax,`0x00+12`(%rsp) + movbe %eax,`0x00+12`(%rsp) # store next counter value lea 1($ctr),%eax aesenc $rndkey1,$inout2 xor $key0,%eax @@ -1244,16 +1329,16 @@ sub aesni_generate8 { call .Lenc_loop6 - movdqu ($inp),$inout6 + movdqu ($inp),$inout6 # load 6 input blocks movdqu 0x10($inp),$inout7 movdqu 0x20($inp),$in0 movdqu 0x30($inp),$in1 movdqu 0x40($inp),$in2 movdqu 0x50($inp),$in3 - lea 0x60($inp),$inp + lea 0x60($inp),$inp # $inp+=6*16 $movkey -64($key,$rnds_),$rndkey1 - pxor $inout0,$inout6 - movaps 0x00(%rsp),$inout0 + pxor $inout0,$inout6 # inp^=E(ctr) + movaps 0x00(%rsp),$inout0 # load next counter [xor-ed with 0 round] pxor $inout1,$inout7 movaps 0x10(%rsp),$inout1 pxor $inout2,$in0 @@ -1264,19 +1349,19 @@ sub aesni_generate8 { movaps 0x40(%rsp),$inout4 pxor $inout5,$in3 movaps 0x50(%rsp),$inout5 - movdqu $inout6,($out) + movdqu $inout6,($out) # store 6 output blocks movdqu $inout7,0x10($out) movdqu $in0,0x20($out) movdqu $in1,0x30($out) movdqu $in2,0x40($out) movdqu $in3,0x50($out) - lea 0x60($out),$out + lea 0x60($out),$out # $out+=6*16 sub \$6,$len - jnc .Lctr32_loop6 + jnc .Lctr32_loop6 # loop if $len-=6 didn't borrow - add \$6,$len - jz .Lctr32_done + add \$6,$len # restore real remaining $len + jz .Lctr32_done # done if ($len==0) lea -48($rnds_),$rounds lea -80($key,$rnds_),$key # restore $key @@ -1286,7 +1371,7 @@ sub aesni_generate8 { .align 32 .Lctr32_loop8: - add \$8,$ctr + add \$8,$ctr # next counter value movdqa 0x60(%rsp),$inout6 aesenc $rndkey1,$inout0 mov $ctr,%r9d @@ -1298,7 +1383,7 @@ sub aesni_generate8 { xor $key0,%r9d nop aesenc $rndkey1,$inout3 - mov %r9d,0x00+12(%rsp) + mov %r9d,0x00+12(%rsp) # store next counter value lea 1($ctr),%r9 aesenc $rndkey1,$inout4 aesenc $rndkey1,$inout5 @@ -1331,7 +1416,7 @@ sub aesni_generate8 { aesenc $rndkey0,$inout1 aesenc $rndkey0,$inout2 xor $key0,%r9d - movdqu 0x00($inp),$in0 + movdqu 0x00($inp),$in0 # start loading input aesenc $rndkey0,$inout3 mov %r9d,0x70+12(%rsp) cmp \$11,$rounds @@ -1388,7 +1473,7 @@ sub aesni_generate8 { .align 16 .Lctr32_enc_done: movdqu 0x10($inp),$in1 - pxor $rndkey0,$in0 + pxor $rndkey0,$in0 # input^=round[last] movdqu 0x20($inp),$in2 pxor $rndkey0,$in1 movdqu 0x30($inp),$in3 @@ -1406,11 +1491,11 @@ sub aesni_generate8 { aesenc $rndkey1,$inout5 aesenc $rndkey1,$inout6 aesenc $rndkey1,$inout7 - movdqu 0x60($inp),$rndkey1 - lea 0x80($inp),$inp + movdqu 0x60($inp),$rndkey1 # borrow $rndkey1 for inp[6] + lea 0x80($inp),$inp # $inp+=8*16 - aesenclast $in0,$inout0 - pxor $rndkey0,$rndkey1 + aesenclast $in0,$inout0 # $inN is inp[N]^round[last] + pxor $rndkey0,$rndkey1 # borrowed $rndkey movdqu 0x70-0x80($inp),$in0 aesenclast $in1,$inout1 pxor $rndkey0,$in0 @@ -1425,10 +1510,10 @@ sub aesni_generate8 { movdqa 0x40(%rsp),$in5 aesenclast $rndkey1,$inout6 movdqa 0x50(%rsp),$rndkey0 - $movkey 0x10-0x80($key),$rndkey1 + $movkey 0x10-0x80($key),$rndkey1#real 1st-round key aesenclast $in0,$inout7 - movups $inout0,($out) # store output + movups $inout0,($out) # store 8 output blocks movdqa $in1,$inout0 movups $inout1,0x10($out) movdqa $in2,$inout1 @@ -1442,21 +1527,24 @@ sub aesni_generate8 { movdqa $rndkey0,$inout5 movups $inout6,0x60($out) movups $inout7,0x70($out) - lea 0x80($out),$out + lea 0x80($out),$out # $out+=8*16 sub \$8,$len - jnc .Lctr32_loop8 + jnc .Lctr32_loop8 # loop if $len-=8 didn't borrow - add \$8,$len - jz .Lctr32_done + add \$8,$len # restore real remainig $len + jz .Lctr32_done # done if ($len==0) lea -0x80($key),$key .Lctr32_tail: + # note that at this point $inout0..5 are populated with + # counter values xor-ed with 0-round key lea 16($key),$key cmp \$4,$len jb .Lctr32_loop3 je .Lctr32_loop4 + # if ($len>4) compute 7 E(counter) shl \$4,$rounds movdqa 0x60(%rsp),$inout6 pxor $inout7,$inout7 @@ -1464,14 +1552,14 @@ sub aesni_generate8 { $movkey 16($key),$rndkey0 aesenc $rndkey1,$inout0 aesenc $rndkey1,$inout1 - lea 32-16($key,$rounds),$key + lea 32-16($key,$rounds),$key# prepare for .Lenc_loop8_enter neg %rax aesenc $rndkey1,$inout2 - add \$16,%rax + add \$16,%rax # prepare for .Lenc_loop8_enter movups ($inp),$in0 aesenc $rndkey1,$inout3 aesenc $rndkey1,$inout4 - movups 0x10($inp),$in1 + movups 0x10($inp),$in1 # pre-load input movups 0x20($inp),$in2 aesenc $rndkey1,$inout5 aesenc $rndkey1,$inout6 @@ -1482,7 +1570,7 @@ sub aesni_generate8 { pxor $in0,$inout0 movdqu 0x40($inp),$in0 pxor $in1,$inout1 - movdqu $inout0,($out) + movdqu $inout0,($out) # store output pxor $in2,$inout2 movdqu $inout1,0x10($out) pxor $in3,$inout3 @@ -1491,17 +1579,17 @@ sub aesni_generate8 { movdqu $inout3,0x30($out) movdqu $inout4,0x40($out) cmp \$6,$len - jb .Lctr32_done + jb .Lctr32_done # $len was 5, stop store movups 0x50($inp),$in1 xorps $in1,$inout5 movups $inout5,0x50($out) - je .Lctr32_done + je .Lctr32_done # $len was 6, stop store movups 0x60($inp),$in2 xorps $in2,$inout6 movups $inout6,0x60($out) - jmp .Lctr32_done + jmp .Lctr32_done # $len was 7, stop store .align 32 .Lctr32_loop4: @@ -1515,7 +1603,7 @@ sub aesni_generate8 { jnz .Lctr32_loop4 aesenclast $rndkey1,$inout0 aesenclast $rndkey1,$inout1 - movups ($inp),$in0 + movups ($inp),$in0 # load input movups 0x10($inp),$in1 aesenclast $rndkey1,$inout2 aesenclast $rndkey1,$inout3 @@ -1523,14 +1611,14 @@ sub aesni_generate8 { movups 0x30($inp),$in3 xorps $in0,$inout0 - movups $inout0,($out) + movups $inout0,($out) # store output xorps $in1,$inout1 movups $inout1,0x10($out) pxor $in2,$inout2 movdqu $inout2,0x20($out) pxor $in3,$inout3 movdqu $inout3,0x30($out) - jmp .Lctr32_done + jmp .Lctr32_done # $len was 4, stop store .align 32 .Lctr32_loop3: @@ -1545,48 +1633,79 @@ sub aesni_generate8 { aesenclast $rndkey1,$inout1 aesenclast $rndkey1,$inout2 - movups ($inp),$in0 + movups ($inp),$in0 # load input xorps $in0,$inout0 - movups $inout0,($out) + movups $inout0,($out) # store output cmp \$2,$len - jb .Lctr32_done + jb .Lctr32_done # $len was 1, stop store movups 0x10($inp),$in1 xorps $in1,$inout1 movups $inout1,0x10($out) - je .Lctr32_done + je .Lctr32_done # $len was 2, stop store movups 0x20($inp),$in2 xorps $in2,$inout2 - movups $inout2,0x20($out) - jmp .Lctr32_done - -.align 16 -.Lctr32_one_shortcut: - movups ($ivp),$inout0 - movups ($inp),$in0 - mov 240($key),$rounds # key->rounds -___ - &aesni_generate1("enc",$key,$rounds); -$code.=<<___; - xorps $in0,$inout0 - movups $inout0,($out) - jmp .Lctr32_done + movups $inout2,0x20($out) # $len was 3, stop store -.align 16 .Lctr32_done: + xorps %xmm0,%xmm0 # clear regiser bank + xor $key0,$key0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 +___ +$code.=<<___ if (!$win64); + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + movaps %xmm0,0x00(%rsp) # clear stack + pxor %xmm8,%xmm8 + movaps %xmm0,0x10(%rsp) + pxor %xmm9,%xmm9 + movaps %xmm0,0x20(%rsp) + pxor %xmm10,%xmm10 + movaps %xmm0,0x30(%rsp) + pxor %xmm11,%xmm11 + movaps %xmm0,0x40(%rsp) + pxor %xmm12,%xmm12 + movaps %xmm0,0x50(%rsp) + pxor %xmm13,%xmm13 + movaps %xmm0,0x60(%rsp) + pxor %xmm14,%xmm14 + movaps %xmm0,0x70(%rsp) + pxor %xmm15,%xmm15 ___ $code.=<<___ if ($win64); movaps -0xa0(%rbp),%xmm6 + movaps %xmm0,-0xa0(%rbp) # clear stack movaps -0x90(%rbp),%xmm7 + movaps %xmm0,-0x90(%rbp) movaps -0x80(%rbp),%xmm8 + movaps %xmm0,-0x80(%rbp) movaps -0x70(%rbp),%xmm9 + movaps %xmm0,-0x70(%rbp) movaps -0x60(%rbp),%xmm10 + movaps %xmm0,-0x60(%rbp) movaps -0x50(%rbp),%xmm11 + movaps %xmm0,-0x50(%rbp) movaps -0x40(%rbp),%xmm12 + movaps %xmm0,-0x40(%rbp) movaps -0x30(%rbp),%xmm13 + movaps %xmm0,-0x30(%rbp) movaps -0x20(%rbp),%xmm14 + movaps %xmm0,-0x20(%rbp) movaps -0x10(%rbp),%xmm15 + movaps %xmm0,-0x10(%rbp) + movaps %xmm0,0x00(%rsp) + movaps %xmm0,0x10(%rsp) + movaps %xmm0,0x20(%rsp) + movaps %xmm0,0x30(%rsp) + movaps %xmm0,0x40(%rsp) + movaps %xmm0,0x50(%rsp) + movaps %xmm0,0x60(%rsp) + movaps %xmm0,0x70(%rsp) ___ $code.=<<___; lea (%rbp),%rsp @@ -1619,7 +1738,7 @@ sub aesni_generate8 { and \$-16,%rsp # Linux kernel stack can be incorrectly seeded ___ $code.=<<___ if ($win64); - movaps %xmm6,-0xa8(%rax) + movaps %xmm6,-0xa8(%rax) # offload everything movaps %xmm7,-0x98(%rax) movaps %xmm8,-0x88(%rax) movaps %xmm9,-0x78(%rax) @@ -1679,7 +1798,7 @@ sub aesni_generate8 { movaps $rndkey1,0x60(%rsp) # save round[0]^round[last] sub \$16*6,$len - jc .Lxts_enc_short + jc .Lxts_enc_short # if $len-=6*16 borrowed mov \$16+96,$rounds lea 32($key_,$rnds_),$key # end of key schedule @@ -1694,7 +1813,7 @@ sub aesni_generate8 { movdqu `16*0`($inp),$inout0 # load input movdqa $rndkey0,$twmask movdqu `16*1`($inp),$inout1 - pxor @tweak[0],$inout0 + pxor @tweak[0],$inout0 # input^=tweak^round[0] movdqu `16*2`($inp),$inout2 pxor @tweak[1],$inout1 aesenc $rndkey1,$inout0 @@ -1713,10 +1832,10 @@ sub aesni_generate8 { lea `16*6`($inp),$inp pxor $twmask,$inout5 - pxor $twres,@tweak[0] + pxor $twres,@tweak[0] # calclulate tweaks^round[last] aesenc $rndkey1,$inout4 pxor $twres,@tweak[1] - movdqa @tweak[0],`16*0`(%rsp) # put aside tweaks^last round key + movdqa @tweak[0],`16*0`(%rsp) # put aside tweaks^round[last] aesenc $rndkey1,$inout5 $movkey 48($key_),$rndkey1 pxor $twres,@tweak[2] @@ -1757,7 +1876,7 @@ sub aesni_generate8 { $movkey -80($key,%rax),$rndkey0 jnz .Lxts_enc_loop6 - movdqa (%r8),$twmask + movdqa (%r8),$twmask # start calculating next tweak movdqa $twres,$twtmp paddd $twres,$twres aesenc $rndkey1,$inout0 @@ -1851,15 +1970,15 @@ sub aesni_generate8 { aesenclast `16*5`(%rsp),$inout5 pxor $twres,@tweak[5] - lea `16*6`($out),$out - movups $inout0,`-16*6`($out) # write output + lea `16*6`($out),$out # $out+=6*16 + movups $inout0,`-16*6`($out) # store 6 output blocks movups $inout1,`-16*5`($out) movups $inout2,`-16*4`($out) movups $inout3,`-16*3`($out) movups $inout4,`-16*2`($out) movups $inout5,`-16*1`($out) sub \$16*6,$len - jnc .Lxts_enc_grandloop + jnc .Lxts_enc_grandloop # loop if $len-=6*16 didn't borrow mov \$16+96,$rounds sub $rnds_,$rounds @@ -1867,34 +1986,36 @@ sub aesni_generate8 { shr \$4,$rounds # restore original value .Lxts_enc_short: + # at the point @tweak[0..5] are populated with tweak values mov $rounds,$rnds_ # backup $rounds pxor $rndkey0,@tweak[0] - add \$16*6,$len - jz .Lxts_enc_done + add \$16*6,$len # restore real remaining $len + jz .Lxts_enc_done # done if ($len==0) pxor $rndkey0,@tweak[1] cmp \$0x20,$len - jb .Lxts_enc_one + jb .Lxts_enc_one # $len is 1*16 pxor $rndkey0,@tweak[2] - je .Lxts_enc_two + je .Lxts_enc_two # $len is 2*16 pxor $rndkey0,@tweak[3] cmp \$0x40,$len - jb .Lxts_enc_three + jb .Lxts_enc_three # $len is 3*16 pxor $rndkey0,@tweak[4] - je .Lxts_enc_four + je .Lxts_enc_four # $len is 4*16 - movdqu ($inp),$inout0 + movdqu ($inp),$inout0 # $len is 5*16 movdqu 16*1($inp),$inout1 movdqu 16*2($inp),$inout2 pxor @tweak[0],$inout0 movdqu 16*3($inp),$inout3 pxor @tweak[1],$inout1 movdqu 16*4($inp),$inout4 - lea 16*5($inp),$inp + lea 16*5($inp),$inp # $inp+=5*16 pxor @tweak[2],$inout2 pxor @tweak[3],$inout3 pxor @tweak[4],$inout4 + pxor $inout5,$inout5 call _aesni_encrypt6 @@ -1902,35 +2023,35 @@ sub aesni_generate8 { movdqa @tweak[5],@tweak[0] xorps @tweak[1],$inout1 xorps @tweak[2],$inout2 - movdqu $inout0,($out) + movdqu $inout0,($out) # store 5 output blocks xorps @tweak[3],$inout3 movdqu $inout1,16*1($out) xorps @tweak[4],$inout4 movdqu $inout2,16*2($out) movdqu $inout3,16*3($out) movdqu $inout4,16*4($out) - lea 16*5($out),$out + lea 16*5($out),$out # $out+=5*16 jmp .Lxts_enc_done .align 16 .Lxts_enc_one: movups ($inp),$inout0 - lea 16*1($inp),$inp + lea 16*1($inp),$inp # inp+=1*16 xorps @tweak[0],$inout0 ___ &aesni_generate1("enc",$key,$rounds); $code.=<<___; xorps @tweak[0],$inout0 movdqa @tweak[1],@tweak[0] - movups $inout0,($out) - lea 16*1($out),$out + movups $inout0,($out) # store one output block + lea 16*1($out),$out # $out+=1*16 jmp .Lxts_enc_done .align 16 .Lxts_enc_two: movups ($inp),$inout0 movups 16($inp),$inout1 - lea 32($inp),$inp + lea 32($inp),$inp # $inp+=2*16 xorps @tweak[0],$inout0 xorps @tweak[1],$inout1 @@ -1939,9 +2060,9 @@ sub aesni_generate8 { xorps @tweak[0],$inout0 movdqa @tweak[2],@tweak[0] xorps @tweak[1],$inout1 - movups $inout0,($out) + movups $inout0,($out) # store 2 output blocks movups $inout1,16*1($out) - lea 16*2($out),$out + lea 16*2($out),$out # $out+=2*16 jmp .Lxts_enc_done .align 16 @@ -1949,7 +2070,7 @@ sub aesni_generate8 { movups ($inp),$inout0 movups 16*1($inp),$inout1 movups 16*2($inp),$inout2 - lea 16*3($inp),$inp + lea 16*3($inp),$inp # $inp+=3*16 xorps @tweak[0],$inout0 xorps @tweak[1],$inout1 xorps @tweak[2],$inout2 @@ -1960,10 +2081,10 @@ sub aesni_generate8 { movdqa @tweak[3],@tweak[0] xorps @tweak[1],$inout1 xorps @tweak[2],$inout2 - movups $inout0,($out) + movups $inout0,($out) # store 3 output blocks movups $inout1,16*1($out) movups $inout2,16*2($out) - lea 16*3($out),$out + lea 16*3($out),$out # $out+=3*16 jmp .Lxts_enc_done .align 16 @@ -1973,7 +2094,7 @@ sub aesni_generate8 { movups 16*2($inp),$inout2 xorps @tweak[0],$inout0 movups 16*3($inp),$inout3 - lea 16*4($inp),$inp + lea 16*4($inp),$inp # $inp+=4*16 xorps @tweak[1],$inout1 xorps @tweak[2],$inout2 xorps @tweak[3],$inout3 @@ -1984,17 +2105,17 @@ sub aesni_generate8 { movdqa @tweak[4],@tweak[0] pxor @tweak[1],$inout1 pxor @tweak[2],$inout2 - movdqu $inout0,($out) + movdqu $inout0,($out) # store 4 output blocks pxor @tweak[3],$inout3 movdqu $inout1,16*1($out) movdqu $inout2,16*2($out) movdqu $inout3,16*3($out) - lea 16*4($out),$out + lea 16*4($out),$out # $out+=4*16 jmp .Lxts_enc_done .align 16 .Lxts_enc_done: - and \$15,$len_ + and \$15,$len_ # see if $len%16 is 0 jz .Lxts_enc_ret mov $len_,$len @@ -2021,18 +2142,60 @@ sub aesni_generate8 { movups $inout0,-16($out) .Lxts_enc_ret: + xorps %xmm0,%xmm0 # clear register bank + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 +___ +$code.=<<___ if (!$win64); + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + movaps %xmm0,0x00(%rsp) # clear stack + pxor %xmm8,%xmm8 + movaps %xmm0,0x10(%rsp) + pxor %xmm9,%xmm9 + movaps %xmm0,0x20(%rsp) + pxor %xmm10,%xmm10 + movaps %xmm0,0x30(%rsp) + pxor %xmm11,%xmm11 + movaps %xmm0,0x40(%rsp) + pxor %xmm12,%xmm12 + movaps %xmm0,0x50(%rsp) + pxor %xmm13,%xmm13 + movaps %xmm0,0x60(%rsp) + pxor %xmm14,%xmm14 + pxor %xmm15,%xmm15 ___ $code.=<<___ if ($win64); movaps -0xa0(%rbp),%xmm6 + movaps %xmm0,-0xa0(%rbp) # clear stack movaps -0x90(%rbp),%xmm7 + movaps %xmm0,-0x90(%rbp) movaps -0x80(%rbp),%xmm8 + movaps %xmm0,-0x80(%rbp) movaps -0x70(%rbp),%xmm9 + movaps %xmm0,-0x70(%rbp) movaps -0x60(%rbp),%xmm10 + movaps %xmm0,-0x60(%rbp) movaps -0x50(%rbp),%xmm11 + movaps %xmm0,-0x50(%rbp) movaps -0x40(%rbp),%xmm12 + movaps %xmm0,-0x40(%rbp) movaps -0x30(%rbp),%xmm13 + movaps %xmm0,-0x30(%rbp) movaps -0x20(%rbp),%xmm14 + movaps %xmm0,-0x20(%rbp) movaps -0x10(%rbp),%xmm15 + movaps %xmm0,-0x10(%rbp) + movaps %xmm0,0x00(%rsp) + movaps %xmm0,0x10(%rsp) + movaps %xmm0,0x20(%rsp) + movaps %xmm0,0x30(%rsp) + movaps %xmm0,0x40(%rsp) + movaps %xmm0,0x50(%rsp) + movaps %xmm0,0x60(%rsp) ___ $code.=<<___; lea (%rbp),%rsp @@ -2053,7 +2216,7 @@ sub aesni_generate8 { and \$-16,%rsp # Linux kernel stack can be incorrectly seeded ___ $code.=<<___ if ($win64); - movaps %xmm6,-0xa8(%rax) + movaps %xmm6,-0xa8(%rax) # offload everything movaps %xmm7,-0x98(%rax) movaps %xmm8,-0x88(%rax) movaps %xmm9,-0x78(%rax) @@ -2116,7 +2279,7 @@ sub aesni_generate8 { movaps $rndkey1,0x60(%rsp) # save round[0]^round[last] sub \$16*6,$len - jc .Lxts_dec_short + jc .Lxts_dec_short # if $len-=6*16 borrowed mov \$16+96,$rounds lea 32($key_,$rnds_),$key # end of key schedule @@ -2131,7 +2294,7 @@ sub aesni_generate8 { movdqu `16*0`($inp),$inout0 # load input movdqa $rndkey0,$twmask movdqu `16*1`($inp),$inout1 - pxor @tweak[0],$inout0 + pxor @tweak[0],$inout0 # intput^=tweak^round[0] movdqu `16*2`($inp),$inout2 pxor @tweak[1],$inout1 aesdec $rndkey1,$inout0 @@ -2150,7 +2313,7 @@ sub aesni_generate8 { lea `16*6`($inp),$inp pxor $twmask,$inout5 - pxor $twres,@tweak[0] + pxor $twres,@tweak[0] # calclulate tweaks^round[last] aesdec $rndkey1,$inout4 pxor $twres,@tweak[1] movdqa @tweak[0],`16*0`(%rsp) # put aside tweaks^last round key @@ -2194,7 +2357,7 @@ sub aesni_generate8 { $movkey -80($key,%rax),$rndkey0 jnz .Lxts_dec_loop6 - movdqa (%r8),$twmask + movdqa (%r8),$twmask # start calculating next tweak movdqa $twres,$twtmp paddd $twres,$twres aesdec $rndkey1,$inout0 @@ -2288,15 +2451,15 @@ sub aesni_generate8 { aesdeclast `16*5`(%rsp),$inout5 pxor $twres,@tweak[5] - lea `16*6`($out),$out - movups $inout0,`-16*6`($out) # write output + lea `16*6`($out),$out # $out+=6*16 + movups $inout0,`-16*6`($out) # store 6 output blocks movups $inout1,`-16*5`($out) movups $inout2,`-16*4`($out) movups $inout3,`-16*3`($out) movups $inout4,`-16*2`($out) movups $inout5,`-16*1`($out) sub \$16*6,$len - jnc .Lxts_dec_grandloop + jnc .Lxts_dec_grandloop # loop if $len-=6*16 didn't borrow mov \$16+96,$rounds sub $rnds_,$rounds @@ -2304,31 +2467,32 @@ sub aesni_generate8 { shr \$4,$rounds # restore original value .Lxts_dec_short: + # at the point @tweak[0..5] are populated with tweak values mov $rounds,$rnds_ # backup $rounds pxor $rndkey0,@tweak[0] pxor $rndkey0,@tweak[1] - add \$16*6,$len - jz .Lxts_dec_done + add \$16*6,$len # restore real remaining $len + jz .Lxts_dec_done # done if ($len==0) pxor $rndkey0,@tweak[2] cmp \$0x20,$len - jb .Lxts_dec_one + jb .Lxts_dec_one # $len is 1*16 pxor $rndkey0,@tweak[3] - je .Lxts_dec_two + je .Lxts_dec_two # $len is 2*16 pxor $rndkey0,@tweak[4] cmp \$0x40,$len - jb .Lxts_dec_three - je .Lxts_dec_four + jb .Lxts_dec_three # $len is 3*16 + je .Lxts_dec_four # $len is 4*16 - movdqu ($inp),$inout0 + movdqu ($inp),$inout0 # $len is 5*16 movdqu 16*1($inp),$inout1 movdqu 16*2($inp),$inout2 pxor @tweak[0],$inout0 movdqu 16*3($inp),$inout3 pxor @tweak[1],$inout1 movdqu 16*4($inp),$inout4 - lea 16*5($inp),$inp + lea 16*5($inp),$inp # $inp+=5*16 pxor @tweak[2],$inout2 pxor @tweak[3],$inout3 pxor @tweak[4],$inout4 @@ -2338,7 +2502,7 @@ sub aesni_generate8 { xorps @tweak[0],$inout0 xorps @tweak[1],$inout1 xorps @tweak[2],$inout2 - movdqu $inout0,($out) + movdqu $inout0,($out) # store 5 output blocks xorps @tweak[3],$inout3 movdqu $inout1,16*1($out) xorps @tweak[4],$inout4 @@ -2347,7 +2511,7 @@ sub aesni_generate8 { movdqu $inout3,16*3($out) pcmpgtd @tweak[5],$twtmp movdqu $inout4,16*4($out) - lea 16*5($out),$out + lea 16*5($out),$out # $out+=5*16 pshufd \$0x13,$twtmp,@tweak[1] # $twres and \$15,$len_ jz .Lxts_dec_ret @@ -2361,23 +2525,23 @@ sub aesni_generate8 { .align 16 .Lxts_dec_one: movups ($inp),$inout0 - lea 16*1($inp),$inp + lea 16*1($inp),$inp # $inp+=1*16 xorps @tweak[0],$inout0 ___ &aesni_generate1("dec",$key,$rounds); $code.=<<___; xorps @tweak[0],$inout0 movdqa @tweak[1],@tweak[0] - movups $inout0,($out) + movups $inout0,($out) # store one output block movdqa @tweak[2],@tweak[1] - lea 16*1($out),$out + lea 16*1($out),$out # $out+=1*16 jmp .Lxts_dec_done .align 16 .Lxts_dec_two: movups ($inp),$inout0 movups 16($inp),$inout1 - lea 32($inp),$inp + lea 32($inp),$inp # $inp+=2*16 xorps @tweak[0],$inout0 xorps @tweak[1],$inout1 @@ -2387,9 +2551,9 @@ sub aesni_generate8 { movdqa @tweak[2],@tweak[0] xorps @tweak[1],$inout1 movdqa @tweak[3],@tweak[1] - movups $inout0,($out) + movups $inout0,($out) # store 2 output blocks movups $inout1,16*1($out) - lea 16*2($out),$out + lea 16*2($out),$out # $out+=2*16 jmp .Lxts_dec_done .align 16 @@ -2397,7 +2561,7 @@ sub aesni_generate8 { movups ($inp),$inout0 movups 16*1($inp),$inout1 movups 16*2($inp),$inout2 - lea 16*3($inp),$inp + lea 16*3($inp),$inp # $inp+=3*16 xorps @tweak[0],$inout0 xorps @tweak[1],$inout1 xorps @tweak[2],$inout2 @@ -2409,10 +2573,10 @@ sub aesni_generate8 { xorps @tweak[1],$inout1 movdqa @tweak[4],@tweak[1] xorps @tweak[2],$inout2 - movups $inout0,($out) + movups $inout0,($out) # store 3 output blocks movups $inout1,16*1($out) movups $inout2,16*2($out) - lea 16*3($out),$out + lea 16*3($out),$out # $out+=3*16 jmp .Lxts_dec_done .align 16 @@ -2422,7 +2586,7 @@ sub aesni_generate8 { movups 16*2($inp),$inout2 xorps @tweak[0],$inout0 movups 16*3($inp),$inout3 - lea 16*4($inp),$inp + lea 16*4($inp),$inp # $inp+=4*16 xorps @tweak[1],$inout1 xorps @tweak[2],$inout2 xorps @tweak[3],$inout3 @@ -2434,17 +2598,17 @@ sub aesni_generate8 { pxor @tweak[1],$inout1 movdqa @tweak[5],@tweak[1] pxor @tweak[2],$inout2 - movdqu $inout0,($out) + movdqu $inout0,($out) # store 4 output blocks pxor @tweak[3],$inout3 movdqu $inout1,16*1($out) movdqu $inout2,16*2($out) movdqu $inout3,16*3($out) - lea 16*4($out),$out + lea 16*4($out),$out # $out+=4*16 jmp .Lxts_dec_done .align 16 .Lxts_dec_done: - and \$15,$len_ + and \$15,$len_ # see if $len%16 is 0 jz .Lxts_dec_ret .Lxts_dec_done2: mov $len_,$len @@ -2482,18 +2646,60 @@ sub aesni_generate8 { movups $inout0,($out) .Lxts_dec_ret: + xorps %xmm0,%xmm0 # clear register bank + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 +___ +$code.=<<___ if (!$win64); + pxor %xmm6,%xmm6 + pxor %xmm7,%xmm7 + movaps %xmm0,0x00(%rsp) # clear stack + pxor %xmm8,%xmm8 + movaps %xmm0,0x10(%rsp) + pxor %xmm9,%xmm9 + movaps %xmm0,0x20(%rsp) + pxor %xmm10,%xmm10 + movaps %xmm0,0x30(%rsp) + pxor %xmm11,%xmm11 + movaps %xmm0,0x40(%rsp) + pxor %xmm12,%xmm12 + movaps %xmm0,0x50(%rsp) + pxor %xmm13,%xmm13 + movaps %xmm0,0x60(%rsp) + pxor %xmm14,%xmm14 + pxor %xmm15,%xmm15 ___ $code.=<<___ if ($win64); movaps -0xa0(%rbp),%xmm6 + movaps %xmm0,-0xa0(%rbp) # clear stack movaps -0x90(%rbp),%xmm7 + movaps %xmm0,-0x90(%rbp) movaps -0x80(%rbp),%xmm8 + movaps %xmm0,-0x80(%rbp) movaps -0x70(%rbp),%xmm9 + movaps %xmm0,-0x70(%rbp) movaps -0x60(%rbp),%xmm10 + movaps %xmm0,-0x60(%rbp) movaps -0x50(%rbp),%xmm11 + movaps %xmm0,-0x50(%rbp) movaps -0x40(%rbp),%xmm12 + movaps %xmm0,-0x40(%rbp) movaps -0x30(%rbp),%xmm13 + movaps %xmm0,-0x30(%rbp) movaps -0x20(%rbp),%xmm14 + movaps %xmm0,-0x20(%rbp) movaps -0x10(%rbp),%xmm15 + movaps %xmm0,-0x10(%rbp) + movaps %xmm0,0x00(%rsp) + movaps %xmm0,0x10(%rsp) + movaps %xmm0,0x20(%rsp) + movaps %xmm0,0x30(%rsp) + movaps %xmm0,0x40(%rsp) + movaps %xmm0,0x50(%rsp) + movaps %xmm0,0x60(%rsp) ___ $code.=<<___; lea (%rbp),%rsp @@ -2548,7 +2754,11 @@ sub aesni_generate8 { jnc .Lcbc_enc_loop add \$16,$len jnz .Lcbc_enc_tail + pxor $rndkey0,$rndkey0 # clear register bank + pxor $rndkey1,$rndkey1 movups $inout0,($ivp) + pxor $inout0,$inout0 + pxor $inout1,$inout1 jmp .Lcbc_ret .Lcbc_enc_tail: @@ -2568,6 +2778,27 @@ sub aesni_generate8 { #--------------------------- CBC DECRYPT ------------------------------# .align 16 .Lcbc_decrypt: + cmp \$16,$len + jne .Lcbc_decrypt_bulk + + # handle single block without allocating stack frame, + # useful in ciphertext stealing mode + movdqu ($inp),$inout0 # load input + movdqu ($ivp),$inout1 # load iv + movdqa $inout0,$inout2 # future iv +___ + &aesni_generate1("dec",$key,$rnds_); +$code.=<<___; + pxor $rndkey0,$rndkey0 # clear register bank + pxor $rndkey1,$rndkey1 + movdqu $inout2,($ivp) # store iv + xorps $inout1,$inout0 # ^=iv + pxor $inout1,$inout1 + movups $inout0,($out) # store output + pxor $inout0,$inout0 + jmp .Lcbc_ret +.align 16 +.Lcbc_decrypt_bulk: lea (%rsp),%rax push %rbp sub \$$frame_size,%rsp @@ -2610,10 +2841,10 @@ sub aesni_generate8 { jbe .Lcbc_dec_six_or_seven and \$`1<<26|1<<22`,%r9d # isolate XSAVE+MOVBE - sub \$0x50,$len + sub \$0x50,$len # $len is biased by -5*16 cmp \$`1<<22`,%r9d # check for MOVBE without XSAVE - je .Lcbc_dec_loop6_enter - sub \$0x20,$len + je .Lcbc_dec_loop6_enter # [which denotes Atom Silvermont] + sub \$0x20,$len # $len is biased by -7*16 lea 0x70($key),$key # size optimization jmp .Lcbc_dec_loop8_enter .align 16 @@ -2740,7 +2971,7 @@ sub aesni_generate8 { movaps $inout7,$inout0 lea -0x70($key),$key add \$0x70,$len - jle .Lcbc_dec_tail_collected + jle .Lcbc_dec_clear_tail_collected movups $inout7,($out) lea 0x10($out),$out cmp \$0x50,$len @@ -2759,14 +2990,19 @@ sub aesni_generate8 { movdqu $inout0,($out) pxor $in1,$inout2 movdqu $inout1,0x10($out) + pxor $inout1,$inout1 # clear register bank pxor $in2,$inout3 movdqu $inout2,0x20($out) + pxor $inout2,$inout2 pxor $in3,$inout4 movdqu $inout3,0x30($out) + pxor $inout3,$inout3 pxor $in4,$inout5 movdqu $inout4,0x40($out) + pxor $inout4,$inout4 lea 0x50($out),$out movdqa $inout5,$inout0 + pxor $inout5,$inout5 jmp .Lcbc_dec_tail_collected .align 16 @@ -2781,16 +3017,23 @@ sub aesni_generate8 { movdqu $inout0,($out) pxor $in1,$inout2 movdqu $inout1,0x10($out) + pxor $inout1,$inout1 # clear register bank pxor $in2,$inout3 movdqu $inout2,0x20($out) + pxor $inout2,$inout2 pxor $in3,$inout4 movdqu $inout3,0x30($out) + pxor $inout3,$inout3 pxor $in4,$inout5 movdqu $inout4,0x40($out) + pxor $inout4,$inout4 pxor $inout7,$inout6 movdqu $inout5,0x50($out) + pxor $inout5,$inout5 lea 0x60($out),$out movdqa $inout6,$inout0 + pxor $inout6,$inout6 + pxor $inout7,$inout7 jmp .Lcbc_dec_tail_collected .align 16 @@ -2834,31 +3077,31 @@ sub aesni_generate8 { movdqa $inout5,$inout0 add \$0x50,$len - jle .Lcbc_dec_tail_collected + jle .Lcbc_dec_clear_tail_collected movups $inout5,($out) lea 0x10($out),$out .Lcbc_dec_tail: movups ($inp),$inout0 sub \$0x10,$len - jbe .Lcbc_dec_one + jbe .Lcbc_dec_one # $len is 1*16 or less movups 0x10($inp),$inout1 movaps $inout0,$in0 sub \$0x10,$len - jbe .Lcbc_dec_two + jbe .Lcbc_dec_two # $len is 2*16 or less movups 0x20($inp),$inout2 movaps $inout1,$in1 sub \$0x10,$len - jbe .Lcbc_dec_three + jbe .Lcbc_dec_three # $len is 3*16 or less movups 0x30($inp),$inout3 movaps $inout2,$in2 sub \$0x10,$len - jbe .Lcbc_dec_four + jbe .Lcbc_dec_four # $len is 4*16 or less - movups 0x40($inp),$inout4 + movups 0x40($inp),$inout4 # $len is 5*16 or less movaps $inout3,$in3 movaps $inout4,$in4 xorps $inout5,$inout5 @@ -2869,12 +3112,17 @@ sub aesni_generate8 { movdqu $inout0,($out) pxor $in1,$inout2 movdqu $inout1,0x10($out) + pxor $inout1,$inout1 # clear register bank pxor $in2,$inout3 movdqu $inout2,0x20($out) + pxor $inout2,$inout2 pxor $in3,$inout4 movdqu $inout3,0x30($out) + pxor $inout3,$inout3 lea 0x40($out),$out movdqa $inout4,$inout0 + pxor $inout4,$inout4 + pxor $inout5,$inout5 sub \$0x10,$len jmp .Lcbc_dec_tail_collected @@ -2896,6 +3144,7 @@ sub aesni_generate8 { pxor $in0,$inout1 movdqu $inout0,($out) movdqa $inout1,$inout0 + pxor $inout1,$inout1 # clear register bank lea 0x10($out),$out jmp .Lcbc_dec_tail_collected .align 16 @@ -2908,7 +3157,9 @@ sub aesni_generate8 { movdqu $inout0,($out) pxor $in1,$inout2 movdqu $inout1,0x10($out) + pxor $inout1,$inout1 # clear register bank movdqa $inout2,$inout0 + pxor $inout2,$inout2 lea 0x20($out),$out jmp .Lcbc_dec_tail_collected .align 16 @@ -2921,41 +3172,71 @@ sub aesni_generate8 { movdqu $inout0,($out) pxor $in1,$inout2 movdqu $inout1,0x10($out) + pxor $inout1,$inout1 # clear register bank pxor $in2,$inout3 movdqu $inout2,0x20($out) + pxor $inout2,$inout2 movdqa $inout3,$inout0 + pxor $inout3,$inout3 lea 0x30($out),$out jmp .Lcbc_dec_tail_collected .align 16 +.Lcbc_dec_clear_tail_collected: + pxor $inout1,$inout1 # clear register bank + pxor $inout2,$inout2 + pxor $inout3,$inout3 +___ +$code.=<<___ if (!$win64); + pxor $inout4,$inout4 # %xmm6..9 + pxor $inout5,$inout5 + pxor $inout6,$inout6 + pxor $inout7,$inout7 +___ +$code.=<<___; .Lcbc_dec_tail_collected: movups $iv,($ivp) and \$15,$len jnz .Lcbc_dec_tail_partial movups $inout0,($out) + pxor $inout0,$inout0 jmp .Lcbc_dec_ret .align 16 .Lcbc_dec_tail_partial: movaps $inout0,(%rsp) + pxor $inout0,$inout0 mov \$16,%rcx mov $out,%rdi sub $len,%rcx lea (%rsp),%rsi - .long 0x9066A4F3 # rep movsb + .long 0x9066A4F3 # rep movsb + movdqa $inout0,(%rsp) .Lcbc_dec_ret: + xorps $rndkey0,$rndkey0 # %xmm0 + pxor $rndkey1,$rndkey1 ___ $code.=<<___ if ($win64); movaps 0x10(%rsp),%xmm6 + movaps %xmm0,0x10(%rsp) # clear stack movaps 0x20(%rsp),%xmm7 + movaps %xmm0,0x20(%rsp) movaps 0x30(%rsp),%xmm8 + movaps %xmm0,0x30(%rsp) movaps 0x40(%rsp),%xmm9 + movaps %xmm0,0x40(%rsp) movaps 0x50(%rsp),%xmm10 + movaps %xmm0,0x50(%rsp) movaps 0x60(%rsp),%xmm11 + movaps %xmm0,0x60(%rsp) movaps 0x70(%rsp),%xmm12 + movaps %xmm0,0x70(%rsp) movaps 0x80(%rsp),%xmm13 + movaps %xmm0,0x80(%rsp) movaps 0x90(%rsp),%xmm14 + movaps %xmm0,0x90(%rsp) movaps 0xa0(%rsp),%xmm15 + movaps %xmm0,0xa0(%rsp) ___ $code.=<<___; lea (%rbp),%rsp @@ -2965,8 +3246,15 @@ sub aesni_generate8 { .size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt ___ } -# int $PREFIX_set_[en|de]crypt_key (const unsigned char *userKey, +# int ${PREFIX}_set_decrypt_key(const unsigned char *inp, # int bits, AES_KEY *key) +# +# input: $inp user-supplied key +# $bits $inp length in bits +# $key pointer to key schedule +# output: %eax 0 denoting success, -1 or -2 - failure (see C) +# *$key key schedule +# { my ($inp,$bits,$key) = @_4args; $bits =~ s/%r/%e/; @@ -3003,7 +3291,9 @@ sub aesni_generate8 { $movkey ($key),%xmm0 # inverse middle aesimc %xmm0,%xmm0 + pxor %xmm1,%xmm1 $movkey %xmm0,($inp) + pxor %xmm0,%xmm0 .Ldec_key_ret: add \$8,%rsp ret @@ -3020,6 +3310,22 @@ sub aesni_generate8 { # Agressively optimized in respect to aeskeygenassist's critical path # and is contained in %xmm0-5 to meet Win64 ABI requirement. # +# int ${PREFIX}_set_encrypt_key(const unsigned char *inp, +# int bits, AES_KEY * const key); +# +# input: $inp user-supplied key +# $bits $inp length in bits +# $key pointer to key schedule +# output: %eax 0 denoting success, -1 or -2 - failure (see C) +# $bits rounds-1 (used in aesni_set_decrypt_key) +# *$key key schedule +# $key pointer to key schedule (used in +# aesni_set_decrypt_key) +# +# Subroutine is frame-less, which means that only volatile registers +# are used. Note that it's declared "abi-omnipotent", which means that +# amount of volatile registers is smaller on Windows. +# $code.=<<___; .globl ${PREFIX}_set_encrypt_key .type ${PREFIX}_set_encrypt_key,\@abi-omnipotent @@ -3033,9 +3339,11 @@ sub aesni_generate8 { test $key,$key jz .Lenc_key_ret + mov \$`1<<28|1<<11`,%r10d # AVX and XOP bits movups ($inp),%xmm0 # pull first 128 bits of *userKey xorps %xmm4,%xmm4 # low dword of xmm4 is assumed 0 - lea 16($key),%rax + and OPENSSL_ia32cap_P+4(%rip),%r10d + lea 16($key),%rax # %rax is used as modifiable copy of $key cmp \$256,$bits je .L14rounds cmp \$192,$bits @@ -3045,6 +3353,9 @@ sub aesni_generate8 { .L10rounds: mov \$9,$bits # 10 rounds for 128-bit key + cmp \$`1<<28`,%r10d # AVX, bit no XOP + je .L10rounds_alt + $movkey %xmm0,($key) # round 0 aeskeygenassist \$0x1,%xmm0,%xmm1 # round 1 call .Lkey_expansion_128_cold @@ -3071,10 +3382,80 @@ sub aesni_generate8 { xor %eax,%eax jmp .Lenc_key_ret +.align 16 +.L10rounds_alt: + movdqa .Lkey_rotate(%rip),%xmm5 + mov \$8,%r10d + movdqa .Lkey_rcon1(%rip),%xmm4 + movdqa %xmm0,%xmm2 + movdqu %xmm0,($key) + jmp .Loop_key128 + +.align 16 +.Loop_key128: + pshufb %xmm5,%xmm0 + aesenclast %xmm4,%xmm0 + pslld \$1,%xmm4 + lea 16(%rax),%rax + + movdqa %xmm2,%xmm3 + pslldq \$4,%xmm2 + pxor %xmm2,%xmm3 + pslldq \$4,%xmm2 + pxor %xmm2,%xmm3 + pslldq \$4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,-16(%rax) + movdqa %xmm0,%xmm2 + + dec %r10d + jnz .Loop_key128 + + movdqa .Lkey_rcon1b(%rip),%xmm4 + + pshufb %xmm5,%xmm0 + aesenclast %xmm4,%xmm0 + pslld \$1,%xmm4 + + movdqa %xmm2,%xmm3 + pslldq \$4,%xmm2 + pxor %xmm2,%xmm3 + pslldq \$4,%xmm2 + pxor %xmm2,%xmm3 + pslldq \$4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,(%rax) + + movdqa %xmm0,%xmm2 + pshufb %xmm5,%xmm0 + aesenclast %xmm4,%xmm0 + + movdqa %xmm2,%xmm3 + pslldq \$4,%xmm2 + pxor %xmm2,%xmm3 + pslldq \$4,%xmm2 + pxor %xmm2,%xmm3 + pslldq \$4,%xmm2 + pxor %xmm3,%xmm2 + + pxor %xmm2,%xmm0 + movdqu %xmm0,16(%rax) + + mov $bits,96(%rax) # 240($key) + xor %eax,%eax + jmp .Lenc_key_ret + .align 16 .L12rounds: movq 16($inp),%xmm2 # remaining 1/3 of *userKey mov \$11,$bits # 12 rounds for 192 + cmp \$`1<<28`,%r10d # AVX, but no XOP + je .L12rounds_alt + $movkey %xmm0,($key) # round 0 aeskeygenassist \$0x1,%xmm2,%xmm1 # round 1,2 call .Lkey_expansion_192a_cold @@ -3097,11 +3478,55 @@ sub aesni_generate8 { xor %rax, %rax jmp .Lenc_key_ret +.align 16 +.L12rounds_alt: + movdqa .Lkey_rotate192(%rip),%xmm5 + movdqa .Lkey_rcon1(%rip),%xmm4 + mov \$8,%r10d + movdqu %xmm0,($key) + jmp .Loop_key192 + +.align 16 +.Loop_key192: + movq %xmm2,0(%rax) + movdqa %xmm2,%xmm1 + pshufb %xmm5,%xmm2 + aesenclast %xmm4,%xmm2 + pslld \$1, %xmm4 + lea 24(%rax),%rax + + movdqa %xmm0,%xmm3 + pslldq \$4,%xmm0 + pxor %xmm0,%xmm3 + pslldq \$4,%xmm0 + pxor %xmm0,%xmm3 + pslldq \$4,%xmm0 + pxor %xmm3,%xmm0 + + pshufd \$0xff,%xmm0,%xmm3 + pxor %xmm1,%xmm3 + pslldq \$4,%xmm1 + pxor %xmm1,%xmm3 + + pxor %xmm2,%xmm0 + pxor %xmm3,%xmm2 + movdqu %xmm0,-16(%rax) + + dec %r10d + jnz .Loop_key192 + + mov $bits,32(%rax) # 240($key) + xor %eax,%eax + jmp .Lenc_key_ret + .align 16 .L14rounds: movups 16($inp),%xmm2 # remaning half of *userKey mov \$13,$bits # 14 rounds for 256 lea 16(%rax),%rax + cmp \$`1<<28`,%r10d # AVX, but no XOP + je .L14rounds_alt + $movkey %xmm0,($key) # round 0 $movkey %xmm2,16($key) # round 1 aeskeygenassist \$0x1,%xmm2,%xmm1 # round 2 @@ -3135,10 +3560,70 @@ sub aesni_generate8 { xor %rax,%rax jmp .Lenc_key_ret +.align 16 +.L14rounds_alt: + movdqa .Lkey_rotate(%rip),%xmm5 + movdqa .Lkey_rcon1(%rip),%xmm4 + mov \$7,%r10d + movdqu %xmm0,0($key) + movdqa %xmm2,%xmm1 + movdqu %xmm2,16($key) + jmp .Loop_key256 + +.align 16 +.Loop_key256: + pshufb %xmm5,%xmm2 + aesenclast %xmm4,%xmm2 + + movdqa %xmm0,%xmm3 + pslldq \$4,%xmm0 + pxor %xmm0,%xmm3 + pslldq \$4,%xmm0 + pxor %xmm0,%xmm3 + pslldq \$4,%xmm0 + pxor %xmm3,%xmm0 + pslld \$1,%xmm4 + + pxor %xmm2,%xmm0 + movdqu %xmm0,(%rax) + + dec %r10d + jz .Ldone_key256 + + pshufd \$0xff,%xmm0,%xmm2 + pxor %xmm3,%xmm3 + aesenclast %xmm3,%xmm2 + + movdqa %xmm1,%xmm3 + pslldq \$4,%xmm1 + pxor %xmm1,%xmm3 + pslldq \$4,%xmm1 + pxor %xmm1,%xmm3 + pslldq \$4,%xmm1 + pxor %xmm3,%xmm1 + + pxor %xmm1,%xmm2 + movdqu %xmm2,16(%rax) + lea 32(%rax),%rax + movdqa %xmm2,%xmm1 + + jmp .Loop_key256 + +.Ldone_key256: + mov $bits,16(%rax) # 240($key) + xor %eax,%eax + jmp .Lenc_key_ret + .align 16 .Lbad_keybits: mov \$-2,%rax .Lenc_key_ret: + pxor %xmm0,%xmm0 + pxor %xmm1,%xmm1 + pxor %xmm2,%xmm2 + pxor %xmm3,%xmm3 + pxor %xmm4,%xmm4 + pxor %xmm5,%xmm5 add \$8,%rsp ret .LSEH_end_set_encrypt_key: @@ -3228,6 +3713,14 @@ sub aesni_generate8 { .long 0x87,0,1,0 .Lincrement1: .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +.Lkey_rotate: + .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d +.Lkey_rotate192: + .long 0x04070605,0x04070605,0x04070605,0x04070605 +.Lkey_rcon1: + .long 1,1,1,1 +.Lkey_rcon1b: + .long 0x1b,0x1b,0x1b,0x1b .asciz "AES for Intel AES-NI, CRYPTOGAMS by " .align 64 @@ -3345,7 +3838,7 @@ sub aesni_generate8 { mov 152($context),%rax # pull context->Rsp mov 248($context),%rbx # pull context->Rip - lea .Lcbc_decrypt(%rip),%r10 + lea .Lcbc_decrypt_bulk(%rip),%r10 cmp %r10,%rbx # context->Rip<"prologue" label jb .Lcommon_seh_tail diff --git a/deps/openssl/openssl/crypto/aes/asm/aesp8-ppc.pl b/deps/openssl/openssl/crypto/aes/asm/aesp8-ppc.pl index c653d5a98ac3c1..a1891cc03caa6b 100755 --- a/deps/openssl/openssl/crypto/aes/asm/aesp8-ppc.pl +++ b/deps/openssl/openssl/crypto/aes/asm/aesp8-ppc.pl @@ -1910,7 +1910,7 @@ () if ($flavour =~ /le$/o) { SWITCH: for($conv) { /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; - /\?rev/ && do { @bytes=reverse(@bytes); last; }; + /\?rev/ && do { @bytes=reverse(@bytes); last; }; } } diff --git a/deps/openssl/openssl/crypto/aes/asm/aesv8-armx.pl b/deps/openssl/openssl/crypto/aes/asm/aesv8-armx.pl index 3b38e4c2032b8e..95ebae3beb9b8f 100755 --- a/deps/openssl/openssl/crypto/aes/asm/aesv8-armx.pl +++ b/deps/openssl/openssl/crypto/aes/asm/aesv8-armx.pl @@ -24,8 +24,12 @@ # # CBC enc CBC dec CTR # Apple A7 2.39 1.20 1.20 -# Cortex-A53 2.45 1.87 1.94 -# Cortex-A57 3.64 1.34 1.32 +# Cortex-A53 1.32 1.29 1.46 +# Cortex-A57(*) 1.95 0.85 0.93 +# Denver 1.96 0.86 0.80 +# +# (*) original 3.64/1.34/1.32 results were for r0p0 revision +# and are still same even for updated module; $flavour = shift; open STDOUT,">".shift; @@ -308,17 +312,17 @@ () .Loop_${dir}c: aes$e $inout,$rndkey0 - vld1.32 {$rndkey0},[$key],#16 aes$mc $inout,$inout + vld1.32 {$rndkey0},[$key],#16 subs $rounds,$rounds,#2 aes$e $inout,$rndkey1 - vld1.32 {$rndkey1},[$key],#16 aes$mc $inout,$inout + vld1.32 {$rndkey1},[$key],#16 b.gt .Loop_${dir}c aes$e $inout,$rndkey0 - vld1.32 {$rndkey0},[$key] aes$mc $inout,$inout + vld1.32 {$rndkey0},[$key] aes$e $inout,$rndkey1 veor $inout,$inout,$rndkey0 @@ -336,6 +340,7 @@ () my ($dat0,$dat1,$in0,$in1,$tmp0,$tmp1,$ivec,$rndlast)=map("q$_",(0..7)); my ($dat,$tmp,$rndzero_n_last)=($dat0,$tmp0,$tmp1); +my ($key4,$key5,$key6,$key7)=("x6","x12","x14",$key); ### q8-q15 preloaded key schedule @@ -385,16 +390,42 @@ () veor $rndzero_n_last,q8,$rndlast b.eq .Lcbc_enc128 + vld1.32 {$in0-$in1},[$key_] + add $key_,$key,#16 + add $key4,$key,#16*4 + add $key5,$key,#16*5 + aese $dat,q8 + aesmc $dat,$dat + add $key6,$key,#16*6 + add $key7,$key,#16*7 + b .Lenter_cbc_enc + +.align 4 .Loop_cbc_enc: aese $dat,q8 - vld1.32 {q8},[$key_],#16 aesmc $dat,$dat - subs $cnt,$cnt,#2 + vst1.8 {$ivec},[$out],#16 +.Lenter_cbc_enc: aese $dat,q9 - vld1.32 {q9},[$key_],#16 aesmc $dat,$dat - b.gt .Loop_cbc_enc + aese $dat,$in0 + aesmc $dat,$dat + vld1.32 {q8},[$key4] + cmp $rounds,#4 + aese $dat,$in1 + aesmc $dat,$dat + vld1.32 {q9},[$key5] + b.eq .Lcbc_enc192 + + aese $dat,q8 + aesmc $dat,$dat + vld1.32 {q8},[$key6] + aese $dat,q9 + aesmc $dat,$dat + vld1.32 {q9},[$key7] + nop +.Lcbc_enc192: aese $dat,q8 aesmc $dat,$dat subs $len,$len,#16 @@ -403,7 +434,6 @@ () cclr $step,eq aese $dat,q10 aesmc $dat,$dat - add $key_,$key,#16 aese $dat,q11 aesmc $dat,$dat vld1.8 {q8},[$inp],$step @@ -412,16 +442,14 @@ () veor q8,q8,$rndzero_n_last aese $dat,q13 aesmc $dat,$dat - vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] + vld1.32 {q9},[$key_] // re-pre-load rndkey[1] aese $dat,q14 aesmc $dat,$dat aese $dat,q15 - - mov $cnt,$rounds veor $ivec,$dat,$rndlast - vst1.8 {$ivec},[$out],#16 b.hs .Loop_cbc_enc + vst1.8 {$ivec},[$out],#16 b .Lcbc_done .align 5 @@ -483,79 +511,78 @@ () .Loop3x_cbc_dec: aesd $dat0,q8 - aesd $dat1,q8 - aesd $dat2,q8 - vld1.32 {q8},[$key_],#16 aesimc $dat0,$dat0 + aesd $dat1,q8 aesimc $dat1,$dat1 + aesd $dat2,q8 aesimc $dat2,$dat2 + vld1.32 {q8},[$key_],#16 subs $cnt,$cnt,#2 aesd $dat0,q9 - aesd $dat1,q9 - aesd $dat2,q9 - vld1.32 {q9},[$key_],#16 aesimc $dat0,$dat0 + aesd $dat1,q9 aesimc $dat1,$dat1 + aesd $dat2,q9 aesimc $dat2,$dat2 + vld1.32 {q9},[$key_],#16 b.gt .Loop3x_cbc_dec aesd $dat0,q8 - aesd $dat1,q8 - aesd $dat2,q8 - veor $tmp0,$ivec,$rndlast aesimc $dat0,$dat0 + aesd $dat1,q8 aesimc $dat1,$dat1 + aesd $dat2,q8 aesimc $dat2,$dat2 + veor $tmp0,$ivec,$rndlast + subs $len,$len,#0x30 veor $tmp1,$in0,$rndlast + mov.lo x6,$len // x6, $cnt, is zero at this point aesd $dat0,q9 - aesd $dat1,q9 - aesd $dat2,q9 - veor $tmp2,$in1,$rndlast - subs $len,$len,#0x30 aesimc $dat0,$dat0 + aesd $dat1,q9 aesimc $dat1,$dat1 + aesd $dat2,q9 aesimc $dat2,$dat2 - vorr $ivec,$in2,$in2 - mov.lo x6,$len // x6, $cnt, is zero at this point - aesd $dat0,q12 - aesd $dat1,q12 - aesd $dat2,q12 + veor $tmp2,$in1,$rndlast add $inp,$inp,x6 // $inp is adjusted in such way that // at exit from the loop $dat1-$dat2 // are loaded with last "words" + vorr $ivec,$in2,$in2 + mov $key_,$key + aesd $dat0,q12 aesimc $dat0,$dat0 + aesd $dat1,q12 aesimc $dat1,$dat1 + aesd $dat2,q12 aesimc $dat2,$dat2 - mov $key_,$key - aesd $dat0,q13 - aesd $dat1,q13 - aesd $dat2,q13 vld1.8 {$in0},[$inp],#16 + aesd $dat0,q13 aesimc $dat0,$dat0 + aesd $dat1,q13 aesimc $dat1,$dat1 + aesd $dat2,q13 aesimc $dat2,$dat2 vld1.8 {$in1},[$inp],#16 aesd $dat0,q14 - aesd $dat1,q14 - aesd $dat2,q14 - vld1.8 {$in2},[$inp],#16 aesimc $dat0,$dat0 + aesd $dat1,q14 aesimc $dat1,$dat1 + aesd $dat2,q14 aesimc $dat2,$dat2 - vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] + vld1.8 {$in2},[$inp],#16 aesd $dat0,q15 aesd $dat1,q15 aesd $dat2,q15 - + vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] add $cnt,$rounds,#2 veor $tmp0,$tmp0,$dat0 veor $tmp1,$tmp1,$dat1 veor $dat2,$dat2,$tmp2 vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] - vorr $dat0,$in0,$in0 vst1.8 {$tmp0},[$out],#16 - vorr $dat1,$in1,$in1 + vorr $dat0,$in0,$in0 vst1.8 {$tmp1},[$out],#16 + vorr $dat1,$in1,$in1 vst1.8 {$dat2},[$out],#16 vorr $dat2,$in2,$in2 b.hs .Loop3x_cbc_dec @@ -566,39 +593,39 @@ () .Lcbc_dec_tail: aesd $dat1,q8 - aesd $dat2,q8 - vld1.32 {q8},[$key_],#16 aesimc $dat1,$dat1 + aesd $dat2,q8 aesimc $dat2,$dat2 + vld1.32 {q8},[$key_],#16 subs $cnt,$cnt,#2 aesd $dat1,q9 - aesd $dat2,q9 - vld1.32 {q9},[$key_],#16 aesimc $dat1,$dat1 + aesd $dat2,q9 aesimc $dat2,$dat2 + vld1.32 {q9},[$key_],#16 b.gt .Lcbc_dec_tail aesd $dat1,q8 - aesd $dat2,q8 aesimc $dat1,$dat1 + aesd $dat2,q8 aesimc $dat2,$dat2 aesd $dat1,q9 - aesd $dat2,q9 aesimc $dat1,$dat1 + aesd $dat2,q9 aesimc $dat2,$dat2 aesd $dat1,q12 - aesd $dat2,q12 aesimc $dat1,$dat1 + aesd $dat2,q12 aesimc $dat2,$dat2 cmn $len,#0x20 aesd $dat1,q13 - aesd $dat2,q13 aesimc $dat1,$dat1 + aesd $dat2,q13 aesimc $dat2,$dat2 veor $tmp1,$ivec,$rndlast aesd $dat1,q14 - aesd $dat2,q14 aesimc $dat1,$dat1 + aesd $dat2,q14 aesimc $dat2,$dat2 veor $tmp2,$in1,$rndlast aesd $dat1,q15 @@ -699,70 +726,69 @@ () .align 4 .Loop3x_ctr32: aese $dat0,q8 - aese $dat1,q8 - aese $dat2,q8 - vld1.32 {q8},[$key_],#16 aesmc $dat0,$dat0 + aese $dat1,q8 aesmc $dat1,$dat1 + aese $dat2,q8 aesmc $dat2,$dat2 + vld1.32 {q8},[$key_],#16 subs $cnt,$cnt,#2 aese $dat0,q9 - aese $dat1,q9 - aese $dat2,q9 - vld1.32 {q9},[$key_],#16 aesmc $dat0,$dat0 + aese $dat1,q9 aesmc $dat1,$dat1 + aese $dat2,q9 aesmc $dat2,$dat2 + vld1.32 {q9},[$key_],#16 b.gt .Loop3x_ctr32 aese $dat0,q8 - aese $dat1,q8 - aese $dat2,q8 - mov $key_,$key aesmc $tmp0,$dat0 - vld1.8 {$in0},[$inp],#16 + aese $dat1,q8 aesmc $tmp1,$dat1 - aesmc $dat2,$dat2 + vld1.8 {$in0},[$inp],#16 vorr $dat0,$ivec,$ivec - aese $tmp0,q9 + aese $dat2,q8 + aesmc $dat2,$dat2 vld1.8 {$in1},[$inp],#16 - aese $tmp1,q9 - aese $dat2,q9 vorr $dat1,$ivec,$ivec + aese $tmp0,q9 aesmc $tmp0,$tmp0 - vld1.8 {$in2},[$inp],#16 + aese $tmp1,q9 aesmc $tmp1,$tmp1 + vld1.8 {$in2},[$inp],#16 + mov $key_,$key + aese $dat2,q9 aesmc $tmp2,$dat2 vorr $dat2,$ivec,$ivec add $tctr0,$ctr,#1 aese $tmp0,q12 + aesmc $tmp0,$tmp0 aese $tmp1,q12 - aese $tmp2,q12 + aesmc $tmp1,$tmp1 veor $in0,$in0,$rndlast add $tctr1,$ctr,#2 - aesmc $tmp0,$tmp0 - aesmc $tmp1,$tmp1 + aese $tmp2,q12 aesmc $tmp2,$tmp2 veor $in1,$in1,$rndlast add $ctr,$ctr,#3 aese $tmp0,q13 + aesmc $tmp0,$tmp0 aese $tmp1,q13 - aese $tmp2,q13 + aesmc $tmp1,$tmp1 veor $in2,$in2,$rndlast rev $tctr0,$tctr0 - aesmc $tmp0,$tmp0 - vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] - aesmc $tmp1,$tmp1 + aese $tmp2,q13 aesmc $tmp2,$tmp2 vmov.32 ${dat0}[3], $tctr0 rev $tctr1,$tctr1 aese $tmp0,q14 + aesmc $tmp0,$tmp0 aese $tmp1,q14 - aese $tmp2,q14 + aesmc $tmp1,$tmp1 vmov.32 ${dat1}[3], $tctr1 rev $tctr2,$ctr - aesmc $tmp0,$tmp0 - aesmc $tmp1,$tmp1 + aese $tmp2,q14 aesmc $tmp2,$tmp2 vmov.32 ${dat2}[3], $tctr2 subs $len,$len,#3 @@ -770,13 +796,14 @@ () aese $tmp1,q15 aese $tmp2,q15 - mov $cnt,$rounds veor $in0,$in0,$tmp0 + vld1.32 {q8},[$key_],#16 // re-pre-load rndkey[0] + vst1.8 {$in0},[$out],#16 veor $in1,$in1,$tmp1 + mov $cnt,$rounds + vst1.8 {$in1},[$out],#16 veor $in2,$in2,$tmp2 vld1.32 {q9},[$key_],#16 // re-pre-load rndkey[1] - vst1.8 {$in0},[$out],#16 - vst1.8 {$in1},[$out],#16 vst1.8 {$in2},[$out],#16 b.hs .Loop3x_ctr32 @@ -788,40 +815,40 @@ () .Lctr32_tail: aese $dat0,q8 - aese $dat1,q8 - vld1.32 {q8},[$key_],#16 aesmc $dat0,$dat0 + aese $dat1,q8 aesmc $dat1,$dat1 + vld1.32 {q8},[$key_],#16 subs $cnt,$cnt,#2 aese $dat0,q9 - aese $dat1,q9 - vld1.32 {q9},[$key_],#16 aesmc $dat0,$dat0 + aese $dat1,q9 aesmc $dat1,$dat1 + vld1.32 {q9},[$key_],#16 b.gt .Lctr32_tail aese $dat0,q8 - aese $dat1,q8 aesmc $dat0,$dat0 + aese $dat1,q8 aesmc $dat1,$dat1 aese $dat0,q9 - aese $dat1,q9 aesmc $dat0,$dat0 + aese $dat1,q9 aesmc $dat1,$dat1 vld1.8 {$in0},[$inp],$step aese $dat0,q12 - aese $dat1,q12 - vld1.8 {$in1},[$inp] aesmc $dat0,$dat0 + aese $dat1,q12 aesmc $dat1,$dat1 + vld1.8 {$in1},[$inp] aese $dat0,q13 - aese $dat1,q13 aesmc $dat0,$dat0 + aese $dat1,q13 aesmc $dat1,$dat1 - aese $dat0,q14 - aese $dat1,q14 veor $in0,$in0,$rndlast + aese $dat0,q14 aesmc $dat0,$dat0 + aese $dat1,q14 aesmc $dat1,$dat1 veor $in1,$in1,$rndlast aese $dat0,q15 @@ -918,21 +945,21 @@ () $arg =~ m/q([0-9]+),\s*\{q([0-9]+)\},\s*q([0-9]+)/o && sprintf "vtbl.8 d%d,{q%d},d%d\n\t". - "vtbl.8 d%d,{q%d},d%d", 2*$1,$2,2*$3, 2*$1+1,$2,2*$3+1; + "vtbl.8 d%d,{q%d},d%d", 2*$1,$2,2*$3, 2*$1+1,$2,2*$3+1; } sub unvdup32 { my $arg=shift; $arg =~ m/q([0-9]+),\s*q([0-9]+)\[([0-3])\]/o && - sprintf "vdup.32 q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1; + sprintf "vdup.32 q%d,d%d[%d]",$1,2*$2+($3>>1),$3&1; } sub unvmov32 { my $arg=shift; $arg =~ m/q([0-9]+)\[([0-3])\],(.*)/o && - sprintf "vmov.32 d%d[%d],%s",2*$1+($2>>1),$2&1,$3; + sprintf "vmov.32 d%d[%d],%s",2*$1+($2>>1),$2&1,$3; } foreach(split("\n",$code)) { diff --git a/deps/openssl/openssl/crypto/aes/asm/bsaes-armv7.pl b/deps/openssl/openssl/crypto/aes/asm/bsaes-armv7.pl index 8b418c820419c5..fcc81d1a493374 100644 --- a/deps/openssl/openssl/crypto/aes/asm/bsaes-armv7.pl +++ b/deps/openssl/openssl/crypto/aes/asm/bsaes-armv7.pl @@ -72,7 +72,7 @@ sub Sbox { sub InBasisChange { # input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb -# output in lsb > [b6, b5, b0, b3, b7, b1, b4, b2] < msb +# output in lsb > [b6, b5, b0, b3, b7, b1, b4, b2] < msb my @b=@_[0..7]; $code.=<<___; veor @b[2], @b[2], @b[1] diff --git a/deps/openssl/openssl/crypto/aes/asm/vpaes-ppc.pl b/deps/openssl/openssl/crypto/aes/asm/vpaes-ppc.pl index 25d753addd716c..7fda60ed9e4d59 100644 --- a/deps/openssl/openssl/crypto/aes/asm/vpaes-ppc.pl +++ b/deps/openssl/openssl/crypto/aes/asm/vpaes-ppc.pl @@ -997,7 +997,7 @@ # high round bl _vpaes_schedule_round bdz Lschedule_mangle_last # dec %esi - bl _vpaes_schedule_mangle + bl _vpaes_schedule_mangle # low round. swap xmm7 and xmm6 ?vspltw v0, v0, 3 # vpshufd \$0xFF, %xmm0, %xmm0 @@ -1005,7 +1005,7 @@ vmr v7, v6 # vmovdqa %xmm6, %xmm7 bl _vpaes_schedule_low_round vmr v7, v5 # vmovdqa %xmm5, %xmm7 - + b Loop_schedule_256 ## ## .aes_schedule_mangle_last @@ -1052,7 +1052,7 @@ Lschedule_mangle_last_dec: lvx $iptlo, r11, r12 # reload $ipt lvx $ipthi, r9, r12 - addi $out, $out, -16 # add \$-16, %rdx + addi $out, $out, -16 # add \$-16, %rdx vxor v0, v0, v26 # vpxor .Lk_s63(%rip), %xmm0, %xmm0 bl _vpaes_schedule_transform # output transform @@ -1484,7 +1484,7 @@ if ($flavour =~ /le$/o) { SWITCH: for($conv) { /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; - /\?rev/ && do { @bytes=reverse(@bytes); last; }; + /\?rev/ && do { @bytes=reverse(@bytes); last; }; } } diff --git a/deps/openssl/openssl/crypto/asn1/Makefile b/deps/openssl/openssl/crypto/asn1/Makefile index 2e2a097399be6f..330fe81b740c55 100644 --- a/deps/openssl/openssl/crypto/asn1/Makefile +++ b/deps/openssl/openssl/crypto/asn1/Makefile @@ -93,6 +93,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by top Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/asn1/a_int.c b/deps/openssl/openssl/crypto/asn1/a_int.c index 70c2b8e62c4356..7e26704a5479ca 100644 --- a/deps/openssl/openssl/crypto/asn1/a_int.c +++ b/deps/openssl/openssl/crypto/asn1/a_int.c @@ -124,6 +124,8 @@ int i2c_ASN1_INTEGER(ASN1_INTEGER *a, unsigned char **pp) else { ret = a->length; i = a->data[0]; + if (ret == 1 && i == 0) + neg = 0; if (!neg && (i > 127)) { pad = 1; pb = 0; @@ -162,7 +164,7 @@ int i2c_ASN1_INTEGER(ASN1_INTEGER *a, unsigned char **pp) p += a->length - 1; i = a->length; /* Copy zeros to destination as long as source is zero */ - while (!*n) { + while (!*n && i > 1) { *(p--) = 0; n--; i--; @@ -419,7 +421,7 @@ ASN1_INTEGER *BN_to_ASN1_INTEGER(const BIGNUM *bn, ASN1_INTEGER *ai) ASN1err(ASN1_F_BN_TO_ASN1_INTEGER, ERR_R_NESTED_ASN1_ERROR); goto err; } - if (BN_is_negative(bn)) + if (BN_is_negative(bn) && !BN_is_zero(bn)) ret->type = V_ASN1_NEG_INTEGER; else ret->type = V_ASN1_INTEGER; diff --git a/deps/openssl/openssl/crypto/asn1/ameth_lib.c b/deps/openssl/openssl/crypto/asn1/ameth_lib.c index 02300dfedf0697..5389c0434740a5 100644 --- a/deps/openssl/openssl/crypto/asn1/ameth_lib.c +++ b/deps/openssl/openssl/crypto/asn1/ameth_lib.c @@ -464,3 +464,21 @@ void EVP_PKEY_asn1_set_ctrl(EVP_PKEY_ASN1_METHOD *ameth, { ameth->pkey_ctrl = pkey_ctrl; } + +void EVP_PKEY_asn1_set_item(EVP_PKEY_ASN1_METHOD *ameth, + int (*item_verify) (EVP_MD_CTX *ctx, + const ASN1_ITEM *it, + void *asn, + X509_ALGOR *a, + ASN1_BIT_STRING *sig, + EVP_PKEY *pkey), + int (*item_sign) (EVP_MD_CTX *ctx, + const ASN1_ITEM *it, + void *asn, + X509_ALGOR *alg1, + X509_ALGOR *alg2, + ASN1_BIT_STRING *sig)) +{ + ameth->item_sign = item_sign; + ameth->item_verify = item_verify; +} diff --git a/deps/openssl/openssl/crypto/asn1/asn1_gen.c b/deps/openssl/openssl/crypto/asn1/asn1_gen.c index 11b582dd3744c3..65749239b1a4c3 100644 --- a/deps/openssl/openssl/crypto/asn1/asn1_gen.c +++ b/deps/openssl/openssl/crypto/asn1/asn1_gen.c @@ -74,6 +74,8 @@ #define ASN1_GEN_STR(str,val) {str, sizeof(str) - 1, val} #define ASN1_FLAG_EXP_MAX 20 +/* Maximum number of nested sequences */ +#define ASN1_GEN_SEQ_MAX_DEPTH 50 /* Input formats */ @@ -110,13 +112,16 @@ typedef struct { int exp_count; } tag_exp_arg; +static ASN1_TYPE *generate_v3(char *str, X509V3_CTX *cnf, int depth, + int *perr); static int bitstr_cb(const char *elem, int len, void *bitstr); static int asn1_cb(const char *elem, int len, void *bitstr); static int append_exp(tag_exp_arg *arg, int exp_tag, int exp_class, int exp_constructed, int exp_pad, int imp_ok); static int parse_tagging(const char *vstart, int vlen, int *ptag, int *pclass); -static ASN1_TYPE *asn1_multi(int utype, const char *section, X509V3_CTX *cnf); +static ASN1_TYPE *asn1_multi(int utype, const char *section, X509V3_CTX *cnf, + int depth, int *perr); static ASN1_TYPE *asn1_str2type(const char *str, int format, int utype); static int asn1_str2tag(const char *tagstr, int len); @@ -132,6 +137,16 @@ ASN1_TYPE *ASN1_generate_nconf(char *str, CONF *nconf) } ASN1_TYPE *ASN1_generate_v3(char *str, X509V3_CTX *cnf) +{ + int err = 0; + ASN1_TYPE *ret = generate_v3(str, cnf, 0, &err); + if (err) + ASN1err(ASN1_F_ASN1_GENERATE_V3, err); + return ret; +} + +static ASN1_TYPE *generate_v3(char *str, X509V3_CTX *cnf, int depth, + int *perr) { ASN1_TYPE *ret; tag_exp_arg asn1_tags; @@ -152,17 +167,22 @@ ASN1_TYPE *ASN1_generate_v3(char *str, X509V3_CTX *cnf) asn1_tags.imp_class = -1; asn1_tags.format = ASN1_GEN_FORMAT_ASCII; asn1_tags.exp_count = 0; - if (CONF_parse_list(str, ',', 1, asn1_cb, &asn1_tags) != 0) + if (CONF_parse_list(str, ',', 1, asn1_cb, &asn1_tags) != 0) { + *perr = ASN1_R_UNKNOWN_TAG; return NULL; + } if ((asn1_tags.utype == V_ASN1_SEQUENCE) || (asn1_tags.utype == V_ASN1_SET)) { if (!cnf) { - ASN1err(ASN1_F_ASN1_GENERATE_V3, - ASN1_R_SEQUENCE_OR_SET_NEEDS_CONFIG); + *perr = ASN1_R_SEQUENCE_OR_SET_NEEDS_CONFIG; return NULL; } - ret = asn1_multi(asn1_tags.utype, asn1_tags.str, cnf); + if (depth >= ASN1_GEN_SEQ_MAX_DEPTH) { + *perr = ASN1_R_ILLEGAL_NESTED_TAGGING; + return NULL; + } + ret = asn1_multi(asn1_tags.utype, asn1_tags.str, cnf, depth, perr); } else ret = asn1_str2type(asn1_tags.str, asn1_tags.format, asn1_tags.utype); @@ -280,7 +300,7 @@ static int asn1_cb(const char *elem, int len, void *bitstr) int tmp_tag, tmp_class; if (elem == NULL) - return 0; + return -1; for (i = 0, p = elem; i < len; p++, i++) { /* Look for the ':' in name value pairs */ @@ -353,7 +373,7 @@ static int asn1_cb(const char *elem, int len, void *bitstr) break; case ASN1_GEN_FLAG_FORMAT: - if(!vstart) { + if (!vstart) { ASN1err(ASN1_F_ASN1_CB, ASN1_R_UNKNOWN_FORMAT); return -1; } @@ -435,7 +455,8 @@ static int parse_tagging(const char *vstart, int vlen, int *ptag, int *pclass) /* Handle multiple types: SET and SEQUENCE */ -static ASN1_TYPE *asn1_multi(int utype, const char *section, X509V3_CTX *cnf) +static ASN1_TYPE *asn1_multi(int utype, const char *section, X509V3_CTX *cnf, + int depth, int *perr) { ASN1_TYPE *ret = NULL; STACK_OF(ASN1_TYPE) *sk = NULL; @@ -454,7 +475,8 @@ static ASN1_TYPE *asn1_multi(int utype, const char *section, X509V3_CTX *cnf) goto bad; for (i = 0; i < sk_CONF_VALUE_num(sect); i++) { ASN1_TYPE *typ = - ASN1_generate_v3(sk_CONF_VALUE_value(sect, i)->value, cnf); + generate_v3(sk_CONF_VALUE_value(sect, i)->value, cnf, + depth + 1, perr); if (!typ) goto bad; if (!sk_ASN1_TYPE_push(sk, typ)) diff --git a/deps/openssl/openssl/crypto/asn1/asn_mime.c b/deps/openssl/openssl/crypto/asn1/asn_mime.c index 7e2f28e6d5e622..96110c540f3dba 100644 --- a/deps/openssl/openssl/crypto/asn1/asn_mime.c +++ b/deps/openssl/openssl/crypto/asn1/asn_mime.c @@ -289,7 +289,8 @@ int SMIME_write_ASN1(BIO *bio, ASN1_VALUE *val, BIO *data, int flags, if ((flags & SMIME_DETACHED) && data) { /* We want multipart/signed */ /* Generate a random boundary */ - RAND_pseudo_bytes((unsigned char *)bound, 32); + if (RAND_pseudo_bytes((unsigned char *)bound, 32) < 0) + return 0; for (i = 0; i < 32; i++) { c = bound[i] & 0xf; if (c < 10) diff --git a/deps/openssl/openssl/crypto/asn1/bio_ndef.c b/deps/openssl/openssl/crypto/asn1/bio_ndef.c index 4a73ca9eac514a..31949b87940fa5 100644 --- a/deps/openssl/openssl/crypto/asn1/bio_ndef.c +++ b/deps/openssl/openssl/crypto/asn1/bio_ndef.c @@ -162,7 +162,7 @@ static int ndef_prefix(BIO *b, unsigned char **pbuf, int *plen, void *parg) derlen = ASN1_item_ndef_i2d(ndef_aux->val, NULL, ndef_aux->it); p = OPENSSL_malloc(derlen); - if(!p) + if (!p) return 0; ndef_aux->derbuf = p; @@ -232,7 +232,7 @@ static int ndef_suffix(BIO *b, unsigned char **pbuf, int *plen, void *parg) derlen = ASN1_item_ndef_i2d(ndef_aux->val, NULL, ndef_aux->it); p = OPENSSL_malloc(derlen); - if(!p) + if (!p) return 0; ndef_aux->derbuf = p; diff --git a/deps/openssl/openssl/crypto/asn1/tasn_new.c b/deps/openssl/openssl/crypto/asn1/tasn_new.c index 7d2964f02307b2..b0c73beeb57870 100644 --- a/deps/openssl/openssl/crypto/asn1/tasn_new.c +++ b/deps/openssl/openssl/crypto/asn1/tasn_new.c @@ -100,9 +100,6 @@ static int asn1_item_ex_combine_new(ASN1_VALUE **pval, const ASN1_ITEM *it, else asn1_cb = 0; - if (!combine) - *pval = NULL; - #ifdef CRYPTO_MDEBUG if (it->sname) CRYPTO_push_info(it->sname); diff --git a/deps/openssl/openssl/crypto/asn1/tasn_prn.c b/deps/openssl/openssl/crypto/asn1/tasn_prn.c index 7c54f9d1d4a305..5e7d53e9854a6d 100644 --- a/deps/openssl/openssl/crypto/asn1/tasn_prn.c +++ b/deps/openssl/openssl/crypto/asn1/tasn_prn.c @@ -290,7 +290,7 @@ static int asn1_item_print_ctx(BIO *out, ASN1_VALUE **fld, int indent, for (i = 0, tt = it->templates; i < it->tcount; i++, tt++) { const ASN1_TEMPLATE *seqtt; seqtt = asn1_do_adb(fld, tt, 1); - if(!seqtt) + if (!seqtt) return 0; tmpfld = asn1_get_field_ptr(fld, seqtt); if (!asn1_template_print_ctx(out, tmpfld, diff --git a/deps/openssl/openssl/crypto/asn1/x_x509.c b/deps/openssl/openssl/crypto/asn1/x_x509.c index 55319acf9ce18d..5f266a26b4c28e 100644 --- a/deps/openssl/openssl/crypto/asn1/x_x509.c +++ b/deps/openssl/openssl/crypto/asn1/x_x509.c @@ -177,7 +177,7 @@ X509 *d2i_X509_AUX(X509 **a, const unsigned char **pp, long length) /* Save start position */ q = *pp; - if(!a || *a == NULL) { + if (!a || *a == NULL) { freeret = 1; } ret = d2i_X509(a, pp, length); @@ -192,7 +192,7 @@ X509 *d2i_X509_AUX(X509 **a, const unsigned char **pp, long length) goto err; return ret; err: - if(freeret) { + if (freeret) { X509_free(ret); if (a) *a = NULL; diff --git a/deps/openssl/openssl/crypto/bf/Makefile b/deps/openssl/openssl/crypto/bf/Makefile index d01bfaa31558dd..6dd2015537fe0e 100644 --- a/deps/openssl/openssl/crypto/bf/Makefile +++ b/deps/openssl/openssl/crypto/bf/Makefile @@ -72,6 +72,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/bio/Makefile b/deps/openssl/openssl/crypto/bio/Makefile index c395d804968166..ef526f6beb2c2b 100644 --- a/deps/openssl/openssl/crypto/bio/Makefile +++ b/deps/openssl/openssl/crypto/bio/Makefile @@ -73,6 +73,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/bio/b_print.c b/deps/openssl/openssl/crypto/bio/b_print.c index c2cf6e619ee4c2..7c81e25d482cf1 100644 --- a/deps/openssl/openssl/crypto/bio/b_print.c +++ b/deps/openssl/openssl/crypto/bio/b_print.c @@ -704,32 +704,29 @@ doapr_outch(char **sbuffer, /* If we haven't at least one buffer, someone has doe a big booboo */ assert(*sbuffer != NULL || buffer != NULL); - if (buffer) { - while (*currlen >= *maxlen) { - if (*buffer == NULL) { - if (*maxlen == 0) - *maxlen = 1024; - *buffer = OPENSSL_malloc(*maxlen); - if(!*buffer) { - /* Panic! Can't really do anything sensible. Just return */ - return; - } - if (*currlen > 0) { - assert(*sbuffer != NULL); - memcpy(*buffer, *sbuffer, *currlen); - } - *sbuffer = NULL; - } else { - *maxlen += 1024; - *buffer = OPENSSL_realloc(*buffer, *maxlen); - if(!*buffer) { - /* Panic! Can't really do anything sensible. Just return */ - return; - } + /* |currlen| must always be <= |*maxlen| */ + assert(*currlen <= *maxlen); + + if (buffer && *currlen == *maxlen) { + *maxlen += 1024; + if (*buffer == NULL) { + *buffer = OPENSSL_malloc(*maxlen); + if (!*buffer) { + /* Panic! Can't really do anything sensible. Just return */ + return; + } + if (*currlen > 0) { + assert(*sbuffer != NULL); + memcpy(*buffer, *sbuffer, *currlen); + } + *sbuffer = NULL; + } else { + *buffer = OPENSSL_realloc(*buffer, *maxlen); + if (!*buffer) { + /* Panic! Can't really do anything sensible. Just return */ + return; } } - /* What to do if *buffer is NULL? */ - assert(*sbuffer != NULL || *buffer != NULL); } if (*currlen < *maxlen) { diff --git a/deps/openssl/openssl/crypto/bio/bf_nbio.c b/deps/openssl/openssl/crypto/bio/bf_nbio.c index da88a8a1bfbdbb..a04f32a0081798 100644 --- a/deps/openssl/openssl/crypto/bio/bf_nbio.c +++ b/deps/openssl/openssl/crypto/bio/bf_nbio.c @@ -139,7 +139,8 @@ static int nbiof_read(BIO *b, char *out, int outl) BIO_clear_retry_flags(b); #if 1 - RAND_pseudo_bytes(&n, 1); + if (RAND_pseudo_bytes(&n, 1) < 0) + return -1; num = (n & 0x07); if (outl > num) @@ -178,7 +179,8 @@ static int nbiof_write(BIO *b, const char *in, int inl) num = nt->lwn; nt->lwn = 0; } else { - RAND_pseudo_bytes(&n, 1); + if (RAND_pseudo_bytes(&n, 1) < 0) + return -1; num = (n & 7); } diff --git a/deps/openssl/openssl/crypto/bio/bio_lib.c b/deps/openssl/openssl/crypto/bio/bio_lib.c index 5267010cb0d7ff..07934f8a667b99 100644 --- a/deps/openssl/openssl/crypto/bio/bio_lib.c +++ b/deps/openssl/openssl/crypto/bio/bio_lib.c @@ -536,8 +536,10 @@ BIO *BIO_dup_chain(BIO *in) /* copy app data */ if (!CRYPTO_dup_ex_data(CRYPTO_EX_INDEX_BIO, &new_bio->ex_data, - &bio->ex_data)) + &bio->ex_data)) { + BIO_free(new_bio); goto err; + } if (ret == NULL) { eoc = new_bio; @@ -549,8 +551,8 @@ BIO *BIO_dup_chain(BIO *in) } return (ret); err: - if (ret != NULL) - BIO_free(ret); + BIO_free_all(ret); + return (NULL); } diff --git a/deps/openssl/openssl/crypto/bio/bss_dgram.c b/deps/openssl/openssl/crypto/bio/bss_dgram.c index 388d90d02ef319..7fcd831da06bed 100644 --- a/deps/openssl/openssl/crypto/bio/bss_dgram.c +++ b/deps/openssl/openssl/crypto/bio/bss_dgram.c @@ -303,16 +303,17 @@ static void dgram_adjust_rcv_timeout(BIO *b) /* Calculate time left until timer expires */ memcpy(&timeleft, &(data->next_timeout), sizeof(struct timeval)); - timeleft.tv_sec -= timenow.tv_sec; - timeleft.tv_usec -= timenow.tv_usec; - if (timeleft.tv_usec < 0) { + if (timeleft.tv_usec < timenow.tv_usec) { + timeleft.tv_usec = 1000000 - timenow.tv_usec + timeleft.tv_usec; timeleft.tv_sec--; - timeleft.tv_usec += 1000000; + } else { + timeleft.tv_usec -= timenow.tv_usec; } - - if (timeleft.tv_sec < 0) { + if (timeleft.tv_sec < timenow.tv_sec) { timeleft.tv_sec = 0; timeleft.tv_usec = 1; + } else { + timeleft.tv_sec -= timenow.tv_sec; } /* @@ -896,7 +897,7 @@ static long dgram_ctrl(BIO *b, int cmd, long num, void *ptr) perror("setsockopt"); ret = -1; } -# elif defined(OPENSSL_SYS_LINUX) && defined(IP_MTUDISCOVER) +# elif defined(OPENSSL_SYS_LINUX) && defined(IP_MTU_DISCOVER) && defined (IP_PMTUDISC_PROBE) if ((sockopt_val = num ? IP_PMTUDISC_PROBE : IP_PMTUDISC_DONT), (ret = setsockopt(b->num, IPPROTO_IP, IP_MTU_DISCOVER, &sockopt_val, sizeof(sockopt_val))) < 0) { @@ -1012,7 +1013,7 @@ BIO *BIO_new_dgram_sctp(int fd, int close_flag) */ sockopt_len = (socklen_t) (sizeof(sctp_assoc_t) + 256 * sizeof(uint8_t)); authchunks = OPENSSL_malloc(sockopt_len); - if(!authchunks) { + if (!authchunks) { BIO_vfree(bio); return (NULL); } @@ -1352,7 +1353,7 @@ static int dgram_sctp_read(BIO *b, char *out, int outl) (socklen_t) (sizeof(sctp_assoc_t) + 256 * sizeof(uint8_t)); authchunks = OPENSSL_malloc(optlen); if (!authchunks) { - BIOerr(BIO_F_DGRAM_SCTP_READ, ERR_R_MALLOC_ERROR); + BIOerr(BIO_F_DGRAM_SCTP_READ, ERR_R_MALLOC_FAILURE); return -1; } memset(authchunks, 0, sizeof(optlen)); @@ -1423,8 +1424,8 @@ static int dgram_sctp_write(BIO *b, const char *in, int inl) if (data->save_shutdown && !BIO_dgram_sctp_wait_for_dry(b)) { char *tmp; data->saved_message.bio = b; - if(!(tmp = OPENSSL_malloc(inl))) { - BIOerr(BIO_F_DGRAM_SCTP_WRITE, ERR_R_MALLOC_ERROR); + if (!(tmp = OPENSSL_malloc(inl))) { + BIOerr(BIO_F_DGRAM_SCTP_WRITE, ERR_R_MALLOC_FAILURE); return -1; } if (data->saved_message.data) diff --git a/deps/openssl/openssl/crypto/bn/Makefile b/deps/openssl/openssl/crypto/bn/Makefile index 316a44fbe5610d..61dce05ad71dc6 100644 --- a/deps/openssl/openssl/crypto/bn/Makefile +++ b/deps/openssl/openssl/crypto/bn/Makefile @@ -110,7 +110,7 @@ x86_64-gf2m.s: asm/x86_64-gf2m.pl $(PERL) asm/x86_64-gf2m.pl $(PERLASM_SCHEME) > $@ rsaz-x86_64.s: asm/rsaz-x86_64.pl $(PERL) asm/rsaz-x86_64.pl $(PERLASM_SCHEME) > $@ -rsaz-avx2.s: asm/rsaz-avx2.pl +rsaz-avx2.s: asm/rsaz-avx2.pl $(PERL) asm/rsaz-avx2.pl $(PERLASM_SCHEME) > $@ bn-ia64.s: asm/ia64.S @@ -176,6 +176,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: bn_prime.h depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/bn/asm/armv4-gf2m.pl b/deps/openssl/openssl/crypto/bn/asm/armv4-gf2m.pl index a387a9ec851d7f..8f529c95cf0509 100644 --- a/deps/openssl/openssl/crypto/bn/asm/armv4-gf2m.pl +++ b/deps/openssl/openssl/crypto/bn/asm/armv4-gf2m.pl @@ -29,7 +29,7 @@ # # Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software # Polynomial Multiplication on ARM Processors using the NEON Engine. -# +# # http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} diff --git a/deps/openssl/openssl/crypto/bn/asm/mips3.s b/deps/openssl/openssl/crypto/bn/asm/mips3.s index 705b78c5dd53da..dca4105c7db1b4 100644 --- a/deps/openssl/openssl/crypto/bn/asm/mips3.s +++ b/deps/openssl/openssl/crypto/bn/asm/mips3.s @@ -426,14 +426,14 @@ LEAF(bn_add_words) sltu v0,t2,ta2 sd t2,-16(a0) daddu v0,t8 - + daddu ta3,t3 sltu t9,ta3,t3 daddu t3,ta3,v0 sltu v0,t3,ta3 sd t3,-8(a0) daddu v0,t9 - + .set noreorder bgtzl AT,.L_bn_add_words_loop ld t0,0(a1) @@ -721,7 +721,7 @@ LEAF(bn_div_words) b .L_bn_div_words_inner_loop2 dsubu QT,1 .set reorder -.L_bn_div_words_inner_loop2_done: +.L_bn_div_words_inner_loop2_done: #undef HH dsubu a0,t3,t0 diff --git a/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl b/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl index a4b3b6ef276767..3b6ccf83d13e4b 100755 --- a/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl +++ b/deps/openssl/openssl/crypto/bn/asm/rsaz-avx2.pl @@ -375,7 +375,7 @@ vpaddq $TEMP1, $ACC1, $ACC1 vpmuludq 32*7-128($aap), $B2, $ACC2 vpbroadcastq 32*5-128($tpa), $B2 - vpaddq 32*11-448($tp1), $ACC2, $ACC2 + vpaddq 32*11-448($tp1), $ACC2, $ACC2 vmovdqu $ACC6, 32*6-192($tp0) vmovdqu $ACC7, 32*7-192($tp0) @@ -434,7 +434,7 @@ vmovdqu $ACC7, 32*16-448($tp1) lea 8($tp1), $tp1 - dec $i + dec $i jnz .LOOP_SQR_1024 ___ $ZERO = $ACC9; @@ -779,7 +779,7 @@ vpblendd \$3, $TEMP4, $TEMP5, $TEMP4 vpaddq $TEMP3, $ACC7, $ACC7 vpaddq $TEMP4, $ACC8, $ACC8 - + vpsrlq \$29, $ACC4, $TEMP1 vpand $AND_MASK, $ACC4, $ACC4 vpsrlq \$29, $ACC5, $TEMP2 @@ -1444,7 +1444,7 @@ vpaddq $TEMP4, $ACC8, $ACC8 vmovdqu $ACC4, 128-128($rp) - vmovdqu $ACC5, 160-128($rp) + vmovdqu $ACC5, 160-128($rp) vmovdqu $ACC6, 192-128($rp) vmovdqu $ACC7, 224-128($rp) vmovdqu $ACC8, 256-128($rp) @@ -1534,7 +1534,7 @@ $code.=" xor @T[1],@T[1]\n" if ($i==15); my $k=1; while (29*($j+1)<64*($i+1)) { - $code.=<<___; + $code.=<<___; mov @T[0],@T[-$k] shr \$`29*$j`,@T[-$k] and %rax,@T[-$k] # &0x1fffffff diff --git a/deps/openssl/openssl/crypto/bn/asm/rsaz-x86_64.pl b/deps/openssl/openssl/crypto/bn/asm/rsaz-x86_64.pl index 25272d278e0477..3bd45dbac01d5b 100755 --- a/deps/openssl/openssl/crypto/bn/asm/rsaz-x86_64.pl +++ b/deps/openssl/openssl/crypto/bn/asm/rsaz-x86_64.pl @@ -275,9 +275,9 @@ movq %r9, 16(%rsp) movq %r10, 24(%rsp) shrq \$63, %rbx - + #third iteration - movq 16($inp), %r9 + movq 16($inp), %r9 movq 24($inp), %rax mulq %r9 addq %rax, %r12 @@ -525,7 +525,7 @@ movl $times,128+8(%rsp) movq $out, %xmm0 # off-load movq %rbp, %xmm1 # off-load -#first iteration +#first iteration mulx %rax, %r8, %r9 mulx 16($inp), %rcx, %r10 @@ -561,7 +561,7 @@ mov %rax, (%rsp) mov %r8, 8(%rsp) -#second iteration +#second iteration mulx 16($inp), %rax, %rbx adox %rax, %r10 adcx %rbx, %r11 @@ -600,8 +600,8 @@ mov %r9, 16(%rsp) .byte 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00 # mov %r10, 24(%rsp) - -#third iteration + +#third iteration .byte 0xc4,0x62,0xc3,0xf6,0x8e,0x18,0x00,0x00,0x00 # mulx 24($inp), $out, %r9 adox $out, %r12 adcx %r9, %r13 @@ -636,8 +636,8 @@ mov %r11, 32(%rsp) .byte 0x4c,0x89,0xa4,0x24,0x28,0x00,0x00,0x00 # mov %r12, 40(%rsp) - -#fourth iteration + +#fourth iteration .byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x20,0x00,0x00,0x00 # mulx 32($inp), %rax, %rbx adox %rax, %r14 adcx %rbx, %r15 @@ -669,8 +669,8 @@ mov %r13, 48(%rsp) mov %r14, 56(%rsp) - -#fifth iteration + +#fifth iteration .byte 0xc4,0x62,0xc3,0xf6,0x9e,0x28,0x00,0x00,0x00 # mulx 40($inp), $out, %r11 adox $out, %r8 adcx %r11, %r9 @@ -697,8 +697,8 @@ mov %r15, 64(%rsp) mov %r8, 72(%rsp) - -#sixth iteration + +#sixth iteration .byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00 # mulx 48($inp), %rax, %rbx adox %rax, %r10 adcx %rbx, %r11 @@ -982,7 +982,7 @@ movq 56($ap), %rax movq %rdx, %r14 adcq \$0, %r14 - + mulq %rbx movq %xmm4, %rbx addq %rax, %r14 @@ -1061,7 +1061,7 @@ movq ($ap), %rax adcq \$0, %rdx addq %r15, %r14 - movq %rdx, %r15 + movq %rdx, %r15 adcq \$0, %r15 leaq 128(%rbp), %rbp @@ -1132,7 +1132,7 @@ mulx 48($ap), %rbx, %r14 lea 128(%rbp), %rbp adcx %rax, %r12 - + mulx 56($ap), %rax, %r15 movq %xmm4, %rdx adcx %rbx, %r13 @@ -1292,7 +1292,7 @@ ___ $code.=<<___ if ($addx); jmp .Lmul_scatter_tail - + .align 32 .Lmulx_scatter: movq ($out), %rdx # pass b[0] @@ -1721,7 +1721,7 @@ movq 56($ap), %rax movq %rdx, %r14 adcq \$0, %r14 - + mulq %rbx addq %rax, %r14 movq ($ap), %rax @@ -1798,7 +1798,7 @@ movq ($ap), %rax adcq \$0, %rdx addq %r15, %r14 - movq %rdx, %r15 + movq %rdx, %r15 adcq \$0, %r15 leaq 8(%rdi), %rdi diff --git a/deps/openssl/openssl/crypto/bn/asm/vis3-mont.pl b/deps/openssl/openssl/crypto/bn/asm/vis3-mont.pl index a1357de0e99af4..263ac02b6f45b7 100644 --- a/deps/openssl/openssl/crypto/bn/asm/vis3-mont.pl +++ b/deps/openssl/openssl/crypto/bn/asm/vis3-mont.pl @@ -100,7 +100,7 @@ ld [$ap+12], $t3 or $t0, $aj, $aj add $ap, 16, $ap - stxa $aj, [$anp]0xe2 ! converted ap[0] + stx $aj, [$anp] ! converted ap[0] mulx $aj, $m0, $lo0 ! ap[0]*bp[0] umulxhi $aj, $m0, $hi0 @@ -150,7 +150,7 @@ sllx $t1, 32, $aj add $ap, 8, $ap or $t0, $aj, $aj - stxa $aj, [$anp]0xe2 ! converted ap[j] + stx $aj, [$anp] ! converted ap[j] ld [$np+0], $t2 ! np[j] addcc $nlo, $hi1, $lo1 @@ -169,7 +169,7 @@ addcc $lo0, $lo1, $lo1 ! np[j]*m1+ap[j]*bp[0] umulxhi $nj, $m1, $nj ! nhi=nj addxc %g0, $hi1, $hi1 - stxa $lo1, [$tp]0xe2 ! tp[j-1] + stx $lo1, [$tp] ! tp[j-1] add $tp, 8, $tp ! tp++ brnz,pt $cnt, .L1st @@ -182,12 +182,12 @@ addxc $nj, %g0, $hi1 addcc $lo0, $lo1, $lo1 ! np[j]*m1+ap[j]*bp[0] addxc %g0, $hi1, $hi1 - stxa $lo1, [$tp]0xe2 ! tp[j-1] + stx $lo1, [$tp] ! tp[j-1] add $tp, 8, $tp addcc $hi0, $hi1, $hi1 addxc %g0, %g0, $ovf ! upmost overflow bit - stxa $hi1, [$tp]0xe2 + stx $hi1, [$tp] add $tp, 8, $tp ba .Louter diff --git a/deps/openssl/openssl/crypto/bn/asm/x86_64-mont.pl b/deps/openssl/openssl/crypto/bn/asm/x86_64-mont.pl index 5b3d1ebfa72154..2989b58f256eaa 100755 --- a/deps/openssl/openssl/crypto/bn/asm/x86_64-mont.pl +++ b/deps/openssl/openssl/crypto/bn/asm/x86_64-mont.pl @@ -790,7 +790,7 @@ sub %r11,%rsp .Lsqr8x_sp_done: and \$-64,%rsp - mov $num,%r10 + mov $num,%r10 neg $num lea 64(%rsp,$num,2),%r11 # copy of modulus diff --git a/deps/openssl/openssl/crypto/bn/asm/x86_64-mont5.pl b/deps/openssl/openssl/crypto/bn/asm/x86_64-mont5.pl index 4fd30dd015b25f..820de3d6f6270b 100755 --- a/deps/openssl/openssl/crypto/bn/asm/x86_64-mont5.pl +++ b/deps/openssl/openssl/crypto/bn/asm/x86_64-mont5.pl @@ -527,7 +527,7 @@ # "above" modulo cache-line size, which effectively precludes # possibility of memory disambiguation logic failure when # accessing the table. - # + # lea 64+8(%rsp,%r11,8),$tp mov %rdx,$A[1] @@ -910,7 +910,7 @@ my $nptr="%rcx"; # const BN_ULONG *nptr, my $n0 ="%r8"; # const BN_ULONG *n0); my $num ="%r9"; # int num, has to be divisible by 8 - # int pwr + # int pwr my ($i,$j,$tptr)=("%rbp","%rcx",$rptr); my @A0=("%r10","%r11"); @@ -975,7 +975,7 @@ sub %r11,%rsp .Lpwr_sp_done: and \$-64,%rsp - mov $num,%r10 + mov $num,%r10 neg $num ############################################################## @@ -1856,7 +1856,7 @@ } $code.=<<___; mov $num,%r10 # prepare for back-to-back call - neg $num # restore $num + neg $num # restore $num ret .size bn_sqr8x_internal,.-bn_sqr8x_internal ___ @@ -1922,7 +1922,7 @@ sub %r11,%rsp .Lfrom_sp_done: and \$-64,%rsp - mov $num,%r10 + mov $num,%r10 neg $num ############################################################## @@ -2071,7 +2071,7 @@ mov \$0,%r10 cmovc %r10,%r11 sub %r11,%rsp -.Lmulx4xsp_done: +.Lmulx4xsp_done: and \$-64,%rsp # ensure alignment ############################################################## # Stack layout @@ -2174,7 +2174,7 @@ # "above" modulo cache-line size, which effectively precludes # possibility of memory disambiguation logic failure when # accessing the table. - # + # lea 64+8*4+8(%rsp,%r11,8),$tptr mov %rdx,$bi @@ -2481,7 +2481,7 @@ sub %r11,%rsp .Lpwrx_sp_done: and \$-64,%rsp - mov $num,%r10 + mov $num,%r10 neg $num ############################################################## @@ -3226,11 +3226,16 @@ .type bn_get_bits5,\@abi-omnipotent .align 16 bn_get_bits5: - mov $inp,%r10 + lea 0($inp),%r10 + lea 1($inp),%r11 mov $num,%ecx - shr \$3,$num - movzw (%r10,$num),%eax - and \$7,%ecx + shr \$4,$num + and \$15,%ecx + lea -8(%ecx),%eax + cmp \$11,%ecx + cmova %r11,%r10 + cmova %eax,%ecx + movzw (%r10,$num,2),%eax shrl %cl,%eax and \$31,%eax ret diff --git a/deps/openssl/openssl/crypto/bn/bn.h b/deps/openssl/openssl/crypto/bn/bn.h index 78709d384082de..5696965e9a09d0 100644 --- a/deps/openssl/openssl/crypto/bn/bn.h +++ b/deps/openssl/openssl/crypto/bn/bn.h @@ -779,6 +779,7 @@ int RAND_pseudo_bytes(unsigned char *buf, int num); * wouldn't be constructed with top!=dmax. */ \ BN_ULONG *_not_const; \ memcpy(&_not_const, &_bnum1->d, sizeof(BN_ULONG*)); \ + /* Debug only - safe to ignore error return */ \ RAND_pseudo_bytes(&_tmp_char, 1); \ memset((unsigned char *)(_not_const + _bnum1->top), _tmp_char, \ (_bnum1->dmax - _bnum1->top) * sizeof(BN_ULONG)); \ @@ -892,6 +893,7 @@ void ERR_load_BN_strings(void); # define BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR 135 # define BN_F_BN_GF2M_MOD_SQR 136 # define BN_F_BN_GF2M_MOD_SQRT 137 +# define BN_F_BN_LSHIFT 145 # define BN_F_BN_MOD_EXP2_MONT 118 # define BN_F_BN_MOD_EXP_MONT 109 # define BN_F_BN_MOD_EXP_MONT_CONSTTIME 124 @@ -907,12 +909,14 @@ void ERR_load_BN_strings(void); # define BN_F_BN_NEW 113 # define BN_F_BN_RAND 114 # define BN_F_BN_RAND_RANGE 122 +# define BN_F_BN_RSHIFT 146 # define BN_F_BN_USUB 115 /* Reason codes. */ # define BN_R_ARG2_LT_ARG3 100 # define BN_R_BAD_RECIPROCAL 101 # define BN_R_BIGNUM_TOO_LONG 114 +# define BN_R_BITS_TOO_SMALL 118 # define BN_R_CALLED_WITH_EVEN_MODULUS 102 # define BN_R_DIV_BY_ZERO 103 # define BN_R_ENCODING_ERROR 104 @@ -920,6 +924,7 @@ void ERR_load_BN_strings(void); # define BN_R_INPUT_NOT_REDUCED 110 # define BN_R_INVALID_LENGTH 106 # define BN_R_INVALID_RANGE 115 +# define BN_R_INVALID_SHIFT 119 # define BN_R_NOT_A_SQUARE 111 # define BN_R_NOT_INITIALIZED 107 # define BN_R_NO_INVERSE 108 diff --git a/deps/openssl/openssl/crypto/bn/bn_err.c b/deps/openssl/openssl/crypto/bn/bn_err.c index faa7e226ba9f3f..e7a703826ee57b 100644 --- a/deps/openssl/openssl/crypto/bn/bn_err.c +++ b/deps/openssl/openssl/crypto/bn/bn_err.c @@ -1,6 +1,6 @@ /* crypto/bn/bn_err.c */ /* ==================================================================== - * Copyright (c) 1999-2007 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2015 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -94,6 +94,7 @@ static ERR_STRING_DATA BN_str_functs[] = { {ERR_FUNC(BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR), "BN_GF2m_mod_solve_quad_arr"}, {ERR_FUNC(BN_F_BN_GF2M_MOD_SQR), "BN_GF2m_mod_sqr"}, {ERR_FUNC(BN_F_BN_GF2M_MOD_SQRT), "BN_GF2m_mod_sqrt"}, + {ERR_FUNC(BN_F_BN_LSHIFT), "BN_lshift"}, {ERR_FUNC(BN_F_BN_MOD_EXP2_MONT), "BN_mod_exp2_mont"}, {ERR_FUNC(BN_F_BN_MOD_EXP_MONT), "BN_mod_exp_mont"}, {ERR_FUNC(BN_F_BN_MOD_EXP_MONT_CONSTTIME), "BN_mod_exp_mont_consttime"}, @@ -109,6 +110,7 @@ static ERR_STRING_DATA BN_str_functs[] = { {ERR_FUNC(BN_F_BN_NEW), "BN_new"}, {ERR_FUNC(BN_F_BN_RAND), "BN_rand"}, {ERR_FUNC(BN_F_BN_RAND_RANGE), "BN_rand_range"}, + {ERR_FUNC(BN_F_BN_RSHIFT), "BN_rshift"}, {ERR_FUNC(BN_F_BN_USUB), "BN_usub"}, {0, NULL} }; @@ -117,6 +119,7 @@ static ERR_STRING_DATA BN_str_reasons[] = { {ERR_REASON(BN_R_ARG2_LT_ARG3), "arg2 lt arg3"}, {ERR_REASON(BN_R_BAD_RECIPROCAL), "bad reciprocal"}, {ERR_REASON(BN_R_BIGNUM_TOO_LONG), "bignum too long"}, + {ERR_REASON(BN_R_BITS_TOO_SMALL), "bits too small"}, {ERR_REASON(BN_R_CALLED_WITH_EVEN_MODULUS), "called with even modulus"}, {ERR_REASON(BN_R_DIV_BY_ZERO), "div by zero"}, {ERR_REASON(BN_R_ENCODING_ERROR), "encoding error"}, @@ -125,6 +128,7 @@ static ERR_STRING_DATA BN_str_reasons[] = { {ERR_REASON(BN_R_INPUT_NOT_REDUCED), "input not reduced"}, {ERR_REASON(BN_R_INVALID_LENGTH), "invalid length"}, {ERR_REASON(BN_R_INVALID_RANGE), "invalid range"}, + {ERR_REASON(BN_R_INVALID_SHIFT), "invalid shift"}, {ERR_REASON(BN_R_NOT_A_SQUARE), "not a square"}, {ERR_REASON(BN_R_NOT_INITIALIZED), "not initialized"}, {ERR_REASON(BN_R_NO_INVERSE), "no inverse"}, diff --git a/deps/openssl/openssl/crypto/bn/bn_gf2m.c b/deps/openssl/openssl/crypto/bn/bn_gf2m.c index aeee49a0156f2a..cfa1c7ce14990a 100644 --- a/deps/openssl/openssl/crypto/bn/bn_gf2m.c +++ b/deps/openssl/openssl/crypto/bn/bn_gf2m.c @@ -450,8 +450,7 @@ int BN_GF2m_mod_arr(BIGNUM *r, const BIGNUM *a, const int p[]) d0 = p[k] % BN_BITS2; d1 = BN_BITS2 - d0; z[n] ^= (zz << d0); - tmp_ulong = zz >> d1; - if (d0 && tmp_ulong) + if (d0 && (tmp_ulong = zz >> d1)) z[n + 1] ^= tmp_ulong; } @@ -694,9 +693,10 @@ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) } # else { - int i, ubits = BN_num_bits(u), vbits = BN_num_bits(v), /* v is copy - * of p */ - top = p->top; + int i; + int ubits = BN_num_bits(u); + int vbits = BN_num_bits(v); /* v is copy of p */ + int top = p->top; BN_ULONG *udp, *bdp, *vdp, *cdp; bn_wexpand(u, top); @@ -740,8 +740,12 @@ int BN_GF2m_mod_inv(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) ubits--; } - if (ubits <= BN_BITS2 && udp[0] == 1) - break; + if (ubits <= BN_BITS2) { + if (udp[0] == 0) /* poly was reducible */ + goto err; + if (udp[0] == 1) + break; + } if (ubits < vbits) { i = ubits; diff --git a/deps/openssl/openssl/crypto/bn/bn_lcl.h b/deps/openssl/openssl/crypto/bn/bn_lcl.h index 7cd58830e93452..00f4f09945b382 100644 --- a/deps/openssl/openssl/crypto/bn/bn_lcl.h +++ b/deps/openssl/openssl/crypto/bn/bn_lcl.h @@ -294,7 +294,7 @@ unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b, # endif # elif defined(__mips) && (defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)) # if defined(__GNUC__) && __GNUC__>=2 -# if __GNUC__>=4 && __GNUC_MINOR__>=4 +# if __GNUC__>4 || (__GNUC__>=4 && __GNUC_MINOR__>=4) /* "h" constraint is no more since 4.4 */ # define BN_UMULT_HIGH(a,b) (((__uint128_t)(a)*(b))>>64) # define BN_UMULT_LOHI(low,high,a,b) ({ \ diff --git a/deps/openssl/openssl/crypto/bn/bn_print.c b/deps/openssl/openssl/crypto/bn/bn_print.c index 4dcaae32bf5c67..ab10b957ba27d4 100644 --- a/deps/openssl/openssl/crypto/bn/bn_print.c +++ b/deps/openssl/openssl/crypto/bn/bn_print.c @@ -71,7 +71,12 @@ char *BN_bn2hex(const BIGNUM *a) char *buf; char *p; - buf = (char *)OPENSSL_malloc(a->top * BN_BYTES * 2 + 2); + if (a->neg && BN_is_zero(a)) { + /* "-0" == 3 bytes including NULL terminator */ + buf = OPENSSL_malloc(3); + } else { + buf = OPENSSL_malloc(a->top * BN_BYTES * 2 + 2); + } if (buf == NULL) { BNerr(BN_F_BN_BN2HEX, ERR_R_MALLOC_FAILURE); goto err; diff --git a/deps/openssl/openssl/crypto/bn/bn_rand.c b/deps/openssl/openssl/crypto/bn/bn_rand.c index 7ac71ec8ed06b9..f9fb2e9e45e01b 100644 --- a/deps/openssl/openssl/crypto/bn/bn_rand.c +++ b/deps/openssl/openssl/crypto/bn/bn_rand.c @@ -121,6 +121,11 @@ static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom) int ret = 0, bit, bytes, mask; time_t tim; + if (bits < 0 || (bits == 1 && top > 0)) { + BNerr(BN_F_BNRAND, BN_R_BITS_TOO_SMALL); + return 0; + } + if (bits == 0) { BN_zero(rnd); return 1; @@ -157,7 +162,8 @@ static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom) unsigned char c; for (i = 0; i < bytes; i++) { - RAND_pseudo_bytes(&c, 1); + if (RAND_pseudo_bytes(&c, 1) < 0) + goto err; if (c >= 128 && i > 0) buf[i] = buf[i - 1]; else if (c < 42) @@ -168,7 +174,7 @@ static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom) } #endif - if (top != -1) { + if (top >= 0) { if (top) { if (bit == 0) { buf[0] = 1; diff --git a/deps/openssl/openssl/crypto/bn/bn_shift.c b/deps/openssl/openssl/crypto/bn/bn_shift.c index 4f3e8ffed7b628..9673d9a3063324 100644 --- a/deps/openssl/openssl/crypto/bn/bn_shift.c +++ b/deps/openssl/openssl/crypto/bn/bn_shift.c @@ -137,6 +137,11 @@ int BN_lshift(BIGNUM *r, const BIGNUM *a, int n) bn_check_top(r); bn_check_top(a); + if (n < 0) { + BNerr(BN_F_BN_LSHIFT, BN_R_INVALID_SHIFT); + return 0; + } + r->neg = a->neg; nw = n / BN_BITS2; if (bn_wexpand(r, a->top + nw + 1) == NULL) @@ -174,6 +179,11 @@ int BN_rshift(BIGNUM *r, const BIGNUM *a, int n) bn_check_top(r); bn_check_top(a); + if (n < 0) { + BNerr(BN_F_BN_RSHIFT, BN_R_INVALID_SHIFT); + return 0; + } + nw = n / BN_BITS2; rb = n % BN_BITS2; lb = BN_BITS2 - rb; diff --git a/deps/openssl/openssl/crypto/buffer/Makefile b/deps/openssl/openssl/crypto/buffer/Makefile index 2efba47f07002c..352efb841ffbc4 100644 --- a/deps/openssl/openssl/crypto/buffer/Makefile +++ b/deps/openssl/openssl/crypto/buffer/Makefile @@ -61,6 +61,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/buffer/buffer.c b/deps/openssl/openssl/crypto/buffer/buffer.c index d287e340a2ba5c..eff3e081576c48 100644 --- a/deps/openssl/openssl/crypto/buffer/buffer.c +++ b/deps/openssl/openssl/crypto/buffer/buffer.c @@ -88,7 +88,7 @@ void BUF_MEM_free(BUF_MEM *a) return; if (a->data != NULL) { - memset(a->data, 0, (unsigned int)a->max); + OPENSSL_cleanse(a->data, a->max); OPENSSL_free(a->data); } OPENSSL_free(a); diff --git a/deps/openssl/openssl/crypto/camellia/Makefile b/deps/openssl/openssl/crypto/camellia/Makefile index 60e896054f494c..ab1225e7d902ac 100644 --- a/deps/openssl/openssl/crypto/camellia/Makefile +++ b/deps/openssl/openssl/crypto/camellia/Makefile @@ -75,6 +75,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/cast/Makefile b/deps/openssl/openssl/crypto/cast/Makefile index f3f4859886a6ce..4c4b5e9baa5513 100644 --- a/deps/openssl/openssl/crypto/cast/Makefile +++ b/deps/openssl/openssl/crypto/cast/Makefile @@ -69,6 +69,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/cmac/Makefile b/deps/openssl/openssl/crypto/cmac/Makefile index 54e7cc39d52b92..6a2840867643c4 100644 --- a/deps/openssl/openssl/crypto/cmac/Makefile +++ b/deps/openssl/openssl/crypto/cmac/Makefile @@ -61,6 +61,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/cmac/cmac.c b/deps/openssl/openssl/crypto/cmac/cmac.c index c5597a3f73b99b..774e6dc919050d 100644 --- a/deps/openssl/openssl/crypto/cmac/cmac.c +++ b/deps/openssl/openssl/crypto/cmac/cmac.c @@ -126,6 +126,8 @@ EVP_CIPHER_CTX *CMAC_CTX_get0_cipher_ctx(CMAC_CTX *ctx) void CMAC_CTX_free(CMAC_CTX *ctx) { + if (!ctx) + return; CMAC_CTX_cleanup(ctx); OPENSSL_free(ctx); } diff --git a/deps/openssl/openssl/crypto/cms/Makefile b/deps/openssl/openssl/crypto/cms/Makefile index 644fef399ee989..6f3a83202638eb 100644 --- a/deps/openssl/openssl/crypto/cms/Makefile +++ b/deps/openssl/openssl/crypto/cms/Makefile @@ -67,6 +67,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/cms/cms_kari.c b/deps/openssl/openssl/crypto/cms/cms_kari.c index f8a6cbadb06c4a..2cfcdb29cd50c8 100644 --- a/deps/openssl/openssl/crypto/cms/cms_kari.c +++ b/deps/openssl/openssl/crypto/cms/cms_kari.c @@ -66,6 +66,7 @@ DECLARE_ASN1_ITEM(CMS_KeyAgreeRecipientInfo) DECLARE_ASN1_ITEM(CMS_RecipientEncryptedKey) DECLARE_ASN1_ITEM(CMS_OriginatorPublicKey) +DECLARE_ASN1_ITEM(CMS_RecipientKeyIdentifier) /* Key Agreement Recipient Info (KARI) routines */ @@ -362,6 +363,9 @@ int cms_RecipientInfo_kari_init(CMS_RecipientInfo *ri, X509 *recip, if (flags & CMS_USE_KEYID) { rek->rid->type = CMS_REK_KEYIDENTIFIER; + rek->rid->d.rKeyId = M_ASN1_new_of(CMS_RecipientKeyIdentifier); + if (rek->rid->d.rKeyId == NULL) + return 0; if (!cms_set1_keyid(&rek->rid->d.rKeyId->subjectKeyIdentifier, recip)) return 0; } else { diff --git a/deps/openssl/openssl/crypto/cms/cms_pwri.c b/deps/openssl/openssl/crypto/cms/cms_pwri.c index 076b54578927b7..a8322dcdf1a62c 100644 --- a/deps/openssl/openssl/crypto/cms/cms_pwri.c +++ b/deps/openssl/openssl/crypto/cms/cms_pwri.c @@ -231,7 +231,7 @@ static int kek_unwrap_key(unsigned char *out, size_t *outlen, return 0; } tmp = OPENSSL_malloc(inlen); - if(!tmp) + if (!tmp) return 0; /* setup IV by decrypting last two blocks */ EVP_DecryptUpdate(ctx, tmp + inlen - 2 * blocklen, &outl, @@ -297,8 +297,9 @@ static int kek_wrap_key(unsigned char *out, size_t *outlen, out[3] = in[2] ^ 0xFF; memcpy(out + 4, in, inlen); /* Add random padding to end */ - if (olen > inlen + 4) - RAND_pseudo_bytes(out + 4 + inlen, olen - 4 - inlen); + if (olen > inlen + 4 + && RAND_pseudo_bytes(out + 4 + inlen, olen - 4 - inlen) < 0) + return 0; /* Encrypt twice */ EVP_EncryptUpdate(ctx, out, &dummy, out, olen); EVP_EncryptUpdate(ctx, out, &dummy, out, olen); diff --git a/deps/openssl/openssl/crypto/cms/cms_smime.c b/deps/openssl/openssl/crypto/cms/cms_smime.c index 8729e3f9c00499..b39ed489989f4e 100644 --- a/deps/openssl/openssl/crypto/cms/cms_smime.c +++ b/deps/openssl/openssl/crypto/cms/cms_smime.c @@ -132,7 +132,7 @@ static void do_free_upto(BIO *f, BIO *upto) BIO_free(f); f = tbio; } - while (f != upto); + while (f && f != upto); } else BIO_free_all(f); } diff --git a/deps/openssl/openssl/crypto/comp/Makefile b/deps/openssl/openssl/crypto/comp/Makefile index efda832dce47f3..a1e9464a11f7ea 100644 --- a/deps/openssl/openssl/crypto/comp/Makefile +++ b/deps/openssl/openssl/crypto/comp/Makefile @@ -64,6 +64,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/conf/Makefile b/deps/openssl/openssl/crypto/conf/Makefile index 78bb3241065d86..d5f5c582414beb 100644 --- a/deps/openssl/openssl/crypto/conf/Makefile +++ b/deps/openssl/openssl/crypto/conf/Makefile @@ -64,6 +64,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/cryptlib.c b/deps/openssl/openssl/crypto/cryptlib.c index 98526d73dc29d0..ca0e3ccc0c7aa0 100644 --- a/deps/openssl/openssl/crypto/cryptlib.c +++ b/deps/openssl/openssl/crypto/cryptlib.c @@ -825,8 +825,6 @@ int OPENSSL_isservice(void) if (_OPENSSL_isservice.p != (void *)-1) return (*_OPENSSL_isservice.f) (); - (void)GetDesktopWindow(); /* return value is ignored */ - h = GetProcessWindowStation(); if (h == NULL) return -1; diff --git a/deps/openssl/openssl/crypto/des/Makefile b/deps/openssl/openssl/crypto/des/Makefile index 060c64795e2974..8b5166ca9ff5fa 100644 --- a/deps/openssl/openssl/crypto/des/Makefile +++ b/deps/openssl/openssl/crypto/des/Makefile @@ -96,6 +96,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/des/des.c b/deps/openssl/openssl/crypto/des/des.c index 2bff281258e836..586aed72378cc1 100644 --- a/deps/openssl/openssl/crypto/des/des.c +++ b/deps/openssl/openssl/crypto/des/des.c @@ -455,8 +455,10 @@ void doencryption(void) rem = l % 8; len = l - rem; if (feof(DES_IN)) { - for (i = 7 - rem; i > 0; i--) - RAND_pseudo_bytes(buf + l++, 1); + for (i = 7 - rem; i > 0; i--) { + if (RAND_pseudo_bytes(buf + l++, 1) < 0) + goto problems; + } buf[l++] = rem; ex = 1; len += rem; diff --git a/deps/openssl/openssl/crypto/des/enc_writ.c b/deps/openssl/openssl/crypto/des/enc_writ.c index b4eecc38120318..bfaabde516ae55 100644 --- a/deps/openssl/openssl/crypto/des/enc_writ.c +++ b/deps/openssl/openssl/crypto/des/enc_writ.c @@ -96,6 +96,9 @@ int DES_enc_write(int fd, const void *_buf, int len, const unsigned char *cp; static int start = 1; + if (len < 0) + return -1; + if (outbuf == NULL) { outbuf = OPENSSL_malloc(BSIZE + HDRSIZE); if (outbuf == NULL) @@ -132,7 +135,9 @@ int DES_enc_write(int fd, const void *_buf, int len, if (len < 8) { cp = shortbuf; memcpy(shortbuf, buf, len); - RAND_pseudo_bytes(shortbuf + len, 8 - len); + if (RAND_pseudo_bytes(shortbuf + len, 8 - len) < 0) { + return -1; + } rnum = 8; } else { cp = buf; diff --git a/deps/openssl/openssl/crypto/dh/Makefile b/deps/openssl/openssl/crypto/dh/Makefile index f447907820c1a6..46fa5ac57b4747 100644 --- a/deps/openssl/openssl/crypto/dh/Makefile +++ b/deps/openssl/openssl/crypto/dh/Makefile @@ -63,6 +63,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/dh/dh_ameth.c b/deps/openssl/openssl/crypto/dh/dh_ameth.c index c6bfc2d3f45bcf..ac72468bd14bf0 100644 --- a/deps/openssl/openssl/crypto/dh/dh_ameth.c +++ b/deps/openssl/openssl/crypto/dh/dh_ameth.c @@ -160,7 +160,7 @@ static int dh_pub_encode(X509_PUBKEY *pk, const EVP_PKEY *pkey) dh = pkey->pkey.dh; str = ASN1_STRING_new(); - if(!str) { + if (!str) { DHerr(DH_F_DH_PUB_ENCODE, ERR_R_MALLOC_FAILURE); goto err; } diff --git a/deps/openssl/openssl/crypto/dh/dh_pmeth.c b/deps/openssl/openssl/crypto/dh/dh_pmeth.c index b3a31472ab2a33..b58e3fa86fad0c 100644 --- a/deps/openssl/openssl/crypto/dh/dh_pmeth.c +++ b/deps/openssl/openssl/crypto/dh/dh_pmeth.c @@ -462,7 +462,7 @@ static int pkey_dh_derive(EVP_PKEY_CTX *ctx, unsigned char *key, ret = 0; Zlen = DH_size(dh); Z = OPENSSL_malloc(Zlen); - if(!Z) { + if (!Z) { goto err; } if (DH_compute_key_padded(Z, dhpub, dh) <= 0) diff --git a/deps/openssl/openssl/crypto/dsa/Makefile b/deps/openssl/openssl/crypto/dsa/Makefile index 5fef4ca5adfc53..81092013773f75 100644 --- a/deps/openssl/openssl/crypto/dsa/Makefile +++ b/deps/openssl/openssl/crypto/dsa/Makefile @@ -63,6 +63,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/dsa/dsa_gen.c b/deps/openssl/openssl/crypto/dsa/dsa_gen.c index 89200369392410..5a328aaab5b408 100644 --- a/deps/openssl/openssl/crypto/dsa/dsa_gen.c +++ b/deps/openssl/openssl/crypto/dsa/dsa_gen.c @@ -204,7 +204,8 @@ int dsa_builtin_paramgen(DSA *ret, size_t bits, size_t qbits, goto err; if (!seed_len) { - RAND_pseudo_bytes(seed, qsize); + if (RAND_pseudo_bytes(seed, qsize) < 0) + goto err; seed_is_random = 1; } else { seed_is_random = 0; diff --git a/deps/openssl/openssl/crypto/dsa/dsa_ossl.c b/deps/openssl/openssl/crypto/dsa/dsa_ossl.c index 665f40a77978d5..f0ec8faa84cc3c 100644 --- a/deps/openssl/openssl/crypto/dsa/dsa_ossl.c +++ b/deps/openssl/openssl/crypto/dsa/dsa_ossl.c @@ -106,23 +106,23 @@ static DSA_METHOD openssl_dsa_meth = { #define DSA_MOD_EXP(err_instr,dsa,rr,a1,p1,a2,p2,m,ctx,in_mont) \ do { \ int _tmp_res53; \ - if((dsa)->meth->dsa_mod_exp) \ + if ((dsa)->meth->dsa_mod_exp) \ _tmp_res53 = (dsa)->meth->dsa_mod_exp((dsa), (rr), (a1), (p1), \ (a2), (p2), (m), (ctx), (in_mont)); \ else \ _tmp_res53 = BN_mod_exp2_mont((rr), (a1), (p1), (a2), (p2), \ (m), (ctx), (in_mont)); \ - if(!_tmp_res53) err_instr; \ + if (!_tmp_res53) err_instr; \ } while(0) #define DSA_BN_MOD_EXP(err_instr,dsa,r,a,p,m,ctx,m_ctx) \ do { \ int _tmp_res53; \ - if((dsa)->meth->bn_mod_exp) \ + if ((dsa)->meth->bn_mod_exp) \ _tmp_res53 = (dsa)->meth->bn_mod_exp((dsa), (r), (a), (p), \ (m), (ctx), (m_ctx)); \ else \ _tmp_res53 = BN_mod_exp_mont((r), (a), (p), (m), (ctx), (m_ctx)); \ - if(!_tmp_res53) err_instr; \ + if (!_tmp_res53) err_instr; \ } while(0) const DSA_METHOD *DSA_OpenSSL(void) diff --git a/deps/openssl/openssl/crypto/dso/Makefile b/deps/openssl/openssl/crypto/dso/Makefile index fb2709ed63a760..36b8ead041c34d 100644 --- a/deps/openssl/openssl/crypto/dso/Makefile +++ b/deps/openssl/openssl/crypto/dso/Makefile @@ -63,6 +63,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/dso/dso_lib.c b/deps/openssl/openssl/crypto/dso/dso_lib.c index d2a48bb6640c4a..09b8eafccacc3d 100644 --- a/deps/openssl/openssl/crypto/dso/dso_lib.c +++ b/deps/openssl/openssl/crypto/dso/dso_lib.c @@ -285,7 +285,7 @@ DSO_FUNC_TYPE DSO_bind_func(DSO *dso, const char *symname) * honest. For one thing, I think I have to return a negative value for any * error because possible DSO_ctrl() commands may return values such as * "size"s that can legitimately be zero (making the standard - * "if(DSO_cmd(...))" form that works almost everywhere else fail at odd + * "if (DSO_cmd(...))" form that works almost everywhere else fail at odd * times. I'd prefer "output" values to be passed by reference and the return * value as success/failure like usual ... but we conform when we must... :-) */ diff --git a/deps/openssl/openssl/crypto/dso/dso_vms.c b/deps/openssl/openssl/crypto/dso/dso_vms.c index 0eff96ec22f992..d0794b8fb044b8 100644 --- a/deps/openssl/openssl/crypto/dso/dso_vms.c +++ b/deps/openssl/openssl/crypto/dso/dso_vms.c @@ -539,7 +539,7 @@ static char *vms_name_converter(DSO *dso, const char *filename) { int len = strlen(filename); char *not_translated = OPENSSL_malloc(len + 1); - if(not_translated) + if (not_translated) strcpy(not_translated, filename); return (not_translated); } diff --git a/deps/openssl/openssl/crypto/ebcdic.c b/deps/openssl/openssl/crypto/ebcdic.c index 4b7652c0ecf7e3..fd6df92b468b00 100644 --- a/deps/openssl/openssl/crypto/ebcdic.c +++ b/deps/openssl/openssl/crypto/ebcdic.c @@ -3,7 +3,7 @@ #ifndef CHARSET_EBCDIC # include -# if defined(PEDANTIC) || defined(__DECC) || defined(OPENSSL_SYS_MACOSX) +# if defined(PEDANTIC) || defined(__DECC) || defined(OPENSSL_SYS_MACOSX) || defined(__clang__) static void *dummy = &dummy; # endif diff --git a/deps/openssl/openssl/crypto/ec/Makefile b/deps/openssl/openssl/crypto/ec/Makefile index 0d9f3ab256a0af..359ef4e40fd4b5 100644 --- a/deps/openssl/openssl/crypto/ec/Makefile +++ b/deps/openssl/openssl/crypto/ec/Makefile @@ -78,6 +78,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl b/deps/openssl/openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl index a34f03cc5e46c8..84379fce1cb95e 100755 --- a/deps/openssl/openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl +++ b/deps/openssl/openssl/crypto/ec/asm/ecp_nistz256-x86_64.pl @@ -30,20 +30,24 @@ # Further optimization by : # -# this/original -# Opteron +12-49% -# Bulldozer +14-45% -# P4 +18-46% -# Westmere +12-34% -# Sandy Bridge +9-35% -# Ivy Bridge +9-35% -# Haswell +8-37% -# Broadwell +18-58% -# Atom +15-50% -# VIA Nano +43-160% +# this/original with/without -DECP_NISTZ256_ASM(*) +# Opteron +12-49% +110-150% +# Bulldozer +14-45% +175-210% +# P4 +18-46% n/a :-( +# Westmere +12-34% +80-87% +# Sandy Bridge +9-35% +110-120% +# Ivy Bridge +9-35% +110-125% +# Haswell +8-37% +140-160% +# Broadwell +18-58% +145-210% +# Atom +15-50% +130-180% +# VIA Nano +43-160% +300-480% +# +# (*) "without -DECP_NISTZ256_ASM" refers to build with +# "enable-ec_nistp_64_gcc_128"; # # Ranges denote minimum and maximum improvement coefficients depending -# on benchmark. +# on benchmark. Lower coefficients are for ECDSA sign, relatively fastest +# server-side operation. Keep in mind that +100% means 2x improvement. $flavour = shift; $output = shift; @@ -599,7 +603,7 @@ adc \$0, $acc0 ######################################################################## - # Second reduction step + # Second reduction step mov $acc1, $t1 shl \$32, $acc1 mulq $poly3 @@ -646,7 +650,7 @@ adc \$0, $acc1 ######################################################################## - # Third reduction step + # Third reduction step mov $acc2, $t1 shl \$32, $acc2 mulq $poly3 @@ -693,7 +697,7 @@ adc \$0, $acc2 ######################################################################## - # Final reduction step + # Final reduction step mov $acc3, $t1 shl \$32, $acc3 mulq $poly3 @@ -706,7 +710,7 @@ mov $acc5, $t1 adc \$0, $acc2 - ######################################################################## + ######################################################################## # Branch-less conditional subtraction of P sub \$-1, $acc4 # .Lpoly[0] mov $acc0, $t2 @@ -2060,7 +2064,7 @@ () movq %xmm1, $r_ptr call __ecp_nistz256_sqr_mont$x # p256_sqr_mont(res_y, S); ___ -{ +{ ######## ecp_nistz256_div_by_2(res_y, res_y); ########################## # operate in 4-5-6-7 "name space" that matches squaring output # @@ -2149,7 +2153,7 @@ () lea $M(%rsp), $b_ptr mov $acc4, $acc6 # harmonize sub output and mul input xor %ecx, %ecx - mov $acc4, $S+8*0(%rsp) # have to save:-( + mov $acc4, $S+8*0(%rsp) # have to save:-( mov $acc5, $acc2 mov $acc5, $S+8*1(%rsp) cmovz $acc0, $acc3 diff --git a/deps/openssl/openssl/crypto/ec/ec.h b/deps/openssl/openssl/crypto/ec/ec.h index 98edfdf8bc93cb..6d3178f609f493 100644 --- a/deps/openssl/openssl/crypto/ec/ec.h +++ b/deps/openssl/openssl/crypto/ec/ec.h @@ -1097,6 +1097,12 @@ void ERR_load_EC_strings(void); # define EC_F_ECPARAMETERS_PRINT_FP 148 # define EC_F_ECPKPARAMETERS_PRINT 149 # define EC_F_ECPKPARAMETERS_PRINT_FP 150 +# define EC_F_ECP_NISTZ256_GET_AFFINE 240 +# define EC_F_ECP_NISTZ256_MULT_PRECOMPUTE 243 +# define EC_F_ECP_NISTZ256_POINTS_MUL 241 +# define EC_F_ECP_NISTZ256_PRE_COMP_NEW 244 +# define EC_F_ECP_NISTZ256_SET_WORDS 245 +# define EC_F_ECP_NISTZ256_WINDOWED_MUL 242 # define EC_F_ECP_NIST_MOD_192 203 # define EC_F_ECP_NIST_MOD_224 204 # define EC_F_ECP_NIST_MOD_256 205 @@ -1208,11 +1214,6 @@ void ERR_load_EC_strings(void); # define EC_F_NISTP224_PRE_COMP_NEW 227 # define EC_F_NISTP256_PRE_COMP_NEW 236 # define EC_F_NISTP521_PRE_COMP_NEW 237 -# define EC_F_ECP_NISTZ256_GET_AFFINE 240 -# define EC_F_ECP_NISTZ256_POINTS_MUL 241 -# define EC_F_ECP_NISTZ256_WINDOWED_MUL 242 -# define EC_F_ECP_NISTZ256_MULT_PRECOMPUTE 243 -# define EC_F_ECP_NISTZ256_PRE_COMP_NEW 244 # define EC_F_O2I_ECPUBLICKEY 152 # define EC_F_OLD_EC_PRIV_DECODE 222 # define EC_F_PKEY_EC_CTRL 197 diff --git a/deps/openssl/openssl/crypto/ec/ec2_oct.c b/deps/openssl/openssl/crypto/ec/ec2_oct.c index c245d886dad11c..0d04cc692303d6 100644 --- a/deps/openssl/openssl/crypto/ec/ec2_oct.c +++ b/deps/openssl/openssl/crypto/ec/ec2_oct.c @@ -387,7 +387,7 @@ int ec_GF2m_simple_oct2point(const EC_GROUP *group, EC_POINT *point, } /* test required by X9.62 */ - if (!EC_POINT_is_on_curve(group, point, ctx)) { + if (EC_POINT_is_on_curve(group, point, ctx) <= 0) { ECerr(EC_F_EC_GF2M_SIMPLE_OCT2POINT, EC_R_POINT_IS_NOT_ON_CURVE); goto err; } diff --git a/deps/openssl/openssl/crypto/ec/ec_asn1.c b/deps/openssl/openssl/crypto/ec/ec_asn1.c index b4b0e9f3b82a2b..4ad8494981bfc4 100644 --- a/deps/openssl/openssl/crypto/ec/ec_asn1.c +++ b/deps/openssl/openssl/crypto/ec/ec_asn1.c @@ -1114,7 +1114,7 @@ int i2d_ECPrivateKey(EC_KEY *a, unsigned char **out) { int ret = 0, ok = 0; unsigned char *buffer = NULL; - size_t buf_len = 0, tmp_len; + size_t buf_len = 0, tmp_len, bn_len; EC_PRIVATEKEY *priv_key = NULL; if (a == NULL || a->group == NULL || a->priv_key == NULL || @@ -1130,18 +1130,32 @@ int i2d_ECPrivateKey(EC_KEY *a, unsigned char **out) priv_key->version = a->version; - buf_len = (size_t)BN_num_bytes(a->priv_key); + bn_len = (size_t)BN_num_bytes(a->priv_key); + + /* Octetstring may need leading zeros if BN is to short */ + + buf_len = (EC_GROUP_get_degree(a->group) + 7) / 8; + + if (bn_len > buf_len) { + ECerr(EC_F_I2D_ECPRIVATEKEY, EC_R_BUFFER_TOO_SMALL); + goto err; + } + buffer = OPENSSL_malloc(buf_len); if (buffer == NULL) { ECerr(EC_F_I2D_ECPRIVATEKEY, ERR_R_MALLOC_FAILURE); goto err; } - if (!BN_bn2bin(a->priv_key, buffer)) { + if (!BN_bn2bin(a->priv_key, buffer + buf_len - bn_len)) { ECerr(EC_F_I2D_ECPRIVATEKEY, ERR_R_BN_LIB); goto err; } + if (buf_len - bn_len > 0) { + memset(buffer, 0, buf_len - bn_len); + } + if (!M_ASN1_OCTET_STRING_set(priv_key->privateKey, buffer, buf_len)) { ECerr(EC_F_I2D_ECPRIVATEKEY, ERR_R_ASN1_LIB); goto err; diff --git a/deps/openssl/openssl/crypto/ec/ec_check.c b/deps/openssl/openssl/crypto/ec/ec_check.c index d3f534999e067c..dd6f0ac409942d 100644 --- a/deps/openssl/openssl/crypto/ec/ec_check.c +++ b/deps/openssl/openssl/crypto/ec/ec_check.c @@ -85,7 +85,7 @@ int EC_GROUP_check(const EC_GROUP *group, BN_CTX *ctx) ECerr(EC_F_EC_GROUP_CHECK, EC_R_UNDEFINED_GENERATOR); goto err; } - if (!EC_POINT_is_on_curve(group, group->generator, ctx)) { + if (EC_POINT_is_on_curve(group, group->generator, ctx) <= 0) { ECerr(EC_F_EC_GROUP_CHECK, EC_R_POINT_IS_NOT_ON_CURVE); goto err; } diff --git a/deps/openssl/openssl/crypto/ec/ec_err.c b/deps/openssl/openssl/crypto/ec/ec_err.c index 13b32c78ac7444..6fe5baafd4b334 100644 --- a/deps/openssl/openssl/crypto/ec/ec_err.c +++ b/deps/openssl/openssl/crypto/ec/ec_err.c @@ -1,6 +1,6 @@ /* crypto/ec/ec_err.c */ /* ==================================================================== - * Copyright (c) 1999-2014 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2015 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -89,6 +89,13 @@ static ERR_STRING_DATA EC_str_functs[] = { {ERR_FUNC(EC_F_ECPARAMETERS_PRINT_FP), "ECParameters_print_fp"}, {ERR_FUNC(EC_F_ECPKPARAMETERS_PRINT), "ECPKParameters_print"}, {ERR_FUNC(EC_F_ECPKPARAMETERS_PRINT_FP), "ECPKParameters_print_fp"}, + {ERR_FUNC(EC_F_ECP_NISTZ256_GET_AFFINE), "ecp_nistz256_get_affine"}, + {ERR_FUNC(EC_F_ECP_NISTZ256_MULT_PRECOMPUTE), + "ecp_nistz256_mult_precompute"}, + {ERR_FUNC(EC_F_ECP_NISTZ256_POINTS_MUL), "ecp_nistz256_points_mul"}, + {ERR_FUNC(EC_F_ECP_NISTZ256_PRE_COMP_NEW), "ecp_nistz256_pre_comp_new"}, + {ERR_FUNC(EC_F_ECP_NISTZ256_SET_WORDS), "ecp_nistz256_set_words"}, + {ERR_FUNC(EC_F_ECP_NISTZ256_WINDOWED_MUL), "ecp_nistz256_windowed_mul"}, {ERR_FUNC(EC_F_ECP_NIST_MOD_192), "ECP_NIST_MOD_192"}, {ERR_FUNC(EC_F_ECP_NIST_MOD_224), "ECP_NIST_MOD_224"}, {ERR_FUNC(EC_F_ECP_NIST_MOD_256), "ECP_NIST_MOD_256"}, @@ -239,12 +246,6 @@ static ERR_STRING_DATA EC_str_functs[] = { {ERR_FUNC(EC_F_NISTP224_PRE_COMP_NEW), "NISTP224_PRE_COMP_NEW"}, {ERR_FUNC(EC_F_NISTP256_PRE_COMP_NEW), "NISTP256_PRE_COMP_NEW"}, {ERR_FUNC(EC_F_NISTP521_PRE_COMP_NEW), "NISTP521_PRE_COMP_NEW"}, - {ERR_FUNC(EC_F_ECP_NISTZ256_GET_AFFINE), "ecp_nistz256_get_affine"}, - {ERR_FUNC(EC_F_ECP_NISTZ256_POINTS_MUL), "ecp_nistz256_points_mul"}, - {ERR_FUNC(EC_F_ECP_NISTZ256_WINDOWED_MUL), "ecp_nistz256_windowed_mul"}, - {ERR_FUNC(EC_F_ECP_NISTZ256_MULT_PRECOMPUTE), - "ecp_nistz256_mult_precompute"}, - {ERR_FUNC(EC_F_ECP_NISTZ256_PRE_COMP_NEW), "ecp_nistz256_pre_comp_new"}, {ERR_FUNC(EC_F_O2I_ECPUBLICKEY), "o2i_ECPublicKey"}, {ERR_FUNC(EC_F_OLD_EC_PRIV_DECODE), "OLD_EC_PRIV_DECODE"}, {ERR_FUNC(EC_F_PKEY_EC_CTRL), "PKEY_EC_CTRL"}, diff --git a/deps/openssl/openssl/crypto/ec/ec_key.c b/deps/openssl/openssl/crypto/ec/ec_key.c index ebdffc82127ecf..55ce3fe9beb2fd 100644 --- a/deps/openssl/openssl/crypto/ec/ec_key.c +++ b/deps/openssl/openssl/crypto/ec/ec_key.c @@ -314,7 +314,7 @@ int EC_KEY_check_key(const EC_KEY *eckey) goto err; /* testing whether the pub_key is on the elliptic curve */ - if (!EC_POINT_is_on_curve(eckey->group, eckey->pub_key, ctx)) { + if (EC_POINT_is_on_curve(eckey->group, eckey->pub_key, ctx) <= 0) { ECerr(EC_F_EC_KEY_CHECK_KEY, EC_R_POINT_IS_NOT_ON_CURVE); goto err; } diff --git a/deps/openssl/openssl/crypto/ec/ec_lcl.h b/deps/openssl/openssl/crypto/ec/ec_lcl.h index 697eeb528ca944..969fd147ef9369 100644 --- a/deps/openssl/openssl/crypto/ec/ec_lcl.h +++ b/deps/openssl/openssl/crypto/ec/ec_lcl.h @@ -459,14 +459,6 @@ int ec_GF2m_simple_mul(const EC_GROUP *group, EC_POINT *r, int ec_GF2m_precompute_mult(EC_GROUP *group, BN_CTX *ctx); int ec_GF2m_have_precompute_mult(const EC_GROUP *group); -/* method functions in ec2_mult.c */ -int ec_GF2m_simple_mul(const EC_GROUP *group, EC_POINT *r, - const BIGNUM *scalar, size_t num, - const EC_POINT *points[], const BIGNUM *scalars[], - BN_CTX *); -int ec_GF2m_precompute_mult(EC_GROUP *group, BN_CTX *ctx); -int ec_GF2m_have_precompute_mult(const EC_GROUP *group); - #ifndef OPENSSL_NO_EC_NISTP_64_GCC_128 /* method functions in ecp_nistp224.c */ int ec_GFp_nistp224_group_init(EC_GROUP *group); diff --git a/deps/openssl/openssl/crypto/ec/ec_lib.c b/deps/openssl/openssl/crypto/ec/ec_lib.c index 6ffd9fc16583aa..3ffa112cc3063e 100644 --- a/deps/openssl/openssl/crypto/ec/ec_lib.c +++ b/deps/openssl/openssl/crypto/ec/ec_lib.c @@ -970,6 +970,13 @@ int EC_POINT_is_at_infinity(const EC_GROUP *group, const EC_POINT *point) return group->meth->is_at_infinity(group, point); } +/* + * Check whether an EC_POINT is on the curve or not. Note that the return + * value for this function should NOT be treated as a boolean. Return values: + * 1: The point is on the curve + * 0: The point is not on the curve + * -1: An error occurred + */ int EC_POINT_is_on_curve(const EC_GROUP *group, const EC_POINT *point, BN_CTX *ctx) { diff --git a/deps/openssl/openssl/crypto/ec/eck_prn.c b/deps/openssl/openssl/crypto/ec/eck_prn.c index 515b262387df0f..df9b37a750d625 100644 --- a/deps/openssl/openssl/crypto/ec/eck_prn.c +++ b/deps/openssl/openssl/crypto/ec/eck_prn.c @@ -346,12 +346,14 @@ static int print_bin(BIO *fp, const char *name, const unsigned char *buf, if (buf == NULL) return 1; - if (off) { + if (off > 0) { if (off > 128) off = 128; memset(str, ' ', off); if (BIO_write(fp, str, off) <= 0) return 0; + } else { + off = 0; } if (BIO_printf(fp, "%s", name) <= 0) diff --git a/deps/openssl/openssl/crypto/ec/ecp_nistz256.c b/deps/openssl/openssl/crypto/ec/ecp_nistz256.c index 2cd6599d8557e1..ca44d0aaeec451 100644 --- a/deps/openssl/openssl/crypto/ec/ecp_nistz256.c +++ b/deps/openssl/openssl/crypto/ec/ecp_nistz256.c @@ -222,6 +222,18 @@ static BN_ULONG is_one(const BN_ULONG a[P256_LIMBS]) return is_zero(res); } +static int ecp_nistz256_set_words(BIGNUM *a, BN_ULONG words[P256_LIMBS]) + { + if (bn_wexpand(a, P256_LIMBS) == NULL) { + ECerr(EC_F_ECP_NISTZ256_SET_WORDS, ERR_R_MALLOC_FAILURE); + return 0; + } + memcpy(a->d, words, sizeof(BN_ULONG) * P256_LIMBS); + a->top = P256_LIMBS; + bn_correct_top(a); + return 1; +} + #ifndef ECP_NISTZ256_REFERENCE_IMPLEMENTATION void ecp_nistz256_point_double(P256_POINT *r, const P256_POINT *a); void ecp_nistz256_point_add(P256_POINT *r, @@ -557,13 +569,14 @@ static int ecp_nistz256_bignum_to_field_elem(BN_ULONG out[P256_LIMBS], } /* r = sum(scalar[i]*point[i]) */ -static void ecp_nistz256_windowed_mul(const EC_GROUP *group, +static int ecp_nistz256_windowed_mul(const EC_GROUP *group, P256_POINT *r, const BIGNUM **scalar, const EC_POINT **point, int num, BN_CTX *ctx) { - int i, j; + + int i, j, ret = 0; unsigned int index; unsigned char (*p_str)[33] = NULL; const unsigned int window_size = 5; @@ -589,6 +602,7 @@ static void ecp_nistz256_windowed_mul(const EC_GROUP *group, for (i = 0; i < num; i++) { P256_POINT *row = table[i]; + /* This is an unusual input, we don't guarantee constant-timeness. */ if ((BN_num_bits(scalar[i]) > 256) || BN_is_negative(scalar[i])) { BIGNUM *mod; @@ -697,6 +711,7 @@ static void ecp_nistz256_windowed_mul(const EC_GROUP *group, ecp_nistz256_point_add(r, r, &h); } + ret = 1; err: if (table_storage) OPENSSL_free(table_storage); @@ -704,6 +719,7 @@ static void ecp_nistz256_windowed_mul(const EC_GROUP *group, OPENSSL_free(p_str); if (scalars) OPENSSL_free(scalars); + return ret; } /* Coordinates of G, for which we have precomputed tables */ @@ -742,6 +758,7 @@ static int ecp_nistz256_mult_precompute(EC_GROUP *group, BN_CTX *ctx) EC_POINT *P = NULL, *T = NULL; const EC_POINT *generator; EC_PRE_COMP *pre_comp; + BN_CTX *new_ctx = NULL; int i, j, k, ret = 0; size_t w; @@ -771,7 +788,7 @@ static int ecp_nistz256_mult_precompute(EC_GROUP *group, BN_CTX *ctx) return 0; if (ctx == NULL) { - ctx = BN_CTX_new(); + ctx = new_ctx = BN_CTX_new(); if (ctx == NULL) goto err; } @@ -802,30 +819,41 @@ static int ecp_nistz256_mult_precompute(EC_GROUP *group, BN_CTX *ctx) P = EC_POINT_new(group); T = EC_POINT_new(group); + if (P == NULL || T == NULL) + goto err; /* * The zero entry is implicitly infinity, and we skip it, storing other * values with -1 offset. */ - EC_POINT_copy(T, generator); + if (!EC_POINT_copy(T, generator)) + goto err; for (k = 0; k < 64; k++) { - EC_POINT_copy(P, T); + if (!EC_POINT_copy(P, T)) + goto err; for (j = 0; j < 37; j++) { /* - * It would be faster to use - * ec_GFp_simple_points_make_affine and make multiple - * points affine at the same time. + * It would be faster to use EC_POINTs_make_affine and + * make multiple points affine at the same time. */ - ec_GFp_simple_make_affine(group, P, ctx); - ecp_nistz256_bignum_to_field_elem(preComputedTable[j] - [k].X, &P->X); - ecp_nistz256_bignum_to_field_elem(preComputedTable[j] - [k].Y, &P->Y); - for (i = 0; i < 7; i++) - ec_GFp_simple_dbl(group, P, P, ctx); + if (!EC_POINT_make_affine(group, P, ctx)) + goto err; + if (!ecp_nistz256_bignum_to_field_elem(preComputedTable[j][k].X, + &P->X) || + !ecp_nistz256_bignum_to_field_elem(preComputedTable[j][k].Y, + &P->Y)) { + ECerr(EC_F_ECP_NISTZ256_MULT_PRECOMPUTE, + EC_R_COORDINATES_OUT_OF_RANGE); + goto err; + } + for (i = 0; i < 7; i++) { + if (!EC_POINT_dbl(group, P, P, ctx)) + goto err; + } } - ec_GFp_simple_add(group, T, T, generator, ctx); + if (!EC_POINT_add(group, T, T, generator, ctx)) + goto err; } pre_comp->group = group; @@ -849,6 +877,8 @@ static int ecp_nistz256_mult_precompute(EC_GROUP *group, BN_CTX *ctx) err: if (ctx != NULL) BN_CTX_end(ctx); + BN_CTX_free(new_ctx); + if (pre_comp) ecp_nistz256_pre_comp_free(pre_comp); if (precomp_storage) @@ -1102,6 +1132,9 @@ static int ecp_nistz256_points_mul(const EC_GROUP *group, const EC_PRE_COMP *pre_comp = NULL; const EC_POINT *generator = NULL; unsigned int index = 0; + BN_CTX *new_ctx = NULL; + const BIGNUM **new_scalars = NULL; + const EC_POINT **new_points = NULL; const unsigned int window_size = 7; const unsigned int mask = (1 << (window_size + 1)) - 1; unsigned int wvalue; @@ -1115,6 +1148,7 @@ static int ecp_nistz256_points_mul(const EC_GROUP *group, ECerr(EC_F_ECP_NISTZ256_POINTS_MUL, EC_R_INCOMPATIBLE_OBJECTS); return 0; } + if ((scalar == NULL) && (num == 0)) return EC_POINT_set_to_infinity(group, r); @@ -1125,13 +1159,13 @@ static int ecp_nistz256_points_mul(const EC_GROUP *group, } } - /* Need 256 bits for space for all coordinates. */ - bn_wexpand(&r->X, P256_LIMBS); - bn_wexpand(&r->Y, P256_LIMBS); - bn_wexpand(&r->Z, P256_LIMBS); - r->X.top = P256_LIMBS; - r->Y.top = P256_LIMBS; - r->Z.top = P256_LIMBS; + if (ctx == NULL) { + ctx = new_ctx = BN_CTX_new(); + if (ctx == NULL) + goto err; + } + + BN_CTX_start(ctx); if (scalar) { generator = EC_GROUP_get0_generator(group); @@ -1156,8 +1190,10 @@ static int ecp_nistz256_points_mul(const EC_GROUP *group, goto err; if (!ecp_nistz256_set_from_affine - (pre_comp_generator, group, pre_comp->precomp[0], ctx)) + (pre_comp_generator, group, pre_comp->precomp[0], ctx)) { + EC_POINT_free(pre_comp_generator); goto err; + } if (0 == EC_POINT_cmp(group, generator, pre_comp_generator, ctx)) preComputedTable = (const PRECOMP256_ROW *)pre_comp->precomp; @@ -1255,20 +1291,16 @@ static int ecp_nistz256_points_mul(const EC_GROUP *group, * Without a precomputed table for the generator, it has to be * handled like a normal point. */ - const BIGNUM **new_scalars; - const EC_POINT **new_points; - new_scalars = OPENSSL_malloc((num + 1) * sizeof(BIGNUM *)); if (!new_scalars) { ECerr(EC_F_ECP_NISTZ256_POINTS_MUL, ERR_R_MALLOC_FAILURE); - return 0; + goto err; } new_points = OPENSSL_malloc((num + 1) * sizeof(EC_POINT *)); if (!new_points) { - OPENSSL_free(new_scalars); ECerr(EC_F_ECP_NISTZ256_POINTS_MUL, ERR_R_MALLOC_FAILURE); - return 0; + goto err; } memcpy(new_scalars, scalars, num * sizeof(BIGNUM *)); @@ -1286,27 +1318,31 @@ static int ecp_nistz256_points_mul(const EC_GROUP *group, if (p_is_infinity) out = &p.p; - ecp_nistz256_windowed_mul(group, out, scalars, points, num, ctx); + if (!ecp_nistz256_windowed_mul(group, out, scalars, points, num, ctx)) + goto err; if (!p_is_infinity) ecp_nistz256_point_add(&p.p, &p.p, out); } - if (no_precomp_for_generator) { - OPENSSL_free(points); - OPENSSL_free(scalars); + /* Not constant-time, but we're only operating on the public output. */ + if (!ecp_nistz256_set_words(&r->X, p.p.X) || + !ecp_nistz256_set_words(&r->Y, p.p.Y) || + !ecp_nistz256_set_words(&r->Z, p.p.Z)) { + goto err; } - - memcpy(r->X.d, p.p.X, sizeof(p.p.X)); - memcpy(r->Y.d, p.p.Y, sizeof(p.p.Y)); - memcpy(r->Z.d, p.p.Z, sizeof(p.p.Z)); - bn_correct_top(&r->X); - bn_correct_top(&r->Y); - bn_correct_top(&r->Z); + r->Z_is_one = is_one(p.p.Z) & 1; ret = 1; - err: +err: + if (ctx) + BN_CTX_end(ctx); + BN_CTX_free(new_ctx); + if (new_points) + OPENSSL_free(new_points); + if (new_scalars) + OPENSSL_free(new_scalars); return ret; } @@ -1319,6 +1355,7 @@ static int ecp_nistz256_get_affine(const EC_GROUP *group, BN_ULONG x_aff[P256_LIMBS]; BN_ULONG y_aff[P256_LIMBS]; BN_ULONG point_x[P256_LIMBS], point_y[P256_LIMBS], point_z[P256_LIMBS]; + BN_ULONG x_ret[P256_LIMBS], y_ret[P256_LIMBS]; if (EC_POINT_is_at_infinity(group, point)) { ECerr(EC_F_ECP_NISTZ256_GET_AFFINE, EC_R_POINT_AT_INFINITY); @@ -1337,19 +1374,17 @@ static int ecp_nistz256_get_affine(const EC_GROUP *group, ecp_nistz256_mul_mont(x_aff, z_inv2, point_x); if (x != NULL) { - bn_wexpand(x, P256_LIMBS); - x->top = P256_LIMBS; - ecp_nistz256_from_mont(x->d, x_aff); - bn_correct_top(x); + ecp_nistz256_from_mont(x_ret, x_aff); + if (!ecp_nistz256_set_words(x, x_ret)) + return 0; } if (y != NULL) { ecp_nistz256_mul_mont(z_inv3, z_inv3, z_inv2); ecp_nistz256_mul_mont(y_aff, z_inv3, point_y); - bn_wexpand(y, P256_LIMBS); - y->top = P256_LIMBS; - ecp_nistz256_from_mont(y->d, y_aff); - bn_correct_top(y); + ecp_nistz256_from_mont(y_ret, y_aff); + if (!ecp_nistz256_set_words(y, y_ret)) + return 0; } return 1; diff --git a/deps/openssl/openssl/crypto/ec/ecp_oct.c b/deps/openssl/openssl/crypto/ec/ecp_oct.c index e5cec8be82663d..1bc3f39ad15ff9 100644 --- a/deps/openssl/openssl/crypto/ec/ecp_oct.c +++ b/deps/openssl/openssl/crypto/ec/ecp_oct.c @@ -413,7 +413,7 @@ int ec_GFp_simple_oct2point(const EC_GROUP *group, EC_POINT *point, } /* test required by X9.62 */ - if (!EC_POINT_is_on_curve(group, point, ctx)) { + if (EC_POINT_is_on_curve(group, point, ctx) <= 0) { ECerr(EC_F_EC_GFP_SIMPLE_OCT2POINT, EC_R_POINT_IS_NOT_ON_CURVE); goto err; } diff --git a/deps/openssl/openssl/crypto/ec/ectest.c b/deps/openssl/openssl/crypto/ec/ectest.c index a18b32761a2e39..fede530bc1391b 100644 --- a/deps/openssl/openssl/crypto/ec/ectest.c +++ b/deps/openssl/openssl/crypto/ec/ectest.c @@ -412,7 +412,7 @@ static void prime_field_tests(void) ABORT; if (!EC_POINT_set_compressed_coordinates_GFp(group, Q, x, 1, ctx)) ABORT; - if (!EC_POINT_is_on_curve(group, Q, ctx)) { + if (EC_POINT_is_on_curve(group, Q, ctx) <= 0) { if (!EC_POINT_get_affine_coordinates_GFp(group, Q, x, y, ctx)) ABORT; fprintf(stderr, "Point is not on curve: x = 0x"); @@ -544,7 +544,7 @@ static void prime_field_tests(void) ABORT; if (!EC_POINT_set_affine_coordinates_GFp(group, P, x, y, ctx)) ABORT; - if (!EC_POINT_is_on_curve(group, P, ctx)) + if (EC_POINT_is_on_curve(group, P, ctx) <= 0) ABORT; if (!BN_hex2bn(&z, "0100000000000000000001F4C8F927AED3CA752257")) ABORT; @@ -593,7 +593,7 @@ static void prime_field_tests(void) ABORT; if (!EC_POINT_set_compressed_coordinates_GFp(group, P, x, 1, ctx)) ABORT; - if (!EC_POINT_is_on_curve(group, P, ctx)) + if (EC_POINT_is_on_curve(group, P, ctx) <= 0) ABORT; if (!BN_hex2bn(&z, "FFFFFFFFFFFFFFFFFFFFFFFF99DEF836146BC9B1B4D22831")) ABORT; @@ -646,7 +646,7 @@ static void prime_field_tests(void) ABORT; if (!EC_POINT_set_compressed_coordinates_GFp(group, P, x, 0, ctx)) ABORT; - if (!EC_POINT_is_on_curve(group, P, ctx)) + if (EC_POINT_is_on_curve(group, P, ctx) <= 0) ABORT; if (!BN_hex2bn (&z, "FFFFFFFFFFFFFFFFFFFFFFFFFFFF16A2E0B8F03E13DD29455C5C2A3D")) @@ -705,7 +705,7 @@ static void prime_field_tests(void) ABORT; if (!EC_POINT_set_compressed_coordinates_GFp(group, P, x, 1, ctx)) ABORT; - if (!EC_POINT_is_on_curve(group, P, ctx)) + if (EC_POINT_is_on_curve(group, P, ctx) <= 0) ABORT; if (!BN_hex2bn(&z, "FFFFFFFF00000000FFFFFFFFFFFFFFFFBCE6FAADA7179E" "84F3B9CAC2FC632551")) @@ -761,7 +761,7 @@ static void prime_field_tests(void) ABORT; if (!EC_POINT_set_compressed_coordinates_GFp(group, P, x, 1, ctx)) ABORT; - if (!EC_POINT_is_on_curve(group, P, ctx)) + if (EC_POINT_is_on_curve(group, P, ctx) <= 0) ABORT; if (!BN_hex2bn(&z, "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF" "FFC7634D81F4372DDF581A0DB248B0A77AECEC196ACCC52973")) @@ -820,7 +820,7 @@ static void prime_field_tests(void) ABORT; if (!EC_POINT_set_compressed_coordinates_GFp(group, P, x, 0, ctx)) ABORT; - if (!EC_POINT_is_on_curve(group, P, ctx)) + if (EC_POINT_is_on_curve(group, P, ctx) <= 0) ABORT; if (!BN_hex2bn(&z, "1FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF" "FFFFFFFFFFFFFFFFFFFFA51868783BF2F966B7FCC0148F709A5D03BB5" @@ -864,7 +864,7 @@ static void prime_field_tests(void) ABORT; if (!EC_POINT_dbl(group, P, P, ctx)) ABORT; - if (!EC_POINT_is_on_curve(group, P, ctx)) + if (EC_POINT_is_on_curve(group, P, ctx) <= 0) ABORT; if (!EC_POINT_invert(group, Q, ctx)) ABORT; /* P = -2Q */ @@ -1008,7 +1008,7 @@ static void prime_field_tests(void) # define CHAR2_CURVE_TEST_INTERNAL(_name, _p, _a, _b, _x, _y, _y_bit, _order, _cof, _degree, _variable) \ if (!BN_hex2bn(&x, _x)) ABORT; \ if (!EC_POINT_set_compressed_coordinates_GF2m(group, P, x, _y_bit, ctx)) ABORT; \ - if (!EC_POINT_is_on_curve(group, P, ctx)) ABORT; \ + if (EC_POINT_is_on_curve(group, P, ctx) <= 0) ABORT; \ if (!BN_hex2bn(&z, _order)) ABORT; \ if (!BN_hex2bn(&cof, _cof)) ABORT; \ if (!EC_GROUP_set_generator(group, P, z, cof)) ABORT; \ @@ -1026,7 +1026,7 @@ static void prime_field_tests(void) if (!BN_hex2bn(&x, _x)) ABORT; \ if (!BN_hex2bn(&y, _y)) ABORT; \ if (!EC_POINT_set_affine_coordinates_GF2m(group, P, x, y, ctx)) ABORT; \ - if (!EC_POINT_is_on_curve(group, P, ctx)) ABORT; \ + if (EC_POINT_is_on_curve(group, P, ctx) <= 0) ABORT; \ if (!BN_hex2bn(&z, _order)) ABORT; \ if (!BN_hex2bn(&cof, _cof)) ABORT; \ if (!EC_GROUP_set_generator(group, P, z, cof)) ABORT; \ @@ -1157,7 +1157,7 @@ static void char2_field_tests(void) if (!EC_POINT_set_affine_coordinates_GF2m(group, Q, x, y, ctx)) ABORT; # endif - if (!EC_POINT_is_on_curve(group, Q, ctx)) { + if (EC_POINT_is_on_curve(group, Q, ctx) <= 0) { /* Change test based on whether binary point compression is enabled or not. */ # ifdef OPENSSL_EC_BIN_PT_COMP if (!EC_POINT_get_affine_coordinates_GF2m(group, Q, x, y, ctx)) @@ -1378,7 +1378,7 @@ static void char2_field_tests(void) ABORT; if (!EC_POINT_dbl(group, P, P, ctx)) ABORT; - if (!EC_POINT_is_on_curve(group, P, ctx)) + if (EC_POINT_is_on_curve(group, P, ctx) <= 0) ABORT; if (!EC_POINT_invert(group, Q, ctx)) ABORT; /* P = -2Q */ diff --git a/deps/openssl/openssl/crypto/ecdh/Makefile b/deps/openssl/openssl/crypto/ecdh/Makefile index df1b03adb1dd05..1b31ba1f0b3fea 100644 --- a/deps/openssl/openssl/crypto/ecdh/Makefile +++ b/deps/openssl/openssl/crypto/ecdh/Makefile @@ -62,6 +62,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/ecdsa/Makefile b/deps/openssl/openssl/crypto/ecdsa/Makefile index e89e0c010c6b93..4ce00e8f930851 100644 --- a/deps/openssl/openssl/crypto/ecdsa/Makefile +++ b/deps/openssl/openssl/crypto/ecdsa/Makefile @@ -62,6 +62,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/ecdsa/ecdsatest.c b/deps/openssl/openssl/crypto/ecdsa/ecdsatest.c index b2d78f3d55e4a2..0f301f86d9eae9 100644 --- a/deps/openssl/openssl/crypto/ecdsa/ecdsatest.c +++ b/deps/openssl/openssl/crypto/ecdsa/ecdsatest.c @@ -296,8 +296,8 @@ int test_builtin(BIO *out) int nid, ret = 0; /* fill digest values with some random data */ - if (!RAND_pseudo_bytes(digest, 20) || - !RAND_pseudo_bytes(wrong_digest, 20)) { + if (RAND_pseudo_bytes(digest, 20) <= 0 || + RAND_pseudo_bytes(wrong_digest, 20) <= 0) { BIO_printf(out, "ERROR: unable to get random data\n"); goto builtin_err; } diff --git a/deps/openssl/openssl/crypto/engine/Makefile b/deps/openssl/openssl/crypto/engine/Makefile index 2ee6c72362d197..426388e9b1748d 100644 --- a/deps/openssl/openssl/crypto/engine/Makefile +++ b/deps/openssl/openssl/crypto/engine/Makefile @@ -71,6 +71,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/err/Makefile b/deps/openssl/openssl/crypto/err/Makefile index 862b23ba1768b6..b6f3ef1778d169 100644 --- a/deps/openssl/openssl/crypto/err/Makefile +++ b/deps/openssl/openssl/crypto/err/Makefile @@ -61,6 +61,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/evp/Makefile b/deps/openssl/openssl/crypto/evp/Makefile index c9afca7cb2d917..aaaad986e0e856 100644 --- a/deps/openssl/openssl/crypto/evp/Makefile +++ b/deps/openssl/openssl/crypto/evp/Makefile @@ -86,6 +86,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/evp/bio_ok.c b/deps/openssl/openssl/crypto/evp/bio_ok.c index a4550349be1118..5c32e35e17b9cf 100644 --- a/deps/openssl/openssl/crypto/evp/bio_ok.c +++ b/deps/openssl/openssl/crypto/evp/bio_ok.c @@ -491,7 +491,8 @@ static int sig_out(BIO *b) * FIXME: there's absolutely no guarantee this makes any sense at all, * particularly now EVP_MD_CTX has been restructured. */ - RAND_pseudo_bytes(md->md_data, md->digest->md_size); + if (RAND_pseudo_bytes(md->md_data, md->digest->md_size) < 0) + goto berr; memcpy(&(ctx->buf[ctx->buf_len]), md->md_data, md->digest->md_size); longswap(&(ctx->buf[ctx->buf_len]), md->digest->md_size); ctx->buf_len += md->digest->md_size; diff --git a/deps/openssl/openssl/crypto/evp/e_aes.c b/deps/openssl/openssl/crypto/evp/e_aes.c index 8161b26325745e..33cbed87f47319 100644 --- a/deps/openssl/openssl/crypto/evp/e_aes.c +++ b/deps/openssl/openssl/crypto/evp/e_aes.c @@ -50,6 +50,7 @@ #include #ifndef OPENSSL_NO_AES +#include # include # include # include @@ -1227,7 +1228,7 @@ static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr) case EVP_CTRL_AEAD_TLS1_AAD: /* Save the AAD for later use */ - if (arg != 13) + if (arg != EVP_AEAD_TLS1_AAD_LEN) return 0; memcpy(c->buf, ptr, arg); gctx->tls_aad_len = arg; @@ -1455,7 +1456,7 @@ static int aes_gcm_tls_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, /* Retrieve tag */ CRYPTO_gcm128_tag(&gctx->gcm, ctx->buf, EVP_GCM_TLS_TAG_LEN); /* If tag mismatch wipe buffer */ - if (memcmp(ctx->buf, in + len, EVP_GCM_TLS_TAG_LEN)) { + if (CRYPTO_memcmp(ctx->buf, in + len, EVP_GCM_TLS_TAG_LEN)) { OPENSSL_cleanse(out, len); goto err; } @@ -1895,7 +1896,7 @@ static int aes_ccm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, !CRYPTO_ccm128_decrypt(ccm, in, out, len)) { unsigned char tag[16]; if (CRYPTO_ccm128_tag(ccm, tag, cctx->M)) { - if (!memcmp(tag, ctx->buf, cctx->M)) + if (!CRYPTO_memcmp(tag, ctx->buf, cctx->M)) rv = len; } } diff --git a/deps/openssl/openssl/crypto/evp/e_aes_cbc_hmac_sha1.c b/deps/openssl/openssl/crypto/evp/e_aes_cbc_hmac_sha1.c index e0127a9bb2e7ac..8330964ee16b00 100644 --- a/deps/openssl/openssl/crypto/evp/e_aes_cbc_hmac_sha1.c +++ b/deps/openssl/openssl/crypto/evp/e_aes_cbc_hmac_sha1.c @@ -94,7 +94,7 @@ typedef struct { defined(_M_AMD64) || defined(_M_X64) || \ defined(__INTEL__) ) -extern unsigned int OPENSSL_ia32cap_P[3]; +extern unsigned int OPENSSL_ia32cap_P[]; # define AESNI_CAPABLE (1<<(57-32)) int aesni_set_encrypt_key(const unsigned char *userKey, int bits, @@ -845,7 +845,12 @@ static int aesni_cbc_hmac_sha1_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, case EVP_CTRL_AEAD_TLS1_AAD: { unsigned char *p = ptr; - unsigned int len = p[arg - 2] << 8 | p[arg - 1]; + unsigned int len; + + if (arg != EVP_AEAD_TLS1_AAD_LEN) + return -1; + + len = p[arg - 2] << 8 | p[arg - 1]; if (ctx->encrypt) { key->payload_length = len; @@ -862,8 +867,6 @@ static int aesni_cbc_hmac_sha1_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, AES_BLOCK_SIZE) & -AES_BLOCK_SIZE) - len); } else { - if (arg > 13) - arg = 13; memcpy(key->aux.tls_aad, ptr, arg); key->payload_length = arg; diff --git a/deps/openssl/openssl/crypto/evp/e_aes_cbc_hmac_sha256.c b/deps/openssl/openssl/crypto/evp/e_aes_cbc_hmac_sha256.c index 30398c7ca43bd7..b1c586e6fd96b8 100644 --- a/deps/openssl/openssl/crypto/evp/e_aes_cbc_hmac_sha256.c +++ b/deps/openssl/openssl/crypto/evp/e_aes_cbc_hmac_sha256.c @@ -94,7 +94,7 @@ typedef struct { defined(_M_AMD64) || defined(_M_X64) || \ defined(__INTEL__) ) -extern unsigned int OPENSSL_ia32cap_P[3]; +extern unsigned int OPENSSL_ia32cap_P[]; # define AESNI_CAPABLE (1<<(57-32)) int aesni_set_encrypt_key(const unsigned char *userKey, int bits, @@ -813,6 +813,11 @@ static int aesni_cbc_hmac_sha256_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, unsigned char *p = ptr; unsigned int len = p[arg - 2] << 8 | p[arg - 1]; + if (arg != EVP_AEAD_TLS1_AAD_LEN) + return -1; + + len = p[arg - 2] << 8 | p[arg - 1]; + if (ctx->encrypt) { key->payload_length = len; if ((key->aux.tls_ver = @@ -828,8 +833,6 @@ static int aesni_cbc_hmac_sha256_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, AES_BLOCK_SIZE) & -AES_BLOCK_SIZE) - len); } else { - if (arg > 13) - arg = 13; memcpy(key->aux.tls_aad, ptr, arg); key->payload_length = arg; diff --git a/deps/openssl/openssl/crypto/evp/e_des3.c b/deps/openssl/openssl/crypto/evp/e_des3.c index 301d93e13dff46..96f272eb8046f6 100644 --- a/deps/openssl/openssl/crypto/evp/e_des3.c +++ b/deps/openssl/openssl/crypto/evp/e_des3.c @@ -447,7 +447,8 @@ static int des_ede3_wrap(EVP_CIPHER_CTX *ctx, unsigned char *out, memcpy(out + inl + 8, sha1tmp, 8); OPENSSL_cleanse(sha1tmp, SHA_DIGEST_LENGTH); /* Generate random IV */ - RAND_bytes(ctx->iv, 8); + if (RAND_bytes(ctx->iv, 8) <= 0) + return -1; memcpy(out, ctx->iv, 8); /* Encrypt everything after IV in place */ des_ede_cbc_cipher(ctx, out + 8, out + 8, inl + 8); diff --git a/deps/openssl/openssl/crypto/evp/e_rc4_hmac_md5.c b/deps/openssl/openssl/crypto/evp/e_rc4_hmac_md5.c index 80735d345adfd0..2da11178294dd4 100644 --- a/deps/openssl/openssl/crypto/evp/e_rc4_hmac_md5.c +++ b/deps/openssl/openssl/crypto/evp/e_rc4_hmac_md5.c @@ -54,6 +54,7 @@ #if !defined(OPENSSL_NO_RC4) && !defined(OPENSSL_NO_MD5) +# include # include # include # include @@ -210,7 +211,7 @@ static int rc4_hmac_md5_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, MD5_Update(&key->md, mac, MD5_DIGEST_LENGTH); MD5_Final(mac, &key->md); - if (memcmp(out + plen, mac, MD5_DIGEST_LENGTH)) + if (CRYPTO_memcmp(out + plen, mac, MD5_DIGEST_LENGTH)) return 0; } else { MD5_Update(&key->md, out + md5_off, len - md5_off); @@ -258,7 +259,12 @@ static int rc4_hmac_md5_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, case EVP_CTRL_AEAD_TLS1_AAD: { unsigned char *p = ptr; - unsigned int len = p[arg - 2] << 8 | p[arg - 1]; + unsigned int len; + + if (arg != EVP_AEAD_TLS1_AAD_LEN) + return -1; + + len = p[arg - 2] << 8 | p[arg - 1]; if (!ctx->encrypt) { len -= MD5_DIGEST_LENGTH; diff --git a/deps/openssl/openssl/crypto/evp/encode.c b/deps/openssl/openssl/crypto/evp/encode.c index d1d8a07c14adc8..c361d1f0126994 100644 --- a/deps/openssl/openssl/crypto/evp/encode.c +++ b/deps/openssl/openssl/crypto/evp/encode.c @@ -137,7 +137,7 @@ void EVP_EncodeUpdate(EVP_ENCODE_CTX *ctx, unsigned char *out, int *outl, unsigned int total = 0; *outl = 0; - if (inl == 0) + if (inl <= 0) return; OPENSSL_assert(ctx->length <= (int)sizeof(ctx->enc_data)); if ((ctx->num + inl) < ctx->length) { @@ -248,7 +248,7 @@ int EVP_DecodeUpdate(EVP_ENCODE_CTX *ctx, unsigned char *out, int *outl, /* We parse the input data */ for (i = 0; i < inl; i++) { - /* If the current line is > 80 characters, scream alot */ + /* If the current line is > 80 characters, scream a lot */ if (ln >= 80) { rv = -1; goto end; diff --git a/deps/openssl/openssl/crypto/evp/evp.h b/deps/openssl/openssl/crypto/evp/evp.h index 47abbac4a24cfe..39ab7937d25672 100644 --- a/deps/openssl/openssl/crypto/evp/evp.h +++ b/deps/openssl/openssl/crypto/evp/evp.h @@ -103,7 +103,6 @@ # define EVP_PKS_RSA 0x0100 # define EVP_PKS_DSA 0x0200 # define EVP_PKS_EC 0x0400 -# define EVP_PKT_EXP 0x1000 /* <= 512 bit key */ # define EVP_PKEY_NONE NID_undef # define EVP_PKEY_RSA NID_rsaEncryption @@ -424,6 +423,9 @@ struct evp_cipher_st { # define EVP_CTRL_TLS1_1_MULTIBLOCK_DECRYPT 0x1b # define EVP_CTRL_TLS1_1_MULTIBLOCK_MAX_BUFSIZE 0x1c +/* RFC 5246 defines additional data to be 13 bytes in length */ +# define EVP_AEAD_TLS1_AAD_LEN 13 + typedef struct { unsigned char *out; const unsigned char *inp; @@ -1121,6 +1123,19 @@ void EVP_PKEY_asn1_set_free(EVP_PKEY_ASN1_METHOD *ameth, void EVP_PKEY_asn1_set_ctrl(EVP_PKEY_ASN1_METHOD *ameth, int (*pkey_ctrl) (EVP_PKEY *pkey, int op, long arg1, void *arg2)); +void EVP_PKEY_asn1_set_item(EVP_PKEY_ASN1_METHOD *ameth, + int (*item_verify) (EVP_MD_CTX *ctx, + const ASN1_ITEM *it, + void *asn, + X509_ALGOR *a, + ASN1_BIT_STRING *sig, + EVP_PKEY *pkey), + int (*item_sign) (EVP_MD_CTX *ctx, + const ASN1_ITEM *it, + void *asn, + X509_ALGOR *alg1, + X509_ALGOR *alg2, + ASN1_BIT_STRING *sig)); # define EVP_PKEY_OP_UNDEFINED 0 # define EVP_PKEY_OP_PARAMGEN (1<<1) diff --git a/deps/openssl/openssl/crypto/evp/p_seal.c b/deps/openssl/openssl/crypto/evp/p_seal.c index caabbf406f40df..ba9dfff2157c1c 100644 --- a/deps/openssl/openssl/crypto/evp/p_seal.c +++ b/deps/openssl/openssl/crypto/evp/p_seal.c @@ -82,8 +82,9 @@ int EVP_SealInit(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *type, return 1; if (EVP_CIPHER_CTX_rand_key(ctx, key) <= 0) return 0; - if (EVP_CIPHER_CTX_iv_length(ctx)) - RAND_pseudo_bytes(iv, EVP_CIPHER_CTX_iv_length(ctx)); + if (EVP_CIPHER_CTX_iv_length(ctx) + && RAND_bytes(iv, EVP_CIPHER_CTX_iv_length(ctx)) <= 0) + return 0; if (!EVP_EncryptInit_ex(ctx, NULL, NULL, key, iv)) return 0; diff --git a/deps/openssl/openssl/crypto/hmac/Makefile b/deps/openssl/openssl/crypto/hmac/Makefile index 0e91709f64ca31..52e39e586075e2 100644 --- a/deps/openssl/openssl/crypto/hmac/Makefile +++ b/deps/openssl/openssl/crypto/hmac/Makefile @@ -61,6 +61,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/hmac/hmac.c b/deps/openssl/openssl/crypto/hmac/hmac.c index 1fc9e2c3fa93ee..15a9a21299157d 100644 --- a/deps/openssl/openssl/crypto/hmac/hmac.c +++ b/deps/openssl/openssl/crypto/hmac/hmac.c @@ -101,8 +101,14 @@ int HMAC_Init_ex(HMAC_CTX *ctx, const void *key, int len, if (md != NULL) { reset = 1; ctx->md = md; - } else + } else if (ctx->md) { md = ctx->md; + } else { + return 0; + } + + if (!ctx->key_init && key == NULL) + return 0; if (key != NULL) { reset = 1; @@ -117,13 +123,15 @@ int HMAC_Init_ex(HMAC_CTX *ctx, const void *key, int len, &ctx->key_length)) goto err; } else { - OPENSSL_assert(len >= 0 && len <= (int)sizeof(ctx->key)); + if (len < 0 || len > (int)sizeof(ctx->key)) + return 0; memcpy(ctx->key, key, len); ctx->key_length = len; } if (ctx->key_length != HMAC_MAX_MD_CBLOCK) memset(&ctx->key[ctx->key_length], 0, HMAC_MAX_MD_CBLOCK - ctx->key_length); + ctx->key_init = 1; } if (reset) { @@ -161,6 +169,9 @@ int HMAC_Update(HMAC_CTX *ctx, const unsigned char *data, size_t len) if (FIPS_mode() && !ctx->i_ctx.engine) return FIPS_hmac_update(ctx, data, len); #endif + if (!ctx->key_init) + return 0; + return EVP_DigestUpdate(&ctx->md_ctx, data, len); } @@ -173,6 +184,9 @@ int HMAC_Final(HMAC_CTX *ctx, unsigned char *md, unsigned int *len) return FIPS_hmac_final(ctx, md, len); #endif + if (!ctx->key_init) + goto err; + if (!EVP_DigestFinal_ex(&ctx->md_ctx, buf, &i)) goto err; if (!EVP_MD_CTX_copy_ex(&ctx->md_ctx, &ctx->o_ctx)) @@ -191,6 +205,8 @@ void HMAC_CTX_init(HMAC_CTX *ctx) EVP_MD_CTX_init(&ctx->i_ctx); EVP_MD_CTX_init(&ctx->o_ctx); EVP_MD_CTX_init(&ctx->md_ctx); + ctx->key_init = 0; + ctx->md = NULL; } int HMAC_CTX_copy(HMAC_CTX *dctx, HMAC_CTX *sctx) @@ -201,8 +217,11 @@ int HMAC_CTX_copy(HMAC_CTX *dctx, HMAC_CTX *sctx) goto err; if (!EVP_MD_CTX_copy(&dctx->md_ctx, &sctx->md_ctx)) goto err; - memcpy(dctx->key, sctx->key, HMAC_MAX_MD_CBLOCK); - dctx->key_length = sctx->key_length; + dctx->key_init = sctx->key_init; + if (sctx->key_init) { + memcpy(dctx->key, sctx->key, HMAC_MAX_MD_CBLOCK); + dctx->key_length = sctx->key_length; + } dctx->md = sctx->md; return 1; err: @@ -242,6 +261,7 @@ unsigned char *HMAC(const EVP_MD *evp_md, const void *key, int key_len, HMAC_CTX_cleanup(&c); return md; err: + HMAC_CTX_cleanup(&c); return NULL; } diff --git a/deps/openssl/openssl/crypto/hmac/hmac.h b/deps/openssl/openssl/crypto/hmac/hmac.h index b8b55cda7d73d9..f8e9f5e4f3c261 100644 --- a/deps/openssl/openssl/crypto/hmac/hmac.h +++ b/deps/openssl/openssl/crypto/hmac/hmac.h @@ -79,6 +79,7 @@ typedef struct hmac_ctx_st { EVP_MD_CTX o_ctx; unsigned int key_length; unsigned char key[HMAC_MAX_MD_CBLOCK]; + int key_init; } HMAC_CTX; # define HMAC_size(e) (EVP_MD_size((e)->md)) diff --git a/deps/openssl/openssl/crypto/hmac/hmactest.c b/deps/openssl/openssl/crypto/hmac/hmactest.c index 3d130a03ed069c..86b6c2529fe29c 100644 --- a/deps/openssl/openssl/crypto/hmac/hmactest.c +++ b/deps/openssl/openssl/crypto/hmac/hmactest.c @@ -85,7 +85,7 @@ static struct test_st { unsigned char data[64]; int data_len; unsigned char *digest; -} test[4] = { +} test[8] = { { "", 0, "More text test vectors to stuff up EBCDIC machines :-)", 54, (unsigned char *)"e9139d1e6ee064ef8cf514fc7dc83e86", @@ -113,10 +113,27 @@ static struct test_st { 0xdd, 0xdd, 0xdd, 0xdd, 0xdd, 0xdd }, 50, (unsigned char *)"56be34521d144c88dbb8c733f0e8b3f6", }, + { + "", 0, "My test data", 12, + (unsigned char *)"61afdecb95429ef494d61fdee15990cabf0826fc" + }, + { + "", 0, "My test data", 12, + (unsigned char *)"2274b195d90ce8e03406f4b526a47e0787a88a65479938f1a5baa3ce0f079776" + }, + { + "123456", 6, "My test data", 12, + (unsigned char *)"bab53058ae861a7f191abe2d0145cbb123776a6369ee3f9d79ce455667e411dd" + }, + { + "12345", 5, "My test data again", 12, + (unsigned char *)"7dbe8c764c068e3bcd6e6b0fbcd5e6fc197b15bb" + } }; # endif -static char *pt(unsigned char *md); +static char *pt(unsigned char *md, unsigned int len); + int main(int argc, char *argv[]) { # ifndef OPENSSL_NO_MD5 @@ -124,6 +141,9 @@ int main(int argc, char *argv[]) char *p; # endif int err = 0; + HMAC_CTX ctx, ctx2; + unsigned char buf[EVP_MAX_MD_SIZE]; + unsigned int len; # ifdef OPENSSL_NO_MD5 printf("test skipped: MD5 disabled\n"); @@ -139,27 +159,167 @@ int main(int argc, char *argv[]) for (i = 0; i < 4; i++) { p = pt(HMAC(EVP_md5(), test[i].key, test[i].key_len, - test[i].data, test[i].data_len, NULL, NULL)); + test[i].data, test[i].data_len, NULL, NULL), + MD5_DIGEST_LENGTH); if (strcmp(p, (char *)test[i].digest) != 0) { - printf("error calculating HMAC on %d entry'\n", i); + printf("Error calculating HMAC on %d entry'\n", i); printf("got %s instead of %s\n", p, test[i].digest); err++; } else printf("test %d ok\n", i); } # endif /* OPENSSL_NO_MD5 */ + +/* test4 */ + HMAC_CTX_init(&ctx); + if (HMAC_Init_ex(&ctx, NULL, 0, NULL, NULL)) { + printf("Should fail to initialise HMAC with empty MD and key (test 4)\n"); + err++; + goto test5; + } + if (HMAC_Update(&ctx, test[4].data, test[4].data_len)) { + printf("Should fail HMAC_Update with ctx not set up (test 4)\n"); + err++; + goto test5; + } + if (HMAC_Init_ex(&ctx, NULL, 0, EVP_sha1(), NULL)) { + printf("Should fail to initialise HMAC with empty key (test 4)\n"); + err++; + goto test5; + } + if (HMAC_Update(&ctx, test[4].data, test[4].data_len)) { + printf("Should fail HMAC_Update with ctx not set up (test 4)\n"); + err++; + goto test5; + } + printf("test 4 ok\n"); +test5: + HMAC_CTX_init(&ctx); + if (HMAC_Init_ex(&ctx, test[4].key, test[4].key_len, NULL, NULL)) { + printf("Should fail to initialise HMAC with empty MD (test 5)\n"); + err++; + goto test6; + } + if (HMAC_Update(&ctx, test[4].data, test[4].data_len)) { + printf("Should fail HMAC_Update with ctx not set up (test 5)\n"); + err++; + goto test6; + } + if (HMAC_Init_ex(&ctx, test[4].key, -1, EVP_sha1(), NULL)) { + printf("Should fail to initialise HMAC with invalid key len(test 5)\n"); + err++; + goto test6; + } + if (!HMAC_Init_ex(&ctx, test[4].key, test[4].key_len, EVP_sha1(), NULL)) { + printf("Failed to initialise HMAC (test 5)\n"); + err++; + goto test6; + } + if (!HMAC_Update(&ctx, test[4].data, test[4].data_len)) { + printf("Error updating HMAC with data (test 5)\n"); + err++; + goto test6; + } + if (!HMAC_Final(&ctx, buf, &len)) { + printf("Error finalising data (test 5)\n"); + err++; + goto test6; + } + p = pt(buf, len); + if (strcmp(p, (char *)test[4].digest) != 0) { + printf("Error calculating interim HMAC on test 5\n"); + printf("got %s instead of %s\n", p, test[4].digest); + err++; + goto test6; + } + if (!HMAC_Init_ex(&ctx, NULL, 0, EVP_sha256(), NULL)) { + printf("Failed to reinitialise HMAC (test 5)\n"); + err++; + goto test6; + } + if (!HMAC_Update(&ctx, test[5].data, test[5].data_len)) { + printf("Error updating HMAC with data (sha256) (test 5)\n"); + err++; + goto test6; + } + if (!HMAC_Final(&ctx, buf, &len)) { + printf("Error finalising data (sha256) (test 5)\n"); + err++; + goto test6; + } + p = pt(buf, len); + if (strcmp(p, (char *)test[5].digest) != 0) { + printf("Error calculating 2nd interim HMAC on test 5\n"); + printf("got %s instead of %s\n", p, test[5].digest); + err++; + goto test6; + } + if (!HMAC_Init_ex(&ctx, test[6].key, test[6].key_len, NULL, NULL)) { + printf("Failed to reinitialise HMAC with key (test 5)\n"); + err++; + goto test6; + } + if (!HMAC_Update(&ctx, test[6].data, test[6].data_len)) { + printf("Error updating HMAC with data (new key) (test 5)\n"); + err++; + goto test6; + } + if (!HMAC_Final(&ctx, buf, &len)) { + printf("Error finalising data (new key) (test 5)\n"); + err++; + goto test6; + } + p = pt(buf, len); + if (strcmp(p, (char *)test[6].digest) != 0) { + printf("error calculating HMAC on test 5\n"); + printf("got %s instead of %s\n", p, test[6].digest); + err++; + } else { + printf("test 5 ok\n"); + } +test6: + HMAC_CTX_init(&ctx); + if (!HMAC_Init_ex(&ctx, test[7].key, test[7].key_len, EVP_sha1(), NULL)) { + printf("Failed to initialise HMAC (test 6)\n"); + err++; + goto end; + } + if (!HMAC_Update(&ctx, test[7].data, test[7].data_len)) { + printf("Error updating HMAC with data (test 6)\n"); + err++; + goto end; + } + if (!HMAC_CTX_copy(&ctx2, &ctx)) { + printf("Failed to copy HMAC_CTX (test 6)\n"); + err++; + goto end; + } + if (!HMAC_Final(&ctx2, buf, &len)) { + printf("Error finalising data (test 6)\n"); + err++; + goto end; + } + p = pt(buf, len); + if (strcmp(p, (char *)test[7].digest) != 0) { + printf("Error calculating HMAC on test 6\n"); + printf("got %s instead of %s\n", p, test[7].digest); + err++; + } else { + printf("test 6 ok\n"); + } +end: EXIT(err); return (0); } # ifndef OPENSSL_NO_MD5 -static char *pt(unsigned char *md) +static char *pt(unsigned char *md, unsigned int len) { - int i; + unsigned int i; static char buf[80]; - for (i = 0; i < MD5_DIGEST_LENGTH; i++) + for (i = 0; i < len; i++) sprintf(&(buf[i * 2]), "%02x", md[i]); return (buf); } diff --git a/deps/openssl/openssl/crypto/idea/Makefile b/deps/openssl/openssl/crypto/idea/Makefile index 8af0acdad97249..3dc23e48dd4269 100644 --- a/deps/openssl/openssl/crypto/idea/Makefile +++ b/deps/openssl/openssl/crypto/idea/Makefile @@ -61,6 +61,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/jpake/Makefile b/deps/openssl/openssl/crypto/jpake/Makefile index 110c49ce0b4a84..5193fd98353300 100644 --- a/deps/openssl/openssl/crypto/jpake/Makefile +++ b/deps/openssl/openssl/crypto/jpake/Makefile @@ -32,6 +32,8 @@ install: chmod 644 $(INSTALL_PREFIX)$(INSTALLTOP)/include/openssl/$$i ); \ done; +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/krb5/Makefile b/deps/openssl/openssl/crypto/krb5/Makefile index 14077390d6923d..8b9a01a296c12e 100644 --- a/deps/openssl/openssl/crypto/krb5/Makefile +++ b/deps/openssl/openssl/crypto/krb5/Makefile @@ -62,6 +62,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/lhash/Makefile b/deps/openssl/openssl/crypto/lhash/Makefile index 82bddac4745030..c7f4365f0ab9bf 100644 --- a/deps/openssl/openssl/crypto/lhash/Makefile +++ b/deps/openssl/openssl/crypto/lhash/Makefile @@ -61,6 +61,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/md2/Makefile b/deps/openssl/openssl/crypto/md2/Makefile index 17f878aeb7d464..b63011085f8940 100644 --- a/deps/openssl/openssl/crypto/md2/Makefile +++ b/deps/openssl/openssl/crypto/md2/Makefile @@ -61,6 +61,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/md4/Makefile b/deps/openssl/openssl/crypto/md4/Makefile index e6f1e4478c048d..3ee436176ba683 100644 --- a/deps/openssl/openssl/crypto/md4/Makefile +++ b/deps/openssl/openssl/crypto/md4/Makefile @@ -62,6 +62,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/md5/Makefile b/deps/openssl/openssl/crypto/md5/Makefile index 390e5f1c7dc7f1..f5240da74cd791 100644 --- a/deps/openssl/openssl/crypto/md5/Makefile +++ b/deps/openssl/openssl/crypto/md5/Makefile @@ -79,6 +79,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/md5/asm/md5-sparcv9.pl b/deps/openssl/openssl/crypto/md5/asm/md5-sparcv9.pl index a4b0157ed15a65..407da3c1b0df24 100644 --- a/deps/openssl/openssl/crypto/md5/asm/md5-sparcv9.pl +++ b/deps/openssl/openssl/crypto/md5/asm/md5-sparcv9.pl @@ -235,7 +235,7 @@ sub R3 { ldd [%o1 + 0x20], %f16 ldd [%o1 + 0x28], %f18 ldd [%o1 + 0x30], %f20 - subcc %o2, 1, %o2 ! done yet? + subcc %o2, 1, %o2 ! done yet? ldd [%o1 + 0x38], %f22 add %o1, 0x40, %o1 prefetch [%o1 + 63], 20 diff --git a/deps/openssl/openssl/crypto/mdc2/Makefile b/deps/openssl/openssl/crypto/mdc2/Makefile index 141553149d5470..c2d0c5b7cd580f 100644 --- a/deps/openssl/openssl/crypto/mdc2/Makefile +++ b/deps/openssl/openssl/crypto/mdc2/Makefile @@ -61,6 +61,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/mem.c b/deps/openssl/openssl/crypto/mem.c index 2ce3e894877df1..fdad49b76ec00a 100644 --- a/deps/openssl/openssl/crypto/mem.c +++ b/deps/openssl/openssl/crypto/mem.c @@ -365,6 +365,9 @@ char *CRYPTO_strdup(const char *str, const char *file, int line) { char *ret = CRYPTO_malloc(strlen(str) + 1, file, line); + if (ret == NULL) + return NULL; + strcpy(ret, str); return ret; } diff --git a/deps/openssl/openssl/crypto/modes/Makefile b/deps/openssl/openssl/crypto/modes/Makefile index cbcbfad4b117ca..a7863d98be2f5c 100644 --- a/deps/openssl/openssl/crypto/modes/Makefile +++ b/deps/openssl/openssl/crypto/modes/Makefile @@ -95,6 +95,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/modes/asm/ghash-armv4.pl b/deps/openssl/openssl/crypto/modes/asm/ghash-armv4.pl index 9b15bbac51a5bd..77fbf34465db48 100644 --- a/deps/openssl/openssl/crypto/modes/asm/ghash-armv4.pl +++ b/deps/openssl/openssl/crypto/modes/asm/ghash-armv4.pl @@ -47,7 +47,7 @@ # # Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software # Polynomial Multiplication on ARM Processors using the NEON Engine. -# +# # http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf # ==================================================================== @@ -450,7 +450,7 @@ sub clmul64x64 { veor $t2,$t2,$t1 @ vshl.i64 $t1,$Xl,#63 veor $t2, $t2, $t1 @ - veor $Xl#hi,$Xl#hi,$t2#lo @ + veor $Xl#hi,$Xl#hi,$t2#lo @ veor $Xh#lo,$Xh#lo,$t2#hi vshr.u64 $t2,$Xl,#1 @ 2nd phase diff --git a/deps/openssl/openssl/crypto/modes/asm/ghash-x86.pl b/deps/openssl/openssl/crypto/modes/asm/ghash-x86.pl index 62b0e65ef9c39e..23a5527b30af3b 100644 --- a/deps/openssl/openssl/crypto/modes/asm/ghash-x86.pl +++ b/deps/openssl/openssl/crypto/modes/asm/ghash-x86.pl @@ -905,7 +905,7 @@ sub reduction_alg9 { # 17/11 times faster than Intel version &psllq ($Xi,57); # &movdqa ($T1,$Xi); # &pslldq ($Xi,8); - &psrldq ($T1,8); # + &psrldq ($T1,8); # &pxor ($Xi,$T2); &pxor ($Xhi,$T1); # @@ -1075,7 +1075,7 @@ sub reduction_alg9 { # 17/11 times faster than Intel version &psllq ($Xi,57); # &movdqa ($T1,$Xi); # &pslldq ($Xi,8); - &psrldq ($T1,8); # + &psrldq ($T1,8); # &pxor ($Xi,$T2); &pxor ($Xhi,$T1); # &pshufd ($T1,$Xhn,0b01001110); diff --git a/deps/openssl/openssl/crypto/modes/asm/ghash-x86_64.pl b/deps/openssl/openssl/crypto/modes/asm/ghash-x86_64.pl index 1b9eb29ed6c028..6e656ca13b8029 100644 --- a/deps/openssl/openssl/crypto/modes/asm/ghash-x86_64.pl +++ b/deps/openssl/openssl/crypto/modes/asm/ghash-x86_64.pl @@ -460,7 +460,7 @@ sub reduction_alg9 { # 17/11 times faster than Intel version psllq \$57,$Xi # movdqa $Xi,$T1 # pslldq \$8,$Xi - psrldq \$8,$T1 # + psrldq \$8,$T1 # pxor $T2,$Xi pxor $T1,$Xhi # @@ -574,7 +574,7 @@ sub reduction_alg9 { # 17/11 times faster than Intel version &clmul64x64_T2 ($Xhi,$Xi,$Hkey,$T2); $code.=<<___ if (0 || (&reduction_alg9($Xhi,$Xi)&&0)); # experimental alternative. special thing about is that there - # no dependency between the two multiplications... + # no dependency between the two multiplications... mov \$`0xE1<<1`,%eax mov \$0xA040608020C0E000,%r10 # ((7..0)·0xE0)&0xff mov \$0x07,%r11d @@ -749,7 +749,7 @@ sub reduction_alg9 { # 17/11 times faster than Intel version movdqa $T2,$T1 # pslldq \$8,$T2 pclmulqdq \$0x00,$Hkey2,$Xln - psrldq \$8,$T1 # + psrldq \$8,$T1 # pxor $T2,$Xi pxor $T1,$Xhi # movdqu 0($inp),$T1 @@ -885,7 +885,7 @@ sub reduction_alg9 { # 17/11 times faster than Intel version psllq \$57,$Xi # movdqa $Xi,$T1 # pslldq \$8,$Xi - psrldq \$8,$T1 # + psrldq \$8,$T1 # pxor $T2,$Xi pshufd \$0b01001110,$Xhn,$Xmn pxor $T1,$Xhi # diff --git a/deps/openssl/openssl/crypto/modes/asm/ghashv8-armx.pl b/deps/openssl/openssl/crypto/modes/asm/ghashv8-armx.pl index 54a1ac4db8d3b2..0b9cd7359fbaa3 100644 --- a/deps/openssl/openssl/crypto/modes/asm/ghashv8-armx.pl +++ b/deps/openssl/openssl/crypto/modes/asm/ghashv8-armx.pl @@ -16,12 +16,17 @@ # other assembly modules. Just like aesv8-armx.pl this module # supports both AArch32 and AArch64 execution modes. # +# July 2014 +# +# Implement 2x aggregated reduction [see ghash-x86.pl for background +# information]. +# # Current performance in cycles per processed byte: # # PMULL[2] 32-bit NEON(*) -# Apple A7 1.76 5.62 -# Cortex-A53 1.45 8.39 -# Cortex-A57 2.22 7.61 +# Apple A7 0.92 5.62 +# Cortex-A53 1.01 8.39 +# Cortex-A57 1.17 7.61 # # (*) presented for reference/comparison purposes; @@ -37,7 +42,7 @@ { my ($Xl,$Xm,$Xh,$IN)=map("q$_",(0..3)); -my ($t0,$t1,$t2,$t3,$H,$Hhl)=map("q$_",(8..14)); +my ($t0,$t1,$t2,$xC2,$H,$Hhl,$H2)=map("q$_",(8..14)); $code=<<___; #include "arm_arch.h" @@ -47,114 +52,277 @@ $code.=".arch armv8-a+crypto\n" if ($flavour =~ /64/); $code.=".fpu neon\n.code 32\n" if ($flavour !~ /64/); +################################################################################ +# void gcm_init_v8(u128 Htable[16],const u64 H[2]); +# +# input: 128-bit H - secret parameter E(K,0^128) +# output: precomputed table filled with degrees of twisted H; +# H is twisted to handle reverse bitness of GHASH; +# only few of 16 slots of Htable[16] are used; +# data is opaque to outside world (which allows to +# optimize the code independently); +# $code.=<<___; .global gcm_init_v8 .type gcm_init_v8,%function .align 4 gcm_init_v8: - vld1.64 {$t1},[x1] @ load H - vmov.i8 $t0,#0xe1 + vld1.64 {$t1},[x1] @ load input H + vmov.i8 $xC2,#0xe1 + vshl.i64 $xC2,$xC2,#57 @ 0xc2.0 vext.8 $IN,$t1,$t1,#8 - vshl.i64 $t0,$t0,#57 - vshr.u64 $t2,$t0,#63 - vext.8 $t0,$t2,$t0,#8 @ t0=0xc2....01 + vshr.u64 $t2,$xC2,#63 vdup.32 $t1,${t1}[1] - vshr.u64 $t3,$IN,#63 + vext.8 $t0,$t2,$xC2,#8 @ t0=0xc2....01 + vshr.u64 $t2,$IN,#63 vshr.s32 $t1,$t1,#31 @ broadcast carry bit - vand $t3,$t3,$t0 + vand $t2,$t2,$t0 vshl.i64 $IN,$IN,#1 - vext.8 $t3,$t3,$t3,#8 + vext.8 $t2,$t2,$t2,#8 vand $t0,$t0,$t1 - vorr $IN,$IN,$t3 @ H<<<=1 - veor $IN,$IN,$t0 @ twisted H - vst1.64 {$IN},[x0] + vorr $IN,$IN,$t2 @ H<<<=1 + veor $H,$IN,$t0 @ twisted H + vst1.64 {$H},[x0],#16 @ store Htable[0] + + @ calculate H^2 + vext.8 $t0,$H,$H,#8 @ Karatsuba pre-processing + vpmull.p64 $Xl,$H,$H + veor $t0,$t0,$H + vpmull2.p64 $Xh,$H,$H + vpmull.p64 $Xm,$t0,$t0 + + vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing + veor $t2,$Xl,$Xh + veor $Xm,$Xm,$t1 + veor $Xm,$Xm,$t2 + vpmull.p64 $t2,$Xl,$xC2 @ 1st phase + + vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result + vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl + veor $Xl,$Xm,$t2 + + vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase + vpmull.p64 $Xl,$Xl,$xC2 + veor $t2,$t2,$Xh + veor $H2,$Xl,$t2 + + vext.8 $t1,$H2,$H2,#8 @ Karatsuba pre-processing + veor $t1,$t1,$H2 + vext.8 $Hhl,$t0,$t1,#8 @ pack Karatsuba pre-processed + vst1.64 {$Hhl-$H2},[x0] @ store Htable[1..2] ret .size gcm_init_v8,.-gcm_init_v8 - +___ +################################################################################ +# void gcm_gmult_v8(u64 Xi[2],const u128 Htable[16]); +# +# input: Xi - current hash value; +# Htable - table precomputed in gcm_init_v8; +# output: Xi - next hash value Xi; +# +$code.=<<___; .global gcm_gmult_v8 .type gcm_gmult_v8,%function .align 4 gcm_gmult_v8: vld1.64 {$t1},[$Xi] @ load Xi - vmov.i8 $t3,#0xe1 - vld1.64 {$H},[$Htbl] @ load twisted H - vshl.u64 $t3,$t3,#57 + vmov.i8 $xC2,#0xe1 + vld1.64 {$H-$Hhl},[$Htbl] @ load twisted H, ... + vshl.u64 $xC2,$xC2,#57 #ifndef __ARMEB__ vrev64.8 $t1,$t1 #endif - vext.8 $Hhl,$H,$H,#8 - mov $len,#0 vext.8 $IN,$t1,$t1,#8 - mov $inc,#0 - veor $Hhl,$Hhl,$H @ Karatsuba pre-processing - mov $inp,$Xi - b .Lgmult_v8 -.size gcm_gmult_v8,.-gcm_gmult_v8 + vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo + veor $t1,$t1,$IN @ Karatsuba pre-processing + vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi + vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi) + + vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing + veor $t2,$Xl,$Xh + veor $Xm,$Xm,$t1 + veor $Xm,$Xm,$t2 + vpmull.p64 $t2,$Xl,$xC2 @ 1st phase of reduction + + vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result + vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl + veor $Xl,$Xm,$t2 + + vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase of reduction + vpmull.p64 $Xl,$Xl,$xC2 + veor $t2,$t2,$Xh + veor $Xl,$Xl,$t2 + +#ifndef __ARMEB__ + vrev64.8 $Xl,$Xl +#endif + vext.8 $Xl,$Xl,$Xl,#8 + vst1.64 {$Xl},[$Xi] @ write out Xi + + ret +.size gcm_gmult_v8,.-gcm_gmult_v8 +___ +################################################################################ +# void gcm_ghash_v8(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len); +# +# input: table precomputed in gcm_init_v8; +# current hash value Xi; +# pointer to input data; +# length of input data in bytes, but divisible by block size; +# output: next hash value Xi; +# +$code.=<<___; .global gcm_ghash_v8 .type gcm_ghash_v8,%function .align 4 gcm_ghash_v8: +___ +$code.=<<___ if ($flavour !~ /64/); + vstmdb sp!,{d8-d15} @ 32-bit ABI says so +___ +$code.=<<___; vld1.64 {$Xl},[$Xi] @ load [rotated] Xi - subs $len,$len,#16 - vmov.i8 $t3,#0xe1 - mov $inc,#16 - vld1.64 {$H},[$Htbl] @ load twisted H - cclr $inc,eq - vext.8 $Xl,$Xl,$Xl,#8 - vshl.u64 $t3,$t3,#57 - vld1.64 {$t1},[$inp],$inc @ load [rotated] inp - vext.8 $Hhl,$H,$H,#8 + @ "[rotated]" means that + @ loaded value would have + @ to be rotated in order to + @ make it appear as in + @ alorithm specification + subs $len,$len,#32 @ see if $len is 32 or larger + mov $inc,#16 @ $inc is used as post- + @ increment for input pointer; + @ as loop is modulo-scheduled + @ $inc is zeroed just in time + @ to preclude oversteping + @ inp[len], which means that + @ last block[s] are actually + @ loaded twice, but last + @ copy is not processed + vld1.64 {$H-$Hhl},[$Htbl],#32 @ load twisted H, ..., H^2 + vmov.i8 $xC2,#0xe1 + vld1.64 {$H2},[$Htbl] + cclr $inc,eq @ is it time to zero $inc? + vext.8 $Xl,$Xl,$Xl,#8 @ rotate Xi + vld1.64 {$t0},[$inp],#16 @ load [rotated] I[0] + vshl.u64 $xC2,$xC2,#57 @ compose 0xc2.0 constant #ifndef __ARMEB__ + vrev64.8 $t0,$t0 vrev64.8 $Xl,$Xl +#endif + vext.8 $IN,$t0,$t0,#8 @ rotate I[0] + b.lo .Lodd_tail_v8 @ $len was less than 32 +___ +{ my ($Xln,$Xmn,$Xhn,$In) = map("q$_",(4..7)); + ####### + # Xi+2 =[H*(Ii+1 + Xi+1)] mod P = + # [(H*Ii+1) + (H*Xi+1)] mod P = + # [(H*Ii+1) + H^2*(Ii+Xi)] mod P + # +$code.=<<___; + vld1.64 {$t1},[$inp],$inc @ load [rotated] I[1] +#ifndef __ARMEB__ vrev64.8 $t1,$t1 #endif - veor $Hhl,$Hhl,$H @ Karatsuba pre-processing - vext.8 $IN,$t1,$t1,#8 - b .Loop_v8 + vext.8 $In,$t1,$t1,#8 + veor $IN,$IN,$Xl @ I[i]^=Xi + vpmull.p64 $Xln,$H,$In @ H·Ii+1 + veor $t1,$t1,$In @ Karatsuba pre-processing + vpmull2.p64 $Xhn,$H,$In + b .Loop_mod2x_v8 .align 4 -.Loop_v8: +.Loop_mod2x_v8: + vext.8 $t2,$IN,$IN,#8 + subs $len,$len,#32 @ is there more data? + vpmull.p64 $Xl,$H2,$IN @ H^2.lo·Xi.lo + cclr $inc,lo @ is it time to zero $inc? + + vpmull.p64 $Xmn,$Hhl,$t1 + veor $t2,$t2,$IN @ Karatsuba pre-processing + vpmull2.p64 $Xh,$H2,$IN @ H^2.hi·Xi.hi + veor $Xl,$Xl,$Xln @ accumulate + vpmull2.p64 $Xm,$Hhl,$t2 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi) + vld1.64 {$t0},[$inp],$inc @ load [rotated] I[i+2] + + veor $Xh,$Xh,$Xhn + cclr $inc,eq @ is it time to zero $inc? + veor $Xm,$Xm,$Xmn + + vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing + veor $t2,$Xl,$Xh + veor $Xm,$Xm,$t1 + vld1.64 {$t1},[$inp],$inc @ load [rotated] I[i+3] +#ifndef __ARMEB__ + vrev64.8 $t0,$t0 +#endif + veor $Xm,$Xm,$t2 + vpmull.p64 $t2,$Xl,$xC2 @ 1st phase of reduction + +#ifndef __ARMEB__ + vrev64.8 $t1,$t1 +#endif + vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result + vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl + vext.8 $In,$t1,$t1,#8 + vext.8 $IN,$t0,$t0,#8 + veor $Xl,$Xm,$t2 + vpmull.p64 $Xln,$H,$In @ H·Ii+1 + veor $IN,$IN,$Xh @ accumulate $IN early + + vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase of reduction + vpmull.p64 $Xl,$Xl,$xC2 + veor $IN,$IN,$t2 + veor $t1,$t1,$In @ Karatsuba pre-processing + veor $IN,$IN,$Xl + vpmull2.p64 $Xhn,$H,$In + b.hs .Loop_mod2x_v8 @ there was at least 32 more bytes + + veor $Xh,$Xh,$t2 + vext.8 $IN,$t0,$t0,#8 @ re-construct $IN + adds $len,$len,#32 @ re-construct $len + veor $Xl,$Xl,$Xh @ re-construct $Xl + b.eq .Ldone_v8 @ is $len zero? +___ +} +$code.=<<___; +.Lodd_tail_v8: vext.8 $t2,$Xl,$Xl,#8 veor $IN,$IN,$Xl @ inp^=Xi - veor $t1,$t1,$t2 @ $t1 is rotated inp^Xi + veor $t1,$t0,$t2 @ $t1 is rotated inp^Xi -.Lgmult_v8: vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo veor $t1,$t1,$IN @ Karatsuba pre-processing vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi - subs $len,$len,#16 vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi) - cclr $inc,eq vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing veor $t2,$Xl,$Xh veor $Xm,$Xm,$t1 - vld1.64 {$t1},[$inp],$inc @ load [rotated] inp veor $Xm,$Xm,$t2 - vpmull.p64 $t2,$Xl,$t3 @ 1st phase + vpmull.p64 $t2,$Xl,$xC2 @ 1st phase of reduction vmov $Xh#lo,$Xm#hi @ Xh|Xm - 256-bit result vmov $Xm#hi,$Xl#lo @ Xm is rotated Xl -#ifndef __ARMEB__ - vrev64.8 $t1,$t1 -#endif veor $Xl,$Xm,$t2 - vext.8 $IN,$t1,$t1,#8 - vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase - vpmull.p64 $Xl,$Xl,$t3 + vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase of reduction + vpmull.p64 $Xl,$Xl,$xC2 veor $t2,$t2,$Xh veor $Xl,$Xl,$t2 - b.hs .Loop_v8 +.Ldone_v8: #ifndef __ARMEB__ vrev64.8 $Xl,$Xl #endif vext.8 $Xl,$Xl,$Xl,#8 vst1.64 {$Xl},[$Xi] @ write out Xi +___ +$code.=<<___ if ($flavour !~ /64/); + vldmia sp!,{d8-d15} @ 32-bit ABI says so +___ +$code.=<<___; ret .size gcm_ghash_v8,.-gcm_ghash_v8 ___ @@ -222,7 +390,7 @@ foreach(split("\n",$code)) { s/\b[wx]([0-9]+)\b/r$1/go; # new->old registers s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go; # new->old registers - s/\/\/\s?/@ /o; # new->old style commentary + s/\/\/\s?/@ /o; # new->old style commentary # fix up remainig new-style suffixes s/\],#[0-9]+/]!/o; @@ -234,7 +402,7 @@ s/^(\s+)b\./$1b/o or s/^(\s+)ret/$1bx\tlr/o; - print $_,"\n"; + print $_,"\n"; } } diff --git a/deps/openssl/openssl/crypto/modes/gcm128.c b/deps/openssl/openssl/crypto/modes/gcm128.c index 24a84a7ae7ec14..e299131c1382ce 100644 --- a/deps/openssl/openssl/crypto/modes/gcm128.c +++ b/deps/openssl/openssl/crypto/modes/gcm128.c @@ -694,7 +694,7 @@ static void gcm_gmult_1bit(u64 Xi[2], const u64 H[2]) defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64)) # define GHASH_ASM_X86_OR_64 # define GCM_FUNCREF_4BIT -extern unsigned int OPENSSL_ia32cap_P[2]; +extern unsigned int OPENSSL_ia32cap_P[]; void gcm_init_clmul(u128 Htable[16], const u64 Xi[2]); void gcm_gmult_clmul(u64 Xi[2], const u128 Htable[16]); @@ -1704,7 +1704,7 @@ int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag, ctx->Xi.u[1] ^= ctx->EK0.u[1]; if (tag && len <= sizeof(ctx->Xi)) - return memcmp(ctx->Xi.c, tag, len); + return CRYPTO_memcmp(ctx->Xi.c, tag, len); else return -1; } diff --git a/deps/openssl/openssl/crypto/modes/modes_lcl.h b/deps/openssl/openssl/crypto/modes/modes_lcl.h index 900f54ca2b87ef..fe14ec7002f0e0 100644 --- a/deps/openssl/openssl/crypto/modes/modes_lcl.h +++ b/deps/openssl/openssl/crypto/modes/modes_lcl.h @@ -38,36 +38,36 @@ typedef unsigned char u8; #if !defined(PEDANTIC) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) # if defined(__GNUC__) && __GNUC__>=2 # if defined(__x86_64) || defined(__x86_64__) -# define BSWAP8(x) ({ u64 ret=(x); \ +# define BSWAP8(x) ({ u64 ret_=(x); \ asm ("bswapq %0" \ - : "+r"(ret)); ret; }) -# define BSWAP4(x) ({ u32 ret=(x); \ + : "+r"(ret_)); ret_; }) +# define BSWAP4(x) ({ u32 ret_=(x); \ asm ("bswapl %0" \ - : "+r"(ret)); ret; }) + : "+r"(ret_)); ret_; }) # elif (defined(__i386) || defined(__i386__)) && !defined(I386_ONLY) -# define BSWAP8(x) ({ u32 lo=(u64)(x)>>32,hi=(x); \ +# define BSWAP8(x) ({ u32 lo_=(u64)(x)>>32,hi_=(x); \ asm ("bswapl %0; bswapl %1" \ - : "+r"(hi),"+r"(lo)); \ - (u64)hi<<32|lo; }) -# define BSWAP4(x) ({ u32 ret=(x); \ + : "+r"(hi_),"+r"(lo_)); \ + (u64)hi_<<32|lo_; }) +# define BSWAP4(x) ({ u32 ret_=(x); \ asm ("bswapl %0" \ - : "+r"(ret)); ret; }) + : "+r"(ret_)); ret_; }) # elif defined(__aarch64__) -# define BSWAP8(x) ({ u64 ret; \ +# define BSWAP8(x) ({ u64 ret_; \ asm ("rev %0,%1" \ - : "=r"(ret) : "r"(x)); ret; }) -# define BSWAP4(x) ({ u32 ret; \ + : "=r"(ret_) : "r"(x)); ret_; }) +# define BSWAP4(x) ({ u32 ret_; \ asm ("rev %w0,%w1" \ - : "=r"(ret) : "r"(x)); ret; }) + : "=r"(ret_) : "r"(x)); ret_; }) # elif (defined(__arm__) || defined(__arm)) && !defined(STRICT_ALIGNMENT) -# define BSWAP8(x) ({ u32 lo=(u64)(x)>>32,hi=(x); \ +# define BSWAP8(x) ({ u32 lo_=(u64)(x)>>32,hi_=(x); \ asm ("rev %0,%0; rev %1,%1" \ - : "+r"(hi),"+r"(lo)); \ - (u64)hi<<32|lo; }) -# define BSWAP4(x) ({ u32 ret; \ + : "+r"(hi_),"+r"(lo_)); \ + (u64)hi_<<32|lo_; }) +# define BSWAP4(x) ({ u32 ret_; \ asm ("rev %0,%1" \ - : "=r"(ret) : "r"((u32)(x))); \ - ret; }) + : "=r"(ret_) : "r"((u32)(x))); \ + ret_; }) # endif # elif defined(_MSC_VER) # if _MSC_VER>=1300 diff --git a/deps/openssl/openssl/crypto/objects/Makefile b/deps/openssl/openssl/crypto/objects/Makefile index a8aedbd422870c..f93d2f9d26473d 100644 --- a/deps/openssl/openssl/crypto/objects/Makefile +++ b/deps/openssl/openssl/crypto/objects/Makefile @@ -74,6 +74,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: obj_dat.h obj_mac.h obj_xref.h depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/objects/o_names.c b/deps/openssl/openssl/crypto/objects/o_names.c index c6774f4578db41..24859926ace6d9 100644 --- a/deps/openssl/openssl/crypto/objects/o_names.c +++ b/deps/openssl/openssl/crypto/objects/o_names.c @@ -313,7 +313,7 @@ void OBJ_NAME_do_all_sorted(int type, d.names = OPENSSL_malloc(lh_OBJ_NAME_num_items(names_lh) * sizeof *d.names); /* Really should return an error if !d.names...but its a void function! */ - if(d.names) { + if (d.names) { d.n = 0; OBJ_NAME_do_all(type, do_all_sorted_fn, &d); diff --git a/deps/openssl/openssl/crypto/objects/obj_dat.c b/deps/openssl/openssl/crypto/objects/obj_dat.c index 5cd755d77d63bd..aca382a6e9f3c9 100644 --- a/deps/openssl/openssl/crypto/objects/obj_dat.c +++ b/deps/openssl/openssl/crypto/objects/obj_dat.c @@ -400,6 +400,8 @@ static int obj_cmp(const ASN1_OBJECT *const *ap, const unsigned int *bp) j = (a->length - b->length); if (j) return (j); + if (a->length == 0) + return 0; return (memcmp(a->data, b->data, a->length)); } @@ -415,6 +417,9 @@ int OBJ_obj2nid(const ASN1_OBJECT *a) if (a->nid != 0) return (a->nid); + if (a->length == 0) + return NID_undef; + if (added != NULL) { ad.type = ADDED_DATA; ad.obj = (ASN1_OBJECT *)a; /* XXX: ugly but harmless */ diff --git a/deps/openssl/openssl/crypto/objects/objects.README b/deps/openssl/openssl/crypto/objects/objects.README index 4d745508d830d2..cb1d216ce8a7cd 100644 --- a/deps/openssl/openssl/crypto/objects/objects.README +++ b/deps/openssl/openssl/crypto/objects/objects.README @@ -8,9 +8,9 @@ The basic syntax for adding an object is as follows: 1 2 3 4 : shortName : Long Name - If the long name doesn't contain spaces, or no short name - exists, the long name is used as basis for the base name - in C. Otherwise, the short name is used. + If Long Name contains only word characters and hyphen-minus + (0x2D) or full stop (0x2E) then Long Name is used as basis + for the base name in C. Otherwise, the shortName is used. The base name (let's call it 'base') will then be used to create the C macros SN_base, LN_base, NID_base and OBJ_base. @@ -22,7 +22,7 @@ Then there are some extra commands: !Alias foo 1 2 3 4 - This juts makes a name foo for an OID. The C macro + This just makes a name foo for an OID. The C macro OBJ_foo will be created as a result. !Cname foo diff --git a/deps/openssl/openssl/crypto/objects/objects.pl b/deps/openssl/openssl/crypto/objects/objects.pl index d0ed459d3dc424..389dc348373a65 100644 --- a/deps/openssl/openssl/crypto/objects/objects.pl +++ b/deps/openssl/openssl/crypto/objects/objects.pl @@ -67,7 +67,7 @@ $myoid = &process_oid($myoid); } - if ($Cname eq "" && !($myln =~ / /)) + if ($Cname eq "" && ($myln =~ /^[_A-Za-z][\w.-]*$/ )) { $Cname = $myln; $Cname =~ s/\./_/g; diff --git a/deps/openssl/openssl/crypto/objects/objects.txt b/deps/openssl/openssl/crypto/objects/objects.txt index 52a1b9fdb5015e..b57aabb226501b 100644 --- a/deps/openssl/openssl/crypto/objects/objects.txt +++ b/deps/openssl/openssl/crypto/objects/objects.txt @@ -1315,7 +1315,7 @@ ISO-US 10046 2 1 : dhpublicnumber : X9.42 DH 1 3 36 3 3 2 8 1 1 11 : brainpoolP384r1 1 3 36 3 3 2 8 1 1 12 : brainpoolP384t1 1 3 36 3 3 2 8 1 1 13 : brainpoolP512r1 -1 3 36 3 3 2 8 1 1 14 : brainpoolP512t1 +1 3 36 3 3 2 8 1 1 14 : brainpoolP512t1 # ECDH schemes from RFC5753 !Alias x9-63-scheme 1 3 133 16 840 63 0 diff --git a/deps/openssl/openssl/crypto/ocsp/Makefile b/deps/openssl/openssl/crypto/ocsp/Makefile index 60c414cf4da605..96a1b156b9fb90 100644 --- a/deps/openssl/openssl/crypto/ocsp/Makefile +++ b/deps/openssl/openssl/crypto/ocsp/Makefile @@ -64,6 +64,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/ocsp/ocsp_ext.c b/deps/openssl/openssl/crypto/ocsp/ocsp_ext.c index 849cb2f7627645..c19648c7329b3b 100644 --- a/deps/openssl/openssl/crypto/ocsp/ocsp_ext.c +++ b/deps/openssl/openssl/crypto/ocsp/ocsp_ext.c @@ -361,8 +361,8 @@ static int ocsp_add1_nonce(STACK_OF(X509_EXTENSION) **exts, ASN1_put_object(&tmpval, 0, len, V_ASN1_OCTET_STRING, V_ASN1_UNIVERSAL); if (val) memcpy(tmpval, val, len); - else - RAND_pseudo_bytes(tmpval, len); + else if (RAND_pseudo_bytes(tmpval, len) < 0) + goto err; if (!X509V3_add1_i2d(exts, NID_id_pkix_OCSP_Nonce, &os, 0, X509V3_ADD_REPLACE)) goto err; diff --git a/deps/openssl/openssl/crypto/ocsp/ocsp_vfy.c b/deps/openssl/openssl/crypto/ocsp/ocsp_vfy.c index 6c0ccb565dbdf9..d4a257c33bb593 100644 --- a/deps/openssl/openssl/crypto/ocsp/ocsp_vfy.c +++ b/deps/openssl/openssl/crypto/ocsp/ocsp_vfy.c @@ -83,6 +83,7 @@ int OCSP_basic_verify(OCSP_BASICRESP *bs, STACK_OF(X509) *certs, { X509 *signer, *x; STACK_OF(X509) *chain = NULL; + STACK_OF(X509) *untrusted = NULL; X509_STORE_CTX ctx; int i, ret = 0; ret = ocsp_find_signer(&signer, bs, certs, st, flags); @@ -107,10 +108,20 @@ int OCSP_basic_verify(OCSP_BASICRESP *bs, STACK_OF(X509) *certs, } if (!(flags & OCSP_NOVERIFY)) { int init_res; - if (flags & OCSP_NOCHAIN) - init_res = X509_STORE_CTX_init(&ctx, st, signer, NULL); - else - init_res = X509_STORE_CTX_init(&ctx, st, signer, bs->certs); + if (flags & OCSP_NOCHAIN) { + untrusted = NULL; + } else if (bs->certs && certs) { + untrusted = sk_X509_dup(bs->certs); + for (i = 0; i < sk_X509_num(certs); i++) { + if (!sk_X509_push(untrusted, sk_X509_value(certs, i))) { + OCSPerr(OCSP_F_OCSP_BASIC_VERIFY, ERR_R_MALLOC_FAILURE); + goto end; + } + } + } else { + untrusted = bs->certs; + } + init_res = X509_STORE_CTX_init(&ctx, st, signer, untrusted); if (!init_res) { ret = -1; OCSPerr(OCSP_F_OCSP_BASIC_VERIFY, ERR_R_X509_LIB); @@ -161,6 +172,8 @@ int OCSP_basic_verify(OCSP_BASICRESP *bs, STACK_OF(X509) *certs, end: if (chain) sk_X509_pop_free(chain, X509_free); + if (bs->certs && certs) + sk_X509_free(untrusted); return ret; } diff --git a/deps/openssl/openssl/crypto/opensslv.h b/deps/openssl/openssl/crypto/opensslv.h index 4f20b97a8f12f1..e6f991cfb82c73 100644 --- a/deps/openssl/openssl/crypto/opensslv.h +++ b/deps/openssl/openssl/crypto/opensslv.h @@ -30,11 +30,11 @@ extern "C" { * (Prior to 0.9.5a beta1, a different scheme was used: MMNNFFRBB for * major minor fix final patch/beta) */ -# define OPENSSL_VERSION_NUMBER 0x1000201fL +# define OPENSSL_VERSION_NUMBER 0x1000202fL # ifdef OPENSSL_FIPS -# define OPENSSL_VERSION_TEXT "OpenSSL 1.0.2a-fips 19 Mar 2015" +# define OPENSSL_VERSION_TEXT "OpenSSL 1.0.2b-fips 11 Jun 2015" # else -# define OPENSSL_VERSION_TEXT "OpenSSL 1.0.2a 19 Mar 2015" +# define OPENSSL_VERSION_TEXT "OpenSSL 1.0.2b 11 Jun 2015" # endif # define OPENSSL_VERSION_PTEXT " part of " OPENSSL_VERSION_TEXT diff --git a/deps/openssl/openssl/crypto/pem/Makefile b/deps/openssl/openssl/crypto/pem/Makefile index 7691f83f6e1d40..65de60e2a12ea1 100644 --- a/deps/openssl/openssl/crypto/pem/Makefile +++ b/deps/openssl/openssl/crypto/pem/Makefile @@ -64,6 +64,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/pem/pem_pk8.c b/deps/openssl/openssl/crypto/pem/pem_pk8.c index b98c76c4a9914d..5747c7366e3d0a 100644 --- a/deps/openssl/openssl/crypto/pem/pem_pk8.c +++ b/deps/openssl/openssl/crypto/pem/pem_pk8.c @@ -138,6 +138,8 @@ static int do_pk8pkey(BIO *bp, EVP_PKEY *x, int isder, int nid, if (kstr == buf) OPENSSL_cleanse(buf, klen); PKCS8_PRIV_KEY_INFO_free(p8inf); + if (p8 == NULL) + return 0; if (isder) ret = i2d_PKCS8_bio(bp, p8); else diff --git a/deps/openssl/openssl/crypto/perlasm/sparcv9_modes.pl b/deps/openssl/openssl/crypto/perlasm/sparcv9_modes.pl index 3bf12508a2b442..eb267a57ed8151 100644 --- a/deps/openssl/openssl/crypto/perlasm/sparcv9_modes.pl +++ b/deps/openssl/openssl/crypto/perlasm/sparcv9_modes.pl @@ -105,7 +105,7 @@ sub alg_cbc_encrypt_implement { brnz,pn $ooff, 2f sub $len, 1, $len - + std %f0, [$out + 0] std %f2, [$out + 8] brnz,pt $len, .L${bits}_cbc_enc_loop @@ -212,7 +212,7 @@ sub alg_cbc_encrypt_implement { call _${alg}${bits}_encrypt_1x add $inp, 16, $inp sub $len, 1, $len - + stda %f0, [$out]0xe2 ! ASI_BLK_INIT, T4-specific add $out, 8, $out stda %f2, [$out]0xe2 ! ASI_BLK_INIT, T4-specific @@ -326,7 +326,7 @@ sub alg_cbc_decrypt_implement { brnz,pn $ooff, 2f sub $len, 1, $len - + std %f0, [$out + 0] std %f2, [$out + 8] brnz,pt $len, .L${bits}_cbc_dec_loop2x @@ -432,7 +432,7 @@ sub alg_cbc_decrypt_implement { brnz,pn $ooff, 2f sub $len, 2, $len - + std %f0, [$out + 0] std %f2, [$out + 8] std %f4, [$out + 16] @@ -688,7 +688,7 @@ sub alg_ctr32_implement { brnz,pn $ooff, 2f sub $len, 1, $len - + std %f0, [$out + 0] std %f2, [$out + 8] brnz,pt $len, .L${bits}_ctr32_loop2x @@ -777,7 +777,7 @@ sub alg_ctr32_implement { brnz,pn $ooff, 2f sub $len, 2, $len - + std %f0, [$out + 0] std %f2, [$out + 8] std %f4, [$out + 16] @@ -1009,7 +1009,7 @@ sub alg_xts_implement { brnz,pn $ooff, 2f sub $len, 1, $len - + std %f0, [$out + 0] std %f2, [$out + 8] brnz,pt $len, .L${bits}_xts_${dir}loop2x @@ -1120,7 +1120,7 @@ sub alg_xts_implement { brnz,pn $ooff, 2f sub $len, 2, $len - + std %f0, [$out + 0] std %f2, [$out + 8] std %f4, [$out + 16] diff --git a/deps/openssl/openssl/crypto/perlasm/x86_64-xlate.pl b/deps/openssl/openssl/crypto/perlasm/x86_64-xlate.pl index 75d5fe4dc40d3f..9c70b8c2c6e9d7 100755 --- a/deps/openssl/openssl/crypto/perlasm/x86_64-xlate.pl +++ b/deps/openssl/openssl/crypto/perlasm/x86_64-xlate.pl @@ -272,7 +272,7 @@ } else { %szmap = ( b=>"BYTE$PTR", w=>"WORD$PTR", l=>"DWORD$PTR", d=>"DWORD$PTR", - q=>"QWORD$PTR", o=>"OWORD$PTR", + q=>"QWORD$PTR", o=>"OWORD$PTR", x=>"XMMWORD$PTR", y=>"YMMWORD$PTR", z=>"ZMMWORD$PTR" ); $self->{label} =~ s/\./\$/g; diff --git a/deps/openssl/openssl/crypto/pkcs12/Makefile b/deps/openssl/openssl/crypto/pkcs12/Makefile index 3a7498fe7ad99a..be5f8c5d21dfbd 100644 --- a/deps/openssl/openssl/crypto/pkcs12/Makefile +++ b/deps/openssl/openssl/crypto/pkcs12/Makefile @@ -67,6 +67,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/pkcs12/p12_mutl.c b/deps/openssl/openssl/crypto/pkcs12/p12_mutl.c index 256b210cceedfe..5ab4bf290e142e 100644 --- a/deps/openssl/openssl/crypto/pkcs12/p12_mutl.c +++ b/deps/openssl/openssl/crypto/pkcs12/p12_mutl.c @@ -60,6 +60,7 @@ #ifndef OPENSSL_NO_HMAC # include # include "cryptlib.h" +# include # include # include # include @@ -123,7 +124,7 @@ int PKCS12_verify_mac(PKCS12 *p12, const char *pass, int passlen) return 0; } if ((maclen != (unsigned int)p12->mac->dinfo->digest->length) - || memcmp(mac, p12->mac->dinfo->digest->data, maclen)) + || CRYPTO_memcmp(mac, p12->mac->dinfo->digest->data, maclen)) return 0; return 1; } diff --git a/deps/openssl/openssl/crypto/pkcs7/Makefile b/deps/openssl/openssl/crypto/pkcs7/Makefile index effe05fc043d3c..decf5e0203e86e 100644 --- a/deps/openssl/openssl/crypto/pkcs7/Makefile +++ b/deps/openssl/openssl/crypto/pkcs7/Makefile @@ -68,6 +68,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/pkcs7/pk7_doit.c b/deps/openssl/openssl/crypto/pkcs7/pk7_doit.c index 31a1b983f1d9d1..c8d7db01bd730c 100644 --- a/deps/openssl/openssl/crypto/pkcs7/pk7_doit.c +++ b/deps/openssl/openssl/crypto/pkcs7/pk7_doit.c @@ -445,6 +445,12 @@ BIO *PKCS7_dataDecode(PKCS7 *p7, EVP_PKEY *pkey, BIO *in_bio, X509 *pcert) switch (i) { case NID_pkcs7_signed: + /* + * p7->d.sign->contents is a PKCS7 structure consisting of a contentType + * field and optional content. + * data_body is NULL if that structure has no (=detached) content + * or if the contentType is wrong (i.e., not "data"). + */ data_body = PKCS7_get_octet_string(p7->d.sign->contents); if (!PKCS7_is_detached(p7) && data_body == NULL) { PKCS7err(PKCS7_F_PKCS7_DATADECODE, @@ -456,6 +462,7 @@ BIO *PKCS7_dataDecode(PKCS7 *p7, EVP_PKEY *pkey, BIO *in_bio, X509 *pcert) case NID_pkcs7_signedAndEnveloped: rsk = p7->d.signed_and_enveloped->recipientinfo; md_sk = p7->d.signed_and_enveloped->md_algs; + /* data_body is NULL if the optional EncryptedContent is missing. */ data_body = p7->d.signed_and_enveloped->enc_data->enc_data; enc_alg = p7->d.signed_and_enveloped->enc_data->algorithm; evp_cipher = EVP_get_cipherbyobj(enc_alg->algorithm); @@ -468,6 +475,7 @@ BIO *PKCS7_dataDecode(PKCS7 *p7, EVP_PKEY *pkey, BIO *in_bio, X509 *pcert) case NID_pkcs7_enveloped: rsk = p7->d.enveloped->recipientinfo; enc_alg = p7->d.enveloped->enc_data->algorithm; + /* data_body is NULL if the optional EncryptedContent is missing. */ data_body = p7->d.enveloped->enc_data->enc_data; evp_cipher = EVP_get_cipherbyobj(enc_alg->algorithm); if (evp_cipher == NULL) { @@ -481,6 +489,12 @@ BIO *PKCS7_dataDecode(PKCS7 *p7, EVP_PKEY *pkey, BIO *in_bio, X509 *pcert) goto err; } + /* Detached content must be supplied via in_bio instead. */ + if (data_body == NULL && in_bio == NULL) { + PKCS7err(PKCS7_F_PKCS7_DATADECODE, PKCS7_R_NO_CONTENT); + goto err; + } + /* We will be checking the signature */ if (md_sk != NULL) { for (i = 0; i < sk_X509_ALGOR_num(md_sk); i++) { @@ -623,7 +637,7 @@ BIO *PKCS7_dataDecode(PKCS7 *p7, EVP_PKEY *pkey, BIO *in_bio, X509 *pcert) etmp = NULL; } #if 1 - if (PKCS7_is_detached(p7) || (in_bio != NULL)) { + if (in_bio != NULL) { bio = in_bio; } else { # if 0 diff --git a/deps/openssl/openssl/crypto/pqueue/Makefile b/deps/openssl/openssl/crypto/pqueue/Makefile index fb36a0c876e3c1..a59b5a9395b224 100644 --- a/deps/openssl/openssl/crypto/pqueue/Makefile +++ b/deps/openssl/openssl/crypto/pqueue/Makefile @@ -61,6 +61,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/rand/Makefile b/deps/openssl/openssl/crypto/rand/Makefile index 27694aa6649893..df44369a082377 100644 --- a/deps/openssl/openssl/crypto/rand/Makefile +++ b/deps/openssl/openssl/crypto/rand/Makefile @@ -63,6 +63,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/rand/rand_os2.c b/deps/openssl/openssl/crypto/rand/rand_os2.c index 02148d5bf9412c..706ab1e8172b16 100644 --- a/deps/openssl/openssl/crypto/rand/rand_os2.c +++ b/deps/openssl/openssl/crypto/rand/rand_os2.c @@ -149,7 +149,7 @@ int RAND_poll(void) if (DosQuerySysState) { char *buffer = OPENSSL_malloc(256 * 1024); - if(!buffer) + if (!buffer) return 0; if (DosQuerySysState(0x1F, 0, 0, 0, buffer, 256 * 1024) == 0) { diff --git a/deps/openssl/openssl/crypto/rc2/Makefile b/deps/openssl/openssl/crypto/rc2/Makefile index 8a9d49ab5eb8b2..b3727a4a6d4755 100644 --- a/deps/openssl/openssl/crypto/rc2/Makefile +++ b/deps/openssl/openssl/crypto/rc2/Makefile @@ -61,6 +61,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/rc4/Makefile b/deps/openssl/openssl/crypto/rc4/Makefile index 76860aeb4bf369..7434ff737e6613 100644 --- a/deps/openssl/openssl/crypto/rc4/Makefile +++ b/deps/openssl/openssl/crypto/rc4/Makefile @@ -89,6 +89,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/rc5/Makefile b/deps/openssl/openssl/crypto/rc5/Makefile index 8a8b00eb89e603..6ca0037c681b84 100644 --- a/deps/openssl/openssl/crypto/rc5/Makefile +++ b/deps/openssl/openssl/crypto/rc5/Makefile @@ -69,6 +69,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/ripemd/Makefile b/deps/openssl/openssl/crypto/ripemd/Makefile index 25140b2a73e212..1c3f094bb0882d 100644 --- a/deps/openssl/openssl/crypto/ripemd/Makefile +++ b/deps/openssl/openssl/crypto/ripemd/Makefile @@ -69,6 +69,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/rsa/Makefile b/deps/openssl/openssl/crypto/rsa/Makefile index af487b6004fb3b..e292e84db3c9e7 100644 --- a/deps/openssl/openssl/crypto/rsa/Makefile +++ b/deps/openssl/openssl/crypto/rsa/Makefile @@ -67,6 +67,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/rsa/rsa_pmeth.c b/deps/openssl/openssl/crypto/rsa/rsa_pmeth.c index ddda0ddc4fe5d3..203635595f4cce 100644 --- a/deps/openssl/openssl/crypto/rsa/rsa_pmeth.c +++ b/deps/openssl/openssl/crypto/rsa/rsa_pmeth.c @@ -254,8 +254,14 @@ static int pkey_rsa_sign(EVP_PKEY_CTX *ctx, unsigned char *sig, return ret; ret = sltmp; } else if (rctx->pad_mode == RSA_X931_PADDING) { - if (!setup_tbuf(rctx, ctx)) + if ((size_t)EVP_PKEY_size(ctx->pkey) < tbslen + 1) { + RSAerr(RSA_F_PKEY_RSA_SIGN, RSA_R_KEY_SIZE_TOO_SMALL); + return -1; + } + if (!setup_tbuf(rctx, ctx)) { + RSAerr(RSA_F_PKEY_RSA_SIGN, ERR_R_MALLOC_FAILURE); return -1; + } memcpy(rctx->tbuf, tbs, tbslen); rctx->tbuf[tbslen] = RSA_X931_hash_id(EVP_MD_type(rctx->md)); ret = RSA_private_encrypt(tbslen + 1, rctx->tbuf, diff --git a/deps/openssl/openssl/crypto/seed/Makefile b/deps/openssl/openssl/crypto/seed/Makefile index 4bc55e4916491b..70d3d45a2be816 100644 --- a/deps/openssl/openssl/crypto/seed/Makefile +++ b/deps/openssl/openssl/crypto/seed/Makefile @@ -62,6 +62,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/sha/Makefile b/deps/openssl/openssl/crypto/sha/Makefile index a8c0cf78509d58..de6cdde58a9ca1 100644 --- a/deps/openssl/openssl/crypto/sha/Makefile +++ b/deps/openssl/openssl/crypto/sha/Makefile @@ -124,6 +124,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/sha/asm/sha1-armv8.pl b/deps/openssl/openssl/crypto/sha/asm/sha1-armv8.pl index deb1238d361e45..c04432a5439429 100644 --- a/deps/openssl/openssl/crypto/sha/asm/sha1-armv8.pl +++ b/deps/openssl/openssl/crypto/sha/asm/sha1-armv8.pl @@ -14,10 +14,14 @@ # # hardware-assisted software(*) # Apple A7 2.31 4.13 (+14%) -# Cortex-A53 2.19 8.73 (+108%) +# Cortex-A53 2.24 8.03 (+97%) # Cortex-A57 2.35 7.88 (+74%) +# Denver 2.13 3.97 (+0%)(**) +# X-Gene 8.80 (+200%) # # (*) Software results are presented mostly for reference purposes. +# (**) Keep in mind that Denver relies on binary translation, which +# optimizes compiler output at run-time. $flavour = shift; open STDOUT,">".shift; diff --git a/deps/openssl/openssl/crypto/sha/asm/sha1-mb-x86_64.pl b/deps/openssl/openssl/crypto/sha/asm/sha1-mb-x86_64.pl index a46af6af3dc1c7..a8ee075eaaa0a0 100644 --- a/deps/openssl/openssl/crypto/sha/asm/sha1-mb-x86_64.pl +++ b/deps/openssl/openssl/crypto/sha/asm/sha1-mb-x86_64.pl @@ -87,7 +87,7 @@ if (1) { # Atom-specific optimization aiming to eliminate pshufb with high - # registers [and thus get rid of 48 cycles accumulated penalty] + # registers [and thus get rid of 48 cycles accumulated penalty] @Xi=map("%xmm$_",(0..4)); ($tx,$t0,$t1,$t2,$t3)=map("%xmm$_",(5..9)); @V=($A,$B,$C,$D,$E)=map("%xmm$_",(10..14)); @@ -118,7 +118,7 @@ sub BODY_00_19 { # ... # $i==13: 14,15,15,15, # $i==14: 15 -# +# # Then at $i==15 Xupdate is applied one iteration in advance... $code.=<<___ if ($i==0); movd (@ptr[0]),@Xi[0] @@ -1541,7 +1541,7 @@ sub sha1op38 { my $instr = shift; my %opcodelet = ( "sha1nexte" => 0xc8, - "sha1msg1" => 0xc9, + "sha1msg1" => 0xc9, "sha1msg2" => 0xca ); if (defined($opcodelet{$instr}) && @_[0] =~ /%xmm([0-9]+),\s*%xmm([0-9]+)/) { diff --git a/deps/openssl/openssl/crypto/sha/asm/sha1-sparcv9.pl b/deps/openssl/openssl/crypto/sha/asm/sha1-sparcv9.pl index 719cafd29004f7..b5efcde5c13962 100644 --- a/deps/openssl/openssl/crypto/sha/asm/sha1-sparcv9.pl +++ b/deps/openssl/openssl/crypto/sha/asm/sha1-sparcv9.pl @@ -220,7 +220,7 @@ sub BODY_40_59 { ldd [%o1 + 0x20], %f16 ldd [%o1 + 0x28], %f18 ldd [%o1 + 0x30], %f20 - subcc %o2, 1, %o2 ! done yet? + subcc %o2, 1, %o2 ! done yet? ldd [%o1 + 0x38], %f22 add %o1, 0x40, %o1 prefetch [%o1 + 63], 20 diff --git a/deps/openssl/openssl/crypto/sha/asm/sha1-x86_64.pl b/deps/openssl/openssl/crypto/sha/asm/sha1-x86_64.pl index 628b549817f11b..9bb6b498190fdf 100755 --- a/deps/openssl/openssl/crypto/sha/asm/sha1-x86_64.pl +++ b/deps/openssl/openssl/crypto/sha/asm/sha1-x86_64.pl @@ -254,7 +254,7 @@ sub BODY_40_59 { jz .Lialu ___ $code.=<<___ if ($shaext); - test \$`1<<29`,%r10d # check SHA bit + test \$`1<<29`,%r10d # check SHA bit jnz _shaext_shortcut ___ $code.=<<___ if ($avx>1); @@ -2039,7 +2039,7 @@ sub sha1op38 { my $instr = shift; my %opcodelet = ( "sha1nexte" => 0xc8, - "sha1msg1" => 0xc9, + "sha1msg1" => 0xc9, "sha1msg2" => 0xca ); if (defined($opcodelet{$instr}) && @_[0] =~ /%xmm([0-9]+),\s*%xmm([0-9]+)/) { diff --git a/deps/openssl/openssl/crypto/sha/asm/sha256-586.pl b/deps/openssl/openssl/crypto/sha/asm/sha256-586.pl index f171358a8a4c0f..6462e45ba75bee 100644 --- a/deps/openssl/openssl/crypto/sha/asm/sha256-586.pl +++ b/deps/openssl/openssl/crypto/sha/asm/sha256-586.pl @@ -40,7 +40,7 @@ # # Performance in clock cycles per processed byte (less is better): # -# gcc icc x86 asm(*) SIMD x86_64 asm(**) +# gcc icc x86 asm(*) SIMD x86_64 asm(**) # Pentium 46 57 40/38 - - # PIII 36 33 27/24 - - # P4 41 38 28 - 17.3 @@ -375,7 +375,7 @@ () &xor ($AH[1],"ecx"); # magic &mov (&DWP(8,"esp"),"ecx"); &mov (&DWP(12,"esp"),"ebx"); - &mov ($E,&DWP(16,"esi")); + &mov ($E,&DWP(16,"esi")); &mov ("ebx",&DWP(20,"esi")); &mov ("ecx",&DWP(24,"esi")); &mov ("esi",&DWP(28,"esi")); diff --git a/deps/openssl/openssl/crypto/sha/asm/sha256-armv4.pl b/deps/openssl/openssl/crypto/sha/asm/sha256-armv4.pl index f14c9c3cb5a190..4fee74d832d100 100644 --- a/deps/openssl/openssl/crypto/sha/asm/sha256-armv4.pl +++ b/deps/openssl/openssl/crypto/sha/asm/sha256-armv4.pl @@ -5,6 +5,8 @@ # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. +# +# Permission to use under GPL terms is granted. # ==================================================================== # SHA256 block procedure for ARMv4. May 2007. @@ -151,10 +153,24 @@ sub BODY_16_XX { } $code=<<___; -#include "arm_arch.h" +#ifndef __KERNEL__ +# include "arm_arch.h" +#else +# define __ARM_ARCH__ __LINUX_ARM_ARCH__ +# define __ARM_MAX_ARCH__ 7 +#endif .text +#if __ARM_ARCH__<7 .code 32 +#else +.syntax unified +# ifdef __thumb2__ +.thumb +# else +.code 32 +# endif +#endif .type K256,%object .align 5 @@ -177,7 +193,7 @@ sub BODY_16_XX { .word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .size K256,.-K256 .word 0 @ terminator -#if __ARM_MAX_ARCH__>=7 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) .LOPENSSL_armcap: .word OPENSSL_armcap_P-sha256_block_data_order #endif @@ -186,9 +202,12 @@ sub BODY_16_XX { .global sha256_block_data_order .type sha256_block_data_order,%function sha256_block_data_order: +#if __ARM_ARCH__<7 sub r3,pc,#8 @ sha256_block_data_order - add $len,$inp,$len,lsl#6 @ len to point at the end of inp -#if __ARM_MAX_ARCH__>=7 +#else + adr r3,sha256_block_data_order +#endif +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) ldr r12,.LOPENSSL_armcap ldr r12,[r3,r12] @ OPENSSL_armcap_P tst r12,#ARMV8_SHA256 @@ -196,6 +215,7 @@ sub BODY_16_XX { tst r12,#ARMV7_NEON bne .LNEON #endif + add $len,$inp,$len,lsl#6 @ len to point at the end of inp stmdb sp!,{$ctx,$inp,$len,r4-r11,lr} ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H} sub $Ktbl,r3,#256+32 @ K256 @@ -213,6 +233,9 @@ sub BODY_16_XX { $code.=".Lrounds_16_xx:\n"; for (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); } $code.=<<___; +#if __ARM_ARCH__>=7 + ite eq @ Thumb2 thing, sanity check in ARM +#endif ldreq $t3,[sp,#16*4] @ pull ctx bne .Lrounds_16_xx @@ -429,16 +452,19 @@ () .arch armv7-a .fpu neon +.global sha256_block_data_order_neon .type sha256_block_data_order_neon,%function .align 4 sha256_block_data_order_neon: .LNEON: stmdb sp!,{r4-r12,lr} + sub $H,sp,#16*4+16 + adr $Ktbl,K256 + bic $H,$H,#15 @ align for 128-bit stores mov $t2,sp - sub sp,sp,#16*4+16 @ alloca - sub $Ktbl,r3,#256+32 @ K256 - bic sp,sp,#15 @ align for 128-bit stores + mov sp,$H @ alloca + add $len,$inp,$len,lsl#6 @ len to point at the end of inp vld1.8 {@X[0]},[$inp]! vld1.8 {@X[1]},[$inp]! @@ -490,11 +516,13 @@ () ldr $t0,[sp,#72] sub $Ktbl,$Ktbl,#256 @ rewind $Ktbl teq $inp,$t0 + it eq subeq $inp,$inp,#64 @ avoid SEGV vld1.8 {@X[0]},[$inp]! @ load next input block vld1.8 {@X[1]},[$inp]! vld1.8 {@X[2]},[$inp]! vld1.8 {@X[3]},[$inp]! + it ne strne $inp,[sp,#68] mov $Xfer,sp ___ @@ -526,10 +554,12 @@ () str $D,[$t1],#4 stmia $t1,{$E-$H} + ittte ne movne $Xfer,sp ldrne $t1,[sp,#0] eorne $t2,$t2,$t2 ldreq sp,[sp,#76] @ restore original sp + itt ne eorne $t3,$B,$C bne .L_00_48 @@ -548,13 +578,26 @@ () my $Ktbl="r3"; $code.=<<___; -#if __ARM_MAX_ARCH__>=7 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) + +# ifdef __thumb2__ +# define INST(a,b,c,d) .byte c,d|0xc,a,b +# else +# define INST(a,b,c,d) .byte a,b,c,d +# endif + .type sha256_block_data_order_armv8,%function .align 5 sha256_block_data_order_armv8: .LARMv8: vld1.32 {$ABCD,$EFGH},[$ctx] - sub $Ktbl,r3,#sha256_block_data_order-K256 +# ifdef __thumb2__ + adr $Ktbl,.LARMv8 + sub $Ktbl,$Ktbl,#.LARMv8-K256 +# else + adrl $Ktbl,K256 +# endif + add $len,$inp,$len,lsl#6 @ len to point at the end of inp .Loop_v8: vld1.8 {@MSG[0]-@MSG[1]},[$inp]! @@ -607,6 +650,7 @@ () vadd.i32 $ABCD,$ABCD,$ABCD_SAVE vadd.i32 $EFGH,$EFGH,$EFGH_SAVE + it ne bne .Loop_v8 vst1.32 {$ABCD,$EFGH},[$ctx] @@ -619,11 +663,19 @@ () $code.=<<___; .asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by " .align 2 -#if __ARM_MAX_ARCH__>=7 +#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) .comm OPENSSL_armcap_P,4,4 #endif ___ +open SELF,$0; +while() { + next if (/^#!/); + last if (!s/^#/@/ and !/^$/); + print; +} +close SELF; + { my %opcode = ( "sha256h" => 0xf3000c40, "sha256h2" => 0xf3100c40, "sha256su0" => 0xf3ba03c0, "sha256su1" => 0xf3200c40 ); @@ -638,7 +690,7 @@ () # since ARMv7 instructions are always encoded little-endian. # correct solution is to use .inst directive, but older # assemblers don't implement it:-( - sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s", + sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s", $word&0xff,($word>>8)&0xff, ($word>>16)&0xff,($word>>24)&0xff, $mnemonic,$arg; diff --git a/deps/openssl/openssl/crypto/sha/asm/sha256-mb-x86_64.pl b/deps/openssl/openssl/crypto/sha/asm/sha256-mb-x86_64.pl index e3acf09e685d7b..adf2ddccd18b0c 100644 --- a/deps/openssl/openssl/crypto/sha/asm/sha256-mb-x86_64.pl +++ b/deps/openssl/openssl/crypto/sha/asm/sha256-mb-x86_64.pl @@ -28,7 +28,7 @@ # (iii) "this" is for n=8, when we gather twice as much data, result # for n=4 is 20.3+4.44=24.7; # (iv) presented improvement coefficients are asymptotic limits and -# in real-life application are somewhat lower, e.g. for 2KB +# in real-life application are somewhat lower, e.g. for 2KB # fragments they range from 75% to 130% (on Haswell); $flavour = shift; @@ -1528,7 +1528,7 @@ sub sha256op38 { my $instr = shift; my %opcodelet = ( "sha256rnds2" => 0xcb, - "sha256msg1" => 0xcc, + "sha256msg1" => 0xcc, "sha256msg2" => 0xcd ); if (defined($opcodelet{$instr}) && @_[0] =~ /%xmm([0-9]+),\s*%xmm([0-9]+)/) { diff --git a/deps/openssl/openssl/crypto/sha/asm/sha512-586.pl b/deps/openssl/openssl/crypto/sha/asm/sha512-586.pl index a006e49a7ae6c0..e96ec00314a486 100644 --- a/deps/openssl/openssl/crypto/sha/asm/sha512-586.pl +++ b/deps/openssl/openssl/crypto/sha/asm/sha512-586.pl @@ -373,7 +373,7 @@ sub BODY_00_15_x86 { &set_label("16_79_sse2",16); for ($j=0;$j<2;$j++) { # 2x unroll - #&movq ("mm7",&QWP(8*(9+16-1),"esp")); # prefetched in BODY_00_15 + #&movq ("mm7",&QWP(8*(9+16-1),"esp")); # prefetched in BODY_00_15 &movq ("mm5",&QWP(8*(9+16-14),"esp")); &movq ("mm1","mm7"); &psrlq ("mm7",1); @@ -669,7 +669,7 @@ sub BODY_00_15_ssse3 { # "phase-less" copy of BODY_00_15_sse2 &movq (&QWP(48,"esi"),"mm6"); &movq (&QWP(56,"esi"),"mm7"); - &cmp ("edi","eax") # are we done yet? + &cmp ("edi","eax") # are we done yet? &jb (&label("loop_ssse3")); &mov ("esp",&DWP(64+12,$frame)); # restore sp diff --git a/deps/openssl/openssl/crypto/sha/asm/sha512-armv8.pl b/deps/openssl/openssl/crypto/sha/asm/sha512-armv8.pl index a77174bc72fe03..f7b36b986a611d 100644 --- a/deps/openssl/openssl/crypto/sha/asm/sha512-armv8.pl +++ b/deps/openssl/openssl/crypto/sha/asm/sha512-armv8.pl @@ -14,9 +14,11 @@ # # SHA256-hw SHA256(*) SHA512 # Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) -# Cortex-A53 2.38 15.6 (+110%) 10.1 (+190%(***)) +# Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) # Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) -# +# Denver 2.01 10.5 (+26%) 6.70 (+8%) +# X-Gene 20.0 (+100%) 12.8 (+300%(***)) +# # (*) Software SHA256 results are of lesser relevance, presented # mostly for informational purposes. # (**) The result is a trade-off: it's possible to improve it by @@ -25,7 +27,7 @@ # (***) Super-impressive coefficients over gcc-generated code are # indication of some compiler "pathology", most notably code # generated with -mgeneral-regs-only is significanty faster -# and lags behind assembly only by 50-90%. +# and the gap is only 40-90%. $flavour=shift; $output=shift; diff --git a/deps/openssl/openssl/crypto/sha/asm/sha512-x86_64.pl b/deps/openssl/openssl/crypto/sha/asm/sha512-x86_64.pl index 288f132d8d2cc7..b7b44b4411362d 100755 --- a/deps/openssl/openssl/crypto/sha/asm/sha512-x86_64.pl +++ b/deps/openssl/openssl/crypto/sha/asm/sha512-x86_64.pl @@ -1774,7 +1774,7 @@ () ###################################################################### # AVX2+BMI code path # -my $a5=$SZ==4?"%esi":"%rsi"; # zap $inp +my $a5=$SZ==4?"%esi":"%rsi"; # zap $inp my $PUSH8=8*2*$SZ; use integer; @@ -2375,7 +2375,7 @@ sub sha256op38 { my $instr = shift; my %opcodelet = ( "sha256rnds2" => 0xcb, - "sha256msg1" => 0xcc, + "sha256msg1" => 0xcc, "sha256msg2" => 0xcd ); if (defined($opcodelet{$instr}) && @_[0] =~ /%xmm([0-7]),\s*%xmm([0-7])/) { diff --git a/deps/openssl/openssl/crypto/srp/Makefile b/deps/openssl/openssl/crypto/srp/Makefile index ddf674864f98a8..414af7bc66b1fd 100644 --- a/deps/openssl/openssl/crypto/srp/Makefile +++ b/deps/openssl/openssl/crypto/srp/Makefile @@ -64,6 +64,8 @@ srptest: top srptest.c $(LIB) lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/srp/srp_vfy.c b/deps/openssl/openssl/crypto/srp/srp_vfy.c index 701b5cd01156d5..50f75d7e4c9f16 100644 --- a/deps/openssl/openssl/crypto/srp/srp_vfy.c +++ b/deps/openssl/openssl/crypto/srp/srp_vfy.c @@ -497,7 +497,8 @@ SRP_user_pwd *SRP_VBASE_get_by_user(SRP_VBASE *vb, char *username) if (!SRP_user_pwd_set_ids(user, username, NULL)) goto err; - RAND_pseudo_bytes(digv, SHA_DIGEST_LENGTH); + if (RAND_pseudo_bytes(digv, SHA_DIGEST_LENGTH) < 0) + goto err; EVP_MD_CTX_init(&ctxt); EVP_DigestInit_ex(&ctxt, EVP_sha1(), NULL); EVP_DigestUpdate(&ctxt, vb->seed_key, strlen(vb->seed_key)); @@ -549,7 +550,8 @@ char *SRP_create_verifier(const char *user, const char *pass, char **salt, } if (*salt == NULL) { - RAND_pseudo_bytes(tmp2, SRP_RANDOM_SALT_LEN); + if (RAND_pseudo_bytes(tmp2, SRP_RANDOM_SALT_LEN) < 0) + goto err; s = BN_bin2bn(tmp2, SRP_RANDOM_SALT_LEN, NULL); } else { @@ -609,7 +611,8 @@ int SRP_create_verifier_BN(const char *user, const char *pass, BIGNUM **salt, srp_bn_print(g); if (*salt == NULL) { - RAND_pseudo_bytes(tmp2, SRP_RANDOM_SALT_LEN); + if (RAND_pseudo_bytes(tmp2, SRP_RANDOM_SALT_LEN) < 0) + goto err; *salt = BN_bin2bn(tmp2, SRP_RANDOM_SALT_LEN, NULL); } diff --git a/deps/openssl/openssl/crypto/stack/Makefile b/deps/openssl/openssl/crypto/stack/Makefile index 5327692ac895ce..b069c93237aece 100644 --- a/deps/openssl/openssl/crypto/stack/Makefile +++ b/deps/openssl/openssl/crypto/stack/Makefile @@ -61,6 +61,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/store/Makefile b/deps/openssl/openssl/crypto/store/Makefile index 0dcfd7857a3771..5bc7ca71f03cde 100644 --- a/deps/openssl/openssl/crypto/store/Makefile +++ b/deps/openssl/openssl/crypto/store/Makefile @@ -63,6 +63,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/threads/th-lock.c b/deps/openssl/openssl/crypto/threads/th-lock.c index 28884c2d44fa92..cc8cf2581d6440 100644 --- a/deps/openssl/openssl/crypto/threads/th-lock.c +++ b/deps/openssl/openssl/crypto/threads/th-lock.c @@ -117,7 +117,7 @@ void CRYPTO_thread_setup(void) int i; lock_cs = OPENSSL_malloc(CRYPTO_num_locks() * sizeof(HANDLE)); - if(!lock_cs) { + if (!lock_cs) { /* Nothing we can do about this...void function! */ return; } @@ -172,7 +172,7 @@ void CRYPTO_thread_setup(void) # else lock_cs = OPENSSL_malloc(CRYPTO_num_locks() * sizeof(rwlock_t)); # endif - if(!lock_cs) { + if (!lock_cs) { /* Nothing we can do about this...void function! */ return; } @@ -260,7 +260,7 @@ void CRYPTO_thread_setup(void) char filename[20]; lock_cs = OPENSSL_malloc(CRYPTO_num_locks() * sizeof(usema_t *)); - if(!lock_cs) { + if (!lock_cs) { /* Nothing we can do about this...void function! */ return; } @@ -328,11 +328,11 @@ void CRYPTO_thread_setup(void) lock_cs = OPENSSL_malloc(CRYPTO_num_locks() * sizeof(pthread_mutex_t)); lock_count = OPENSSL_malloc(CRYPTO_num_locks() * sizeof(long)); - if(!lock_cs || !lock_count) { + if (!lock_cs || !lock_count) { /* Nothing we can do about this...void function! */ - if(lock_cs) + if (lock_cs) OPENSSL_free(lock_cs); - if(lock_count) + if (lock_count) OPENSSL_free(lock_count); return; } diff --git a/deps/openssl/openssl/crypto/ts/Makefile b/deps/openssl/openssl/crypto/ts/Makefile index c18234555be300..cf991efe464199 100644 --- a/deps/openssl/openssl/crypto/ts/Makefile +++ b/deps/openssl/openssl/crypto/ts/Makefile @@ -73,6 +73,8 @@ tags: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/txt_db/Makefile b/deps/openssl/openssl/crypto/txt_db/Makefile index e6f30331d8dd2f..4f70b199a5d483 100644 --- a/deps/openssl/openssl/crypto/txt_db/Makefile +++ b/deps/openssl/openssl/crypto/txt_db/Makefile @@ -61,6 +61,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by top Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/ui/Makefile b/deps/openssl/openssl/crypto/ui/Makefile index a685659fb4c884..b28fcca6d9653b 100644 --- a/deps/openssl/openssl/crypto/ui/Makefile +++ b/deps/openssl/openssl/crypto/ui/Makefile @@ -65,6 +65,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/whrlpool/Makefile b/deps/openssl/openssl/crypto/whrlpool/Makefile index f4d46e4d17b79e..befd6d6f36efb0 100644 --- a/deps/openssl/openssl/crypto/whrlpool/Makefile +++ b/deps/openssl/openssl/crypto/whrlpool/Makefile @@ -74,6 +74,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/x509/Makefile b/deps/openssl/openssl/crypto/x509/Makefile index cfbb59c37d0e64..01aa3bf3854fdc 100644 --- a/deps/openssl/openssl/crypto/x509/Makefile +++ b/deps/openssl/openssl/crypto/x509/Makefile @@ -71,6 +71,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) diff --git a/deps/openssl/openssl/crypto/x509/x509_lu.c b/deps/openssl/openssl/crypto/x509/x509_lu.c index ff1fa975fd7c0b..b0d653903ff566 100644 --- a/deps/openssl/openssl/crypto/x509/x509_lu.c +++ b/deps/openssl/openssl/crypto/x509/x509_lu.c @@ -216,6 +216,8 @@ X509_STORE *X509_STORE_new(void) static void cleanup(X509_OBJECT *a) { + if (!a) + return; if (a->type == X509_LU_X509) { X509_free(a->data.x509); } else if (a->type == X509_LU_CRL) { diff --git a/deps/openssl/openssl/crypto/x509/x509_vfy.c b/deps/openssl/openssl/crypto/x509/x509_vfy.c index f3e9c56b091715..8ce41f9c9a8ef4 100644 --- a/deps/openssl/openssl/crypto/x509/x509_vfy.c +++ b/deps/openssl/openssl/crypto/x509/x509_vfy.c @@ -1788,47 +1788,84 @@ int X509_cmp_time(const ASN1_TIME *ctm, time_t *cmp_time) ASN1_TIME atm; long offset; char buff1[24], buff2[24], *p; - int i, j; + int i, j, remaining; p = buff1; - i = ctm->length; + remaining = ctm->length; str = (char *)ctm->data; + /* + * Note that the following (historical) code allows much more slack in the + * time format than RFC5280. In RFC5280, the representation is fixed: + * UTCTime: YYMMDDHHMMSSZ + * GeneralizedTime: YYYYMMDDHHMMSSZ + */ if (ctm->type == V_ASN1_UTCTIME) { - if ((i < 11) || (i > 17)) + /* YYMMDDHHMM[SS]Z or YYMMDDHHMM[SS](+-)hhmm */ + int min_length = sizeof("YYMMDDHHMMZ") - 1; + int max_length = sizeof("YYMMDDHHMMSS+hhmm") - 1; + if (remaining < min_length || remaining > max_length) return 0; memcpy(p, str, 10); p += 10; str += 10; + remaining -= 10; } else { - if (i < 13) + /* YYYYMMDDHHMM[SS[.fff]]Z or YYYYMMDDHHMM[SS[.f[f[f]]]](+-)hhmm */ + int min_length = sizeof("YYYYMMDDHHMMZ") - 1; + int max_length = sizeof("YYYYMMDDHHMMSS.fff+hhmm") - 1; + if (remaining < min_length || remaining > max_length) return 0; memcpy(p, str, 12); p += 12; str += 12; + remaining -= 12; } if ((*str == 'Z') || (*str == '-') || (*str == '+')) { *(p++) = '0'; *(p++) = '0'; } else { + /* SS (seconds) */ + if (remaining < 2) + return 0; *(p++) = *(str++); *(p++) = *(str++); - /* Skip any fractional seconds... */ - if (*str == '.') { + remaining -= 2; + /* + * Skip any (up to three) fractional seconds... + * TODO(emilia): in RFC5280, fractional seconds are forbidden. + * Can we just kill them altogether? + */ + if (remaining && *str == '.') { str++; - while ((*str >= '0') && (*str <= '9')) - str++; + remaining--; + for (i = 0; i < 3 && remaining; i++, str++, remaining--) { + if (*str < '0' || *str > '9') + break; + } } } *(p++) = 'Z'; *(p++) = '\0'; - if (*str == 'Z') + /* We now need either a terminating 'Z' or an offset. */ + if (!remaining) + return 0; + if (*str == 'Z') { + if (remaining != 1) + return 0; offset = 0; - else { + } else { + /* (+-)HHMM */ if ((*str != '+') && (*str != '-')) return 0; + /* Historical behaviour: the (+-)hhmm offset is forbidden in RFC5280. */ + if (remaining != 5) + return 0; + if (str[1] < '0' || str[1] > '9' || str[2] < '0' || str[2] > '9' || + str[3] < '0' || str[3] > '9' || str[4] < '0' || str[4] > '9') + return 0; offset = ((str[1] - '0') * 10 + (str[2] - '0')) * 60; offset += (str[3] - '0') * 10 + (str[4] - '0'); if (*str == '-') @@ -2206,6 +2243,8 @@ X509_STORE_CTX *X509_STORE_CTX_new(void) void X509_STORE_CTX_free(X509_STORE_CTX *ctx) { + if (!ctx) + return; X509_STORE_CTX_cleanup(ctx); OPENSSL_free(ctx); } diff --git a/deps/openssl/openssl/crypto/x509/x509_vfy.h b/deps/openssl/openssl/crypto/x509/x509_vfy.h index 4b236c06143b69..bd8613c62ba429 100644 --- a/deps/openssl/openssl/crypto/x509/x509_vfy.h +++ b/deps/openssl/openssl/crypto/x509/x509_vfy.h @@ -434,7 +434,7 @@ void X509_STORE_CTX_set_depth(X509_STORE_CTX *ctx, int depth); # define X509_V_FLAG_PARTIAL_CHAIN 0x80000 /* * If the initial chain is not trusted, do not attempt to build an alternative - * chain. Alternate chain checking was introduced in 1.1.0. Setting this flag + * chain. Alternate chain checking was introduced in 1.0.2b. Setting this flag * will force the behaviour to match that of previous versions. */ # define X509_V_FLAG_NO_ALT_CHAINS 0x100000 diff --git a/deps/openssl/openssl/crypto/x509/x509_vpm.c b/deps/openssl/openssl/crypto/x509/x509_vpm.c index 322239401e004b..1ea0c69f5743d1 100644 --- a/deps/openssl/openssl/crypto/x509/x509_vpm.c +++ b/deps/openssl/openssl/crypto/x509/x509_vpm.c @@ -172,16 +172,17 @@ X509_VERIFY_PARAM *X509_VERIFY_PARAM_new(void) { X509_VERIFY_PARAM *param; X509_VERIFY_PARAM_ID *paramid; - param = OPENSSL_malloc(sizeof(X509_VERIFY_PARAM)); + + param = OPENSSL_malloc(sizeof *param); if (!param) return NULL; - paramid = OPENSSL_malloc(sizeof(X509_VERIFY_PARAM)); + paramid = OPENSSL_malloc(sizeof *paramid); if (!paramid) { OPENSSL_free(param); return NULL; } - memset(param, 0, sizeof(X509_VERIFY_PARAM)); - memset(paramid, 0, sizeof(X509_VERIFY_PARAM_ID)); + memset(param, 0, sizeof *param); + memset(paramid, 0, sizeof *paramid); param->id = paramid; x509_verify_param_zero(param); return param; @@ -189,6 +190,8 @@ X509_VERIFY_PARAM *X509_VERIFY_PARAM_new(void) void X509_VERIFY_PARAM_free(X509_VERIFY_PARAM *param) { + if (param == NULL) + return; x509_verify_param_zero(param); OPENSSL_free(param->id); OPENSSL_free(param); diff --git a/deps/openssl/openssl/crypto/x509/x509type.c b/deps/openssl/openssl/crypto/x509/x509type.c index 033175257a7738..9219f753bf9327 100644 --- a/deps/openssl/openssl/crypto/x509/x509type.c +++ b/deps/openssl/openssl/crypto/x509/x509type.c @@ -121,9 +121,6 @@ int X509_certificate_type(X509 *x, EVP_PKEY *pkey) } } - /* /8 because it's 1024 bits we look for, not bytes */ - if (EVP_PKEY_size(pk) <= 1024 / 8) - ret |= EVP_PKT_EXP; if (pkey == NULL) EVP_PKEY_free(pk); return (ret); diff --git a/deps/openssl/openssl/crypto/x509v3/Makefile b/deps/openssl/openssl/crypto/x509v3/Makefile index cdbfd524039215..9791b77a0765df 100644 --- a/deps/openssl/openssl/crypto/x509v3/Makefile +++ b/deps/openssl/openssl/crypto/x509v3/Makefile @@ -71,6 +71,8 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff +update: depend + depend: @[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile... $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) @@ -535,26 +537,18 @@ v3_purp.o: ../../include/openssl/x509_vfy.h ../../include/openssl/x509v3.h v3_purp.o: ../cryptlib.h v3_purp.c v3_scts.o: ../../e_os.h ../../include/openssl/asn1.h v3_scts.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h -v3_scts.o: ../../include/openssl/comp.h ../../include/openssl/conf.h -v3_scts.o: ../../include/openssl/crypto.h ../../include/openssl/dsa.h -v3_scts.o: ../../include/openssl/dtls1.h ../../include/openssl/e_os2.h -v3_scts.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h -v3_scts.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h -v3_scts.o: ../../include/openssl/evp.h ../../include/openssl/hmac.h -v3_scts.o: ../../include/openssl/kssl.h ../../include/openssl/lhash.h -v3_scts.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h -v3_scts.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h -v3_scts.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pem.h -v3_scts.o: ../../include/openssl/pem2.h ../../include/openssl/pkcs7.h -v3_scts.o: ../../include/openssl/pqueue.h ../../include/openssl/rsa.h -v3_scts.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h -v3_scts.o: ../../include/openssl/srtp.h ../../include/openssl/ssl.h -v3_scts.o: ../../include/openssl/ssl2.h ../../include/openssl/ssl23.h -v3_scts.o: ../../include/openssl/ssl3.h ../../include/openssl/stack.h -v3_scts.o: ../../include/openssl/symhacks.h ../../include/openssl/tls1.h -v3_scts.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h -v3_scts.o: ../../include/openssl/x509v3.h ../../ssl/ssl_locl.h ../cryptlib.h -v3_scts.o: v3_scts.c +v3_scts.o: ../../include/openssl/conf.h ../../include/openssl/crypto.h +v3_scts.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h +v3_scts.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h +v3_scts.o: ../../include/openssl/err.h ../../include/openssl/evp.h +v3_scts.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h +v3_scts.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h +v3_scts.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h +v3_scts.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h +v3_scts.o: ../../include/openssl/sha.h ../../include/openssl/stack.h +v3_scts.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h +v3_scts.o: ../../include/openssl/x509_vfy.h ../../include/openssl/x509v3.h +v3_scts.o: ../cryptlib.h v3_scts.c v3_skey.o: ../../e_os.h ../../include/openssl/asn1.h v3_skey.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h v3_skey.o: ../../include/openssl/conf.h ../../include/openssl/crypto.h diff --git a/deps/openssl/openssl/crypto/x509v3/v3_alt.c b/deps/openssl/openssl/crypto/x509v3/v3_alt.c index 807867b918c627..22ec202846ec23 100644 --- a/deps/openssl/openssl/crypto/x509v3/v3_alt.c +++ b/deps/openssl/openssl/crypto/x509v3/v3_alt.c @@ -584,24 +584,26 @@ static int do_othername(GENERAL_NAME *gen, char *value, X509V3_CTX *ctx) static int do_dirname(GENERAL_NAME *gen, char *value, X509V3_CTX *ctx) { - int ret; - STACK_OF(CONF_VALUE) *sk; - X509_NAME *nm; + int ret = 0; + STACK_OF(CONF_VALUE) *sk = NULL; + X509_NAME *nm = NULL; if (!(nm = X509_NAME_new())) - return 0; + goto err; sk = X509V3_get_section(ctx, value); if (!sk) { X509V3err(X509V3_F_DO_DIRNAME, X509V3_R_SECTION_NOT_FOUND); ERR_add_error_data(2, "section=", value); - X509_NAME_free(nm); - return 0; + goto err; } /* FIXME: should allow other character types... */ ret = X509V3_NAME_from_section(nm, sk, MBSTRING_ASC); if (!ret) - X509_NAME_free(nm); + goto err; gen->d.dirn = nm; - X509V3_section_free(ctx, sk); +err: + if (ret == 0) + X509_NAME_free(nm); + X509V3_section_free(ctx, sk); return ret; } diff --git a/deps/openssl/openssl/crypto/x509v3/v3_cpols.c b/deps/openssl/openssl/crypto/x509v3/v3_cpols.c index dca6ab2ec934f6..0febc1b3edc1cf 100644 --- a/deps/openssl/openssl/crypto/x509v3/v3_cpols.c +++ b/deps/openssl/openssl/crypto/x509v3/v3_cpols.c @@ -230,11 +230,11 @@ static POLICYINFO *policy_section(X509V3_CTX *ctx, goto merr; if (!sk_POLICYQUALINFO_push(pol->qualifiers, qual)) goto merr; - if(!(qual->pqualid = OBJ_nid2obj(NID_id_qt_cps))) { + if (!(qual->pqualid = OBJ_nid2obj(NID_id_qt_cps))) { X509V3err(X509V3_F_POLICY_SECTION, ERR_R_INTERNAL_ERROR); goto err; } - if(!(qual->d.cpsuri = M_ASN1_IA5STRING_new())) + if (!(qual->d.cpsuri = M_ASN1_IA5STRING_new())) goto merr; if (!ASN1_STRING_set(qual->d.cpsuri, cnf->value, strlen(cnf->value))) @@ -294,7 +294,7 @@ static POLICYQUALINFO *notice_section(X509V3_CTX *ctx, POLICYQUALINFO *qual; if (!(qual = POLICYQUALINFO_new())) goto merr; - if(!(qual->pqualid = OBJ_nid2obj(NID_id_qt_unotice))) { + if (!(qual->pqualid = OBJ_nid2obj(NID_id_qt_unotice))) { X509V3err(X509V3_F_NOTICE_SECTION, ERR_R_INTERNAL_ERROR); goto err; } @@ -304,7 +304,7 @@ static POLICYQUALINFO *notice_section(X509V3_CTX *ctx, for (i = 0; i < sk_CONF_VALUE_num(unot); i++) { cnf = sk_CONF_VALUE_value(unot, i); if (!strcmp(cnf->name, "explicitText")) { - if(!(not->exptext = M_ASN1_VISIBLESTRING_new())) + if (!(not->exptext = M_ASN1_VISIBLESTRING_new())) goto merr; if (!ASN1_STRING_set(not->exptext, cnf->value, strlen(cnf->value))) diff --git a/deps/openssl/openssl/crypto/x509v3/v3_scts.c b/deps/openssl/openssl/crypto/x509v3/v3_scts.c index 9a4c3eba0bdd5f..6e0b8d6844c8ce 100644 --- a/deps/openssl/openssl/crypto/x509v3/v3_scts.c +++ b/deps/openssl/openssl/crypto/x509v3/v3_scts.c @@ -60,7 +60,16 @@ #include "cryptlib.h" #include #include -#include "../ssl/ssl_locl.h" + +/* Signature and hash algorithms from RFC 5246 */ +#define TLSEXT_hash_sha256 4 + +#define TLSEXT_signature_rsa 1 +#define TLSEXT_signature_ecdsa 3 + + +#define n2s(c,s) ((s=(((unsigned int)(c[0]))<< 8)| \ + (((unsigned int)(c[1])) )),c+=2) #if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__) # define SCT_TIMESTAMP unsigned __int64 diff --git a/deps/openssl/openssl/crypto/x509v3/v3_utl.c b/deps/openssl/openssl/crypto/x509v3/v3_utl.c index ed6099e120b261..bdd7b95f4570b4 100644 --- a/deps/openssl/openssl/crypto/x509v3/v3_utl.c +++ b/deps/openssl/openssl/crypto/x509v3/v3_utl.c @@ -285,6 +285,10 @@ STACK_OF(CONF_VALUE) *X509V3_parse_list(const char *line) int state; /* We are going to modify the line so copy it first */ linebuf = BUF_strdup(line); + if (linebuf == NULL) { + X509V3err(X509V3_F_X509V3_PARSE_LIST, ERR_R_MALLOC_FAILURE); + goto err; + } state = HDR_NAME; ntmp = NULL; /* Go through all characters */ @@ -807,7 +811,7 @@ static const unsigned char *valid_star(const unsigned char *p, size_t len, */ if (p[i] == '*') { int atstart = (state & LABEL_START); - int atend = (i == len - 1 || p[i + i] == '.'); + int atend = (i == len - 1 || p[i + 1] == '.'); /*- * At most one wildcard per pattern. * No wildcards in IDNA labels. diff --git a/deps/openssl/openssl/demos/bio/README b/deps/openssl/openssl/demos/bio/README index a8467cda6429cd..a36bb48a5df910 100644 --- a/deps/openssl/openssl/demos/bio/README +++ b/deps/openssl/openssl/demos/bio/README @@ -4,3 +4,4 @@ to simplify socket programming. The client-conf, server-conf, client-arg and client-conf include examples of how to use the SSL_CONF API for configuration file or command line processing. + diff --git a/deps/openssl/openssl/demos/bio/accept.cnf b/deps/openssl/openssl/demos/bio/accept.cnf index f92cedf312c2aa..e4acea75f344bd 100644 --- a/deps/openssl/openssl/demos/bio/accept.cnf +++ b/deps/openssl/openssl/demos/bio/accept.cnf @@ -8,6 +8,6 @@ Curves = P-521:P-384:P-256 # Automatic curve selection ECDHParameters = Automatic # Restricted signature algorithms -SignatureAlgorithms = RSA+SHA512:ECDSA+SHA512 +SignatureAlgorithms = RSA+SHA512:ECDSA+SHA512 Certificate=server.pem PrivateKey=server.pem diff --git a/deps/openssl/openssl/demos/bio/connect.cnf b/deps/openssl/openssl/demos/bio/connect.cnf index ab764403a4742d..4dee03c373885e 100644 --- a/deps/openssl/openssl/demos/bio/connect.cnf +++ b/deps/openssl/openssl/demos/bio/connect.cnf @@ -6,4 +6,4 @@ Connect = localhost:4433 # Only support 3 curves Curves = P-521:P-384:P-256 # Restricted signature algorithms -SignatureAlgorithms = RSA+SHA512:ECDSA+SHA512 +SignatureAlgorithms = RSA+SHA512:ECDSA+SHA512 diff --git a/deps/openssl/openssl/demos/easy_tls/easy-tls.c b/deps/openssl/openssl/demos/easy_tls/easy-tls.c index acc688aaf4b77a..5682e91a479302 100644 --- a/deps/openssl/openssl/demos/easy_tls/easy-tls.c +++ b/deps/openssl/openssl/demos/easy_tls/easy-tls.c @@ -761,7 +761,8 @@ SSL_CTX *tls_create_ctx(struct tls_create_ctx_args a, void *apparg) if (tls_dhe1024 == NULL) { int i; - RAND_bytes((unsigned char *)&i, sizeof i); + if (RAND_bytes((unsigned char *)&i, sizeof i) <= 0) + goto err_return; /* * make sure that i is non-negative -- pick one of the provided * seeds diff --git a/deps/openssl/openssl/doc/apps/ciphers.pod b/deps/openssl/openssl/doc/apps/ciphers.pod index e9280bc5021c87..1c26e3b3da36ab 100644 --- a/deps/openssl/openssl/doc/apps/ciphers.pod +++ b/deps/openssl/openssl/doc/apps/ciphers.pod @@ -365,10 +365,8 @@ e.g. DES-CBC3-SHA. In these cases, RSA authentication is used. SSL_RSA_WITH_DES_CBC_SHA DES-CBC-SHA SSL_RSA_WITH_3DES_EDE_CBC_SHA DES-CBC3-SHA - SSL_DH_DSS_EXPORT_WITH_DES40_CBC_SHA EXP-DH-DSS-DES-CBC-SHA SSL_DH_DSS_WITH_DES_CBC_SHA DH-DSS-DES-CBC-SHA SSL_DH_DSS_WITH_3DES_EDE_CBC_SHA DH-DSS-DES-CBC3-SHA - SSL_DH_RSA_EXPORT_WITH_DES40_CBC_SHA EXP-DH-RSA-DES-CBC-SHA SSL_DH_RSA_WITH_DES_CBC_SHA DH-RSA-DES-CBC-SHA SSL_DH_RSA_WITH_3DES_EDE_CBC_SHA DH-RSA-DES-CBC3-SHA SSL_DHE_DSS_EXPORT_WITH_DES40_CBC_SHA EXP-EDH-DSS-DES-CBC-SHA diff --git a/deps/openssl/openssl/doc/apps/cms.pod b/deps/openssl/openssl/doc/apps/cms.pod index e7a61bf017157c..4eaedbcd34c4b9 100644 --- a/deps/openssl/openssl/doc/apps/cms.pod +++ b/deps/openssl/openssl/doc/apps/cms.pod @@ -35,6 +35,7 @@ B B [B<-print>] [B<-CAfile file>] [B<-CApath dir>] +[B<-no_alt_chains>] [B<-md digest>] [B<-[cipher]>] [B<-nointern>] @@ -419,7 +420,7 @@ portion of a message so they may be included manually. If signing then many S/MIME mail clients check the signers certificate's email address matches that specified in the From: address. -=item B<-purpose, -ignore_critical, -issuer_checks, -crl_check, -crl_check_all, -policy_check, -extended_crl, -x509_strict, -policy -check_ss_sig> +=item B<-purpose, -ignore_critical, -issuer_checks, -crl_check, -crl_check_all, -policy_check, -extended_crl, -x509_strict, -policy -check_ss_sig -no_alt_chains> Set various certificate chain valiadition option. See the L|verify(1)> manual page for details. @@ -650,9 +651,11 @@ The B option was first added in OpenSSL 1.1.0 The use of B<-recip> to specify the recipient when encrypting mail was first added to OpenSSL 1.1.0 -Support for RSA-OAEP and RSA-PSS was first added to OpenSSL 1.1.0. +Support for RSA-OAEP and RSA-PSS was first added to OpenSSL 1.1.0. The use of non-RSA keys with B<-encrypt> and B<-decrypt> was first added to OpenSSL 1.1.0. +The -no_alt_chains options was first added to OpenSSL 1.0.2b. + =cut diff --git a/deps/openssl/openssl/doc/apps/config.pod b/deps/openssl/openssl/doc/apps/config.pod index d5cce54f44a892..e12591528c0cd8 100644 --- a/deps/openssl/openssl/doc/apps/config.pod +++ b/deps/openssl/openssl/doc/apps/config.pod @@ -277,6 +277,59 @@ priority and B used if neither is defined: # The above value is used if TEMP isn't in the environment tmpfile=${ENV::TEMP}/tmp.filename +Simple OpenSSL library configuration example to enter FIPS mode: + + # Default appname: should match "appname" parameter (if any) + # supplied to CONF_modules_load_file et al. + openssl_conf = openssl_conf_section + + [openssl_conf_section] + # Configuration module list + alg_section = evp_sect + + [evp_sect] + # Set to "yes" to enter FIPS mode if supported + fips_mode = yes + +Note: in the above example you will get an error in non FIPS capable versions +of OpenSSL. + +More complex OpenSSL library configuration. Add OID and don't enter FIPS mode: + + # Default appname: should match "appname" parameter (if any) + # supplied to CONF_modules_load_file et al. + openssl_conf = openssl_conf_section + + [openssl_conf_section] + # Configuration module list + alg_section = evp_sect + oid_section = new_oids + + [evp_sect] + # This will have no effect as FIPS mode is off by default. + # Set to "yes" to enter FIPS mode, if supported + fips_mode = no + + [new_oids] + # New OID, just short name + newoid1 = 1.2.3.4.1 + # New OID shortname and long name + newoid2 = New OID 2 long name, 1.2.3.4.2 + +The above examples can be used with with any application supporting library +configuration if "openssl_conf" is modified to match the appropriate "appname". + +For example if the second sample file above is saved to "example.cnf" then +the command line: + + OPENSSL_CONF=example.cnf openssl asn1parse -genstr OID:1.2.3.4.1 + +will output: + + 0:d=0 hl=2 l= 4 prim: OBJECT :newoid1 + +showing that the OID "newoid1" has been added as "1.2.3.4.1". + =head1 BUGS Currently there is no way to include characters using the octal B<\nnn> diff --git a/deps/openssl/openssl/doc/apps/dhparam.pod b/deps/openssl/openssl/doc/apps/dhparam.pod index 6e27cf5c1516a0..1cd4c76663c5b3 100644 --- a/deps/openssl/openssl/doc/apps/dhparam.pod +++ b/deps/openssl/openssl/doc/apps/dhparam.pod @@ -71,8 +71,10 @@ check if the parameters are valid primes and generator. =item B<-2>, B<-5> -The generator to use, either 2 or 5. 2 is the default. If present then the -input file is ignored and parameters are generated instead. +The generator to use, either 2 or 5. If present then the +input file is ignored and parameters are generated instead. If not +present but B is present, parameters are generated with the +default generator 2. =item B<-rand> I @@ -85,9 +87,10 @@ all others. =item I this option specifies that a parameter set should be generated of size -I. It must be the last option. If not present then a value of 512 -is used. If this option is present then the input file is ignored and -parameters are generated instead. +I. It must be the last option. If this option is present then +the input file is ignored and parameters are generated instead. If +this option is not present but a generator (B<-2> or B<-5>) is +present, parameters are generated with a default length of 2048 bits. =item B<-noout> diff --git a/deps/openssl/openssl/doc/apps/ocsp.pod b/deps/openssl/openssl/doc/apps/ocsp.pod index 2372b373cdc1cd..4639502a0fb1e5 100644 --- a/deps/openssl/openssl/doc/apps/ocsp.pod +++ b/deps/openssl/openssl/doc/apps/ocsp.pod @@ -29,6 +29,7 @@ B B [B<-path>] [B<-CApath dir>] [B<-CAfile file>] +[B<-no_alt_chains>]] [B<-VAfile file>] [B<-validity_period n>] [B<-status_age n>] @@ -143,6 +144,10 @@ connection timeout to the OCSP responder in seconds file or pathname containing trusted CA certificates. These are used to verify the signature on the OCSP response. +=item B<-no_alt_chains> + +See L|verify(1)> manual page for details. + =item B<-verify_other file> file containing additional certificates to search when attempting to locate @@ -379,3 +384,9 @@ second file. openssl ocsp -index demoCA/index.txt -rsigner rcert.pem -CA demoCA/cacert.pem -reqin req.der -respout resp.der + +=head1 HISTORY + +The -no_alt_chains options was first added to OpenSSL 1.0.2b. + +=cut diff --git a/deps/openssl/openssl/doc/apps/s_client.pod b/deps/openssl/openssl/doc/apps/s_client.pod index 826b7cc893a3e9..84d0527069418d 100644 --- a/deps/openssl/openssl/doc/apps/s_client.pod +++ b/deps/openssl/openssl/doc/apps/s_client.pod @@ -19,6 +19,7 @@ B B [B<-pass arg>] [B<-CApath directory>] [B<-CAfile filename>] +[B<-no_alt_chains>] [B<-reconnect>] [B<-pause>] [B<-showcerts>] @@ -120,7 +121,7 @@ also used when building the client certificate chain. A file containing trusted certificates to use during server authentication and to use when attempting to build the client certificate chain. -=item B<-purpose, -ignore_critical, -issuer_checks, -crl_check, -crl_check_all, -policy_check, -extended_crl, -x509_strict, -policy -check_ss_sig> +=item B<-purpose, -ignore_critical, -issuer_checks, -crl_check, -crl_check_all, -policy_check, -extended_crl, -x509_strict, -policy -check_ss_sig -no_alt_chains> Set various certificate chain valiadition option. See the L|verify(1)> manual page for details. @@ -270,7 +271,7 @@ all others. =item B<-serverinfo types> -a list of comma-separated TLS Extension Types (numbers between 0 and +a list of comma-separated TLS Extension Types (numbers between 0 and 65535). Each type will be sent as an empty ClientHello TLS Extension. The server's response (if any) will be encoded and displayed as a PEM file. @@ -361,4 +362,8 @@ information whenever a session is renegotiated. L, L, L +=head1 HISTORY + +The -no_alt_chains options was first added to OpenSSL 1.0.2b. + =cut diff --git a/deps/openssl/openssl/doc/apps/s_server.pod b/deps/openssl/openssl/doc/apps/s_server.pod index b37f410fb9ce61..baca7792446f2e 100644 --- a/deps/openssl/openssl/doc/apps/s_server.pod +++ b/deps/openssl/openssl/doc/apps/s_server.pod @@ -33,6 +33,7 @@ B B [B<-state>] [B<-CApath directory>] [B<-CAfile filename>] +[B<-no_alt_chains>] [B<-nocert>] [B<-cipher cipherlist>] [B<-serverpref>] @@ -174,6 +175,10 @@ and to use when attempting to build the server certificate chain. The list is also used in the list of acceptable client CAs passed to the client when a certificate is requested. +=item B<-no_alt_chains> + +See the L|verify(1)> manual page for details. + =item B<-state> prints out the SSL session states. @@ -406,4 +411,8 @@ unknown cipher suites a client says it supports. L, L, L +=head1 HISTORY + +The -no_alt_chains options was first added to OpenSSL 1.0.2b. + =cut diff --git a/deps/openssl/openssl/doc/apps/smime.pod b/deps/openssl/openssl/doc/apps/smime.pod index d39a59a90d8cba..d5618c8ff0dfd6 100644 --- a/deps/openssl/openssl/doc/apps/smime.pod +++ b/deps/openssl/openssl/doc/apps/smime.pod @@ -15,6 +15,7 @@ B B [B<-pk7out>] [B<-[cipher]>] [B<-in file>] +[B<-no_alt_chains>] [B<-certfile file>] [B<-signer file>] [B<-recip file>] @@ -259,7 +260,7 @@ portion of a message so they may be included manually. If signing then many S/MIME mail clients check the signers certificate's email address matches that specified in the From: address. -=item B<-purpose, -ignore_critical, -issuer_checks, -crl_check, -crl_check_all, -policy_check, -extended_crl, -x509_strict, -policy -check_ss_sig> +=item B<-purpose, -ignore_critical, -issuer_checks, -crl_check, -crl_check_all, -policy_check, -extended_crl, -x509_strict, -policy -check_ss_sig -no_alt_chains> Set various options of certificate chain verification. See L|verify(1)> manual page for details. @@ -441,5 +442,6 @@ structures may cause parsing errors. The use of multiple B<-signer> options and the B<-resign> command were first added in OpenSSL 1.0.0 +The -no_alt_chains options was first added to OpenSSL 1.0.2b. =cut diff --git a/deps/openssl/openssl/doc/apps/verify.pod b/deps/openssl/openssl/doc/apps/verify.pod index df0153435629fd..df1b86dfe5400d 100644 --- a/deps/openssl/openssl/doc/apps/verify.pod +++ b/deps/openssl/openssl/doc/apps/verify.pod @@ -25,6 +25,7 @@ B B [B<-extended_crl>] [B<-use_deltas>] [B<-policy_print>] +[B<-no_alt_chains>] [B<-untrusted file>] [B<-help>] [B<-issuer_checks>] @@ -124,6 +125,14 @@ Set policy variable inhibit-any-policy (see RFC5280). Set policy variable inhibit-policy-mapping (see RFC5280). +=item B<-no_alt_chains> + +When building a certificate chain, if the first certificate chain found is not +trusted, then OpenSSL will continue to check to see if an alternative chain can +be found that is trusted. With this option that behaviour is suppressed so that +only the first chain found is ever used. Using this option will force the +behaviour to match that of previous OpenSSL versions. + =item B<-policy_print> Print out diagnostics related to policy processing. @@ -425,4 +434,8 @@ B<20 X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY> error codes. L +=head1 HISTORY + +The -no_alt_chains options was first added to OpenSSL 1.0.2b. + =cut diff --git a/deps/openssl/openssl/doc/crypto/BN_CTX_new.pod b/deps/openssl/openssl/doc/crypto/BN_CTX_new.pod index 9e72c18ad55eee..bbedbb17782cca 100644 --- a/deps/openssl/openssl/doc/crypto/BN_CTX_new.pod +++ b/deps/openssl/openssl/doc/crypto/BN_CTX_new.pod @@ -25,7 +25,7 @@ is rather expensive when used in conjunction with repeated subroutine calls, the B structure is used. BN_CTX_new() allocates and initializes a B -structure. +structure. BN_CTX_free() frees the components of the B, and if it was created by BN_CTX_new(), also the structure itself. diff --git a/deps/openssl/openssl/doc/crypto/BN_rand.pod b/deps/openssl/openssl/doc/crypto/BN_rand.pod index d6b975ccf64385..bd6bc8632394ed 100644 --- a/deps/openssl/openssl/doc/crypto/BN_rand.pod +++ b/deps/openssl/openssl/doc/crypto/BN_rand.pod @@ -24,7 +24,8 @@ most significant bit of the random number can be zero. If B is 0, it is set to 1, and if B is 1, the two most significant bits of the number will be set to 1, so that the product of two such random numbers will always have 2*B length. If B is true, the -number will be odd. +number will be odd. The value of B must be zero or greater. If B is +1 then B cannot also be 1. BN_pseudo_rand() does the same, but pseudo-random numbers generated by this function are not necessarily unpredictable. They can be used for diff --git a/deps/openssl/openssl/doc/crypto/BN_set_bit.pod b/deps/openssl/openssl/doc/crypto/BN_set_bit.pod index b7c47b9b01583e..a32cca2cee6b11 100644 --- a/deps/openssl/openssl/doc/crypto/BN_set_bit.pod +++ b/deps/openssl/openssl/doc/crypto/BN_set_bit.pod @@ -37,12 +37,12 @@ BN_mask_bits() truncates B to an B bit number shorter than B bits. BN_lshift() shifts B left by B bits and places the result in -B (C). BN_lshift1() shifts B left by one and places -the result in B (C). +B (C). Note that B must be non-negative. BN_lshift1() shifts +B left by one and places the result in B (C). BN_rshift() shifts B right by B bits and places the result in -B (C). BN_rshift1() shifts B right by one and places -the result in B (C). +B (C). Note that B must be non-negative. BN_rshift1() shifts +B right by one and places the result in B (C). For the shift functions, B and B may be the same variable. diff --git a/deps/openssl/openssl/doc/crypto/CMS_get0_SignerInfos.pod b/deps/openssl/openssl/doc/crypto/CMS_get0_SignerInfos.pod index 346a0c3834c4ad..b46c0e07ab3dac 100644 --- a/deps/openssl/openssl/doc/crypto/CMS_get0_SignerInfos.pod +++ b/deps/openssl/openssl/doc/crypto/CMS_get0_SignerInfos.pod @@ -25,7 +25,7 @@ associated with a specific CMS_SignerInfo structure B. Either the keyidentifier will be set in B or B issuer name and serial number in B and B. -CMS_SignerInfo_get0_signature() retrieves the signature associated with +CMS_SignerInfo_get0_signature() retrieves the signature associated with B in a pointer to an ASN1_OCTET_STRING structure. This pointer returned corresponds to the internal signature value if B so it may be read or modified. diff --git a/deps/openssl/openssl/doc/crypto/EC_GROUP_copy.pod b/deps/openssl/openssl/doc/crypto/EC_GROUP_copy.pod index a38c742059f9ef..954af469d5aa8c 100644 --- a/deps/openssl/openssl/doc/crypto/EC_GROUP_copy.pod +++ b/deps/openssl/openssl/doc/crypto/EC_GROUP_copy.pod @@ -43,7 +43,7 @@ EC_GROUP_copy, EC_GROUP_dup, EC_GROUP_method_of, EC_GROUP_set_generator, EC_GROU int EC_GROUP_get_basis_type(const EC_GROUP *); int EC_GROUP_get_trinomial_basis(const EC_GROUP *, unsigned int *k); - int EC_GROUP_get_pentanomial_basis(const EC_GROUP *, unsigned int *k1, + int EC_GROUP_get_pentanomial_basis(const EC_GROUP *, unsigned int *k1, unsigned int *k2, unsigned int *k3); =head1 DESCRIPTION @@ -75,10 +75,10 @@ EC_GROUP_get_asn1_flag and EC_GROUP_set_asn1_flag get and set the status of the the curve_name must also be set. The point_coversion_form for a curve controls how EC_POINT data is encoded as ASN1 as defined in X9.62 (ECDSA). -point_conversion_form_t is an enum defined as follows: +point_conversion_form_t is an enum defined as follows: typedef enum { - /** the point is encoded as z||x, where the octet z specifies + /** the point is encoded as z||x, where the octet z specifies * which solution of the quadratic equation y is */ POINT_CONVERSION_COMPRESSED = 2, /** the point is encoded as z||x||y, where z is the octet 0x02 */ @@ -88,13 +88,13 @@ point_conversion_form_t is an enum defined as follows: POINT_CONVERSION_HYBRID = 6 } point_conversion_form_t; - + For POINT_CONVERSION_UNCOMPRESSED the point is encoded as an octet signifying the UNCOMPRESSED form has been used followed by the octets for x, followed by the octets for y. For any given x co-ordinate for a point on a curve it is possible to derive two possible y values. For POINT_CONVERSION_COMPRESSED the point is encoded as an octet signifying that the COMPRESSED form has been used AND which of -the two possible solutions for y has been used, followed by the octets for x. +the two possible solutions for y has been used, followed by the octets for x. For POINT_CONVERSION_HYBRID the point is encoded as an octet signifying the HYBRID form has been used AND which of the two possible solutions for y has been used, followed by the octets for x, followed by the octets for y. diff --git a/deps/openssl/openssl/doc/crypto/EC_GROUP_new.pod b/deps/openssl/openssl/doc/crypto/EC_GROUP_new.pod index 18fa6f50b046db..ff55bf33a3c9cc 100644 --- a/deps/openssl/openssl/doc/crypto/EC_GROUP_new.pod +++ b/deps/openssl/openssl/doc/crypto/EC_GROUP_new.pod @@ -42,7 +42,7 @@ use a trinomial or a pentanomial for this parameter. A new curve can be constructed by calling EC_GROUP_new, using the implementation provided by B (see L). It is then necessary to call either EC_GROUP_set_curve_GFp or -EC_GROUP_set_curve_GF2m as appropriate to create a curve defined over Fp or over F2^m respectively. +EC_GROUP_set_curve_GF2m as appropriate to create a curve defined over Fp or over F2^m respectively. EC_GROUP_set_curve_GFp sets the curve parameters B

, B and B for a curve over Fp stored in B. EC_group_get_curve_GFp obtains the previously set curve parameters. @@ -64,7 +64,7 @@ provided. The return value is the total number of curves available (whether that not). Passing a NULL B, or setting B to 0 will do nothing other than return the total number of curves available. The EC_builtin_curve structure is defined as follows: - typedef struct { + typedef struct { int nid; const char *comment; } EC_builtin_curve; diff --git a/deps/openssl/openssl/doc/crypto/EC_KEY_new.pod b/deps/openssl/openssl/doc/crypto/EC_KEY_new.pod index f5830c5d7904e7..e859689bcb5079 100644 --- a/deps/openssl/openssl/doc/crypto/EC_KEY_new.pod +++ b/deps/openssl/openssl/doc/crypto/EC_KEY_new.pod @@ -26,7 +26,7 @@ EC_KEY_new, EC_KEY_get_flags, EC_KEY_set_flags, EC_KEY_clear_flags, EC_KEY_new_b int EC_KEY_set_public_key(EC_KEY *key, const EC_POINT *pub); point_conversion_form_t EC_KEY_get_conv_form(const EC_KEY *key); void EC_KEY_set_conv_form(EC_KEY *eckey, point_conversion_form_t cform); - void *EC_KEY_get_key_method_data(EC_KEY *key, + void *EC_KEY_get_key_method_data(EC_KEY *key, void *(*dup_func)(void *), void (*free_func)(void *), void (*clear_free_func)(void *)); void EC_KEY_insert_key_method_data(EC_KEY *key, void *data, void *(*dup_func)(void *), void (*free_func)(void *), void (*clear_free_func)(void *)); @@ -42,7 +42,7 @@ An EC_KEY represents a public key and (optionaly) an associated private key. A n The reference count for the newly created EC_KEY is initially set to 1. A curve can be associated with the EC_KEY by calling EC_KEY_set_group. -Alternatively a new EC_KEY can be constructed by calling EC_KEY_new_by_curve_name and supplying the nid of the associated curve. Refer to L for a description of curve names. This function simply wraps calls to EC_KEY_new and +Alternatively a new EC_KEY can be constructed by calling EC_KEY_new_by_curve_name and supplying the nid of the associated curve. Refer to L for a description of curve names. This function simply wraps calls to EC_KEY_new and EC_GROUP_new_by_curve_name. Calling EC_KEY_free decrements the reference count for the EC_KEY object, and if it has dropped to zero then frees the memory associated diff --git a/deps/openssl/openssl/doc/crypto/ERR_remove_state.pod b/deps/openssl/openssl/doc/crypto/ERR_remove_state.pod index 7a59d437391671..a4d38c17fd6bbb 100644 --- a/deps/openssl/openssl/doc/crypto/ERR_remove_state.pod +++ b/deps/openssl/openssl/doc/crypto/ERR_remove_state.pod @@ -40,6 +40,6 @@ L ERR_remove_state() is available in all versions of SSLeay and OpenSSL. It was deprecated in OpenSSL 1.0.0 when ERR_remove_thread_state was introduced -and thread IDs were introduced to identify threads instead of 'unsigned long'. +and thread IDs were introduced to identify threads instead of 'unsigned long'. =cut diff --git a/deps/openssl/openssl/doc/crypto/EVP_BytesToKey.pod b/deps/openssl/openssl/doc/crypto/EVP_BytesToKey.pod index 5d6059528e6d43..a9b6bb0c731fb5 100644 --- a/deps/openssl/openssl/doc/crypto/EVP_BytesToKey.pod +++ b/deps/openssl/openssl/doc/crypto/EVP_BytesToKey.pod @@ -36,8 +36,8 @@ If the total key and IV length is less than the digest length and B is used then the derivation algorithm is compatible with PKCS#5 v1.5 otherwise a non standard extension is used to derive the extra data. -Newer applications should use more standard algorithms such as PBKDF2 as -defined in PKCS#5v2.1 for key derivation. +Newer applications should use a more modern algorithm such as PBKDF2 as +defined in PKCS#5v2.1 and provided by PKCS5_PBKDF2_HMAC. =head1 KEY DERIVATION ALGORITHM diff --git a/deps/openssl/openssl/doc/crypto/EVP_EncryptInit.pod b/deps/openssl/openssl/doc/crypto/EVP_EncryptInit.pod index 27af7a9cabc7f1..fb6036f959ba0c 100644 --- a/deps/openssl/openssl/doc/crypto/EVP_EncryptInit.pod +++ b/deps/openssl/openssl/doc/crypto/EVP_EncryptInit.pod @@ -24,7 +24,7 @@ EVP_idea_ecb, EVP_idea_cfb, EVP_idea_ofb, EVP_idea_cbc, EVP_rc2_cbc, EVP_rc2_ecb, EVP_rc2_cfb, EVP_rc2_ofb, EVP_rc2_40_cbc, EVP_rc2_64_cbc, EVP_bf_cbc, EVP_bf_ecb, EVP_bf_cfb, EVP_bf_ofb, EVP_cast5_cbc, EVP_cast5_ecb, EVP_cast5_cfb, EVP_cast5_ofb, EVP_rc5_32_12_16_cbc, -EVP_rc5_32_12_16_ecb, EVP_rc5_32_12_16_cfb, EVP_rc5_32_12_16_ofb, +EVP_rc5_32_12_16_ecb, EVP_rc5_32_12_16_cfb, EVP_rc5_32_12_16_ofb, EVP_aes_128_gcm, EVP_aes_192_gcm, EVP_aes_256_gcm, EVP_aes_128_ccm, EVP_aes_192_ccm, EVP_aes_256_ccm - EVP cipher routines @@ -367,7 +367,7 @@ For GCM mode ciphers the behaviour of the EVP interface is subtly altered and several GCM specific ctrl operations are supported. To specify any additional authenticated data (AAD) a call to EVP_CipherUpdate(), -EVP_EncryptUpdate() or EVP_DecryptUpdate() should be made with the output +EVP_EncryptUpdate() or EVP_DecryptUpdate() should be made with the output parameter B set to B. When decrypting the return value of EVP_DecryptFinal() or EVP_CipherFinal() @@ -381,7 +381,7 @@ The following ctrls are supported in GCM mode: Sets the GCM IV length: this call can only be made before specifying an IV. If not called a default IV length is used (96 bits for AES). - + EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_GCM_GET_TAG, taglen, tag); Writes B bytes of the tag value to the buffer indicated by B. @@ -392,7 +392,7 @@ processed (e.g. after an EVP_EncryptFinal() call). Sets the expected tag to B bytes from B. This call is only legal when decrypting data and must be made B any data is processed (e.g. -before any EVP_DecryptUpdate() call). +before any EVP_DecryptUpdate() call). See L below for an example of the use of GCM mode. @@ -402,14 +402,14 @@ The behaviour of CCM mode ciphers is similar to CCM mode but with a few additional requirements and different ctrl values. Like GCM mode any additional authenticated data (AAD) is passed by calling -EVP_CipherUpdate(), EVP_EncryptUpdate() or EVP_DecryptUpdate() with the output +EVP_CipherUpdate(), EVP_EncryptUpdate() or EVP_DecryptUpdate() with the output parameter B set to B. Additionally the total plaintext or ciphertext length B be passed to EVP_CipherUpdate(), EVP_EncryptUpdate() or -EVP_DecryptUpdate() with the output and input parameters (B and B) +EVP_DecryptUpdate() with the output and input parameters (B and B) set to B and the length passed in the B parameter. The following ctrls are supported in CCM mode: - + EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_CCM_SET_TAG, taglen, tag); This call is made to set the expected B tag value when decrypting or diff --git a/deps/openssl/openssl/doc/crypto/X509_VERIFY_PARAM_set_flags.pod b/deps/openssl/openssl/doc/crypto/X509_VERIFY_PARAM_set_flags.pod index 347d48dfec0ab7..44792f91a11d9e 100644 --- a/deps/openssl/openssl/doc/crypto/X509_VERIFY_PARAM_set_flags.pod +++ b/deps/openssl/openssl/doc/crypto/X509_VERIFY_PARAM_set_flags.pod @@ -197,6 +197,12 @@ verification. If this flag is set then additional status codes will be sent to the verification callback and it B be prepared to handle such cases without assuming they are hard errors. +The B flag suppresses checking for alternative +chains. By default, when building a certificate chain, if the first certificate +chain found is not trusted, then OpenSSL will continue to check to see if an +alternative chain can be found that is trusted. With this flag set the behaviour +will match that of OpenSSL versions prior to 1.0.2b. + =head1 NOTES The above functions should be used to manipulate verification parameters @@ -233,6 +239,6 @@ L =head1 HISTORY -TBA +The B flag was added in OpenSSL 1.0.2b =cut diff --git a/deps/openssl/openssl/doc/crypto/d2i_ECPKParameters.pod b/deps/openssl/openssl/doc/crypto/d2i_ECPKParameters.pod index 55b83d6c8857de..704b4ab35286a8 100644 --- a/deps/openssl/openssl/doc/crypto/d2i_ECPKParameters.pod +++ b/deps/openssl/openssl/doc/crypto/d2i_ECPKParameters.pod @@ -25,7 +25,7 @@ d2i_ECPKParameters, i2d_ECPKParameters, d2i_ECPKParameters_bio, i2d_ECPKParamete The ECPKParameters encode and decode routines encode and parse the public parameters for an B structure, which represents a curve. -d2i_ECPKParameters() attempts to decode B bytes at B<*in>. If +d2i_ECPKParameters() attempts to decode B bytes at B<*in>. If successful a pointer to the B structure is returned. If an error occurred then B is returned. If B is not B then the returned structure is written to B<*px>. If B<*px> is not B @@ -38,7 +38,7 @@ i2d_ECPKParameters() encodes the structure pointed to by B into DER format. If B is not B is writes the DER encoded data to the buffer at B<*out>, and increments it to point after the data just written. If the return value is negative an error occurred, otherwise it -returns the length of the encoded data. +returns the length of the encoded data. If B<*out> is B memory will be allocated for a buffer and the encoded data written to it. In this case B<*out> is not incremented and it points to @@ -73,7 +73,7 @@ i2d_ECPKParameters() returns the number of bytes successfully encoded or a negat value if an error occurs. i2d_ECPKParameters_bio(), i2d_ECPKParameters_fp(), ECPKParameters_print and ECPKParameters_print_fp -return 1 for success and 0 if an error occurs. +return 1 for success and 0 if an error occurs. =head1 SEE ALSO diff --git a/deps/openssl/openssl/doc/crypto/ec.pod b/deps/openssl/openssl/doc/crypto/ec.pod index 484bbcf8f9b7fd..7d57ba8ea07113 100644 --- a/deps/openssl/openssl/doc/crypto/ec.pod +++ b/deps/openssl/openssl/doc/crypto/ec.pod @@ -104,7 +104,7 @@ ec - Elliptic Curve functions int EC_GROUP_get_basis_type(const EC_GROUP *); int EC_GROUP_get_trinomial_basis(const EC_GROUP *, unsigned int *k); - int EC_GROUP_get_pentanomial_basis(const EC_GROUP *, unsigned int *k1, + int EC_GROUP_get_pentanomial_basis(const EC_GROUP *, unsigned int *k1, unsigned int *k2, unsigned int *k3); EC_GROUP *d2i_ECPKParameters(EC_GROUP **, const unsigned char **in, long len); int i2d_ECPKParameters(const EC_GROUP *, unsigned char **out); @@ -136,7 +136,7 @@ ec - Elliptic Curve functions void EC_KEY_set_enc_flags(EC_KEY *eckey, unsigned int flags); point_conversion_form_t EC_KEY_get_conv_form(const EC_KEY *key); void EC_KEY_set_conv_form(EC_KEY *eckey, point_conversion_form_t cform); - void *EC_KEY_get_key_method_data(EC_KEY *key, + void *EC_KEY_get_key_method_data(EC_KEY *key, void *(*dup_func)(void *), void (*free_func)(void *), void (*clear_free_func)(void *)); void EC_KEY_insert_key_method_data(EC_KEY *key, void *data, void *(*dup_func)(void *), void (*free_func)(void *), void (*clear_free_func)(void *)); diff --git a/deps/openssl/openssl/doc/crypto/pem.pod b/deps/openssl/openssl/doc/crypto/pem.pod index 21e9fe3b98a908..763eb6f533925e 100644 --- a/deps/openssl/openssl/doc/crypto/pem.pod +++ b/deps/openssl/openssl/doc/crypto/pem.pod @@ -2,7 +2,29 @@ =head1 NAME -PEM, PEM_read_bio_PrivateKey, PEM_read_PrivateKey, PEM_write_bio_PrivateKey, PEM_write_PrivateKey, PEM_write_bio_PKCS8PrivateKey, PEM_write_PKCS8PrivateKey, PEM_write_bio_PKCS8PrivateKey_nid, PEM_write_PKCS8PrivateKey_nid, PEM_read_bio_PUBKEY, PEM_read_PUBKEY, PEM_write_bio_PUBKEY, PEM_write_PUBKEY, PEM_read_bio_RSAPrivateKey, PEM_read_RSAPrivateKey, PEM_write_bio_RSAPrivateKey, PEM_write_RSAPrivateKey, PEM_read_bio_RSAPublicKey, PEM_read_RSAPublicKey, PEM_write_bio_RSAPublicKey, PEM_write_RSAPublicKey, PEM_read_bio_RSA_PUBKEY, PEM_read_RSA_PUBKEY, PEM_write_bio_RSA_PUBKEY, PEM_write_RSA_PUBKEY, PEM_read_bio_DSAPrivateKey, PEM_read_DSAPrivateKey, PEM_write_bio_DSAPrivateKey, PEM_write_DSAPrivateKey, PEM_read_bio_DSA_PUBKEY, PEM_read_DSA_PUBKEY, PEM_write_bio_DSA_PUBKEY, PEM_write_DSA_PUBKEY, PEM_read_bio_DSAparams, PEM_read_DSAparams, PEM_write_bio_DSAparams, PEM_write_DSAparams, PEM_read_bio_DHparams, PEM_read_DHparams, PEM_write_bio_DHparams, PEM_write_DHparams, PEM_read_bio_X509, PEM_read_X509, PEM_write_bio_X509, PEM_write_X509, PEM_read_bio_X509_AUX, PEM_read_X509_AUX, PEM_write_bio_X509_AUX, PEM_write_X509_AUX, PEM_read_bio_X509_REQ, PEM_read_X509_REQ, PEM_write_bio_X509_REQ, PEM_write_X509_REQ, PEM_write_bio_X509_REQ_NEW, PEM_write_X509_REQ_NEW, PEM_read_bio_X509_CRL, PEM_read_X509_CRL, PEM_write_bio_X509_CRL, PEM_write_X509_CRL, PEM_read_bio_PKCS7, PEM_read_PKCS7, PEM_write_bio_PKCS7, PEM_write_PKCS7, PEM_read_bio_NETSCAPE_CERT_SEQUENCE, PEM_read_NETSCAPE_CERT_SEQUENCE, PEM_write_bio_NETSCAPE_CERT_SEQUENCE, PEM_write_NETSCAPE_CERT_SEQUENCE - PEM routines +PEM, PEM_read_bio_PrivateKey, PEM_read_PrivateKey, PEM_write_bio_PrivateKey, +PEM_write_PrivateKey, PEM_write_bio_PKCS8PrivateKey, PEM_write_PKCS8PrivateKey, +PEM_write_bio_PKCS8PrivateKey_nid, PEM_write_PKCS8PrivateKey_nid, +PEM_read_bio_PUBKEY, PEM_read_PUBKEY, PEM_write_bio_PUBKEY, PEM_write_PUBKEY, +PEM_read_bio_RSAPrivateKey, PEM_read_RSAPrivateKey, +PEM_write_bio_RSAPrivateKey, PEM_write_RSAPrivateKey, +PEM_read_bio_RSAPublicKey, PEM_read_RSAPublicKey, PEM_write_bio_RSAPublicKey, +PEM_write_RSAPublicKey, PEM_read_bio_RSA_PUBKEY, PEM_read_RSA_PUBKEY, +PEM_write_bio_RSA_PUBKEY, PEM_write_RSA_PUBKEY, PEM_read_bio_DSAPrivateKey, +PEM_read_DSAPrivateKey, PEM_write_bio_DSAPrivateKey, PEM_write_DSAPrivateKey, +PEM_read_bio_DSA_PUBKEY, PEM_read_DSA_PUBKEY, PEM_write_bio_DSA_PUBKEY, +PEM_write_DSA_PUBKEY, PEM_read_bio_DSAparams, PEM_read_DSAparams, +PEM_write_bio_DSAparams, PEM_write_DSAparams, PEM_read_bio_DHparams, +PEM_read_DHparams, PEM_write_bio_DHparams, PEM_write_DHparams, +PEM_read_bio_X509, PEM_read_X509, PEM_write_bio_X509, PEM_write_X509, +PEM_read_bio_X509_AUX, PEM_read_X509_AUX, PEM_write_bio_X509_AUX, +PEM_write_X509_AUX, PEM_read_bio_X509_REQ, PEM_read_X509_REQ, +PEM_write_bio_X509_REQ, PEM_write_X509_REQ, PEM_write_bio_X509_REQ_NEW, +PEM_write_X509_REQ_NEW, PEM_read_bio_X509_CRL, PEM_read_X509_CRL, +PEM_write_bio_X509_CRL, PEM_write_X509_CRL, PEM_read_bio_PKCS7, PEM_read_PKCS7, +PEM_write_bio_PKCS7, PEM_write_PKCS7, PEM_read_bio_NETSCAPE_CERT_SEQUENCE, +PEM_read_NETSCAPE_CERT_SEQUENCE, PEM_write_bio_NETSCAPE_CERT_SEQUENCE, +PEM_write_NETSCAPE_CERT_SEQUENCE - PEM routines =head1 SYNOPSIS @@ -239,7 +261,8 @@ SubjectPublicKeyInfo structure and an error occurs if the public key is not DSA. The B functions process DSA parameters using a DSA -structure. The parameters are encoded using a foobar structure. +structure. The parameters are encoded using a Dss-Parms structure +as defined in RFC2459. The B functions process DH parameters using a DH structure. The parameters are encoded using a PKCS#3 DHparameter diff --git a/deps/openssl/openssl/doc/crypto/sha.pod b/deps/openssl/openssl/doc/crypto/sha.pod index 6eef846c1634a7..0c9dbf2f3d2489 100644 --- a/deps/openssl/openssl/doc/crypto/sha.pod +++ b/deps/openssl/openssl/doc/crypto/sha.pod @@ -81,7 +81,7 @@ used only when backward compatibility is required. =head1 RETURN VALUES SHA1(), SHA224(), SHA256(), SHA384() and SHA512() return a pointer to the hash -value. +value. SHA1_Init(), SHA1_Update() and SHA1_Final() and equivalent SHA224, SHA256, SHA384 and SHA512 functions return 1 for success, 0 otherwise. diff --git a/deps/openssl/openssl/doc/ssl/SSL_COMP_add_compression_method.pod b/deps/openssl/openssl/doc/ssl/SSL_COMP_add_compression_method.pod index f4d191c9b6bdc5..2bb440379f89e7 100644 --- a/deps/openssl/openssl/doc/ssl/SSL_COMP_add_compression_method.pod +++ b/deps/openssl/openssl/doc/ssl/SSL_COMP_add_compression_method.pod @@ -2,7 +2,7 @@ =head1 NAME -SSL_COMP_add_compression_method - handle SSL/TLS integrated compression methods +SSL_COMP_add_compression_method, SSL_COMP_free_compression_methods - handle SSL/TLS integrated compression methods =head1 SYNOPSIS @@ -10,6 +10,8 @@ SSL_COMP_add_compression_method - handle SSL/TLS integrated compression methods int SSL_COMP_add_compression_method(int id, COMP_METHOD *cm); + +void SSL_COMP_free_compression_methods(void); + =head1 DESCRIPTION SSL_COMP_add_compression_method() adds the compression method B with @@ -17,6 +19,10 @@ the identifier B to the list of available compression methods. This list is globally maintained for all SSL operations within this application. It cannot be set for specific SSL_CTX or SSL objects. +SSL_COMP_free_compression_methods() frees the internal table of +compression methods that were built internally, and possibly +augmented by adding SSL_COMP_add_compression_method(). + =head1 NOTES The TLS standard (or SSLv3) allows the integration of compression methods @@ -38,8 +44,8 @@ its own compression methods and will unconditionally activate compression when a matching identifier is found. There is no way to restrict the list of compression methods supported on a per connection basis. -The OpenSSL library has the compression methods B and (when -especially enabled during compilation) B available. +If enabled during compilation, the OpenSSL library will have the +COMP_zlib() compression method available. =head1 WARNINGS diff --git a/deps/openssl/openssl/doc/ssl/SSL_CONF_CTX_set1_prefix.pod b/deps/openssl/openssl/doc/ssl/SSL_CONF_CTX_set1_prefix.pod index 4978c785a6dc90..76990188d154a2 100644 --- a/deps/openssl/openssl/doc/ssl/SSL_CONF_CTX_set1_prefix.pod +++ b/deps/openssl/openssl/doc/ssl/SSL_CONF_CTX_set1_prefix.pod @@ -20,7 +20,7 @@ to B. If B is B it is restored to the default value. Command prefixes alter the commands recognised by subsequent SSL_CTX_cmd() calls. For example for files, if the prefix "SSL" is set then command names such as "SSLProtocol", "SSLOptions" etc. are recognised instead of "Protocol" -and "Options". Similarly for command lines if the prefix is "--ssl-" then +and "Options". Similarly for command lines if the prefix is "--ssl-" then "--ssl-no_tls1_2" is recognised instead of "-no_tls1_2". If the B flag is set then prefix checks are case diff --git a/deps/openssl/openssl/doc/ssl/SSL_CONF_cmd.pod b/deps/openssl/openssl/doc/ssl/SSL_CONF_cmd.pod index 1b72fd57c36820..2bf1a60e9013fd 100644 --- a/deps/openssl/openssl/doc/ssl/SSL_CONF_cmd.pod +++ b/deps/openssl/openssl/doc/ssl/SSL_CONF_cmd.pod @@ -74,7 +74,7 @@ B). Curve names are case sensitive. =item B<-named_curve> -This sets the temporary curve used for ephemeral ECDH modes. Only used by +This sets the temporary curve used for ephemeral ECDH modes. Only used by servers The B argument is a curve name or the special value B which @@ -85,7 +85,7 @@ can be either the B name (e.g. B) or an OpenSSL OID name =item B<-cipher> Sets the cipher suite list to B. Note: syntax checking of B is -currently not performed unless a B or B structure is +currently not performed unless a B or B structure is associated with B. =item B<-cert> @@ -111,7 +111,7 @@ operations are permitted. =item B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1>, B<-no_tls1_1>, B<-no_tls1_2> -Disables protocol support for SSLv2, SSLv3, TLS 1.0, TLS 1.1 or TLS 1.2 +Disables protocol support for SSLv2, SSLv3, TLS 1.0, TLS 1.1 or TLS 1.2 by setting the corresponding options B, B, B, B and B respectively. @@ -177,7 +177,7 @@ Note: the command prefix (if set) alters the recognised B values. =item B Sets the cipher suite list to B. Note: syntax checking of B is -currently not performed unless an B or B structure is +currently not performed unless an B or B structure is associated with B. =item B @@ -244,7 +244,7 @@ B). Curve names are case sensitive. =item B -This sets the temporary curve used for ephemeral ECDH modes. Only used by +This sets the temporary curve used for ephemeral ECDH modes. Only used by servers The B argument is a curve name or the special value B which @@ -342,7 +342,7 @@ or values which cannot be overridden. For example if an application calls: SSL_CONF_cmd(ctx, "Protocol", "-SSLv2"); SSL_CONF_cmd(ctx, userparam, uservalue); -it will disable SSLv2 support by default but the user can override it. If +it will disable SSLv2 support by default but the user can override it. If however the call sequence is: SSL_CONF_cmd(ctx, userparam, uservalue); @@ -372,7 +372,7 @@ can be checked instead. If -3 is returned a required argument is missing and an error is indicated. If 0 is returned some other error occurred and this can be reported back to the user. -The function SSL_CONF_cmd_value_type() can be used by applications to +The function SSL_CONF_cmd_value_type() can be used by applications to check for the existence of a command or to perform additional syntax checking or translation of the command value. For example if the return value is B an application could translate a relative diff --git a/deps/openssl/openssl/doc/ssl/SSL_CTX_set1_curves.pod b/deps/openssl/openssl/doc/ssl/SSL_CTX_set1_curves.pod index cc983f31c2749e..18d0c9ac394e53 100644 --- a/deps/openssl/openssl/doc/ssl/SSL_CTX_set1_curves.pod +++ b/deps/openssl/openssl/doc/ssl/SSL_CTX_set1_curves.pod @@ -27,7 +27,7 @@ SSL_CTX_set_ecdh_auto, SSL_set_ecdh_auto - EC supported curve functions SSL_CTX_set1_curves() sets the supported curves for B to B curves in the array B. The array consist of all NIDs of curves in preference order. For a TLS client the curves are used directly in the -supported curves extension. For a TLS server the curves are used to +supported curves extension. For a TLS server the curves are used to determine the set of shared curves. SSL_CTX_set1_curves_list() sets the supported curves for B to @@ -38,7 +38,7 @@ SSL_set1_curves() and SSL_set1_curves_list() are similar except they set supported curves for the SSL structure B. SSL_get1_curves() returns the set of supported curves sent by a client -in the supported curves extension. It returns the total number of +in the supported curves extension. It returns the total number of supported curves. The B parameter can be B to simply return the number of curves for memory allocation purposes. The B array is in the form of a set of curve NIDs in preference @@ -53,7 +53,7 @@ so B is normally set to zero. If the value B is out of range, NID_undef is returned. SSL_CTX_set_ecdh_auto() and SSL_set_ecdh_auto() set automatic curve -selection for server B or B to B. If B is 1 then +selection for server B or B to B. If B is 1 then the highest preference curve is automatically used for ECDH temporary keys used during key exchange. diff --git a/deps/openssl/openssl/doc/ssl/SSL_CTX_set1_verify_cert_store.pod b/deps/openssl/openssl/doc/ssl/SSL_CTX_set1_verify_cert_store.pod index e70debfc703f0a..493cca48194008 100644 --- a/deps/openssl/openssl/doc/ssl/SSL_CTX_set1_verify_cert_store.pod +++ b/deps/openssl/openssl/doc/ssl/SSL_CTX_set1_verify_cert_store.pod @@ -54,7 +54,7 @@ any client certificate chain. The chain store is used to build the certificate chain. If the mode B is set or a certificate chain is -configured already (for example using the functions such as +configured already (for example using the functions such as L or L) then automatic chain building is disabled. diff --git a/deps/openssl/openssl/doc/ssl/SSL_CTX_set_cert_store.pod b/deps/openssl/openssl/doc/ssl/SSL_CTX_set_cert_store.pod index d85f47a3289007..846416e0694772 100644 --- a/deps/openssl/openssl/doc/ssl/SSL_CTX_set_cert_store.pod +++ b/deps/openssl/openssl/doc/ssl/SSL_CTX_set_cert_store.pod @@ -46,7 +46,7 @@ X509_STORE object and its handling becomes available. The X509_STORE structure used by an SSL_CTX is used for verifying peer certificates and building certificate chains, it is also shared by -every child SSL structure. Applications wanting finer control can use +every child SSL structure. Applications wanting finer control can use functions such as SSL_CTX_set1_verify_cert_store() instead. =head1 RETURN VALUES diff --git a/deps/openssl/openssl/doc/ssl/SSL_CTX_set_custom_cli_ext.pod b/deps/openssl/openssl/doc/ssl/SSL_CTX_set_custom_cli_ext.pod index 60cf16bd6aa04f..3fceef9258a310 100644 --- a/deps/openssl/openssl/doc/ssl/SSL_CTX_set_custom_cli_ext.pod +++ b/deps/openssl/openssl/doc/ssl/SSL_CTX_set_custom_cli_ext.pod @@ -39,11 +39,11 @@ SSL_CTX_add_client_custom_ext, SSL_CTX_add_server_custom_ext - custom TLS extens =head1 DESCRIPTION -SSL_CTX_add_client_custom_ext() adds a custom extension for a TLS client +SSL_CTX_add_client_custom_ext() adds a custom extension for a TLS client with extension type B and callbacks B, B and B. -SSL_CTX_add_server_custom_ext() adds a custom extension for a TLS server +SSL_CTX_add_server_custom_ext() adds a custom extension for a TLS server with extension type B and callbacks B, B and B. @@ -55,7 +55,7 @@ internally by OpenSSL and 0 otherwise. =head1 EXTENSION CALLBACKS -The callback B is called to send custom extension data to be +The callback B is called to send custom extension data to be included in ClientHello for TLS clients or ServerHello for servers. The B parameter is set to the extension type which will be added and B to the value set when the extension handler was added. diff --git a/deps/openssl/openssl/doc/ssl/SSL_CTX_set_tmp_dh_callback.pod b/deps/openssl/openssl/doc/ssl/SSL_CTX_set_tmp_dh_callback.pod index 7a27eef50b173d..b754c16a86e66e 100644 --- a/deps/openssl/openssl/doc/ssl/SSL_CTX_set_tmp_dh_callback.pod +++ b/deps/openssl/openssl/doc/ssl/SSL_CTX_set_tmp_dh_callback.pod @@ -61,12 +61,12 @@ negotiation is being saved. If "strong" primes were used to generate the DH parameters, it is not strictly necessary to generate a new key for each handshake but it does improve forward -secrecy. If it is not assured, that "strong" primes were used (see especially -the section about DSA parameters below), SSL_OP_SINGLE_DH_USE must be used -in order to prevent small subgroup attacks. Always using SSL_OP_SINGLE_DH_USE -has an impact on the computer time needed during negotiation, but it is not -very large, so application authors/users should consider to always enable -this option. +secrecy. If it is not assured that "strong" primes were used, +SSL_OP_SINGLE_DH_USE must be used in order to prevent small subgroup +attacks. Always using SSL_OP_SINGLE_DH_USE has an impact on the +computer time needed during negotiation, but it is not very large, so +application authors/users should consider always enabling this option. +The option is required to implement perfect forward secrecy (PFS). As generating DH parameters is extremely time consuming, an application should not generate the parameters on the fly but supply the parameters. @@ -74,82 +74,62 @@ DH parameters can be reused, as the actual key is newly generated during the negotiation. The risk in reusing DH parameters is that an attacker may specialize on a very often used DH group. Applications should therefore generate their own DH parameters during the installation process using the -openssl L application. In order to reduce the computer -time needed for this generation, it is possible to use DSA parameters -instead (see L), but in this case SSL_OP_SINGLE_DH_USE -is mandatory. +openssl L application. This application +guarantees that "strong" primes are used. -Application authors may compile in DH parameters. Files dh512.pem, -dh1024.pem, dh2048.pem, and dh4096.pem in the 'apps' directory of current +Files dh2048.pem, and dh4096.pem in the 'apps' directory of the current version of the OpenSSL distribution contain the 'SKIP' DH parameters, which use safe primes and were generated verifiably pseudo-randomly. These files can be converted into C code using the B<-C> option of the -L application. -Authors may also generate their own set of parameters using -L, but a user may not be sure how the parameters were -generated. The generation of DH parameters during installation is therefore -recommended. +L application. Generation of custom DH +parameters during installation should still be preferred to stop an +attacker from specializing on a commonly used group. Files dh1024.pem +and dh512.pem contain old parameters that must not be used by +applications. An application may either directly specify the DH parameters or -can supply the DH parameters via a callback function. The callback approach -has the advantage, that the callback may supply DH parameters for different -key lengths. +can supply the DH parameters via a callback function. -The B is called with the B needed and -the B information. The B flag is set, when the -ephemeral DH key exchange is performed with an export cipher. +Previous versions of the callback used B and B +parameters to control parameter generation for export and non-export +cipher suites. Modern servers that do not support export ciphersuites +are advised to either use SSL_CTX_set_tmp_dh() in combination with +SSL_OP_SINGLE_DH_USE, or alternatively, use the callback but ignore +B and B and simply supply at least 2048-bit +parameters in the callback. =head1 EXAMPLES -Handle DH parameters for key lengths of 512 and 1024 bits. (Error handling +Setup DH parameters with a key length of 2048 bits. (Error handling partly left out.) - ... - /* Set up ephemeral DH stuff */ - DH *dh_512 = NULL; - DH *dh_1024 = NULL; - FILE *paramfile; + Command-line parameter generation: + $ openssl dhparam -out dh_param_2048.pem 2048 + + Code for setting up parameters during server initialization: ... - /* "openssl dhparam -out dh_param_512.pem -2 512" */ - paramfile = fopen("dh_param_512.pem", "r"); + SSL_CTX ctx = SSL_CTX_new(); + ... + + /* Set up ephemeral DH parameters. */ + DH *dh_2048 = NULL; + FILE *paramfile; + paramfile = fopen("dh_param_2048.pem", "r"); if (paramfile) { - dh_512 = PEM_read_DHparams(paramfile, NULL, NULL, NULL); + dh_2048 = PEM_read_DHparams(paramfile, NULL, NULL, NULL); fclose(paramfile); + } else { + /* Error. */ } - /* "openssl dhparam -out dh_param_1024.pem -2 1024" */ - paramfile = fopen("dh_param_1024.pem", "r"); - if (paramfile) { - dh_1024 = PEM_read_DHparams(paramfile, NULL, NULL, NULL); - fclose(paramfile); + if (dh_2048 == NULL) { + /* Error. */ } - ... - - /* "openssl dhparam -C -2 512" etc... */ - DH *get_dh512() { ... } - DH *get_dh1024() { ... } - - DH *tmp_dh_callback(SSL *s, int is_export, int keylength) - { - DH *dh_tmp=NULL; - - switch (keylength) { - case 512: - if (!dh_512) - dh_512 = get_dh512(); - dh_tmp = dh_512; - break; - case 1024: - if (!dh_1024) - dh_1024 = get_dh1024(); - dh_tmp = dh_1024; - break; - default: - /* Generating a key on the fly is very costly, so use what is there */ - setup_dh_parameters_like_above(); - } - return(dh_tmp); + if (SSL_CTX_set_tmp_dh(ctx, dh_2048) != 1) { + /* Error. */ } + SSL_CTX_set_options(ctx, SSL_OP_SINGLE_DH_USE); + ... =head1 RETURN VALUES diff --git a/deps/openssl/openssl/doc/ssl/SSL_CTX_use_certificate.pod b/deps/openssl/openssl/doc/ssl/SSL_CTX_use_certificate.pod index 9072b874fb24b3..80321b8580e3f8 100644 --- a/deps/openssl/openssl/doc/ssl/SSL_CTX_use_certificate.pod +++ b/deps/openssl/openssl/doc/ssl/SSL_CTX_use_certificate.pod @@ -123,7 +123,7 @@ SSL_CTX_use_certificate_chain_file() adds the first certificate found in the file to the certificate store. The other certificates are added to the store of chain certificates using L. Note: versions of OpenSSL before 1.0.2 only had a single certificate chain store for all certificate types, OpenSSL 1.0.2 and later -have a separate chain store for each type. SSL_CTX_use_certificate_chain_file() +have a separate chain store for each type. SSL_CTX_use_certificate_chain_file() should be used instead of the SSL_CTX_use_certificate_file() function in order to allow the use of complete certificate chains even when no trusted CA storage is used or when the CA issuing the certificate shall not be added to diff --git a/deps/openssl/openssl/doc/ssl/SSL_CTX_use_serverinfo.pod b/deps/openssl/openssl/doc/ssl/SSL_CTX_use_serverinfo.pod index e3aa7f92c203c4..da7935c83d0a93 100644 --- a/deps/openssl/openssl/doc/ssl/SSL_CTX_use_serverinfo.pod +++ b/deps/openssl/openssl/doc/ssl/SSL_CTX_use_serverinfo.pod @@ -20,8 +20,8 @@ A "serverinfo" extension is returned in response to an empty ClientHello Extension. SSL_CTX_use_serverinfo() loads one or more serverinfo extensions from -a byte array into B. The extensions must be concatenated into a -sequence of bytes. Each extension must consist of a 2-byte Extension Type, +a byte array into B. The extensions must be concatenated into a +sequence of bytes. Each extension must consist of a 2-byte Extension Type, a 2-byte length, and then length bytes of extension_data. SSL_CTX_use_serverinfo_file() loads one or more serverinfo extensions from diff --git a/deps/openssl/openssl/e_os2.h b/deps/openssl/openssl/e_os2.h index 613607f8387e54..7be9989ac3a101 100644 --- a/deps/openssl/openssl/e_os2.h +++ b/deps/openssl/openssl/e_os2.h @@ -109,6 +109,12 @@ extern "C" { # undef OPENSSL_SYS_UNIX # define OPENSSL_SYS_WIN32 # endif +# if defined(_WIN64) || defined(OPENSSL_SYSNAME_WIN64) +# undef OPENSSL_SYS_UNIX +# if !defined(OPENSSL_SYS_WIN64) +# define OPENSSL_SYS_WIN64 +# endif +# endif # if defined(OPENSSL_SYSNAME_WINNT) # undef OPENSSL_SYS_UNIX # define OPENSSL_SYS_WINNT @@ -121,7 +127,7 @@ extern "C" { # endif /* Anything that tries to look like Microsoft is "Windows" */ -# if defined(OPENSSL_SYS_WIN32) || defined(OPENSSL_SYS_WINNT) || defined(OPENSSL_SYS_WINCE) +# if defined(OPENSSL_SYS_WIN32) || defined(OPENSSL_SYS_WIN64) || defined(OPENSSL_SYS_WINNT) || defined(OPENSSL_SYS_WINCE) # undef OPENSSL_SYS_UNIX # define OPENSSL_SYS_WINDOWS # ifndef OPENSSL_SYS_MSDOS diff --git a/deps/openssl/openssl/engines/Makefile b/deps/openssl/openssl/engines/Makefile index da6c8750b5ae44..2058ff405afeaf 100644 --- a/deps/openssl/openssl/engines/Makefile +++ b/deps/openssl/openssl/engines/Makefile @@ -146,12 +146,15 @@ lint: lint -DLINT $(INCLUDES) $(SRC)>fluff @target=lint; $(RECURSIVE_MAKE) -depend: - @if [ -z "$(THIS)" ]; then \ - $(MAKE) -f $(TOP)/Makefile reflect THIS=$@; \ - fi - @[ -z "$(THIS)" ] || $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) +update: local_depend + @if [ -z "$(THIS)" ]; then $(MAKE) -f $(TOP)/Makefile reflect THIS=$@; fi + @[ -z "$(THIS)" ] || (set -e; target=update; $(RECURSIVE_MAKE) ) + +depend: local_depend + @if [ -z "$(THIS)" ]; then $(MAKE) -f $(TOP)/Makefile reflect THIS=$@; fi @[ -z "$(THIS)" ] || (set -e; target=depend; $(RECURSIVE_MAKE) ) +local_depend: + @[ -z "$(THIS)" ] || $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) dclean: $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new diff --git a/deps/openssl/openssl/engines/ccgost/Makefile b/deps/openssl/openssl/engines/ccgost/Makefile index 2f36580836e569..17e1efbdff3014 100644 --- a/deps/openssl/openssl/engines/ccgost/Makefile +++ b/deps/openssl/openssl/engines/ccgost/Makefile @@ -66,12 +66,13 @@ links: tests: -depend: - @if [ -z "$(THIS)" ]; then \ - $(MAKE) -f $(TOP)/Makefile reflect THIS=$@; \ - else \ - $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC); \ - fi +update: local_depend + @if [ -z "$(THIS)" ]; then $(MAKE) -f $(TOP)/Makefile reflect THIS=$@; fi + +depend: local_depend + @if [ -z "$(THIS)" ]; then $(MAKE) -f $(TOP)/Makefile reflect THIS=$@; fi +local_depend: + @[ -z "$(THIS)" ] || $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) files: $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO @@ -261,8 +262,9 @@ gost_sign.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h gost_sign.o: ../../include/openssl/dsa.h ../../include/openssl/e_os2.h gost_sign.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h gost_sign.o: ../../include/openssl/ecdsa.h ../../include/openssl/engine.h -gost_sign.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h -gost_sign.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h +gost_sign.o: ../../include/openssl/err.h ../../include/openssl/evp.h +gost_sign.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h +gost_sign.o: ../../include/openssl/objects.h gost_sign.o: ../../include/openssl/opensslconf.h gost_sign.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h gost_sign.o: ../../include/openssl/pkcs7.h ../../include/openssl/rand.h diff --git a/deps/openssl/openssl/engines/ccgost/e_gost_err.c b/deps/openssl/openssl/engines/ccgost/e_gost_err.c index 3201b648fb2afc..80ef58f8ca3cd2 100644 --- a/deps/openssl/openssl/engines/ccgost/e_gost_err.c +++ b/deps/openssl/openssl/engines/ccgost/e_gost_err.c @@ -1,6 +1,6 @@ /* e_gost_err.c */ /* ==================================================================== - * Copyright (c) 1999-2009 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2015 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -90,6 +90,7 @@ static ERR_STRING_DATA GOST_str_functs[] = { {ERR_FUNC(GOST_F_GOST_IMIT_CTRL), "GOST_IMIT_CTRL"}, {ERR_FUNC(GOST_F_GOST_IMIT_FINAL), "GOST_IMIT_FINAL"}, {ERR_FUNC(GOST_F_GOST_IMIT_UPDATE), "GOST_IMIT_UPDATE"}, + {ERR_FUNC(GOST_F_GOST_SIGN_KEYGEN), "GOST_SIGN_KEYGEN"}, {ERR_FUNC(GOST_F_PARAM_COPY_GOST01), "PARAM_COPY_GOST01"}, {ERR_FUNC(GOST_F_PARAM_COPY_GOST94), "PARAM_COPY_GOST94"}, {ERR_FUNC(GOST_F_PKEY_GOST01CP_DECRYPT), "PKEY_GOST01CP_DECRYPT"}, diff --git a/deps/openssl/openssl/engines/ccgost/e_gost_err.h b/deps/openssl/openssl/engines/ccgost/e_gost_err.h index 92be558452f923..a2018ec4de9d6e 100644 --- a/deps/openssl/openssl/engines/ccgost/e_gost_err.h +++ b/deps/openssl/openssl/engines/ccgost/e_gost_err.h @@ -90,6 +90,7 @@ void ERR_GOST_error(int function, int reason, char *file, int line); # define GOST_F_GOST_IMIT_CTRL 114 # define GOST_F_GOST_IMIT_FINAL 140 # define GOST_F_GOST_IMIT_UPDATE 115 +# define GOST_F_GOST_SIGN_KEYGEN 142 # define GOST_F_PARAM_COPY_GOST01 116 # define GOST_F_PARAM_COPY_GOST94 117 # define GOST_F_PKEY_GOST01CP_DECRYPT 118 diff --git a/deps/openssl/openssl/engines/ccgost/gost2001.c b/deps/openssl/openssl/engines/ccgost/gost2001.c index 2b96694821e20a..9536295430d068 100644 --- a/deps/openssl/openssl/engines/ccgost/gost2001.c +++ b/deps/openssl/openssl/engines/ccgost/gost2001.c @@ -41,6 +41,11 @@ int fill_GOST2001_params(EC_KEY *eckey, int nid) BN_CTX *ctx = BN_CTX_new(); int ok = 0; + if(!ctx) { + GOSTerr(GOST_F_FILL_GOST2001_PARAMS, ERR_R_MALLOC_FAILURE); + goto err; + } + BN_CTX_start(ctx); p = BN_CTX_get(ctx); a = BN_CTX_get(ctx); @@ -48,6 +53,10 @@ int fill_GOST2001_params(EC_KEY *eckey, int nid) x = BN_CTX_get(ctx); y = BN_CTX_get(ctx); q = BN_CTX_get(ctx); + if(!p || !a || !b || !x || !y || !q) { + GOSTerr(GOST_F_FILL_GOST2001_PARAMS, ERR_R_MALLOC_FAILURE); + goto err; + } while (params->nid != NID_undef && params->nid != nid) params++; if (params->nid == NID_undef) { @@ -55,18 +64,33 @@ int fill_GOST2001_params(EC_KEY *eckey, int nid) GOST_R_UNSUPPORTED_PARAMETER_SET); goto err; } - BN_hex2bn(&p, params->p); - BN_hex2bn(&a, params->a); - BN_hex2bn(&b, params->b); + if(!BN_hex2bn(&p, params->p) + || !BN_hex2bn(&a, params->a) + || !BN_hex2bn(&b, params->b)) { + GOSTerr(GOST_F_FILL_GOST2001_PARAMS, + ERR_R_INTERNAL_ERROR); + goto err; + } grp = EC_GROUP_new_curve_GFp(p, a, b, ctx); + if(!grp) { + GOSTerr(GOST_F_FILL_GOST2001_PARAMS, ERR_R_MALLOC_FAILURE); + goto err; + } P = EC_POINT_new(grp); + if(!P) { + GOSTerr(GOST_F_FILL_GOST2001_PARAMS, ERR_R_MALLOC_FAILURE); + goto err; + } - BN_hex2bn(&x, params->x); - BN_hex2bn(&y, params->y); - EC_POINT_set_affine_coordinates_GFp(grp, P, x, y, ctx); - BN_hex2bn(&q, params->q); + if(!BN_hex2bn(&x, params->x) + || !BN_hex2bn(&y, params->y) + || !EC_POINT_set_affine_coordinates_GFp(grp, P, x, y, ctx) + || !BN_hex2bn(&q, params->q)) { + GOSTerr(GOST_F_FILL_GOST2001_PARAMS, ERR_R_INTERNAL_ERROR); + goto err; + } #ifdef DEBUG_KEYS fprintf(stderr, "Set params index %d oid %s\nq=", (params - R3410_2001_paramset), OBJ_nid2sn(params->nid)); @@ -74,16 +98,23 @@ int fill_GOST2001_params(EC_KEY *eckey, int nid) fprintf(stderr, "\n"); #endif - EC_GROUP_set_generator(grp, P, q, NULL); + if(!EC_GROUP_set_generator(grp, P, q, NULL)) { + GOSTerr(GOST_F_FILL_GOST2001_PARAMS, ERR_R_INTERNAL_ERROR); + goto err; + } EC_GROUP_set_curve_name(grp, params->nid); - - EC_KEY_set_group(eckey, grp); + if(!EC_KEY_set_group(eckey, grp)) { + GOSTerr(GOST_F_FILL_GOST2001_PARAMS, ERR_R_INTERNAL_ERROR); + goto err; + } ok = 1; err: - EC_POINT_free(P); - EC_GROUP_free(grp); - BN_CTX_end(ctx); - BN_CTX_free(ctx); + if (P) EC_POINT_free(P); + if (grp) EC_GROUP_free(grp); + if (ctx) { + BN_CTX_end(ctx); + BN_CTX_free(ctx); + } return ok; } @@ -94,7 +125,7 @@ int fill_GOST2001_params(EC_KEY *eckey, int nid) */ DSA_SIG *gost2001_do_sign(const unsigned char *dgst, int dlen, EC_KEY *eckey) { - DSA_SIG *newsig = NULL; + DSA_SIG *newsig = NULL, *ret = NULL; BIGNUM *md = hashsum2bn(dgst); BIGNUM *order = NULL; const EC_GROUP *group; @@ -103,6 +134,10 @@ DSA_SIG *gost2001_do_sign(const unsigned char *dgst, int dlen, EC_KEY *eckey) NULL, *e = NULL; EC_POINT *C = NULL; BN_CTX *ctx = BN_CTX_new(); + if(!ctx || !md) { + GOSTerr(GOST_F_GOST2001_DO_SIGN, ERR_R_MALLOC_FAILURE); + goto err; + } BN_CTX_start(ctx); OPENSSL_assert(dlen == 32); newsig = DSA_SIG_new(); @@ -111,11 +146,25 @@ DSA_SIG *gost2001_do_sign(const unsigned char *dgst, int dlen, EC_KEY *eckey) goto err; } group = EC_KEY_get0_group(eckey); + if(!group) { + GOSTerr(GOST_F_GOST2001_DO_SIGN, ERR_R_INTERNAL_ERROR); + goto err; + } order = BN_CTX_get(ctx); - EC_GROUP_get_order(group, order, ctx); + if(!order || !EC_GROUP_get_order(group, order, ctx)) { + GOSTerr(GOST_F_GOST2001_DO_SIGN, ERR_R_INTERNAL_ERROR); + goto err; + } priv_key = EC_KEY_get0_private_key(eckey); + if(!priv_key) { + GOSTerr(GOST_F_GOST2001_DO_SIGN, ERR_R_INTERNAL_ERROR); + goto err; + } e = BN_CTX_get(ctx); - BN_mod(e, md, order, ctx); + if(!e || !BN_mod(e, md, order, ctx)) { + GOSTerr(GOST_F_GOST2001_DO_SIGN, ERR_R_INTERNAL_ERROR); + goto err; + } #ifdef DEBUG_SIGN fprintf(stderr, "digest as bignum="); BN_print_fp(stderr, md); @@ -128,55 +177,80 @@ DSA_SIG *gost2001_do_sign(const unsigned char *dgst, int dlen, EC_KEY *eckey) } k = BN_CTX_get(ctx); C = EC_POINT_new(group); + if(!k || !C) { + GOSTerr(GOST_F_GOST2001_DO_SIGN, ERR_R_MALLOC_FAILURE); + goto err; + } do { do { if (!BN_rand_range(k, order)) { GOSTerr(GOST_F_GOST2001_DO_SIGN, GOST_R_RANDOM_NUMBER_GENERATOR_FAILED); - DSA_SIG_free(newsig); - newsig = NULL; goto err; } if (!EC_POINT_mul(group, C, k, NULL, NULL, ctx)) { GOSTerr(GOST_F_GOST2001_DO_SIGN, ERR_R_EC_LIB); - DSA_SIG_free(newsig); - newsig = NULL; goto err; } if (!X) X = BN_CTX_get(ctx); + if (!r) + r = BN_CTX_get(ctx); + if (!X || !r) { + GOSTerr(GOST_F_GOST2001_DO_SIGN, ERR_R_MALLOC_FAILURE); + goto err; + } if (!EC_POINT_get_affine_coordinates_GFp(group, C, X, NULL, ctx)) { GOSTerr(GOST_F_GOST2001_DO_SIGN, ERR_R_EC_LIB); - DSA_SIG_free(newsig); - newsig = NULL; goto err; } - if (!r) - r = BN_CTX_get(ctx); - BN_nnmod(r, X, order, ctx); + + if(!BN_nnmod(r, X, order, ctx)) { + GOSTerr(GOST_F_GOST2001_DO_SIGN, ERR_R_INTERNAL_ERROR); + goto err; + } } while (BN_is_zero(r)); /* s = (r*priv_key+k*e) mod order */ if (!tmp) tmp = BN_CTX_get(ctx); - BN_mod_mul(tmp, priv_key, r, order, ctx); if (!tmp2) tmp2 = BN_CTX_get(ctx); - BN_mod_mul(tmp2, k, e, order, ctx); if (!s) s = BN_CTX_get(ctx); - BN_mod_add(s, tmp, tmp2, order, ctx); + if (!tmp || !tmp2 || !s) { + GOSTerr(GOST_F_GOST2001_DO_SIGN, ERR_R_MALLOC_FAILURE); + goto err; + } + + if(!BN_mod_mul(tmp, priv_key, r, order, ctx) + || !BN_mod_mul(tmp2, k, e, order, ctx) + || !BN_mod_add(s, tmp, tmp2, order, ctx)) { + GOSTerr(GOST_F_GOST2001_DO_SIGN, ERR_R_INTERNAL_ERROR); + goto err; + } } while (BN_is_zero(s)); newsig->s = BN_dup(s); newsig->r = BN_dup(r); + if(!newsig->s || !newsig->r) { + GOSTerr(GOST_F_GOST2001_DO_SIGN, ERR_R_MALLOC_FAILURE); + goto err; + } + + ret = newsig; err: - BN_CTX_end(ctx); - BN_CTX_free(ctx); - EC_POINT_free(C); - BN_free(md); - return newsig; + if(ctx) { + BN_CTX_end(ctx); + BN_CTX_free(ctx); + } + if (C) EC_POINT_free(C); + if (md) BN_free(md); + if (!ret && newsig) { + DSA_SIG_free(newsig); + } + return ret; } /* @@ -196,6 +270,11 @@ int gost2001_do_verify(const unsigned char *dgst, int dgst_len, const EC_POINT *pub_key = NULL; int ok = 0; + if(!ctx || !group) { + GOSTerr(GOST_F_GOST2001_DO_VERIFY, ERR_R_INTERNAL_ERROR); + goto err; + } + BN_CTX_start(ctx); order = BN_CTX_get(ctx); e = BN_CTX_get(ctx); @@ -205,9 +284,17 @@ int gost2001_do_verify(const unsigned char *dgst, int dgst_len, X = BN_CTX_get(ctx); R = BN_CTX_get(ctx); v = BN_CTX_get(ctx); + if(!order || !e || !z1 || !z2 || !tmp || !X || !R || !v) { + GOSTerr(GOST_F_GOST2001_DO_VERIFY, ERR_R_MALLOC_FAILURE); + goto err; + } - EC_GROUP_get_order(group, order, ctx); pub_key = EC_KEY_get0_public_key(ec); + if(!pub_key || !EC_GROUP_get_order(group, order, ctx)) { + GOSTerr(GOST_F_GOST2001_DO_VERIFY, ERR_R_INTERNAL_ERROR); + goto err; + } + if (BN_is_zero(sig->s) || BN_is_zero(sig->r) || (BN_cmp(sig->s, order) >= 1) || (BN_cmp(sig->r, order) >= 1)) { GOSTerr(GOST_F_GOST2001_DO_VERIFY, @@ -217,19 +304,28 @@ int gost2001_do_verify(const unsigned char *dgst, int dgst_len, } md = hashsum2bn(dgst); - BN_mod(e, md, order, ctx); + if(!md || !BN_mod(e, md, order, ctx)) { + GOSTerr(GOST_F_GOST2001_DO_VERIFY, ERR_R_INTERNAL_ERROR); + goto err; + } #ifdef DEBUG_SIGN fprintf(stderr, "digest as bignum: "); BN_print_fp(stderr, md); fprintf(stderr, "\ndigest mod q: "); BN_print_fp(stderr, e); #endif - if (BN_is_zero(e)) - BN_one(e); + if (BN_is_zero(e) && !BN_one(e)) { + GOSTerr(GOST_F_GOST2001_DO_VERIFY, ERR_R_INTERNAL_ERROR); + goto err; + } v = BN_mod_inverse(v, e, order, ctx); - BN_mod_mul(z1, sig->s, v, order, ctx); - BN_sub(tmp, order, sig->r); - BN_mod_mul(z2, tmp, v, order, ctx); + if(!v + || !BN_mod_mul(z1, sig->s, v, order, ctx) + || !BN_sub(tmp, order, sig->r) + || !BN_mod_mul(z2, tmp, v, order, ctx)) { + GOSTerr(GOST_F_GOST2001_DO_VERIFY, ERR_R_INTERNAL_ERROR); + goto err; + } #ifdef DEBUG_SIGN fprintf(stderr, "\nInverted digest value: "); BN_print_fp(stderr, v); @@ -239,6 +335,10 @@ int gost2001_do_verify(const unsigned char *dgst, int dgst_len, BN_print_fp(stderr, z2); #endif C = EC_POINT_new(group); + if (!C) { + GOSTerr(GOST_F_GOST2001_DO_VERIFY, ERR_R_MALLOC_FAILURE); + goto err; + } if (!EC_POINT_mul(group, C, z1, pub_key, z2, ctx)) { GOSTerr(GOST_F_GOST2001_DO_VERIFY, ERR_R_EC_LIB); goto err; @@ -247,7 +347,10 @@ int gost2001_do_verify(const unsigned char *dgst, int dgst_len, GOSTerr(GOST_F_GOST2001_DO_VERIFY, ERR_R_EC_LIB); goto err; } - BN_mod(R, X, order, ctx); + if(!BN_mod(R, X, order, ctx)) { + GOSTerr(GOST_F_GOST2001_DO_VERIFY, ERR_R_INTERNAL_ERROR); + goto err; + } #ifdef DEBUG_SIGN fprintf(stderr, "\nX="); BN_print_fp(stderr, X); @@ -261,10 +364,12 @@ int gost2001_do_verify(const unsigned char *dgst, int dgst_len, ok = 1; } err: - EC_POINT_free(C); - BN_CTX_end(ctx); - BN_CTX_free(ctx); - BN_free(md); + if (C) EC_POINT_free(C); + if (ctx) { + BN_CTX_end(ctx); + BN_CTX_free(ctx); + } + if (md) BN_free(md); return ok; } @@ -287,6 +392,10 @@ int gost2001_compute_public(EC_KEY *ec) return 0; } ctx = BN_CTX_new(); + if(!ctx) { + GOSTerr(GOST_F_GOST2001_COMPUTE_PUBLIC, ERR_R_MALLOC_FAILURE); + goto err; + } BN_CTX_start(ctx); if (!(priv_key = EC_KEY_get0_private_key(ec))) { GOSTerr(GOST_F_GOST2001_COMPUTE_PUBLIC, ERR_R_EC_LIB); @@ -294,6 +403,10 @@ int gost2001_compute_public(EC_KEY *ec) } pub_key = EC_POINT_new(group); + if(!pub_key) { + GOSTerr(GOST_F_GOST2001_COMPUTE_PUBLIC, ERR_R_MALLOC_FAILURE); + goto err; + } if (!EC_POINT_mul(group, pub_key, priv_key, NULL, NULL, ctx)) { GOSTerr(GOST_F_GOST2001_COMPUTE_PUBLIC, ERR_R_EC_LIB); goto err; @@ -304,9 +417,11 @@ int gost2001_compute_public(EC_KEY *ec) } ok = 256; err: - BN_CTX_end(ctx); - EC_POINT_free(pub_key); - BN_CTX_free(ctx); + if (pub_key) EC_POINT_free(pub_key); + if (ctx) { + BN_CTX_end(ctx); + BN_CTX_free(ctx); + } return ok; } @@ -320,7 +435,13 @@ int gost2001_keygen(EC_KEY *ec) { BIGNUM *order = BN_new(), *d = BN_new(); const EC_GROUP *group = EC_KEY_get0_group(ec); - EC_GROUP_get_order(group, order, NULL); + + if(!group || !EC_GROUP_get_order(group, order, NULL)) { + GOSTerr(GOST_F_GOST2001_KEYGEN, ERR_R_INTERNAL_ERROR); + BN_free(d); + BN_free(order); + return 0; + } do { if (!BN_rand_range(d, order)) { @@ -332,7 +453,13 @@ int gost2001_keygen(EC_KEY *ec) } } while (BN_is_zero(d)); - EC_KEY_set_private_key(ec, d); + + if(!EC_KEY_set_private_key(ec, d)) { + GOSTerr(GOST_F_GOST2001_KEYGEN, ERR_R_INTERNAL_ERROR); + BN_free(d); + BN_free(order); + return 0; + } BN_free(d); BN_free(order); return gost2001_compute_public(ec); diff --git a/deps/openssl/openssl/engines/ccgost/gost94_keyx.c b/deps/openssl/openssl/engines/ccgost/gost94_keyx.c index 85f4bc89982d3f..ce57f17cbf32f1 100644 --- a/deps/openssl/openssl/engines/ccgost/gost94_keyx.c +++ b/deps/openssl/openssl/engines/ccgost/gost94_keyx.c @@ -104,6 +104,7 @@ int pkey_GOST94cp_encrypt(EVP_PKEY_CTX *ctx, unsigned char *out, struct gost_pmeth_data *data = EVP_PKEY_CTX_get_data(ctx); gost_ctx cctx; int key_is_ephemeral = 1; + int tmp_outlen; EVP_PKEY *mykey = EVP_PKEY_CTX_get0_peerkey(ctx); /* Do not use vizir cipher parameters with cryptopro */ @@ -174,12 +175,13 @@ int pkey_GOST94cp_encrypt(EVP_PKEY_CTX *ctx, unsigned char *out, } ASN1_OBJECT_free(gkt->key_agreement_info->cipher); gkt->key_agreement_info->cipher = OBJ_nid2obj(param->nid); - *outlen = i2d_GOST_KEY_TRANSPORT(gkt, out ? &out : NULL); - if (*outlen <= 0) { + tmp_outlen = i2d_GOST_KEY_TRANSPORT(gkt, out ? &out : NULL); + if (tmp_outlen <= 0) { GOSTerr(GOST_F_PKEY_GOST94CP_ENCRYPT, GOST_R_ERROR_PACKING_KEY_TRANSPORT_INFO); goto err; } + *outlen = tmp_outlen; if (!key_is_ephemeral) { /* Set control "public key from client certificate used" */ if (EVP_PKEY_CTX_ctrl(ctx, -1, -1, EVP_PKEY_CTRL_PEER_KEY, 3, NULL) <= diff --git a/deps/openssl/openssl/engines/ccgost/gost_ameth.c b/deps/openssl/openssl/engines/ccgost/gost_ameth.c index 713a0face51977..b7c5354c1ae293 100644 --- a/deps/openssl/openssl/engines/ccgost/gost_ameth.c +++ b/deps/openssl/openssl/engines/ccgost/gost_ameth.c @@ -115,7 +115,10 @@ static int decode_gost_algor_params(EVP_PKEY *pkey, X509_ALGOR *palg) } param_nid = OBJ_obj2nid(gkp->key_params); GOST_KEY_PARAMS_free(gkp); - EVP_PKEY_set_type(pkey, pkey_nid); + if(!EVP_PKEY_set_type(pkey, pkey_nid)) { + GOSTerr(GOST_F_DECODE_GOST_ALGOR_PARAMS, ERR_R_INTERNAL_ERROR); + return 0; + } switch (pkey_nid) { case NID_id_GostR3410_94: { @@ -552,9 +555,19 @@ static int param_copy_gost01(EVP_PKEY *to, const EVP_PKEY *from) } if (!eto) { eto = EC_KEY_new(); - EVP_PKEY_assign(to, EVP_PKEY_base_id(from), eto); + if(!eto) { + GOSTerr(GOST_F_PARAM_COPY_GOST01, ERR_R_MALLOC_FAILURE); + return 0; + } + if(!EVP_PKEY_assign(to, EVP_PKEY_base_id(from), eto)) { + GOSTerr(GOST_F_PARAM_COPY_GOST01, ERR_R_INTERNAL_ERROR); + return 0; + } + } + if(!EC_KEY_set_group(eto, EC_KEY_get0_group(efrom))) { + GOSTerr(GOST_F_PARAM_COPY_GOST01, ERR_R_INTERNAL_ERROR); + return 0; } - EC_KEY_set_group(eto, EC_KEY_get0_group(efrom)); if (EC_KEY_get0_private_key(eto)) { gost2001_compute_public(eto); } @@ -729,8 +742,21 @@ static int pub_encode_gost01(X509_PUBKEY *pub, const EVP_PKEY *pk) } X = BN_new(); Y = BN_new(); - EC_POINT_get_affine_coordinates_GFp(EC_KEY_get0_group(ec), - pub_key, X, Y, NULL); + if(!X || !Y) { + GOSTerr(GOST_F_PUB_ENCODE_GOST01, ERR_R_MALLOC_FAILURE); + if(X) BN_free(X); + if(Y) BN_free(Y); + BN_free(order); + return 0; + } + if(!EC_POINT_get_affine_coordinates_GFp(EC_KEY_get0_group(ec), + pub_key, X, Y, NULL)) { + GOSTerr(GOST_F_PUB_ENCODE_GOST01, ERR_R_INTERNAL_ERROR); + BN_free(X); + BN_free(Y); + BN_free(order); + return 0; + } data_len = 2 * BN_num_bytes(order); BN_free(order); databuf = OPENSSL_malloc(data_len); diff --git a/deps/openssl/openssl/engines/ccgost/gost_pmeth.c b/deps/openssl/openssl/engines/ccgost/gost_pmeth.c index a2c7cf27d8efef..4a79a85cfc6203 100644 --- a/deps/openssl/openssl/engines/ccgost/gost_pmeth.c +++ b/deps/openssl/openssl/engines/ccgost/gost_pmeth.c @@ -510,7 +510,7 @@ static int pkey_gost_mac_ctrl_str(EVP_PKEY_CTX *ctx, long keylen; int ret; unsigned char *keybuf = string_to_hex(value, &keylen); - if (keylen != 32) { + if (!keybuf || keylen != 32) { GOSTerr(GOST_F_PKEY_GOST_MAC_CTRL_STR, GOST_R_INVALID_MAC_KEY_LENGTH); OPENSSL_free(keybuf); diff --git a/deps/openssl/openssl/engines/ccgost/gost_sign.c b/deps/openssl/openssl/engines/ccgost/gost_sign.c index 0116e47400b758..07ad921ab530fd 100644 --- a/deps/openssl/openssl/engines/ccgost/gost_sign.c +++ b/deps/openssl/openssl/engines/ccgost/gost_sign.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "gost_params.h" #include "gost_lcl.h" @@ -52,11 +53,16 @@ void dump_dsa_sig(const char *message, DSA_SIG *sig) DSA_SIG *gost_do_sign(const unsigned char *dgst, int dlen, DSA *dsa) { BIGNUM *k = NULL, *tmp = NULL, *tmp2 = NULL; - DSA_SIG *newsig = DSA_SIG_new(); + DSA_SIG *newsig = NULL, *ret = NULL; BIGNUM *md = hashsum2bn(dgst); /* check if H(M) mod q is zero */ BN_CTX *ctx = BN_CTX_new(); + if(!ctx) { + GOSTerr(GOST_F_GOST_DO_SIGN, ERR_R_MALLOC_FAILURE); + goto err; + } BN_CTX_start(ctx); + newsig = DSA_SIG_new(); if (!newsig) { GOSTerr(GOST_F_GOST_DO_SIGN, GOST_R_NO_MEMORY); goto err; @@ -64,6 +70,10 @@ DSA_SIG *gost_do_sign(const unsigned char *dgst, int dlen, DSA *dsa) tmp = BN_CTX_get(ctx); k = BN_CTX_get(ctx); tmp2 = BN_CTX_get(ctx); + if(!tmp || !k || !tmp2) { + GOSTerr(GOST_F_GOST_DO_SIGN, ERR_R_MALLOC_FAILURE); + goto err; + } BN_mod(tmp, md, dsa->q, ctx); if (BN_is_zero(tmp)) { BN_one(md); @@ -76,24 +86,41 @@ DSA_SIG *gost_do_sign(const unsigned char *dgst, int dlen, DSA *dsa) BN_rand_range(k, dsa->q); /* generate r = (a^x mod p) mod q */ BN_mod_exp(tmp, dsa->g, k, dsa->p, ctx); - if (!(newsig->r)) + if (!(newsig->r)) { newsig->r = BN_new(); + if(!newsig->r) { + GOSTerr(GOST_F_GOST_DO_SIGN, ERR_R_MALLOC_FAILURE); + goto err; + } + } BN_mod(newsig->r, tmp, dsa->q, ctx); } while (BN_is_zero(newsig->r)); /* generate s = (xr + k(Hm)) mod q */ BN_mod_mul(tmp, dsa->priv_key, newsig->r, dsa->q, ctx); BN_mod_mul(tmp2, k, md, dsa->q, ctx); - if (!newsig->s) + if (!newsig->s) { newsig->s = BN_new(); + if(!newsig->s) { + GOSTerr(GOST_F_GOST_DO_SIGN, ERR_R_MALLOC_FAILURE); + goto err; + } + } BN_mod_add(newsig->s, tmp, tmp2, dsa->q, ctx); } while (BN_is_zero(newsig->s)); + + ret = newsig; err: BN_free(md); - BN_CTX_end(ctx); - BN_CTX_free(ctx); - return newsig; + if(ctx) { + BN_CTX_end(ctx); + BN_CTX_free(ctx); + } + if(!ret && newsig) { + DSA_SIG_free(newsig); + } + return ret; } /* @@ -135,17 +162,21 @@ int pack_sign_cp(DSA_SIG *s, int order, unsigned char *sig, size_t *siglen) int gost_do_verify(const unsigned char *dgst, int dgst_len, DSA_SIG *sig, DSA *dsa) { - BIGNUM *md, *tmp = NULL; + BIGNUM *md = NULL, *tmp = NULL; BIGNUM *q2 = NULL; BIGNUM *u = NULL, *v = NULL, *z1 = NULL, *z2 = NULL; BIGNUM *tmp2 = NULL, *tmp3 = NULL; - int ok; + int ok = 0; BN_CTX *ctx = BN_CTX_new(); + if(!ctx) { + GOSTerr(GOST_F_GOST_DO_VERIFY, ERR_R_MALLOC_FAILURE); + goto err; + } BN_CTX_start(ctx); if (BN_cmp(sig->s, dsa->q) >= 1 || BN_cmp(sig->r, dsa->q) >= 1) { GOSTerr(GOST_F_GOST_DO_VERIFY, GOST_R_SIGNATURE_PARTS_GREATER_THAN_Q); - return 0; + goto err; } md = hashsum2bn(dgst); @@ -157,6 +188,10 @@ int gost_do_verify(const unsigned char *dgst, int dgst_len, tmp2 = BN_CTX_get(ctx); tmp3 = BN_CTX_get(ctx); u = BN_CTX_get(ctx); + if(!tmp || !v || !q2 || !z1 || !z2 || !tmp2 || !tmp3 || !u) { + GOSTerr(GOST_F_GOST_DO_VERIFY, ERR_R_MALLOC_FAILURE); + goto err; + } BN_mod(tmp, md, dsa->q, ctx); if (BN_is_zero(tmp)) { @@ -172,15 +207,18 @@ int gost_do_verify(const unsigned char *dgst, int dgst_len, BN_mod_exp(tmp2, dsa->pub_key, z2, dsa->p, ctx); BN_mod_mul(tmp3, tmp, tmp2, dsa->p, ctx); BN_mod(u, tmp3, dsa->q, ctx); - ok = BN_cmp(u, sig->r); + ok = (BN_cmp(u, sig->r) == 0); - BN_free(md); - BN_CTX_end(ctx); - BN_CTX_free(ctx); - if (ok != 0) { + if (!ok) { GOSTerr(GOST_F_GOST_DO_VERIFY, GOST_R_SIGNATURE_MISMATCH); } - return (ok == 0); +err: + if(md) BN_free(md); + if(ctx) { + BN_CTX_end(ctx); + BN_CTX_free(ctx); + } + return ok; } /* @@ -190,13 +228,24 @@ int gost_do_verify(const unsigned char *dgst, int dgst_len, int gost94_compute_public(DSA *dsa) { /* Now fill algorithm parameters with correct values */ - BN_CTX *ctx = BN_CTX_new(); + BN_CTX *ctx; if (!dsa->g) { GOSTerr(GOST_F_GOST94_COMPUTE_PUBLIC, GOST_R_KEY_IS_NOT_INITALIZED); return 0; } - /* Compute public key y = a^x mod p */ + ctx = BN_CTX_new(); + if(!ctx) { + GOSTerr(GOST_F_GOST94_COMPUTE_PUBLIC, ERR_R_MALLOC_FAILURE); + return 0; + } + dsa->pub_key = BN_new(); + if(!dsa->pub_key) { + GOSTerr(GOST_F_GOST94_COMPUTE_PUBLIC, ERR_R_MALLOC_FAILURE); + BN_CTX_free(ctx); + return 0; + } + /* Compute public key y = a^x mod p */ BN_mod_exp(dsa->pub_key, dsa->g, dsa->priv_key, dsa->p, ctx); BN_CTX_free(ctx); return 1; @@ -243,6 +292,10 @@ int fill_GOST94_params(DSA *dsa, int nid) int gost_sign_keygen(DSA *dsa) { dsa->priv_key = BN_new(); + if(!dsa->priv_key) { + GOSTerr(GOST_F_GOST_SIGN_KEYGEN, ERR_R_MALLOC_FAILURE); + return 0; + } BN_rand_range(dsa->priv_key, dsa->q); return gost94_compute_public(dsa); } diff --git a/deps/openssl/openssl/engines/e_sureware.c b/deps/openssl/openssl/engines/e_sureware.c index 1005dfc903d5e6..8a23763f75d920 100644 --- a/deps/openssl/openssl/engines/e_sureware.c +++ b/deps/openssl/openssl/engines/e_sureware.c @@ -712,10 +712,12 @@ static EVP_PKEY *sureware_load_public(ENGINE *e, const char *key_id, /* set public big nums */ rsatmp->e = BN_new(); rsatmp->n = BN_new(); + if(!rsatmp->e || !rsatmp->n) + goto err; bn_expand2(rsatmp->e, el / sizeof(BN_ULONG)); bn_expand2(rsatmp->n, el / sizeof(BN_ULONG)); - if (!rsatmp->e || rsatmp->e->dmax != (int)(el / sizeof(BN_ULONG)) || - !rsatmp->n || rsatmp->n->dmax != (int)(el / sizeof(BN_ULONG))) + if (rsatmp->e->dmax != (int)(el / sizeof(BN_ULONG)) || + rsatmp->n->dmax != (int)(el / sizeof(BN_ULONG))) goto err; ret = p_surewarehk_Load_Rsa_Pubkey(msg, key_id, el, (unsigned long *)rsatmp->n->d, @@ -752,15 +754,16 @@ static EVP_PKEY *sureware_load_public(ENGINE *e, const char *key_id, dsatmp->p = BN_new(); dsatmp->q = BN_new(); dsatmp->g = BN_new(); + if(!dsatmp->pub_key || !dsatmp->p || !dsatmp->q || !dsatmp->g) + goto err; bn_expand2(dsatmp->pub_key, el / sizeof(BN_ULONG)); bn_expand2(dsatmp->p, el / sizeof(BN_ULONG)); bn_expand2(dsatmp->q, 20 / sizeof(BN_ULONG)); bn_expand2(dsatmp->g, el / sizeof(BN_ULONG)); - if (!dsatmp->pub_key - || dsatmp->pub_key->dmax != (int)(el / sizeof(BN_ULONG)) - || !dsatmp->p || dsatmp->p->dmax != (int)(el / sizeof(BN_ULONG)) - || !dsatmp->q || dsatmp->q->dmax != 20 / sizeof(BN_ULONG) - || !dsatmp->g || dsatmp->g->dmax != (int)(el / sizeof(BN_ULONG))) + if (dsatmp->pub_key->dmax != (int)(el / sizeof(BN_ULONG)) + || dsatmp->p->dmax != (int)(el / sizeof(BN_ULONG)) + || dsatmp->q->dmax != 20 / sizeof(BN_ULONG) + || dsatmp->g->dmax != (int)(el / sizeof(BN_ULONG))) goto err; ret = p_surewarehk_Load_Dsa_Pubkey(msg, key_id, el, @@ -1038,10 +1041,12 @@ static DSA_SIG *surewarehk_dsa_do_sign(const unsigned char *from, int flen, } psign->r = BN_new(); psign->s = BN_new(); + if(!psign->r || !psign->s) + goto err; bn_expand2(psign->r, 20 / sizeof(BN_ULONG)); bn_expand2(psign->s, 20 / sizeof(BN_ULONG)); - if (!psign->r || psign->r->dmax != 20 / sizeof(BN_ULONG) || - !psign->s || psign->s->dmax != 20 / sizeof(BN_ULONG)) + if (psign->r->dmax != 20 / sizeof(BN_ULONG) || + psign->s->dmax != 20 / sizeof(BN_ULONG)) goto err; ret = p_surewarehk_Dsa_Sign(msg, flen, from, (unsigned long *)psign->r->d, @@ -1070,9 +1075,9 @@ static int surewarehk_modexp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, char msg[64] = "ENGINE_modexp"; if (!p_surewarehk_Mod_Exp) { SUREWAREerr(SUREWARE_F_SUREWAREHK_MODEXP, ENGINE_R_NOT_INITIALISED); - } else { + } else if (r) { bn_expand2(r, m->top); - if (r && r->dmax == m->top) { + if (r->dmax == m->top) { /* do it */ ret = p_surewarehk_Mod_Exp(msg, m->top * sizeof(BN_ULONG), diff --git a/deps/openssl/openssl/openssl.spec b/deps/openssl/openssl/openssl.spec index 909f2bfabf37ee..01f06171ccd337 100644 --- a/deps/openssl/openssl/openssl.spec +++ b/deps/openssl/openssl/openssl.spec @@ -6,7 +6,7 @@ Release: 1 Summary: Secure Sockets Layer and cryptography libraries and tools Name: openssl -Version: 1.0.2a +Version: 1.0.2b Source0: ftp://ftp.openssl.org/source/%{name}-%{version}.tar.gz License: OpenSSL Group: System Environment/Libraries diff --git a/deps/openssl/openssl/ssl/Makefile b/deps/openssl/openssl/ssl/Makefile index a7bd4ee143c936..42f1af5c8e932e 100644 --- a/deps/openssl/openssl/ssl/Makefile +++ b/deps/openssl/openssl/ssl/Makefile @@ -89,12 +89,13 @@ tests: lint: lint -DLINT $(INCLUDES) $(SRC)>fluff -depend: - @if [ -z "$(THIS)" ]; then \ - $(MAKE) -f $(TOP)/Makefile reflect THIS=$@; \ - else \ - $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC); \ - fi +update: local_depend + @if [ -z "$(THIS)" ]; then $(MAKE) -f $(TOP)/Makefile reflect THIS=$@; fi + +depend: local_depend + @if [ -z "$(THIS)" ]; then $(MAKE) -f $(TOP)/Makefile reflect THIS=$@; fi +local_depend: + @[ -z "$(THIS)" ] || $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC) dclean: $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new @@ -486,26 +487,27 @@ s2_pkt.o: ../include/openssl/ssl3.h ../include/openssl/stack.h s2_pkt.o: ../include/openssl/symhacks.h ../include/openssl/tls1.h s2_pkt.o: ../include/openssl/x509.h ../include/openssl/x509_vfy.h s2_pkt.c s2_pkt.o: ssl_locl.h -s2_srvr.o: ../e_os.h ../include/openssl/asn1.h ../include/openssl/bio.h -s2_srvr.o: ../include/openssl/buffer.h ../include/openssl/comp.h -s2_srvr.o: ../include/openssl/crypto.h ../include/openssl/dsa.h -s2_srvr.o: ../include/openssl/dtls1.h ../include/openssl/e_os2.h -s2_srvr.o: ../include/openssl/ec.h ../include/openssl/ecdh.h -s2_srvr.o: ../include/openssl/ecdsa.h ../include/openssl/err.h -s2_srvr.o: ../include/openssl/evp.h ../include/openssl/hmac.h -s2_srvr.o: ../include/openssl/kssl.h ../include/openssl/lhash.h -s2_srvr.o: ../include/openssl/obj_mac.h ../include/openssl/objects.h -s2_srvr.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h -s2_srvr.o: ../include/openssl/ossl_typ.h ../include/openssl/pem.h -s2_srvr.o: ../include/openssl/pem2.h ../include/openssl/pkcs7.h -s2_srvr.o: ../include/openssl/pqueue.h ../include/openssl/rand.h -s2_srvr.o: ../include/openssl/rsa.h ../include/openssl/safestack.h -s2_srvr.o: ../include/openssl/sha.h ../include/openssl/srtp.h -s2_srvr.o: ../include/openssl/ssl.h ../include/openssl/ssl2.h -s2_srvr.o: ../include/openssl/ssl23.h ../include/openssl/ssl3.h -s2_srvr.o: ../include/openssl/stack.h ../include/openssl/symhacks.h -s2_srvr.o: ../include/openssl/tls1.h ../include/openssl/x509.h -s2_srvr.o: ../include/openssl/x509_vfy.h s2_srvr.c ssl_locl.h +s2_srvr.o: ../crypto/constant_time_locl.h ../e_os.h ../include/openssl/asn1.h +s2_srvr.o: ../include/openssl/bio.h ../include/openssl/buffer.h +s2_srvr.o: ../include/openssl/comp.h ../include/openssl/crypto.h +s2_srvr.o: ../include/openssl/dsa.h ../include/openssl/dtls1.h +s2_srvr.o: ../include/openssl/e_os2.h ../include/openssl/ec.h +s2_srvr.o: ../include/openssl/ecdh.h ../include/openssl/ecdsa.h +s2_srvr.o: ../include/openssl/err.h ../include/openssl/evp.h +s2_srvr.o: ../include/openssl/hmac.h ../include/openssl/kssl.h +s2_srvr.o: ../include/openssl/lhash.h ../include/openssl/obj_mac.h +s2_srvr.o: ../include/openssl/objects.h ../include/openssl/opensslconf.h +s2_srvr.o: ../include/openssl/opensslv.h ../include/openssl/ossl_typ.h +s2_srvr.o: ../include/openssl/pem.h ../include/openssl/pem2.h +s2_srvr.o: ../include/openssl/pkcs7.h ../include/openssl/pqueue.h +s2_srvr.o: ../include/openssl/rand.h ../include/openssl/rsa.h +s2_srvr.o: ../include/openssl/safestack.h ../include/openssl/sha.h +s2_srvr.o: ../include/openssl/srtp.h ../include/openssl/ssl.h +s2_srvr.o: ../include/openssl/ssl2.h ../include/openssl/ssl23.h +s2_srvr.o: ../include/openssl/ssl3.h ../include/openssl/stack.h +s2_srvr.o: ../include/openssl/symhacks.h ../include/openssl/tls1.h +s2_srvr.o: ../include/openssl/x509.h ../include/openssl/x509_vfy.h s2_srvr.c +s2_srvr.o: ssl_locl.h s3_both.o: ../e_os.h ../include/openssl/asn1.h ../include/openssl/bio.h s3_both.o: ../include/openssl/buffer.h ../include/openssl/comp.h s3_both.o: ../include/openssl/crypto.h ../include/openssl/dsa.h diff --git a/deps/openssl/openssl/ssl/d1_both.c b/deps/openssl/openssl/ssl/d1_both.c index 21048003bcd356..b4ee7abe27dcb7 100644 --- a/deps/openssl/openssl/ssl/d1_both.c +++ b/deps/openssl/openssl/ssl/d1_both.c @@ -489,6 +489,12 @@ long dtls1_get_message(SSL *s, int st1, int stn, int mt, long max, int *ok) return i; } + if (mt >= 0 && s->s3->tmp.message_type != mt) { + al = SSL_AD_UNEXPECTED_MESSAGE; + SSLerr(SSL_F_DTLS1_GET_MESSAGE, SSL_R_UNEXPECTED_MESSAGE); + goto f_err; + } + p = (unsigned char *)s->init_buf->data; msg_len = msg_hdr->msg_len; @@ -873,6 +879,20 @@ dtls1_get_message_fragment(SSL *s, int st1, int stn, long max, int *ok) /* parse the message fragment header */ dtls1_get_message_header(wire, &msg_hdr); + len = msg_hdr.msg_len; + frag_off = msg_hdr.frag_off; + frag_len = msg_hdr.frag_len; + + /* + * We must have at least frag_len bytes left in the record to be read. + * Fragments must not span records. + */ + if (frag_len > s->s3->rrec.length) { + al = SSL3_AD_ILLEGAL_PARAMETER; + SSLerr(SSL_F_DTLS1_GET_MESSAGE_FRAGMENT, SSL_R_BAD_LENGTH); + goto f_err; + } + /* * if this is a future (or stale) message it gets buffered * (or dropped)--no further processing at this time @@ -883,10 +903,6 @@ dtls1_get_message_fragment(SSL *s, int st1, int stn, long max, int *ok) && !(s->d1->listen && msg_hdr.seq == 1)) return dtls1_process_out_of_seq_message(s, &msg_hdr, ok); - len = msg_hdr.msg_len; - frag_off = msg_hdr.frag_off; - frag_len = msg_hdr.frag_len; - if (frag_len && frag_len < len) return dtls1_reassemble_fragment(s, &msg_hdr, ok); @@ -917,17 +933,16 @@ dtls1_get_message_fragment(SSL *s, int st1, int stn, long max, int *ok) if ((al = dtls1_preprocess_fragment(s, &msg_hdr, max))) goto f_err; - /* XDTLS: ressurect this when restart is in place */ - s->state = stn; - if (frag_len > 0) { unsigned char *p = (unsigned char *)s->init_buf->data + DTLS1_HM_HEADER_LENGTH; i = s->method->ssl_read_bytes(s, SSL3_RT_HANDSHAKE, &p[frag_off], frag_len, 0); + /* - * XDTLS: fix this--message fragments cannot span multiple packets + * This shouldn't ever fail due to NBIO because we already checked + * that we have enough data in the record */ if (i <= 0) { s->rwstate = SSL_READING; @@ -948,6 +963,7 @@ dtls1_get_message_fragment(SSL *s, int st1, int stn, long max, int *ok) } *ok = 1; + s->state = stn; /* * Note that s->init_num is *not* used as current offset in @@ -1420,7 +1436,10 @@ int dtls1_process_heartbeat(SSL *s) memcpy(bp, pl, payload); bp += payload; /* Random padding */ - RAND_pseudo_bytes(bp, padding); + if (RAND_pseudo_bytes(bp, padding) < 0) { + OPENSSL_free(buffer); + return -1; + } r = dtls1_write_bytes(s, TLS1_RT_HEARTBEAT, buffer, write_length); @@ -1454,7 +1473,7 @@ int dtls1_process_heartbeat(SSL *s) int dtls1_heartbeat(SSL *s) { unsigned char *buf, *p; - int ret; + int ret = -1; unsigned int payload = 18; /* Sequence number + random bytes */ unsigned int padding = 16; /* Use minimum padding */ @@ -1502,10 +1521,12 @@ int dtls1_heartbeat(SSL *s) /* Sequence number */ s2n(s->tlsext_hb_seq, p); /* 16 random bytes */ - RAND_pseudo_bytes(p, 16); + if (RAND_pseudo_bytes(p, 16) < 0) + goto err; p += 16; /* Random padding */ - RAND_pseudo_bytes(p, padding); + if (RAND_pseudo_bytes(p, padding) < 0) + goto err; ret = dtls1_write_bytes(s, TLS1_RT_HEARTBEAT, buf, 3 + payload + padding); if (ret >= 0) { @@ -1518,6 +1539,7 @@ int dtls1_heartbeat(SSL *s) s->tlsext_hb_pending = 1; } +err: OPENSSL_free(buf); return ret; diff --git a/deps/openssl/openssl/ssl/d1_clnt.c b/deps/openssl/openssl/ssl/d1_clnt.c index 1858263e19c1ae..4c2ccbf5ae085f 100644 --- a/deps/openssl/openssl/ssl/d1_clnt.c +++ b/deps/openssl/openssl/ssl/d1_clnt.c @@ -228,6 +228,7 @@ int dtls1_connect(SSL *s) (s->version & 0xff00) != (DTLS1_BAD_VER & 0xff00)) { SSLerr(SSL_F_DTLS1_CONNECT, ERR_R_INTERNAL_ERROR); ret = -1; + s->state = SSL_ST_ERR; goto end; } @@ -237,10 +238,12 @@ int dtls1_connect(SSL *s) if (s->init_buf == NULL) { if ((buf = BUF_MEM_new()) == NULL) { ret = -1; + s->state = SSL_ST_ERR; goto end; } if (!BUF_MEM_grow(buf, SSL3_RT_MAX_PLAIN_LENGTH)) { ret = -1; + s->state = SSL_ST_ERR; goto end; } s->init_buf = buf; @@ -249,12 +252,14 @@ int dtls1_connect(SSL *s) if (!ssl3_setup_buffers(s)) { ret = -1; + s->state = SSL_ST_ERR; goto end; } /* setup buffing BIO */ if (!ssl_init_wbio_buffer(s, 0)) { ret = -1; + s->state = SSL_ST_ERR; goto end; } @@ -433,6 +438,7 @@ int dtls1_connect(SSL *s) */ if (!ssl3_check_cert_and_algorithm(s)) { ret = -1; + s->state = SSL_ST_ERR; goto end; } break; @@ -564,6 +570,7 @@ int dtls1_connect(SSL *s) #endif if (!s->method->ssl3_enc->setup_key_block(s)) { ret = -1; + s->state = SSL_ST_ERR; goto end; } @@ -571,6 +578,7 @@ int dtls1_connect(SSL *s) SSL3_CHANGE_CIPHER_CLIENT_WRITE)) { ret = -1; + s->state = SSL_ST_ERR; goto end; } #ifndef OPENSSL_NO_SCTP @@ -751,6 +759,7 @@ int dtls1_connect(SSL *s) goto end; /* break; */ + case SSL_ST_ERR: default: SSLerr(SSL_F_DTLS1_CONNECT, SSL_R_UNKNOWN_STATE); ret = -1; @@ -842,5 +851,6 @@ static int dtls1_get_hello_verify(SSL *s) f_err: ssl3_send_alert(s, SSL3_AL_FATAL, al); + s->state = SSL_ST_ERR; return -1; } diff --git a/deps/openssl/openssl/ssl/d1_pkt.c b/deps/openssl/openssl/ssl/d1_pkt.c index 940ca692790271..fe30ec7d004235 100644 --- a/deps/openssl/openssl/ssl/d1_pkt.c +++ b/deps/openssl/openssl/ssl/d1_pkt.c @@ -1069,7 +1069,7 @@ int dtls1_read_bytes(SSL *s, int type, unsigned char *buf, int len, int peek) (s->d1->handshake_fragment[3] != 0)) { al = SSL_AD_DECODE_ERROR; SSLerr(SSL_F_DTLS1_READ_BYTES, SSL_R_BAD_HELLO_REQUEST); - goto err; + goto f_err; } /* diff --git a/deps/openssl/openssl/ssl/d1_srvr.c b/deps/openssl/openssl/ssl/d1_srvr.c index eafa0127b7c758..655333a25210d7 100644 --- a/deps/openssl/openssl/ssl/d1_srvr.c +++ b/deps/openssl/openssl/ssl/d1_srvr.c @@ -240,11 +240,13 @@ int dtls1_accept(SSL *s) if (s->init_buf == NULL) { if ((buf = BUF_MEM_new()) == NULL) { ret = -1; + s->state = SSL_ST_ERR; goto end; } if (!BUF_MEM_grow(buf, SSL3_RT_MAX_PLAIN_LENGTH)) { BUF_MEM_free(buf); ret = -1; + s->state = SSL_ST_ERR; goto end; } s->init_buf = buf; @@ -252,6 +254,7 @@ int dtls1_accept(SSL *s) if (!ssl3_setup_buffers(s)) { ret = -1; + s->state = SSL_ST_ERR; goto end; } @@ -273,6 +276,7 @@ int dtls1_accept(SSL *s) #endif if (!ssl_init_wbio_buffer(s, 1)) { ret = -1; + s->state = SSL_ST_ERR; goto end; } @@ -486,7 +490,7 @@ int dtls1_accept(SSL *s) #ifndef OPENSSL_NO_PSK || ((alg_k & SSL_kPSK) && s->ctx->psk_identity_hint) #endif - || (alg_k & (SSL_kEDH | SSL_kDHr | SSL_kDHd)) + || (alg_k & SSL_kDHE) || (alg_k & SSL_kEECDH) || ((alg_k & SSL_kRSA) && (s->cert->pkeys[SSL_PKEY_RSA_ENC].privatekey == NULL @@ -661,11 +665,14 @@ int dtls1_accept(SSL *s) */ if (!s->s3->handshake_buffer) { SSLerr(SSL_F_DTLS1_ACCEPT, ERR_R_INTERNAL_ERROR); + s->state = SSL_ST_ERR; return -1; } s->s3->flags |= TLS1_FLAGS_KEEP_HANDSHAKE; - if (!ssl3_digest_cached_records(s)) + if (!ssl3_digest_cached_records(s)) { + s->state = SSL_ST_ERR; return -1; + } } else { s->state = SSL3_ST_SR_CERT_VRFY_A; s->init_num = 0; @@ -688,15 +695,6 @@ int dtls1_accept(SSL *s) case SSL3_ST_SR_CERT_VRFY_A: case SSL3_ST_SR_CERT_VRFY_B: - /* - * This *should* be the first time we enable CCS, but be - * extra careful about surrounding code changes. We need - * to set this here because we don't know if we're - * expecting a CertificateVerify or not. - */ - if (!s->s3->change_cipher_spec) - s->d1->change_cipher_spec_ok = 1; - /* we should decide if we expected this one */ ret = ssl3_get_cert_verify(s); if (ret <= 0) goto end; @@ -713,11 +711,10 @@ int dtls1_accept(SSL *s) case SSL3_ST_SR_FINISHED_A: case SSL3_ST_SR_FINISHED_B: /* - * Enable CCS for resumed handshakes. - * In a full handshake, we end up here through - * SSL3_ST_SR_CERT_VRFY_B, so change_cipher_spec_ok was - * already set. Receiving a CCS clears the flag, so make - * sure not to re-enable it to ban duplicates. + * Enable CCS. Receiving a CCS clears the flag, so make + * sure not to re-enable it to ban duplicates. This *should* be the + * first time we have received one - but we check anyway to be + * cautious. * s->s3->change_cipher_spec is set when a CCS is * processed in d1_pkt.c, and remains set until * the client's Finished message is read. @@ -767,6 +764,7 @@ int dtls1_accept(SSL *s) s->session->cipher = s->s3->tmp.new_cipher; if (!s->method->ssl3_enc->setup_key_block(s)) { ret = -1; + s->state = SSL_ST_ERR; goto end; } @@ -795,6 +793,7 @@ int dtls1_accept(SSL *s) SSL3_CHANGE_CIPHER_SERVER_WRITE)) { ret = -1; + s->state = SSL_ST_ERR; goto end; } @@ -875,6 +874,7 @@ int dtls1_accept(SSL *s) goto end; /* break; */ + case SSL_ST_ERR: default: SSLerr(SSL_F_DTLS1_ACCEPT, SSL_R_UNKNOWN_STATE); ret = -1; @@ -933,6 +933,7 @@ int dtls1_send_hello_verify_request(SSL *s) &(s->d1->cookie_len)) == 0) { SSLerr(SSL_F_DTLS1_SEND_HELLO_VERIFY_REQUEST, ERR_R_INTERNAL_ERROR); + s->state = SSL_ST_ERR; return 0; } diff --git a/deps/openssl/openssl/ssl/s2_pkt.c b/deps/openssl/openssl/ssl/s2_pkt.c index 614b9a35d2ea9f..7a6188813431f3 100644 --- a/deps/openssl/openssl/ssl/s2_pkt.c +++ b/deps/openssl/openssl/ssl/s2_pkt.c @@ -576,6 +576,20 @@ static int n_do_ssl_write(SSL *s, const unsigned char *buf, unsigned int len) s->s2->padding = p; s->s2->mac_data = &(s->s2->wbuf[3]); s->s2->wact_data = &(s->s2->wbuf[3 + mac_size]); + + /* + * It would be clearer to write this as follows: + * if (mac_size + len + p > SSL2_MAX_RECORD_LENGTH_2_BYTE_HEADER) + * However |len| is user input that could in theory be very large. We + * know |mac_size| and |p| are small, so to avoid any possibility of + * overflow we write it like this. + * + * In theory this should never fail because the logic above should have + * modified |len| if it is too big. But we are being cautious. + */ + if (len > (SSL2_MAX_RECORD_LENGTH_2_BYTE_HEADER - (mac_size + p))) { + return -1; + } /* we copy the data into s->s2->wbuf */ memcpy(s->s2->wact_data, buf, len); if (p) diff --git a/deps/openssl/openssl/ssl/s2_srvr.c b/deps/openssl/openssl/ssl/s2_srvr.c index 19bb48c9cd4647..4289272b73d3dc 100644 --- a/deps/openssl/openssl/ssl/s2_srvr.c +++ b/deps/openssl/openssl/ssl/s2_srvr.c @@ -111,6 +111,7 @@ #include "ssl_locl.h" #ifndef OPENSSL_NO_SSL2 +#include "../crypto/constant_time_locl.h" # include # include # include @@ -372,12 +373,15 @@ int ssl2_accept(SSL *s) static int get_client_master_key(SSL *s) { int is_export, i, n, keya; - unsigned int ek; + unsigned int num_encrypted_key_bytes, key_length; unsigned long len; unsigned char *p; const SSL_CIPHER *cp; const EVP_CIPHER *c; const EVP_MD *md; + unsigned char rand_premaster_secret[SSL_MAX_MASTER_KEY_LENGTH]; + unsigned char decrypt_good; + size_t j; p = (unsigned char *)s->init_buf->data; if (s->state == SSL2_ST_GET_CLIENT_MASTER_KEY_A) { @@ -465,12 +469,6 @@ static int get_client_master_key(SSL *s) return (0); } - if (s->session->cipher->algorithm2 & SSL2_CF_8_BYTE_ENC) { - is_export = 1; - ek = 8; - } else - ek = 5; - /* * The format of the CLIENT-MASTER-KEY message is * 1 byte message type @@ -484,12 +482,27 @@ static int get_client_master_key(SSL *s) * * If the cipher is an export cipher, then the encrypted key bytes * are a fixed portion of the total key (5 or 8 bytes). The size of - * this portion is in |ek|. If the cipher is not an export cipher, - * then the entire key material is encrypted (i.e., clear key length - * must be zero). + * this portion is in |num_encrypted_key_bytes|. If the cipher is not an + * export cipher, then the entire key material is encrypted (i.e., clear + * key length must be zero). */ - if ((!is_export && s->s2->tmp.clear != 0) || - (is_export && s->s2->tmp.clear + ek != (unsigned int)EVP_CIPHER_key_length(c))) { + key_length = (unsigned int)EVP_CIPHER_key_length(c); + if (key_length > SSL_MAX_MASTER_KEY_LENGTH) { + ssl2_return_error(s, SSL2_PE_UNDEFINED_ERROR); + SSLerr(SSL_F_GET_CLIENT_MASTER_KEY, ERR_R_INTERNAL_ERROR); + return -1; + } + + if (s->session->cipher->algorithm2 & SSL2_CF_8_BYTE_ENC) { + is_export = 1; + num_encrypted_key_bytes = 8; + } else if (is_export) { + num_encrypted_key_bytes = 5; + } else { + num_encrypted_key_bytes = key_length; + } + + if (s->s2->tmp.clear + num_encrypted_key_bytes != key_length) { ssl2_return_error(s, SSL2_PE_UNDEFINED_ERROR); SSLerr(SSL_F_GET_CLIENT_MASTER_KEY,SSL_R_BAD_LENGTH); return -1; @@ -499,64 +512,49 @@ static int get_client_master_key(SSL *s) * Decryption can't be expanding, so if we don't have enough encrypted * bytes to fit the key in the buffer, stop now. */ - if ((is_export && s->s2->tmp.enc < ek) || - (!is_export && s->s2->tmp.enc < (unsigned int)EVP_CIPHER_key_length(c))) { + if (s->s2->tmp.enc < num_encrypted_key_bytes) { ssl2_return_error(s,SSL2_PE_UNDEFINED_ERROR); SSLerr(SSL_F_GET_CLIENT_MASTER_KEY,SSL_R_LENGTH_TOO_SHORT); return -1; } + /* + * We must not leak whether a decryption failure occurs because of + * Bleichenbacher's attack on PKCS #1 v1.5 RSA padding (see RFC 2246, + * section 7.4.7.1). The code follows that advice of the TLS RFC and + * generates a random premaster secret for the case that the decrypt + * fails. See https://tools.ietf.org/html/rfc5246#section-7.4.7.1 + */ + + /* + * should be RAND_bytes, but we cannot work around a failure. + */ + if (RAND_pseudo_bytes(rand_premaster_secret, + (int)num_encrypted_key_bytes) <= 0) + return 0; + i = ssl_rsa_private_decrypt(s->cert, s->s2->tmp.enc, &(p[s->s2->tmp.clear]), &(p[s->s2->tmp.clear]), (s->s2->ssl2_rollback) ? RSA_SSLV23_PADDING : RSA_PKCS1_PADDING); - - /* bad decrypt */ -# if 1 + ERR_clear_error(); /* * If a bad decrypt, continue with protocol but with a random master * secret (Bleichenbacher attack) */ - if ((i < 0) || ((!is_export && i != EVP_CIPHER_key_length(c)) - || (is_export && i != (int)ek))) { - ERR_clear_error(); - if (is_export) - i = ek; - else - i = EVP_CIPHER_key_length(c); - if (RAND_pseudo_bytes(&p[s->s2->tmp.clear], i) <= 0) - return 0; - } -# else - if (i < 0) { - error = 1; - SSLerr(SSL_F_GET_CLIENT_MASTER_KEY, SSL_R_BAD_RSA_DECRYPT); - } - /* incorrect number of key bytes for non export cipher */ - else if ((!is_export && (i != EVP_CIPHER_key_length(c))) - || (is_export && ((i != ek) || (s->s2->tmp.clear + i != - EVP_CIPHER_key_length(c))))) { - error = 1; - SSLerr(SSL_F_GET_CLIENT_MASTER_KEY, SSL_R_WRONG_NUMBER_OF_KEY_BITS); - } - if (error) { - ssl2_return_error(s, SSL2_PE_UNDEFINED_ERROR); - return (-1); + decrypt_good = constant_time_eq_int_8(i, (int)num_encrypted_key_bytes); + for (j = 0; j < num_encrypted_key_bytes; j++) { + p[s->s2->tmp.clear + j] = + constant_time_select_8(decrypt_good, p[s->s2->tmp.clear + j], + rand_premaster_secret[j]); } -# endif - if (is_export) - i = EVP_CIPHER_key_length(c); + s->session->master_key_length = (int)key_length; + memcpy(s->session->master_key, p, key_length); + OPENSSL_cleanse(p, key_length); - if (i > SSL_MAX_MASTER_KEY_LENGTH) { - ssl2_return_error(s, SSL2_PE_UNDEFINED_ERROR); - SSLerr(SSL_F_GET_CLIENT_MASTER_KEY, ERR_R_INTERNAL_ERROR); - return -1; - } - s->session->master_key_length = i; - memcpy(s->session->master_key, p, (unsigned int)i); - return (1); + return 1; } static int get_client_hello(SSL *s) diff --git a/deps/openssl/openssl/ssl/s3_both.c b/deps/openssl/openssl/ssl/s3_both.c index c92fd721e2aad6..019e21cd02741c 100644 --- a/deps/openssl/openssl/ssl/s3_both.c +++ b/deps/openssl/openssl/ssl/s3_both.c @@ -168,7 +168,7 @@ int ssl3_send_finished(SSL *s, int a, int b, const char *sender, int slen) i = s->method->ssl3_enc->final_finish_mac(s, sender, slen, s->s3->tmp.finish_md); - if (i == 0) + if (i <= 0) return 0; s->s3->tmp.finish_md_len = i; memcpy(p, s->s3->tmp.finish_md, i); diff --git a/deps/openssl/openssl/ssl/s3_cbc.c b/deps/openssl/openssl/ssl/s3_cbc.c index f31dc046f3bf15..a0edcef90ada1b 100644 --- a/deps/openssl/openssl/ssl/s3_cbc.c +++ b/deps/openssl/openssl/ssl/s3_cbc.c @@ -149,7 +149,7 @@ int tls1_cbc_remove_padding(const SSL *s, */ if ((s->options & SSL_OP_TLS_BLOCK_PADDING_BUG) && !s->expand) { /* First packet is even in size, so check */ - if ((memcmp(s->s3->read_sequence, "\0\0\0\0\0\0\0\0", 8) == 0) && + if ((CRYPTO_memcmp(s->s3->read_sequence, "\0\0\0\0\0\0\0\0", 8) == 0) && !(padding_length & 1)) { s->s3->flags |= TLS1_FLAGS_TLS_PADDING_BUG; } @@ -639,12 +639,22 @@ void ssl3_cbc_digest_record(const EVP_MD_CTX *ctx, if (k > 0) { if (is_sslv3) { + unsigned overhang; + /* * The SSLv3 header is larger than a single block. overhang is * the number of bytes beyond a single block that the header - * consumes: either 7 bytes (SHA1) or 11 bytes (MD5). + * consumes: either 7 bytes (SHA1) or 11 bytes (MD5). There are no + * ciphersuites in SSLv3 that are not SHA1 or MD5 based and + * therefore we can be confident that the header_length will be + * greater than |md_block_size|. However we add a sanity check just + * in case */ - unsigned overhang = header_length - md_block_size; + if (header_length <= md_block_size) { + /* Should never happen */ + return; + } + overhang = header_length - md_block_size; md_transform(md_state.c, header); memcpy(first_block, header + md_block_size, overhang); memcpy(first_block + overhang, data, md_block_size - overhang); diff --git a/deps/openssl/openssl/ssl/s3_clnt.c b/deps/openssl/openssl/ssl/s3_clnt.c index 91053d59eab396..2346ce50c07015 100644 --- a/deps/openssl/openssl/ssl/s3_clnt.c +++ b/deps/openssl/openssl/ssl/s3_clnt.c @@ -168,6 +168,9 @@ #endif static int ca_dn_cmp(const X509_NAME *const *a, const X509_NAME *const *b); +#ifndef OPENSSL_NO_TLSEXT +static int ssl3_check_finished(SSL *s); +#endif #ifndef OPENSSL_NO_SSL3_METHOD static const SSL_METHOD *ssl3_get_client_method(int ver) @@ -235,6 +238,7 @@ int ssl3_connect(SSL *s) if ((s->version & 0xff00) != 0x0300) { SSLerr(SSL_F_SSL3_CONNECT, ERR_R_INTERNAL_ERROR); + s->state = SSL_ST_ERR; ret = -1; goto end; } @@ -245,10 +249,12 @@ int ssl3_connect(SSL *s) if (s->init_buf == NULL) { if ((buf = BUF_MEM_new()) == NULL) { ret = -1; + s->state = SSL_ST_ERR; goto end; } if (!BUF_MEM_grow(buf, SSL3_RT_MAX_PLAIN_LENGTH)) { ret = -1; + s->state = SSL_ST_ERR; goto end; } s->init_buf = buf; @@ -263,6 +269,7 @@ int ssl3_connect(SSL *s) /* setup buffing BIO */ if (!ssl_init_wbio_buffer(s, 0)) { ret = -1; + s->state = SSL_ST_ERR; goto end; } @@ -317,12 +324,24 @@ int ssl3_connect(SSL *s) break; case SSL3_ST_CR_CERT_A: case SSL3_ST_CR_CERT_B: +#ifndef OPENSSL_NO_TLSEXT + /* Noop (ret = 0) for everything but EAP-FAST. */ + ret = ssl3_check_finished(s); + if (ret < 0) + goto end; + if (ret == 1) { + s->hit = 1; + s->state = SSL3_ST_CR_FINISHED_A; + s->init_num = 0; + break; + } +#endif /* Check if it is anon DH/ECDH, SRP auth */ /* or PSK */ if (! (s->s3->tmp. new_cipher->algorithm_auth & (SSL_aNULL | SSL_aSRP)) -&& !(s->s3->tmp.new_cipher->algorithm_mkey & SSL_kPSK)) { + && !(s->s3->tmp.new_cipher->algorithm_mkey & SSL_kPSK)) { ret = ssl3_get_server_certificate(s); if (ret <= 0) goto end; @@ -358,6 +377,7 @@ int ssl3_connect(SSL *s) */ if (!ssl3_check_cert_and_algorithm(s)) { ret = -1; + s->state = SSL_ST_ERR; goto end; } break; @@ -381,6 +401,7 @@ int ssl3_connect(SSL *s) if ((ret = SRP_Calc_A_param(s)) <= 0) { SSLerr(SSL_F_SSL3_CONNECT, SSL_R_SRP_A_CALC); ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_INTERNAL_ERROR); + s->state = SSL_ST_ERR; goto end; } } @@ -472,6 +493,7 @@ int ssl3_connect(SSL *s) #endif if (!s->method->ssl3_enc->setup_key_block(s)) { ret = -1; + s->state = SSL_ST_ERR; goto end; } @@ -479,6 +501,7 @@ int ssl3_connect(SSL *s) SSL3_CHANGE_CIPHER_CLIENT_WRITE)) { ret = -1; + s->state = SSL_ST_ERR; goto end; } @@ -553,7 +576,8 @@ int ssl3_connect(SSL *s) case SSL3_ST_CR_FINISHED_A: case SSL3_ST_CR_FINISHED_B: - s->s3->flags |= SSL3_FLAGS_CCS_OK; + if (!s->s3->change_cipher_spec) + s->s3->flags |= SSL3_FLAGS_CCS_OK; ret = ssl3_get_finished(s, SSL3_ST_CR_FINISHED_A, SSL3_ST_CR_FINISHED_B); if (ret <= 0) @@ -612,6 +636,7 @@ int ssl3_connect(SSL *s) goto end; /* break; */ + case SSL_ST_ERR: default: SSLerr(SSL_F_SSL3_CONNECT, SSL_R_UNKNOWN_STATE); ret = -1; @@ -659,9 +684,17 @@ int ssl3_client_hello(SSL *s) buf = (unsigned char *)s->init_buf->data; if (s->state == SSL3_ST_CW_CLNT_HELLO_A) { SSL_SESSION *sess = s->session; - if ((sess == NULL) || - (sess->ssl_version != s->version) || - !sess->session_id_length || (sess->not_resumable)) { + if ((sess == NULL) || (sess->ssl_version != s->version) || +#ifdef OPENSSL_NO_TLSEXT + !sess->session_id_length || +#else + /* + * In the case of EAP-FAST, we can have a pre-shared + * "ticket" without a session ID. + */ + (!sess->session_id_length && !sess->tlsext_tick) || +#endif + (sess->not_resumable)) { if (!ssl_get_new_session(s, 0)) goto err; } @@ -853,6 +886,7 @@ int ssl3_client_hello(SSL *s) /* SSL3_ST_CW_CLNT_HELLO_B */ return ssl_do_write(s); err: + s->state = SSL_ST_ERR; return (-1); } @@ -924,7 +958,7 @@ int ssl3_get_server_hello(SSL *s) al = SSL_AD_PROTOCOL_VERSION; goto f_err; } - s->version = s->method->version; + s->session->ssl_version = s->version = s->method->version; } if ((p[0] != (s->version >> 8)) || (p[1] != (s->version & 0xff))) { @@ -952,10 +986,19 @@ int ssl3_get_server_hello(SSL *s) } #ifndef OPENSSL_NO_TLSEXT /* - * check if we want to resume the session based on external pre-shared - * secret + * Check if we can resume the session based on external pre-shared secret. + * EAP-FAST (RFC 4851) supports two types of session resumption. + * Resumption based on server-side state works with session IDs. + * Resumption based on pre-shared Protected Access Credentials (PACs) + * works by overriding the SessionTicket extension at the application + * layer, and does not send a session ID. (We do not know whether EAP-FAST + * servers would honour the session ID.) Therefore, the session ID alone + * is not a reliable indicator of session resumption, so we first check if + * we can resume, and later peek at the next handshake message to see if the + * server wants to resume. */ - if (s->version >= TLS1_VERSION && s->tls_session_secret_cb) { + if (s->version >= TLS1_VERSION && s->tls_session_secret_cb && + s->session->tlsext_tick) { SSL_CIPHER *pref_cipher = NULL; s->session->master_key_length = sizeof(s->session->master_key); if (s->tls_session_secret_cb(s, s->session->master_key, @@ -964,12 +1007,15 @@ int ssl3_get_server_hello(SSL *s) s->tls_session_secret_cb_arg)) { s->session->cipher = pref_cipher ? pref_cipher : ssl_get_cipher_by_char(s, p + j); - s->hit = 1; + } else { + SSLerr(SSL_F_SSL3_GET_SERVER_HELLO, ERR_R_INTERNAL_ERROR); + al = SSL_AD_INTERNAL_ERROR; + goto f_err; } } #endif /* OPENSSL_NO_TLSEXT */ - if (!s->hit && j != 0 && j == s->session->session_id_length + if (j != 0 && j == s->session->session_id_length && memcmp(p, s->session->session_id, j) == 0) { if (s->sid_ctx_length != s->session->sid_ctx_length || memcmp(s->session->sid_ctx, s->sid_ctx, s->sid_ctx_length)) { @@ -980,12 +1026,13 @@ int ssl3_get_server_hello(SSL *s) goto f_err; } s->hit = 1; - } - /* a miss or crap from the other end */ - if (!s->hit) { + } else { /* - * If we were trying for session-id reuse, make a new SSL_SESSION so - * we don't stuff up other people + * If we were trying for session-id reuse but the server + * didn't echo the ID, make a new SSL_SESSION. + * In the case of EAP-FAST and PAC, we do not send a session ID, + * so the PAC-based session secret is always preserved. It'll be + * overwritten if the server refuses resumption. */ if (s->session->session_id_length > 0) { if (!ssl_get_new_session(s, 0)) { @@ -1113,6 +1160,7 @@ int ssl3_get_server_hello(SSL *s) f_err: ssl3_send_alert(s, SSL3_AL_FATAL, al); err: + s->state = SSL_ST_ERR; return (-1); } @@ -1298,8 +1346,10 @@ int ssl3_get_server_certificate(SSL *s) if (0) { f_err: ssl3_send_alert(s, SSL3_AL_FATAL, al); - } err: + s->state = SSL_ST_ERR; + } + EVP_PKEY_free(pkey); X509_free(x); sk_X509_pop_free(sk, X509_free); @@ -1621,6 +1671,13 @@ int ssl3_get_key_exchange(SSL *s) SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, ERR_R_INTERNAL_ERROR); goto err; } + + if (EVP_PKEY_bits(pkey) <= SSL_C_EXPORT_PKEYLENGTH(s->s3->tmp.new_cipher)) { + al = SSL_AD_UNEXPECTED_MESSAGE; + SSLerr(SSL_F_SSL3_GET_KEY_EXCHANGE, SSL_R_UNEXPECTED_MESSAGE); + goto f_err; + } + s->session->sess_cert->peer_rsa_tmp = rsa; rsa = NULL; } @@ -1965,6 +2022,7 @@ int ssl3_get_key_exchange(SSL *s) EC_KEY_free(ecdh); #endif EVP_MD_CTX_cleanup(&md_ctx); + s->state = SSL_ST_ERR; return (-1); } @@ -2140,7 +2198,10 @@ int ssl3_get_certificate_request(SSL *s) ca_sk = NULL; ret = 1; + goto done; err: + s->state = SSL_ST_ERR; + done: if (ca_sk != NULL) sk_X509_NAME_pop_free(ca_sk, X509_NAME_free); return (ret); @@ -2175,6 +2236,38 @@ int ssl3_get_new_session_ticket(SSL *s) } p = d = (unsigned char *)s->init_msg; + + if (s->session->session_id_length > 0) { + int i = s->session_ctx->session_cache_mode; + SSL_SESSION *new_sess; + /* + * We reused an existing session, so we need to replace it with a new + * one + */ + if (i & SSL_SESS_CACHE_CLIENT) { + /* + * Remove the old session from the cache + */ + if (i & SSL_SESS_CACHE_NO_INTERNAL_STORE) { + if (s->session_ctx->remove_session_cb != NULL) + s->session_ctx->remove_session_cb(s->session_ctx, + s->session); + } else { + /* We carry on if this fails */ + SSL_CTX_remove_session(s->session_ctx, s->session); + } + } + + if ((new_sess = ssl_session_dup(s->session, 0)) == 0) { + al = SSL_AD_INTERNAL_ERROR; + SSLerr(SSL_F_SSL3_GET_NEW_SESSION_TICKET, ERR_R_MALLOC_FAILURE); + goto f_err; + } + + SSL_SESSION_free(s->session); + s->session = new_sess; + } + n2l(p, s->session->tlsext_tick_lifetime_hint); n2s(p, ticklen); /* ticket_lifetime_hint + ticket_length + ticket */ @@ -2217,6 +2310,7 @@ int ssl3_get_new_session_ticket(SSL *s) f_err: ssl3_send_alert(s, SSL3_AL_FATAL, al); err: + s->state = SSL_ST_ERR; return (-1); } @@ -2277,6 +2371,7 @@ int ssl3_get_cert_status(SSL *s) return 1; f_err: ssl3_send_alert(s, SSL3_AL_FATAL, al); + s->state = SSL_ST_ERR; return (-1); } #endif @@ -2298,12 +2393,32 @@ int ssl3_get_server_done(SSL *s) /* should contain no data */ ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_DECODE_ERROR); SSLerr(SSL_F_SSL3_GET_SERVER_DONE, SSL_R_LENGTH_MISMATCH); + s->state = SSL_ST_ERR; return -1; } ret = 1; return (ret); } +#ifndef OPENSSL_NO_DH +static DH *get_server_static_dh_key(SESS_CERT *scert) +{ + DH *dh_srvr = NULL; + EVP_PKEY *spkey = NULL; + int idx = scert->peer_cert_type; + + if (idx >= 0) + spkey = X509_get_pubkey(scert->peer_pkeys[idx].x509); + if (spkey) { + dh_srvr = EVP_PKEY_get1_DH(spkey); + EVP_PKEY_free(spkey); + } + if (dh_srvr == NULL) + SSLerr(SSL_F_GET_SERVER_STATIC_DH_KEY, ERR_R_INTERNAL_ERROR); + return dh_srvr; +} +#endif + int ssl3_send_client_key_exchange(SSL *s) { unsigned char *p; @@ -2546,25 +2661,14 @@ int ssl3_send_client_key_exchange(SSL *s) goto err; } - if (scert->peer_dh_tmp != NULL) + if (scert->peer_dh_tmp != NULL) { dh_srvr = scert->peer_dh_tmp; - else { - /* we get them from the cert */ - int idx = scert->peer_cert_type; - EVP_PKEY *spkey = NULL; - dh_srvr = NULL; - if (idx >= 0) - spkey = X509_get_pubkey(scert->peer_pkeys[idx].x509); - if (spkey) { - dh_srvr = EVP_PKEY_get1_DH(spkey); - EVP_PKEY_free(spkey); - } - if (dh_srvr == NULL) { - SSLerr(SSL_F_SSL3_SEND_CLIENT_KEY_EXCHANGE, - ERR_R_INTERNAL_ERROR); + } else { + dh_srvr = get_server_static_dh_key(scert); + if (dh_srvr == NULL) goto err; - } } + if (s->s3->flags & TLS1_FLAGS_SKIP_CERT_VERIFY) { /* Use client certificate key */ EVP_PKEY *clkey = s->cert->key->privatekey; @@ -2624,8 +2728,6 @@ int ssl3_send_client_key_exchange(SSL *s) } DH_free(dh_clnt); - - /* perhaps clean things up a bit EAY EAY EAY EAY */ } #endif @@ -2847,7 +2949,10 @@ int ssl3_send_client_key_exchange(SSL *s) EVP_PKEY_encrypt_init(pkey_ctx); /* Generate session key */ - RAND_bytes(premaster_secret, 32); + if (RAND_bytes(premaster_secret, 32) <= 0) { + EVP_PKEY_CTX_free(pkey_ctx); + goto err; + } /* * If we have client certificate, use its secret as peer key */ @@ -3061,6 +3166,7 @@ int ssl3_send_client_key_exchange(SSL *s) EC_KEY_free(clnt_ecdh); EVP_PKEY_free(srvr_pub_pkey); #endif + s->state = SSL_ST_ERR; return (-1); } @@ -3189,6 +3295,7 @@ int ssl3_send_client_verify(SSL *s) err: EVP_MD_CTX_cleanup(&mctx); EVP_PKEY_CTX_free(pctx); + s->state = SSL_ST_ERR; return (-1); } @@ -3252,6 +3359,7 @@ int ssl3_send_client_certificate(SSL *s) } if (i == 0) { ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_INTERNAL_ERROR); + s->state = SSL_ST_ERR; return 0; } s->rwstate = SSL_NOTHING; @@ -3312,6 +3420,7 @@ int ssl3_send_client_certificate(SSL *s) 2) ? NULL : s->cert->key)) { SSLerr(SSL_F_SSL3_SEND_CLIENT_CERTIFICATE, ERR_R_INTERNAL_ERROR); ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_INTERNAL_ERROR); + s->state = SSL_ST_ERR; return 0; } } @@ -3326,6 +3435,7 @@ int ssl3_check_cert_and_algorithm(SSL *s) int i, idx; long alg_k, alg_a; EVP_PKEY *pkey = NULL; + int pkey_bits; SESS_CERT *sc; #ifndef OPENSSL_NO_RSA RSA *rsa; @@ -3333,6 +3443,7 @@ int ssl3_check_cert_and_algorithm(SSL *s) #ifndef OPENSSL_NO_DH DH *dh; #endif + int al = SSL_AD_HANDSHAKE_FAILURE; alg_k = s->s3->tmp.new_cipher->algorithm_mkey; alg_a = s->s3->tmp.new_cipher->algorithm_auth; @@ -3375,6 +3486,7 @@ int ssl3_check_cert_and_algorithm(SSL *s) } #endif pkey = X509_get_pubkey(sc->peer_pkeys[idx].x509); + pkey_bits = EVP_PKEY_bits(pkey); i = X509_certificate_type(sc->peer_pkeys[idx].x509, pkey); EVP_PKEY_free(pkey); @@ -3392,40 +3504,82 @@ int ssl3_check_cert_and_algorithm(SSL *s) } #endif #ifndef OPENSSL_NO_RSA - if ((alg_k & SSL_kRSA) && - !(has_bits(i, EVP_PK_RSA | EVP_PKT_ENC) || (rsa != NULL))) { - SSLerr(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM, - SSL_R_MISSING_RSA_ENCRYPTING_CERT); - goto f_err; + if (alg_k & SSL_kRSA) { + if (!SSL_C_IS_EXPORT(s->s3->tmp.new_cipher) && + !has_bits(i, EVP_PK_RSA | EVP_PKT_ENC)) { + SSLerr(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM, + SSL_R_MISSING_RSA_ENCRYPTING_CERT); + goto f_err; + } else if (SSL_C_IS_EXPORT(s->s3->tmp.new_cipher)) { + if (pkey_bits <= SSL_C_EXPORT_PKEYLENGTH(s->s3->tmp.new_cipher)) { + if (!has_bits(i, EVP_PK_RSA | EVP_PKT_ENC)) { + SSLerr(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM, + SSL_R_MISSING_RSA_ENCRYPTING_CERT); + goto f_err; + } + if (rsa != NULL) { + /* server key exchange is not allowed. */ + al = SSL_AD_INTERNAL_ERROR; + SSLerr(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM, ERR_R_INTERNAL_ERROR); + goto f_err; + } + } + } } #endif #ifndef OPENSSL_NO_DH - if ((alg_k & SSL_kEDH) && - !(has_bits(i, EVP_PK_DH | EVP_PKT_EXCH) || (dh != NULL))) { - SSLerr(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM, SSL_R_MISSING_DH_KEY); + if ((alg_k & SSL_kEDH) && dh == NULL) { + al = SSL_AD_INTERNAL_ERROR; + SSLerr(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM, ERR_R_INTERNAL_ERROR); goto f_err; - } else if ((alg_k & SSL_kDHr) && !SSL_USE_SIGALGS(s) && + } + if ((alg_k & SSL_kDHr) && !SSL_USE_SIGALGS(s) && !has_bits(i, EVP_PK_DH | EVP_PKS_RSA)) { SSLerr(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM, SSL_R_MISSING_DH_RSA_CERT); goto f_err; } # ifndef OPENSSL_NO_DSA - else if ((alg_k & SSL_kDHd) && !SSL_USE_SIGALGS(s) && - !has_bits(i, EVP_PK_DH | EVP_PKS_DSA)) { + if ((alg_k & SSL_kDHd) && !SSL_USE_SIGALGS(s) && + !has_bits(i, EVP_PK_DH | EVP_PKS_DSA)) { SSLerr(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM, SSL_R_MISSING_DH_DSA_CERT); goto f_err; } # endif -#endif - if (SSL_C_IS_EXPORT(s->s3->tmp.new_cipher) && !has_bits(i, EVP_PKT_EXP)) { + if (alg_k & (SSL_kDHE | SSL_kDHr | SSL_kDHd)) { + int dh_size; + if (alg_k & SSL_kDHE) { + dh_size = BN_num_bits(dh->p); + } else { + DH *dh_srvr = get_server_static_dh_key(sc); + if (dh_srvr == NULL) + goto f_err; + dh_size = BN_num_bits(dh_srvr->p); + DH_free(dh_srvr); + } + + if ((!SSL_C_IS_EXPORT(s->s3->tmp.new_cipher) && dh_size < 768) + || (SSL_C_IS_EXPORT(s->s3->tmp.new_cipher) && dh_size < 512)) { + SSLerr(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM, SSL_R_DH_KEY_TOO_SMALL); + goto f_err; + } + } +#endif /* !OPENSSL_NO_DH */ + + if (SSL_C_IS_EXPORT(s->s3->tmp.new_cipher) && + pkey_bits > SSL_C_EXPORT_PKEYLENGTH(s->s3->tmp.new_cipher)) { #ifndef OPENSSL_NO_RSA if (alg_k & SSL_kRSA) { - if (rsa == NULL - || RSA_size(rsa) * 8 > + if (rsa == NULL) { + SSLerr(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM, + SSL_R_MISSING_EXPORT_TMP_RSA_KEY); + goto f_err; + } else if (BN_num_bits(rsa->n) > SSL_C_EXPORT_PKEYLENGTH(s->s3->tmp.new_cipher)) { + /* We have a temporary RSA key but it's too large. */ + al = SSL_AD_EXPORT_RESTRICTION; SSLerr(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM, SSL_R_MISSING_EXPORT_TMP_RSA_KEY); goto f_err; @@ -3433,14 +3587,21 @@ int ssl3_check_cert_and_algorithm(SSL *s) } else #endif #ifndef OPENSSL_NO_DH - if (alg_k & (SSL_kEDH | SSL_kDHr | SSL_kDHd)) { - if (dh == NULL - || DH_size(dh) * 8 > + if (alg_k & SSL_kDHE) { + if (BN_num_bits(dh->p) > SSL_C_EXPORT_PKEYLENGTH(s->s3->tmp.new_cipher)) { + /* We have a temporary DH key but it's too large. */ + al = SSL_AD_EXPORT_RESTRICTION; SSLerr(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM, SSL_R_MISSING_EXPORT_TMP_DH_KEY); goto f_err; } + } else if (alg_k & (SSL_kDHr | SSL_kDHd)) { + /* The cert should have had an export DH key. */ + al = SSL_AD_EXPORT_RESTRICTION; + SSLerr(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM, + SSL_R_MISSING_EXPORT_TMP_DH_KEY); + goto f_err; } else #endif { @@ -3451,12 +3612,62 @@ int ssl3_check_cert_and_algorithm(SSL *s) } return (1); f_err: - ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_HANDSHAKE_FAILURE); + ssl3_send_alert(s, SSL3_AL_FATAL, al); err: return (0); } -#if !defined(OPENSSL_NO_TLSEXT) && !defined(OPENSSL_NO_NEXTPROTONEG) +#ifndef OPENSSL_NO_TLSEXT +/* + * Normally, we can tell if the server is resuming the session from + * the session ID. EAP-FAST (RFC 4851), however, relies on the next server + * message after the ServerHello to determine if the server is resuming. + * Therefore, we allow EAP-FAST to peek ahead. + * ssl3_check_finished returns 1 if we are resuming from an external + * pre-shared secret, we have a "ticket" and the next server handshake message + * is Finished; and 0 otherwise. It returns -1 upon an error. + */ +static int ssl3_check_finished(SSL *s) +{ + int ok = 0; + + if (s->version < TLS1_VERSION || !s->tls_session_secret_cb || + !s->session->tlsext_tick) + return 0; + + /* Need to permit this temporarily, in case the next message is Finished. */ + s->s3->flags |= SSL3_FLAGS_CCS_OK; + /* + * This function is called when we might get a Certificate message instead, + * so permit appropriate message length. + * We ignore the return value as we're only interested in the message type + * and not its length. + */ + s->method->ssl_get_message(s, + SSL3_ST_CR_CERT_A, + SSL3_ST_CR_CERT_B, + -1, s->max_cert_list, &ok); + s->s3->flags &= ~SSL3_FLAGS_CCS_OK; + + if (!ok) + return -1; + + s->s3->tmp.reuse_message = 1; + + if (s->s3->tmp.message_type == SSL3_MT_FINISHED) + return 1; + + /* If we're not done, then the CCS arrived early and we should bail. */ + if (s->s3->change_cipher_spec) { + SSLerr(SSL_F_SSL3_CHECK_FINISHED, SSL_R_CCS_RECEIVED_EARLY); + ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_UNEXPECTED_MESSAGE); + return -1; + } + + return 0; +} + +# ifndef OPENSSL_NO_NEXTPROTONEG int ssl3_send_next_proto(SSL *s) { unsigned int len, padding_len; @@ -3479,8 +3690,8 @@ int ssl3_send_next_proto(SSL *s) return ssl3_do_write(s, SSL3_RT_HANDSHAKE); } -#endif /* !OPENSSL_NO_TLSEXT && - * !OPENSSL_NO_NEXTPROTONEG */ +#endif /* !OPENSSL_NO_NEXTPROTONEG */ +#endif /* !OPENSSL_NO_TLSEXT */ int ssl_do_client_cert_cb(SSL *s, X509 **px509, EVP_PKEY **ppkey) { diff --git a/deps/openssl/openssl/ssl/s3_lib.c b/deps/openssl/openssl/ssl/s3_lib.c index 28129f68d9e694..5db349a23a6a59 100644 --- a/deps/openssl/openssl/ssl/s3_lib.c +++ b/deps/openssl/openssl/ssl/s3_lib.c @@ -330,7 +330,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { /* The DH ciphers */ /* Cipher 0B */ { - 1, + 0, SSL3_TXT_DH_DSS_DES_40_CBC_SHA, SSL3_CK_DH_DSS_DES_40_CBC_SHA, SSL_kDHd, @@ -378,7 +378,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = { /* Cipher 0E */ { - 1, + 0, SSL3_TXT_DH_RSA_DES_40_CBC_SHA, SSL3_CK_DH_RSA_DES_40_CBC_SHA, SSL_kDHr, diff --git a/deps/openssl/openssl/ssl/s3_pkt.c b/deps/openssl/openssl/ssl/s3_pkt.c index 221ae039e99eae..603c285ac49940 100644 --- a/deps/openssl/openssl/ssl/s3_pkt.c +++ b/deps/openssl/openssl/ssl/s3_pkt.c @@ -361,11 +361,22 @@ static int ssl3_get_record(SSL *s) if (version != s->version) { SSLerr(SSL_F_SSL3_GET_RECORD, SSL_R_WRONG_VERSION_NUMBER); if ((s->version & 0xFF00) == (version & 0xFF00) - && !s->enc_write_ctx && !s->write_hash) + && !s->enc_write_ctx && !s->write_hash) { + if (rr->type == SSL3_RT_ALERT) { + /* + * The record is using an incorrect version number, but + * what we've got appears to be an alert. We haven't + * read the body yet to check whether its a fatal or + * not - but chances are it is. We probably shouldn't + * send a fatal alert back. We'll just end. + */ + goto err; + } /* * Send back error using their minor version number :-) */ s->version = (unsigned short)version; + } al = SSL_AD_PROTOCOL_VERSION; goto f_err; } @@ -708,7 +719,7 @@ int ssl3_write_bytes(SSL *s, int type, const void *buf_, int len) packlen *= 4; wb->buf = OPENSSL_malloc(packlen); - if(!wb->buf) { + if (!wb->buf) { SSLerr(SSL_F_SSL3_WRITE_BYTES, ERR_R_MALLOC_FAILURE); return -1; } diff --git a/deps/openssl/openssl/ssl/s3_srvr.c b/deps/openssl/openssl/ssl/s3_srvr.c index c016139b1ded86..8885694c02b345 100644 --- a/deps/openssl/openssl/ssl/s3_srvr.c +++ b/deps/openssl/openssl/ssl/s3_srvr.c @@ -266,6 +266,7 @@ int ssl3_accept(SSL *s) if ((s->version >> 8) != 3) { SSLerr(SSL_F_SSL3_ACCEPT, ERR_R_INTERNAL_ERROR); + s->state = SSL_ST_ERR; return -1; } s->type = SSL_ST_ACCEPT; @@ -273,11 +274,13 @@ int ssl3_accept(SSL *s) if (s->init_buf == NULL) { if ((buf = BUF_MEM_new()) == NULL) { ret = -1; + s->state = SSL_ST_ERR; goto end; } if (!BUF_MEM_grow(buf, SSL3_RT_MAX_PLAIN_LENGTH)) { BUF_MEM_free(buf); ret = -1; + s->state = SSL_ST_ERR; goto end; } s->init_buf = buf; @@ -285,6 +288,7 @@ int ssl3_accept(SSL *s) if (!ssl3_setup_buffers(s)) { ret = -1; + s->state = SSL_ST_ERR; goto end; } @@ -303,6 +307,7 @@ int ssl3_accept(SSL *s) */ if (!ssl_init_wbio_buffer(s, 1)) { ret = -1; + s->state = SSL_ST_ERR; goto end; } @@ -320,6 +325,7 @@ int ssl3_accept(SSL *s) SSL_R_UNSAFE_LEGACY_RENEGOTIATION_DISABLED); ssl3_send_alert(s, SSL3_AL_FATAL, SSL_AD_HANDSHAKE_FAILURE); ret = -1; + s->state = SSL_ST_ERR; goto end; } else { /* @@ -379,6 +385,7 @@ int ssl3_accept(SSL *s) SSLerr(SSL_F_SSL3_ACCEPT, SSL_R_CLIENTHELLO_TLSEXT); ret = SSL_TLSEXT_ERR_ALERT_FATAL; ret = -1; + s->state = SSL_ST_ERR; goto end; } } @@ -529,9 +536,12 @@ int ssl3_accept(SSL *s) skip = 1; s->s3->tmp.cert_request = 0; s->state = SSL3_ST_SW_SRVR_DONE_A; - if (s->s3->handshake_buffer) - if (!ssl3_digest_cached_records(s)) + if (s->s3->handshake_buffer) { + if (!ssl3_digest_cached_records(s)) { + s->state = SSL_ST_ERR; return -1; + } + } } else { s->s3->tmp.cert_request = 1; ret = ssl3_send_certificate_request(s); @@ -621,11 +631,14 @@ int ssl3_accept(SSL *s) */ if (!s->s3->handshake_buffer) { SSLerr(SSL_F_SSL3_ACCEPT, ERR_R_INTERNAL_ERROR); + s->state = SSL_ST_ERR; return -1; } s->s3->flags |= TLS1_FLAGS_KEEP_HANDSHAKE; - if (!ssl3_digest_cached_records(s)) + if (!ssl3_digest_cached_records(s)) { + s->state = SSL_ST_ERR; return -1; + } } else { int offset = 0; int dgst_num; @@ -639,9 +652,12 @@ int ssl3_accept(SSL *s) * CertificateVerify should be generalized. But it is next * step */ - if (s->s3->handshake_buffer) - if (!ssl3_digest_cached_records(s)) + if (s->s3->handshake_buffer) { + if (!ssl3_digest_cached_records(s)) { + s->state = SSL_ST_ERR; return -1; + } + } for (dgst_num = 0; dgst_num < SSL_MAX_DIGEST; dgst_num++) if (s->s3->handshake_dgst[dgst_num]) { int dgst_size; @@ -657,6 +673,7 @@ int ssl3_accept(SSL *s) dgst_size = EVP_MD_CTX_size(s->s3->handshake_dgst[dgst_num]); if (dgst_size < 0) { + s->state = SSL_ST_ERR; ret = -1; goto end; } @@ -667,15 +684,6 @@ int ssl3_accept(SSL *s) case SSL3_ST_SR_CERT_VRFY_A: case SSL3_ST_SR_CERT_VRFY_B: - /* - * This *should* be the first time we enable CCS, but be - * extra careful about surrounding code changes. We need - * to set this here because we don't know if we're - * expecting a CertificateVerify or not. - */ - if (!s->s3->change_cipher_spec) - s->s3->flags |= SSL3_FLAGS_CCS_OK; - /* we should decide if we expected this one */ ret = ssl3_get_cert_verify(s); if (ret <= 0) goto end; @@ -695,11 +703,10 @@ int ssl3_accept(SSL *s) case SSL3_ST_SR_NEXT_PROTO_A: case SSL3_ST_SR_NEXT_PROTO_B: /* - * Enable CCS for resumed handshakes with NPN. - * In a full handshake with NPN, we end up here through - * SSL3_ST_SR_CERT_VRFY_B, where SSL3_FLAGS_CCS_OK was - * already set. Receiving a CCS clears the flag, so make - * sure not to re-enable it to ban duplicates. + * Enable CCS for NPN. Receiving a CCS clears the flag, so make + * sure not to re-enable it to ban duplicates. This *should* be the + * first time we have received one - but we check anyway to be + * cautious. * s->s3->change_cipher_spec is set when a CCS is * processed in s3_pkt.c, and remains set until * the client's Finished message is read. @@ -718,10 +725,8 @@ int ssl3_accept(SSL *s) case SSL3_ST_SR_FINISHED_A: case SSL3_ST_SR_FINISHED_B: /* - * Enable CCS for resumed handshakes without NPN. - * In a full handshake, we end up here through - * SSL3_ST_SR_CERT_VRFY_B, where SSL3_FLAGS_CCS_OK was - * already set. Receiving a CCS clears the flag, so make + * Enable CCS for handshakes without NPN. In NPN the CCS flag has + * already been set. Receiving a CCS clears the flag, so make * sure not to re-enable it to ban duplicates. * s->s3->change_cipher_spec is set when a CCS is * processed in s3_pkt.c, and remains set until @@ -771,6 +776,7 @@ int ssl3_accept(SSL *s) s->session->cipher = s->s3->tmp.new_cipher; if (!s->method->ssl3_enc->setup_key_block(s)) { ret = -1; + s->state = SSL_ST_ERR; goto end; } @@ -787,6 +793,7 @@ int ssl3_accept(SSL *s) SSL3_CHANGE_CIPHER_SERVER_WRITE)) { ret = -1; + s->state = SSL_ST_ERR; goto end; } @@ -849,6 +856,7 @@ int ssl3_accept(SSL *s) goto end; /* break; */ + case SSL_ST_ERR: default: SSLerr(SSL_F_SSL3_ACCEPT, SSL_R_UNKNOWN_STATE); ret = -1; @@ -930,6 +938,16 @@ int ssl3_get_client_hello(SSL *s) s->first_packet = 0; d = p = (unsigned char *)s->init_msg; + /* + * 2 bytes for client version, SSL3_RANDOM_SIZE bytes for random, 1 byte + * for session id length + */ + if (n < 2 + SSL3_RANDOM_SIZE + 1) { + al = SSL_AD_DECODE_ERROR; + SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_LENGTH_TOO_SHORT); + goto f_err; + } + /* * use version from inside client hello, not from record header (may * differ: see RFC 2246, Appendix E, second paragraph) @@ -962,6 +980,12 @@ int ssl3_get_client_hello(SSL *s) unsigned int session_length, cookie_length; session_length = *(p + SSL3_RANDOM_SIZE); + + if (p + SSL3_RANDOM_SIZE + session_length + 1 >= d + n) { + al = SSL_AD_DECODE_ERROR; + SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_LENGTH_TOO_SHORT); + goto f_err; + } cookie_length = *(p + SSL3_RANDOM_SIZE + session_length + 1); if (cookie_length == 0) @@ -975,6 +999,12 @@ int ssl3_get_client_hello(SSL *s) /* get the session-id */ j = *(p++); + if (p + j > d + n) { + al = SSL_AD_DECODE_ERROR; + SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_LENGTH_TOO_SHORT); + goto f_err; + } + s->hit = 0; /* * Versions before 0.9.7 always allow clients to resume sessions in @@ -1019,8 +1049,19 @@ int ssl3_get_client_hello(SSL *s) if (SSL_IS_DTLS(s)) { /* cookie stuff */ + if (p + 1 > d + n) { + al = SSL_AD_DECODE_ERROR; + SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_LENGTH_TOO_SHORT); + goto f_err; + } cookie_len = *(p++); + if (p + cookie_len > d + n) { + al = SSL_AD_DECODE_ERROR; + SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_LENGTH_TOO_SHORT); + goto f_err; + } + /* * The ClientHello may contain a cookie even if the * HelloVerify message has not been sent--make sure that it @@ -1086,27 +1127,33 @@ int ssl3_get_client_hello(SSL *s) } } + if (p + 2 > d + n) { + al = SSL_AD_DECODE_ERROR; + SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_LENGTH_TOO_SHORT); + goto f_err; + } n2s(p, i); - if ((i == 0) && (j != 0)) { - /* we need a cipher if we are not resuming a session */ + + if (i == 0) { al = SSL_AD_ILLEGAL_PARAMETER; SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_NO_CIPHERS_SPECIFIED); goto f_err; } - if ((p + i) >= (d + n)) { + + /* i bytes of cipher data + 1 byte for compression length later */ + if ((p + i + 1) > (d + n)) { /* not enough data */ al = SSL_AD_DECODE_ERROR; SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_LENGTH_MISMATCH); goto f_err; } - if ((i > 0) && (ssl_bytes_to_cipher_list(s, p, i, &(ciphers)) - == NULL)) { + if (ssl_bytes_to_cipher_list(s, p, i, &(ciphers)) == NULL) { goto err; } p += i; /* If it is a hit, check that the cipher is in the list */ - if ((s->hit) && (i > 0)) { + if (s->hit) { j = 0; id = s->session->cipher->id; @@ -1335,8 +1382,8 @@ int ssl3_get_client_hello(SSL *s) sk_SSL_CIPHER_free(s->session->ciphers); s->session->ciphers = ciphers; if (ciphers == NULL) { - al = SSL_AD_ILLEGAL_PARAMETER; - SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, SSL_R_NO_CIPHERS_PASSED); + al = SSL_AD_INTERNAL_ERROR; + SSLerr(SSL_F_SSL3_GET_CLIENT_HELLO, ERR_R_INTERNAL_ERROR); goto f_err; } ciphers = NULL; @@ -1424,8 +1471,10 @@ int ssl3_get_client_hello(SSL *s) if (0) { f_err: ssl3_send_alert(s, SSL3_AL_FATAL, al); - } err: + s->state = SSL_ST_ERR; + } + if (ciphers != NULL) sk_SSL_CIPHER_free(ciphers); return ret < 0 ? -1 : ret; @@ -1443,8 +1492,10 @@ int ssl3_send_server_hello(SSL *s) buf = (unsigned char *)s->init_buf->data; #ifdef OPENSSL_NO_TLSEXT p = s->s3->server_random; - if (ssl_fill_hello_random(s, 1, p, SSL3_RANDOM_SIZE) <= 0) + if (ssl_fill_hello_random(s, 1, p, SSL3_RANDOM_SIZE) <= 0) { + s->state = SSL_ST_ERR; return -1; + } #endif /* Do the message type and length last */ d = p = ssl_handshake_start(s); @@ -1479,6 +1530,7 @@ int ssl3_send_server_hello(SSL *s) sl = s->session->session_id_length; if (sl > (int)sizeof(s->session->session_id)) { SSLerr(SSL_F_SSL3_SEND_SERVER_HELLO, ERR_R_INTERNAL_ERROR); + s->state = SSL_ST_ERR; return -1; } *(p++) = sl; @@ -1501,6 +1553,7 @@ int ssl3_send_server_hello(SSL *s) #ifndef OPENSSL_NO_TLSEXT if (ssl_prepare_serverhello_tlsext(s) <= 0) { SSLerr(SSL_F_SSL3_SEND_SERVER_HELLO, SSL_R_SERVERHELLO_TLSEXT); + s->state = SSL_ST_ERR; return -1; } if ((p = @@ -1508,6 +1561,7 @@ int ssl3_send_server_hello(SSL *s) &al)) == NULL) { ssl3_send_alert(s, SSL3_AL_FATAL, al); SSLerr(SSL_F_SSL3_SEND_SERVER_HELLO, ERR_R_INTERNAL_ERROR); + s->state = SSL_ST_ERR; return -1; } #endif @@ -1970,6 +2024,7 @@ int ssl3_send_server_key_exchange(SSL *s) BN_CTX_free(bn_ctx); #endif EVP_MD_CTX_cleanup(&md_ctx); + s->state = SSL_ST_ERR; return (-1); } @@ -2063,6 +2118,7 @@ int ssl3_send_certificate_request(SSL *s) /* SSL3_ST_SW_CERT_REQ_B */ return ssl_do_write(s); err: + s->state = SSL_ST_ERR; return (-1); } @@ -2355,6 +2411,7 @@ int ssl3_get_client_key_exchange(SSL *s) int padl, outl; krb5_timestamp authtime = 0; krb5_ticket_times ttimes; + int kerr = 0; EVP_CIPHER_CTX_init(&ciph_ctx); @@ -2458,23 +2515,27 @@ int ssl3_get_client_key_exchange(SSL *s) { SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, SSL_R_DECRYPTION_FAILED); - goto err; + kerr = 1; + goto kclean; } if (outl > SSL_MAX_MASTER_KEY_LENGTH) { SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, SSL_R_DATA_LENGTH_TOO_LONG); - goto err; + kerr = 1; + goto kclean; } if (!EVP_DecryptFinal_ex(&ciph_ctx, &(pms[outl]), &padl)) { SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, SSL_R_DECRYPTION_FAILED); - goto err; + kerr = 1; + goto kclean; } outl += padl; if (outl > SSL_MAX_MASTER_KEY_LENGTH) { SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, SSL_R_DATA_LENGTH_TOO_LONG); - goto err; + kerr = 1; + goto kclean; } if (!((pms[0] == (s->client_version >> 8)) && (pms[1] == (s->client_version & 0xff)))) { @@ -2491,7 +2552,8 @@ int ssl3_get_client_key_exchange(SSL *s) if (!(s->options & SSL_OP_TLS_ROLLBACK_BUG)) { SSLerr(SSL_F_SSL3_GET_CLIENT_KEY_EXCHANGE, SSL_AD_DECODE_ERROR); - goto err; + kerr = 1; + goto kclean; } } @@ -2517,6 +2579,11 @@ int ssl3_get_client_key_exchange(SSL *s) * kssl_ctx = kssl_ctx_free(kssl_ctx); * if (s->kssl_ctx) s->kssl_ctx = NULL; */ + + kclean: + OPENSSL_cleanse(pms, sizeof(pms)); + if (kerr) + goto err; } else #endif /* OPENSSL_NO_KRB5 */ @@ -2835,6 +2902,7 @@ int ssl3_get_client_key_exchange(SSL *s) s-> session->master_key, premaster_secret, 32); + OPENSSL_cleanse(premaster_secret, sizeof(premaster_secret)); /* Check if pubkey from client certificate was used */ if (EVP_PKEY_CTX_ctrl (pkey_ctx, -1, -1, EVP_PKEY_CTRL_PEER_KEY, 2, NULL) > 0) @@ -2867,6 +2935,7 @@ int ssl3_get_client_key_exchange(SSL *s) EC_KEY_free(srvr_ecdh); BN_CTX_free(bn_ctx); #endif + s->state = SSL_ST_ERR; return (-1); } @@ -2882,39 +2951,31 @@ int ssl3_get_cert_verify(SSL *s) EVP_MD_CTX mctx; EVP_MD_CTX_init(&mctx); + /* + * We should only process a CertificateVerify message if we have received + * a Certificate from the client. If so then |s->session->peer| will be non + * NULL. In some instances a CertificateVerify message is not required even + * if the peer has sent a Certificate (e.g. such as in the case of static + * DH). In that case the ClientKeyExchange processing will skip the + * CertificateVerify state so we should not arrive here. + */ + if (s->session->peer == NULL) { + ret = 1; + goto end; + } + n = s->method->ssl_get_message(s, SSL3_ST_SR_CERT_VRFY_A, SSL3_ST_SR_CERT_VRFY_B, - -1, SSL3_RT_MAX_PLAIN_LENGTH, &ok); + SSL3_MT_CERTIFICATE_VERIFY, + SSL3_RT_MAX_PLAIN_LENGTH, &ok); if (!ok) return ((int)n); - if (s->session->peer != NULL) { - peer = s->session->peer; - pkey = X509_get_pubkey(peer); - type = X509_certificate_type(peer, pkey); - } else { - peer = NULL; - pkey = NULL; - } - - if (s->s3->tmp.message_type != SSL3_MT_CERTIFICATE_VERIFY) { - s->s3->tmp.reuse_message = 1; - if (peer != NULL) { - al = SSL_AD_UNEXPECTED_MESSAGE; - SSLerr(SSL_F_SSL3_GET_CERT_VERIFY, SSL_R_MISSING_VERIFY_MESSAGE); - goto f_err; - } - ret = 1; - goto end; - } - - if (peer == NULL) { - SSLerr(SSL_F_SSL3_GET_CERT_VERIFY, SSL_R_NO_CLIENT_CERT_RECEIVED); - al = SSL_AD_UNEXPECTED_MESSAGE; - goto f_err; - } + peer = s->session->peer; + pkey = X509_get_pubkey(peer); + type = X509_certificate_type(peer, pkey); if (!(type & EVP_PKT_SIGN)) { SSLerr(SSL_F_SSL3_GET_CERT_VERIFY, @@ -2923,12 +2984,6 @@ int ssl3_get_cert_verify(SSL *s) goto f_err; } - if (s->s3->change_cipher_spec) { - SSLerr(SSL_F_SSL3_GET_CERT_VERIFY, SSL_R_CCS_RECEIVED_EARLY); - al = SSL_AD_UNEXPECTED_MESSAGE; - goto f_err; - } - /* we now have a signature that we need to verify */ p = (unsigned char *)s->init_msg; /* Check for broken implementations of GOST ciphersuites */ @@ -3069,6 +3124,7 @@ int ssl3_get_cert_verify(SSL *s) if (0) { f_err: ssl3_send_alert(s, SSL3_AL_FATAL, al); + s->state = SSL_ST_ERR; } end: if (s->s3->handshake_buffer) { @@ -3227,8 +3283,10 @@ int ssl3_get_client_certificate(SSL *s) if (0) { f_err: ssl3_send_alert(s, SSL3_AL_FATAL, al); - } err: + s->state = SSL_ST_ERR; + } + if (x != NULL) X509_free(x); if (sk != NULL) @@ -3248,12 +3306,14 @@ int ssl3_send_server_certificate(SSL *s) (s->s3->tmp.new_cipher->algorithm_mkey & SSL_kKRB5)) { SSLerr(SSL_F_SSL3_SEND_SERVER_CERTIFICATE, ERR_R_INTERNAL_ERROR); + s->state = SSL_ST_ERR; return (0); } } if (!ssl3_output_cert_chain(s, cpk)) { SSLerr(SSL_F_SSL3_SEND_SERVER_CERTIFICATE, ERR_R_INTERNAL_ERROR); + s->state = SSL_ST_ERR; return (0); } s->state = SSL3_ST_SW_CERT_B; @@ -3287,11 +3347,15 @@ int ssl3_send_newsession_ticket(SSL *s) * Some length values are 16 bits, so forget it if session is too * long */ - if (slen_full == 0 || slen_full > 0xFF00) + if (slen_full == 0 || slen_full > 0xFF00) { + s->state = SSL_ST_ERR; return -1; + } senc = OPENSSL_malloc(slen_full); - if (!senc) + if (!senc) { + s->state = SSL_ST_ERR; return -1; + } EVP_CIPHER_CTX_init(&ctx); HMAC_CTX_init(&hctx); @@ -3391,10 +3455,10 @@ int ssl3_send_newsession_ticket(SSL *s) /* Now write out lengths: p points to end of data written */ /* Total length */ len = p - ssl_handshake_start(s); - ssl_set_handshake_header(s, SSL3_MT_NEWSESSION_TICKET, len); /* Skip ticket lifetime hint */ p = ssl_handshake_start(s) + 4; s2n(len - 6, p); + ssl_set_handshake_header(s, SSL3_MT_NEWSESSION_TICKET, len); s->state = SSL3_ST_SW_SESSION_TICKET_B; OPENSSL_free(senc); } @@ -3406,6 +3470,7 @@ int ssl3_send_newsession_ticket(SSL *s) OPENSSL_free(senc); EVP_CIPHER_CTX_cleanup(&ctx); HMAC_CTX_cleanup(&hctx); + s->state = SSL_ST_ERR; return -1; } @@ -3419,8 +3484,10 @@ int ssl3_send_cert_status(SSL *s) * 1 (ocsp response type) + 3 (ocsp response length) * + (ocsp response) */ - if (!BUF_MEM_grow(s->init_buf, 8 + s->tlsext_ocsp_resplen)) + if (!BUF_MEM_grow(s->init_buf, 8 + s->tlsext_ocsp_resplen)) { + s->state = SSL_ST_ERR; return -1; + } p = (unsigned char *)s->init_buf->data; @@ -3463,6 +3530,7 @@ int ssl3_get_next_proto(SSL *s) if (!s->s3->next_proto_neg_seen) { SSLerr(SSL_F_SSL3_GET_NEXT_PROTO, SSL_R_GOT_NEXT_PROTO_WITHOUT_EXTENSION); + s->state = SSL_ST_ERR; return -1; } @@ -3482,11 +3550,14 @@ int ssl3_get_next_proto(SSL *s) */ if (!s->s3->change_cipher_spec) { SSLerr(SSL_F_SSL3_GET_NEXT_PROTO, SSL_R_GOT_NEXT_PROTO_BEFORE_A_CCS); + s->state = SSL_ST_ERR; return -1; } - if (n < 2) + if (n < 2) { + s->state = SSL_ST_ERR; return 0; /* The body must be > 1 bytes long */ + } p = (unsigned char *)s->init_msg; @@ -3498,15 +3569,20 @@ int ssl3_get_next_proto(SSL *s) * uint8 padding[padding_len]; */ proto_len = p[0]; - if (proto_len + 2 > s->init_num) + if (proto_len + 2 > s->init_num) { + s->state = SSL_ST_ERR; return 0; + } padding_len = p[proto_len + 1]; - if (proto_len + padding_len + 2 != s->init_num) + if (proto_len + padding_len + 2 != s->init_num) { + s->state = SSL_ST_ERR; return 0; + } s->next_proto_negotiated = OPENSSL_malloc(proto_len); if (!s->next_proto_negotiated) { SSLerr(SSL_F_SSL3_GET_NEXT_PROTO, ERR_R_MALLOC_FAILURE); + s->state = SSL_ST_ERR; return 0; } memcpy(s->next_proto_negotiated, p + 1, proto_len); diff --git a/deps/openssl/openssl/ssl/ssl.h b/deps/openssl/openssl/ssl/ssl.h index a6d845dc994fc1..6fe1a2474d43a8 100644 --- a/deps/openssl/openssl/ssl/ssl.h +++ b/deps/openssl/openssl/ssl/ssl.h @@ -1727,6 +1727,7 @@ extern "C" { # define SSL_ST_BEFORE 0x4000 # define SSL_ST_OK 0x03 # define SSL_ST_RENEGOTIATE (0x04|SSL_ST_INIT) +# define SSL_ST_ERR 0x05 # define SSL_CB_LOOP 0x01 # define SSL_CB_EXIT 0x02 @@ -2640,6 +2641,7 @@ void ERR_load_SSL_strings(void); # define SSL_F_GET_CLIENT_MASTER_KEY 107 # define SSL_F_GET_SERVER_FINISHED 108 # define SSL_F_GET_SERVER_HELLO 109 +# define SSL_F_GET_SERVER_STATIC_DH_KEY 340 # define SSL_F_GET_SERVER_VERIFY 110 # define SSL_F_I2D_SSL_SESSION 111 # define SSL_F_READ_N 112 @@ -2670,6 +2672,7 @@ void ERR_load_SSL_strings(void); # define SSL_F_SSL3_CHANGE_CIPHER_STATE 129 # define SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM 130 # define SSL_F_SSL3_CHECK_CLIENT_HELLO 304 +# define SSL_F_SSL3_CHECK_FINISHED 339 # define SSL_F_SSL3_CLIENT_HELLO 131 # define SSL_F_SSL3_CONNECT 132 # define SSL_F_SSL3_CTRL 213 @@ -2784,6 +2787,7 @@ void ERR_load_SSL_strings(void); # define SSL_F_SSL_RSA_PUBLIC_ENCRYPT 188 # define SSL_F_SSL_SCAN_CLIENTHELLO_TLSEXT 320 # define SSL_F_SSL_SCAN_SERVERHELLO_TLSEXT 321 +# define SSL_F_SSL_SESSION_DUP 348 # define SSL_F_SSL_SESSION_NEW 189 # define SSL_F_SSL_SESSION_PRINT_FP 190 # define SSL_F_SSL_SESSION_SET1_ID_CONTEXT 312 @@ -2904,6 +2908,7 @@ void ERR_load_SSL_strings(void); # define SSL_R_DATA_LENGTH_TOO_LONG 146 # define SSL_R_DECRYPTION_FAILED 147 # define SSL_R_DECRYPTION_FAILED_OR_BAD_RECORD_MAC 281 +# define SSL_R_DH_KEY_TOO_SMALL 372 # define SSL_R_DH_PUBLIC_VALUE_LENGTH_IS_WRONG 148 # define SSL_R_DIGEST_CHECK_FAILED 149 # define SSL_R_DTLS_MESSAGE_TOO_BIG 334 diff --git a/deps/openssl/openssl/ssl/ssl_err.c b/deps/openssl/openssl/ssl/ssl_err.c index ab3aa233748d84..1a6030e6238dee 100644 --- a/deps/openssl/openssl/ssl/ssl_err.c +++ b/deps/openssl/openssl/ssl/ssl_err.c @@ -1,6 +1,6 @@ /* ssl/ssl_err.c */ /* ==================================================================== - * Copyright (c) 1999-2014 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2015 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -119,6 +119,7 @@ static ERR_STRING_DATA SSL_str_functs[] = { {ERR_FUNC(SSL_F_GET_CLIENT_MASTER_KEY), "GET_CLIENT_MASTER_KEY"}, {ERR_FUNC(SSL_F_GET_SERVER_FINISHED), "GET_SERVER_FINISHED"}, {ERR_FUNC(SSL_F_GET_SERVER_HELLO), "GET_SERVER_HELLO"}, + {ERR_FUNC(SSL_F_GET_SERVER_STATIC_DH_KEY), "GET_SERVER_STATIC_DH_KEY"}, {ERR_FUNC(SSL_F_GET_SERVER_VERIFY), "GET_SERVER_VERIFY"}, {ERR_FUNC(SSL_F_I2D_SSL_SESSION), "i2d_SSL_SESSION"}, {ERR_FUNC(SSL_F_READ_N), "READ_N"}, @@ -151,6 +152,7 @@ static ERR_STRING_DATA SSL_str_functs[] = { {ERR_FUNC(SSL_F_SSL3_CHECK_CERT_AND_ALGORITHM), "ssl3_check_cert_and_algorithm"}, {ERR_FUNC(SSL_F_SSL3_CHECK_CLIENT_HELLO), "ssl3_check_client_hello"}, + {ERR_FUNC(SSL_F_SSL3_CHECK_FINISHED), "SSL3_CHECK_FINISHED"}, {ERR_FUNC(SSL_F_SSL3_CLIENT_HELLO), "ssl3_client_hello"}, {ERR_FUNC(SSL_F_SSL3_CONNECT), "ssl3_connect"}, {ERR_FUNC(SSL_F_SSL3_CTRL), "ssl3_ctrl"}, @@ -310,6 +312,7 @@ static ERR_STRING_DATA SSL_str_functs[] = { "SSL_SCAN_CLIENTHELLO_TLSEXT"}, {ERR_FUNC(SSL_F_SSL_SCAN_SERVERHELLO_TLSEXT), "SSL_SCAN_SERVERHELLO_TLSEXT"}, + {ERR_FUNC(SSL_F_SSL_SESSION_DUP), "ssl_session_dup"}, {ERR_FUNC(SSL_F_SSL_SESSION_NEW), "SSL_SESSION_new"}, {ERR_FUNC(SSL_F_SSL_SESSION_PRINT_FP), "SSL_SESSION_print_fp"}, {ERR_FUNC(SSL_F_SSL_SESSION_SET1_ID_CONTEXT), @@ -458,6 +461,7 @@ static ERR_STRING_DATA SSL_str_reasons[] = { {ERR_REASON(SSL_R_DECRYPTION_FAILED), "decryption failed"}, {ERR_REASON(SSL_R_DECRYPTION_FAILED_OR_BAD_RECORD_MAC), "decryption failed or bad record mac"}, + {ERR_REASON(SSL_R_DH_KEY_TOO_SMALL), "dh key too small"}, {ERR_REASON(SSL_R_DH_PUBLIC_VALUE_LENGTH_IS_WRONG), "dh public value length is wrong"}, {ERR_REASON(SSL_R_DIGEST_CHECK_FAILED), "digest check failed"}, diff --git a/deps/openssl/openssl/ssl/ssl_lib.c b/deps/openssl/openssl/ssl/ssl_lib.c index e9ad2bc81beb67..c0931e787728ed 100644 --- a/deps/openssl/openssl/ssl/ssl_lib.c +++ b/deps/openssl/openssl/ssl/ssl_lib.c @@ -1510,9 +1510,13 @@ STACK_OF(SSL_CIPHER) *ssl_bytes_to_cipher_list(SSL *s, unsigned char *p, SSL_R_ERROR_IN_RECEIVED_CIPHER_LIST); return (NULL); } - if ((skp == NULL) || (*skp == NULL)) + if ((skp == NULL) || (*skp == NULL)) { sk = sk_SSL_CIPHER_new_null(); /* change perhaps later */ - else { + if(sk == NULL) { + SSLerr(SSL_F_SSL_BYTES_TO_CIPHER_LIST, ERR_R_MALLOC_FAILURE); + return NULL; + } + } else { sk = *skp; sk_SSL_CIPHER_zero(sk); } @@ -2326,7 +2330,7 @@ void ssl_set_cert_masks(CERT *c, const SSL_CIPHER *cipher) if (dh_dsa_export) emask_k |= SSL_kDHd; - if (emask_k & (SSL_kDHr | SSL_kDHd)) + if (mask_k & (SSL_kDHr | SSL_kDHd)) mask_a |= SSL_aDH; if (rsa_enc || rsa_sign) { @@ -2832,6 +2836,12 @@ const char *SSL_get_version(const SSL *s) return ("SSLv3"); else if (s->version == SSL2_VERSION) return ("SSLv2"); + else if (s->version == DTLS1_BAD_VER) + return ("DTLSv0.9"); + else if (s->version == DTLS1_VERSION) + return ("DTLSv1"); + else if (s->version == DTLS1_2_VERSION) + return ("DTLSv1.2"); else return ("unknown"); } diff --git a/deps/openssl/openssl/ssl/ssl_locl.h b/deps/openssl/openssl/ssl/ssl_locl.h index 79b85b9ed947de..6c2c551e5e341e 100644 --- a/deps/openssl/openssl/ssl/ssl_locl.h +++ b/deps/openssl/openssl/ssl/ssl_locl.h @@ -1058,6 +1058,7 @@ int ssl_set_peer_cert_type(SESS_CERT *c, int type); int ssl_get_new_session(SSL *s, int session); int ssl_get_prev_session(SSL *s, unsigned char *session, int len, const unsigned char *limit); +SSL_SESSION *ssl_session_dup(SSL_SESSION *src, int ticket); int ssl_cipher_id_cmp(const SSL_CIPHER *a, const SSL_CIPHER *b); DECLARE_OBJ_BSEARCH_GLOBAL_CMP_FN(SSL_CIPHER, SSL_CIPHER, ssl_cipher_id); int ssl_cipher_ptr_id_cmp(const SSL_CIPHER *const *ap, @@ -1230,7 +1231,6 @@ int dtls1_write_app_data_bytes(SSL *s, int type, const void *buf, int len); int dtls1_write_bytes(SSL *s, int type, const void *buf, int len); int dtls1_send_change_cipher_spec(SSL *s, int a, int b); -int dtls1_send_finished(SSL *s, int a, int b, const char *sender, int slen); int dtls1_read_failed(SSL *s, int code); int dtls1_buffer_message(SSL *s, int ccs); int dtls1_retransmit_message(SSL *s, unsigned short seq, diff --git a/deps/openssl/openssl/ssl/ssl_sess.c b/deps/openssl/openssl/ssl/ssl_sess.c index 1e1002fc8540db..07e7379abfd104 100644 --- a/deps/openssl/openssl/ssl/ssl_sess.c +++ b/deps/openssl/openssl/ssl/ssl_sess.c @@ -227,6 +227,130 @@ SSL_SESSION *SSL_SESSION_new(void) return (ss); } +/* + * Create a new SSL_SESSION and duplicate the contents of |src| into it. If + * ticket == 0 then no ticket information is duplicated, otherwise it is. + */ +SSL_SESSION *ssl_session_dup(SSL_SESSION *src, int ticket) +{ + SSL_SESSION *dest; + + dest = OPENSSL_malloc(sizeof(*src)); + if (dest == NULL) { + goto err; + } + memcpy(dest, src, sizeof(*dest)); + + /* + * Set the various pointers to NULL so that we can call SSL_SESSION_free in + * the case of an error whilst halfway through constructing dest + */ +#ifndef OPENSSL_NO_PSK + dest->psk_identity_hint = NULL; + dest->psk_identity = NULL; +#endif + dest->ciphers = NULL; +#ifndef OPENSSL_NO_TLSEXT + dest->tlsext_hostname = NULL; +# ifndef OPENSSL_NO_EC + dest->tlsext_ecpointformatlist = NULL; + dest->tlsext_ellipticcurvelist = NULL; +# endif +#endif + dest->tlsext_tick = NULL; +#ifndef OPENSSL_NO_SRP + dest->srp_username = NULL; +#endif + memset(&dest->ex_data, 0, sizeof(dest->ex_data)); + + /* We deliberately don't copy the prev and next pointers */ + dest->prev = NULL; + dest->next = NULL; + + dest->references = 1; + + if (src->sess_cert != NULL) + CRYPTO_add(&src->sess_cert->references, 1, CRYPTO_LOCK_SSL_SESS_CERT); + + if (src->peer != NULL) + CRYPTO_add(&src->peer->references, 1, CRYPTO_LOCK_X509); + +#ifndef OPENSSL_NO_PSK + if (src->psk_identity_hint) { + dest->psk_identity_hint = BUF_strdup(src->psk_identity_hint); + if (dest->psk_identity_hint == NULL) { + goto err; + } + } + if (src->psk_identity) { + dest->psk_identity = BUF_strdup(src->psk_identity); + if (dest->psk_identity == NULL) { + goto err; + } + } +#endif + + if(src->ciphers != NULL) { + dest->ciphers = sk_SSL_CIPHER_dup(src->ciphers); + if (dest->ciphers == NULL) + goto err; + } + + if (!CRYPTO_dup_ex_data(CRYPTO_EX_INDEX_SSL_SESSION, + &dest->ex_data, &src->ex_data)) { + goto err; + } + +#ifndef OPENSSL_NO_TLSEXT + if (src->tlsext_hostname) { + dest->tlsext_hostname = BUF_strdup(src->tlsext_hostname); + if (dest->tlsext_hostname == NULL) { + goto err; + } + } +# ifndef OPENSSL_NO_EC + if (src->tlsext_ecpointformatlist) { + dest->tlsext_ecpointformatlist = + BUF_memdup(src->tlsext_ecpointformatlist, + src->tlsext_ecpointformatlist_length); + if (dest->tlsext_ecpointformatlist == NULL) + goto err; + } + if (src->tlsext_ellipticcurvelist) { + dest->tlsext_ellipticcurvelist = + BUF_memdup(src->tlsext_ellipticcurvelist, + src->tlsext_ellipticcurvelist_length); + if (dest->tlsext_ellipticcurvelist == NULL) + goto err; + } +# endif +#endif + + if (ticket != 0) { + dest->tlsext_tick = BUF_memdup(src->tlsext_tick, src->tlsext_ticklen); + if(dest->tlsext_tick == NULL) + goto err; + } else { + dest->tlsext_tick_lifetime_hint = 0; + dest->tlsext_ticklen = 0; + } + +#ifndef OPENSSL_NO_SRP + if (src->srp_username) { + dest->srp_username = BUF_strdup(src->srp_username); + if (dest->srp_username == NULL) { + goto err; + } + } +#endif + + return dest; +err: + SSLerr(SSL_F_SSL_SESSION_DUP, ERR_R_MALLOC_FAILURE); + SSL_SESSION_free(dest); + return NULL; +} + const unsigned char *SSL_SESSION_get_id(const SSL_SESSION *s, unsigned int *len) { @@ -449,9 +573,14 @@ int ssl_get_prev_session(SSL *s, unsigned char *session_id, int len, int r; #endif - if (len > SSL_MAX_SSL_SESSION_ID_LENGTH) + if (len < 0 || len > SSL_MAX_SSL_SESSION_ID_LENGTH) goto err; + if (session_id + len > limit) { + fatal = 1; + goto err; + } + if (len == 0) try_session_cache = 0; diff --git a/deps/openssl/openssl/ssl/ssl_stat.c b/deps/openssl/openssl/ssl/ssl_stat.c index d725d783423968..1b9069f978d2b9 100644 --- a/deps/openssl/openssl/ssl/ssl_stat.c +++ b/deps/openssl/openssl/ssl/ssl_stat.c @@ -117,6 +117,9 @@ const char *SSL_state_string_long(const SSL *s) case SSL_ST_OK | SSL_ST_ACCEPT: str = "ok/accept SSL initialization"; break; + case SSL_ST_ERR: + str = "error"; + break; #ifndef OPENSSL_NO_SSL2 case SSL2_ST_CLIENT_START_ENCRYPTION: str = "SSLv2 client start encryption"; @@ -496,6 +499,9 @@ const char *SSL_state_string(const SSL *s) case SSL_ST_OK: str = "SSLOK "; break; + case SSL_ST_ERR: + str = "SSLERR"; + break; #ifndef OPENSSL_NO_SSL2 case SSL2_ST_CLIENT_START_ENCRYPTION: str = "2CSENC"; diff --git a/deps/openssl/openssl/ssl/ssltest.c b/deps/openssl/openssl/ssl/ssltest.c index 9f5d5862479801..6737adf239c0b2 100644 --- a/deps/openssl/openssl/ssl/ssltest.c +++ b/deps/openssl/openssl/ssl/ssltest.c @@ -692,7 +692,9 @@ static void sv_usage(void) " -bytes - number of bytes to swap between client/server\n"); #ifndef OPENSSL_NO_DH fprintf(stderr, - " -dhe1024 - use 1024 bit key (safe prime) for DHE\n"); + " -dhe512 - use 512 bit key for DHE (to test failure)\n"); + fprintf(stderr, + " -dhe1024 - use 1024 bit key (safe prime) for DHE (default, no-op)\n"); fprintf(stderr, " -dhe1024dsa - use 1024 bit key (with 160-bit subprime) for DHE\n"); fprintf(stderr, " -no_dhe - disable DHE\n"); @@ -715,6 +717,10 @@ static void sv_usage(void) #endif #ifndef OPENSSL_NO_TLS1 fprintf(stderr, " -tls1 - use TLSv1\n"); +#endif +#ifndef OPENSSL_NO_DTLS + fprintf(stderr, " -dtls1 - use DTLSv1\n"); + fprintf(stderr, " -dtls12 - use DTLSv1.2\n"); #endif fprintf(stderr, " -CApath arg - PEM format directory of CA's\n"); fprintf(stderr, " -CAfile arg - PEM format file of CA's\n"); @@ -877,7 +883,7 @@ int main(int argc, char *argv[]) int badop = 0; int bio_pair = 0; int force = 0; - int tls1 = 0, ssl2 = 0, ssl3 = 0, ret = 1; + int dtls1 = 0, dtls12 = 0, tls1 = 0, ssl2 = 0, ssl3 = 0, ret = 1; int client_auth = 0; int server_auth = 0, i; struct app_verify_arg app_verify_arg = @@ -897,7 +903,7 @@ int main(int argc, char *argv[]) long bytes = 256L; #ifndef OPENSSL_NO_DH DH *dh; - int dhe1024 = 0, dhe1024dsa = 0; + int dhe512 = 0, dhe1024dsa = 0; #endif #ifndef OPENSSL_NO_ECDH EC_KEY *ecdh = NULL; @@ -977,19 +983,19 @@ int main(int argc, char *argv[]) debug = 1; else if (strcmp(*argv, "-reuse") == 0) reuse = 1; - else if (strcmp(*argv, "-dhe1024") == 0) { + else if (strcmp(*argv, "-dhe512") == 0) { #ifndef OPENSSL_NO_DH - dhe1024 = 1; + dhe512 = 1; #else fprintf(stderr, - "ignoring -dhe1024, since I'm compiled without DH\n"); + "ignoring -dhe512, since I'm compiled without DH\n"); #endif } else if (strcmp(*argv, "-dhe1024dsa") == 0) { #ifndef OPENSSL_NO_DH dhe1024dsa = 1; #else fprintf(stderr, - "ignoring -dhe1024, since I'm compiled without DH\n"); + "ignoring -dhe1024dsa, since I'm compiled without DH\n"); #endif } else if (strcmp(*argv, "-no_dhe") == 0) no_dhe = 1; @@ -1037,6 +1043,16 @@ int main(int argc, char *argv[]) no_protocol = 1; #endif ssl3 = 1; + } else if (strcmp(*argv, "-dtls1") == 0) { +#ifdef OPENSSL_NO_DTLS + no_protocol = 1; +#endif + dtls1 = 1; + } else if (strcmp(*argv, "-dtls12") == 0) { +#ifdef OPENSSL_NO_DTLS + no_protocol = 1; +#endif + dtls12 = 1; } else if (strncmp(*argv, "-num", 4) == 0) { if (--argc < 1) goto bad; @@ -1172,8 +1188,8 @@ int main(int argc, char *argv[]) goto end; } - if (ssl2 + ssl3 + tls1 > 1) { - fprintf(stderr, "At most one of -ssl2, -ssl3, or -tls1 should " + if (ssl2 + ssl3 + tls1 + dtls1 + dtls12 > 1) { + fprintf(stderr, "At most one of -ssl2, -ssl3, -tls1, -dtls1 or -dtls12 should " "be requested.\n"); EXIT(1); } @@ -1190,10 +1206,10 @@ int main(int argc, char *argv[]) goto end; } - if (!ssl2 && !ssl3 && !tls1 && number > 1 && !reuse && !force) { + if (!ssl2 && !ssl3 && !tls1 && !dtls1 && !dtls12 && number > 1 && !reuse && !force) { fprintf(stderr, "This case cannot work. Use -f to perform " "the test anyway (and\n-d to see what happens), " - "or add one of -ssl2, -ssl3, -tls1, -reuse\n" + "or add one of ssl2, -ssl3, -tls1, -dtls1, -dtls12, -reuse\n" "to avoid protocol mismatch.\n"); EXIT(1); } @@ -1271,6 +1287,13 @@ int main(int argc, char *argv[]) meth = SSLv3_method(); else #endif +#ifndef OPENSSL_NO_DTLS + if (dtls1) + meth = DTLSv1_method(); + else if (dtls12) + meth = DTLSv1_2_method(); + else +#endif #ifndef OPENSSL_NO_TLS1 if (tls1) meth = TLSv1_method(); @@ -1297,10 +1320,10 @@ int main(int argc, char *argv[]) */ SSL_CTX_set_options(s_ctx, SSL_OP_SINGLE_DH_USE); dh = get_dh1024dsa(); - } else if (dhe1024) - dh = get_dh1024(); - else + } else if (dhe512) dh = get_dh512(); + else + dh = get_dh1024(); SSL_CTX_set_tmp_dh(s_ctx, dh); DH_free(dh); } @@ -1318,12 +1341,9 @@ int main(int argc, char *argv[]) BIO_printf(bio_err, "unknown curve name (%s)\n", named_curve); goto end; } - } else -# ifdef OPENSSL_NO_EC2M + } else { nid = NID_X9_62_prime256v1; -# else - nid = NID_sect163r2; -# endif + } ecdh = EC_KEY_new_by_curve_name(nid); if (ecdh == NULL) { diff --git a/deps/openssl/openssl/ssl/t1_enc.c b/deps/openssl/openssl/ssl/t1_enc.c index 577885fe081e80..e2a8f86919789e 100644 --- a/deps/openssl/openssl/ssl/t1_enc.c +++ b/deps/openssl/openssl/ssl/t1_enc.c @@ -260,7 +260,7 @@ static int tls1_PRF(long digest_mask, if ((m << TLS1_PRF_DGST_SHIFT) & digest_mask) count++; } - if(!count) { + if (!count) { /* Should never happen */ SSLerr(SSL_F_TLS1_PRF, ERR_R_INTERNAL_ERROR); goto err; @@ -404,9 +404,9 @@ int tls1_change_cipher_state(SSL *s, int which) } #endif /* - * this is done by dtls1_reset_seq_numbers for DTLS1_VERSION + * this is done by dtls1_reset_seq_numbers for DTLS */ - if (s->version != DTLS1_VERSION) + if (!SSL_IS_DTLS(s)) memset(&(s->s3->read_sequence[0]), 0, 8); mac_secret = &(s->s3->read_mac_secret[0]); mac_secret_size = &(s->s3->read_mac_secret_size); @@ -442,9 +442,9 @@ int tls1_change_cipher_state(SSL *s, int which) } #endif /* - * this is done by dtls1_reset_seq_numbers for DTLS1_VERSION + * this is done by dtls1_reset_seq_numbers for DTLS */ - if (s->version != DTLS1_VERSION) + if (!SSL_IS_DTLS(s)) memset(&(s->s3->write_sequence[0]), 0, 8); mac_secret = &(s->s3->write_mac_secret[0]); mac_secret_size = &(s->s3->write_mac_secret_size); @@ -803,7 +803,7 @@ int tls1_enc(SSL *s, int send) bs = EVP_CIPHER_block_size(ds->cipher); if (EVP_CIPHER_flags(ds->cipher) & EVP_CIPH_FLAG_AEAD_CIPHER) { - unsigned char buf[13], *seq; + unsigned char buf[EVP_AEAD_TLS1_AAD_LEN], *seq; seq = send ? s->s3->write_sequence : s->s3->read_sequence; @@ -827,7 +827,10 @@ int tls1_enc(SSL *s, int send) buf[10] = (unsigned char)(s->version); buf[11] = rec->length >> 8; buf[12] = rec->length & 0xff; - pad = EVP_CIPHER_CTX_ctrl(ds, EVP_CTRL_AEAD_TLS1_AAD, 13, buf); + pad = EVP_CIPHER_CTX_ctrl(ds, EVP_CTRL_AEAD_TLS1_AAD, + EVP_AEAD_TLS1_AAD_LEN, buf); + if (pad <= 0) + return -1; if (send) { l += pad; rec->length += pad; diff --git a/deps/openssl/openssl/ssl/t1_lib.c b/deps/openssl/openssl/ssl/t1_lib.c index d85d26e596d935..d811d3fdb88f11 100644 --- a/deps/openssl/openssl/ssl/t1_lib.c +++ b/deps/openssl/openssl/ssl/t1_lib.c @@ -113,9 +113,11 @@ #include #include #include +#ifndef OPENSSL_NO_EC #ifdef OPENSSL_NO_EC2M # include #endif +#endif #include #include #include "ssl_locl.h" @@ -260,47 +262,68 @@ static const unsigned char ecformats_default[] = { TLSEXT_ECPOINTFORMAT_ansiX962_compressed_char2 }; -static const unsigned char eccurves_default[] = { -# ifndef OPENSSL_NO_EC2M - 0, 14, /* sect571r1 (14) */ - 0, 13, /* sect571k1 (13) */ -# endif +/* The client's default curves / the server's 'auto' curves. */ +static const unsigned char eccurves_auto[] = { + /* Prefer P-256 which has the fastest and most secure implementations. */ + 0, 23, /* secp256r1 (23) */ + /* Other >= 256-bit prime curves. */ 0, 25, /* secp521r1 (25) */ 0, 28, /* brainpool512r1 (28) */ + 0, 27, /* brainpoolP384r1 (27) */ + 0, 24, /* secp384r1 (24) */ + 0, 26, /* brainpoolP256r1 (26) */ + 0, 22, /* secp256k1 (22) */ # ifndef OPENSSL_NO_EC2M + /* >= 256-bit binary curves. */ + 0, 14, /* sect571r1 (14) */ + 0, 13, /* sect571k1 (13) */ 0, 11, /* sect409k1 (11) */ 0, 12, /* sect409r1 (12) */ + 0, 9, /* sect283k1 (9) */ + 0, 10, /* sect283r1 (10) */ # endif +}; + +static const unsigned char eccurves_all[] = { + /* Prefer P-256 which has the fastest and most secure implementations. */ + 0, 23, /* secp256r1 (23) */ + /* Other >= 256-bit prime curves. */ + 0, 25, /* secp521r1 (25) */ + 0, 28, /* brainpool512r1 (28) */ 0, 27, /* brainpoolP384r1 (27) */ 0, 24, /* secp384r1 (24) */ + 0, 26, /* brainpoolP256r1 (26) */ + 0, 22, /* secp256k1 (22) */ # ifndef OPENSSL_NO_EC2M + /* >= 256-bit binary curves. */ + 0, 14, /* sect571r1 (14) */ + 0, 13, /* sect571k1 (13) */ + 0, 11, /* sect409k1 (11) */ + 0, 12, /* sect409r1 (12) */ 0, 9, /* sect283k1 (9) */ 0, 10, /* sect283r1 (10) */ # endif - 0, 26, /* brainpoolP256r1 (26) */ - 0, 22, /* secp256k1 (22) */ - 0, 23, /* secp256r1 (23) */ + /* + * Remaining curves disabled by default but still permitted if set + * via an explicit callback or parameters. + */ + 0, 20, /* secp224k1 (20) */ + 0, 21, /* secp224r1 (21) */ + 0, 18, /* secp192k1 (18) */ + 0, 19, /* secp192r1 (19) */ + 0, 15, /* secp160k1 (15) */ + 0, 16, /* secp160r1 (16) */ + 0, 17, /* secp160r2 (17) */ # ifndef OPENSSL_NO_EC2M 0, 8, /* sect239k1 (8) */ 0, 6, /* sect233k1 (6) */ 0, 7, /* sect233r1 (7) */ -# endif - 0, 20, /* secp224k1 (20) */ - 0, 21, /* secp224r1 (21) */ -# ifndef OPENSSL_NO_EC2M 0, 4, /* sect193r1 (4) */ 0, 5, /* sect193r2 (5) */ -# endif - 0, 18, /* secp192k1 (18) */ - 0, 19, /* secp192r1 (19) */ -# ifndef OPENSSL_NO_EC2M 0, 1, /* sect163k1 (1) */ 0, 2, /* sect163r1 (2) */ 0, 3, /* sect163r2 (3) */ # endif - 0, 15, /* secp160k1 (15) */ - 0, 16, /* secp160r1 (16) */ - 0, 17, /* secp160r2 (17) */ }; static const unsigned char suiteb_curves[] = { @@ -474,8 +497,13 @@ static int tls1_get_curvelist(SSL *s, int sess, } else # endif { - *pcurves = eccurves_default; - pcurveslen = sizeof(eccurves_default); + if (!s->server || (s->cert && s->cert->ecdh_tmp_auto)) { + *pcurves = eccurves_auto; + pcurveslen = sizeof(eccurves_auto); + } else { + *pcurves = eccurves_all; + pcurveslen = sizeof(eccurves_all); + } } } } @@ -565,6 +593,20 @@ int tls1_shared_curve(SSL *s, int nmatch) (s, !(s->options & SSL_OP_CIPHER_SERVER_PREFERENCE), &pref, &num_pref)) return nmatch == -1 ? 0 : NID_undef; + + /* + * If the client didn't send the elliptic_curves extension all of them + * are allowed. + */ + if (num_supp == 0 && (s->options & SSL_OP_CIPHER_SERVER_PREFERENCE) != 0) { + supp = eccurves_all; + num_supp = sizeof(eccurves_all) / 2; + } else if (num_pref == 0 && + (s->options & SSL_OP_CIPHER_SERVER_PREFERENCE) == 0) { + pref = eccurves_all; + num_pref = sizeof(eccurves_all) / 2; + } + k = 0; for (i = 0; i < num_pref; i++, pref += 2) { const unsigned char *tsupp = supp; @@ -761,6 +803,16 @@ static int tls1_check_ec_key(SSL *s, for (j = 0; j <= 1; j++) { if (!tls1_get_curvelist(s, j, &pcurves, &num_curves)) return 0; + if (j == 1 && num_curves == 0) { + /* + * If we've not received any curves then skip this check. + * RFC 4492 does not require the supported elliptic curves extension + * so if it is not sent we can just choose any curve. + * It is invalid to send an empty list in the elliptic curves + * extension, so num_curves == 0 always means no extension. + */ + break; + } for (i = 0; i < num_curves; i++, pcurves += 2) { if (pcurves[0] == curve_id[0] && pcurves[1] == curve_id[1]) break; @@ -1964,19 +2016,23 @@ static int ssl_scan_clienthello_tlsext(SSL *s, unsigned char **p, s->srtp_profile = NULL; - if (data >= (d + n - 2)) - goto ri_check; + if (data >= (d + n - 2)) { + if (data != d + n) + goto err; + else + goto ri_check; + } n2s(data, len); if (data > (d + n - len)) - goto ri_check; + goto err; while (data <= (d + n - 4)) { n2s(data, type); n2s(data, size); if (data + size > (d + n)) - goto ri_check; + goto err; # if 0 fprintf(stderr, "Received extension type %d size %d\n", type, size); # endif @@ -2012,16 +2068,12 @@ static int ssl_scan_clienthello_tlsext(SSL *s, unsigned char **p, int servname_type; int dsize; - if (size < 2) { - *al = SSL_AD_DECODE_ERROR; - return 0; - } + if (size < 2) + goto err; n2s(data, dsize); size -= 2; - if (dsize > size) { - *al = SSL_AD_DECODE_ERROR; - return 0; - } + if (dsize > size) + goto err; sdata = data; while (dsize > 3) { @@ -2029,18 +2081,16 @@ static int ssl_scan_clienthello_tlsext(SSL *s, unsigned char **p, n2s(sdata, len); dsize -= 3; - if (len > dsize) { - *al = SSL_AD_DECODE_ERROR; - return 0; - } + if (len > dsize) + goto err; + if (s->servername_done == 0) switch (servname_type) { case TLSEXT_NAMETYPE_host_name: if (!s->hit) { - if (s->session->tlsext_hostname) { - *al = SSL_AD_DECODE_ERROR; - return 0; - } + if (s->session->tlsext_hostname) + goto err; + if (len > TLSEXT_MAXLEN_host_name) { *al = TLS1_AD_UNRECOGNIZED_NAME; return 0; @@ -2074,31 +2124,23 @@ static int ssl_scan_clienthello_tlsext(SSL *s, unsigned char **p, dsize -= len; } - if (dsize != 0) { - *al = SSL_AD_DECODE_ERROR; - return 0; - } + if (dsize != 0) + goto err; } # ifndef OPENSSL_NO_SRP else if (type == TLSEXT_TYPE_srp) { - if (size <= 0 || ((len = data[0])) != (size - 1)) { - *al = SSL_AD_DECODE_ERROR; - return 0; - } - if (s->srp_ctx.login != NULL) { - *al = SSL_AD_DECODE_ERROR; - return 0; - } + if (size == 0 || ((len = data[0])) != (size - 1)) + goto err; + if (s->srp_ctx.login != NULL) + goto err; if ((s->srp_ctx.login = OPENSSL_malloc(len + 1)) == NULL) return -1; memcpy(s->srp_ctx.login, &data[1], len); s->srp_ctx.login[len] = '\0'; - if (strlen(s->srp_ctx.login) != len) { - *al = SSL_AD_DECODE_ERROR; - return 0; - } + if (strlen(s->srp_ctx.login) != len) + goto err; } # endif @@ -2108,10 +2150,8 @@ static int ssl_scan_clienthello_tlsext(SSL *s, unsigned char **p, int ecpointformatlist_length = *(sdata++); if (ecpointformatlist_length != size - 1 || - ecpointformatlist_length < 1) { - *al = TLS1_AD_DECODE_ERROR; - return 0; - } + ecpointformatlist_length < 1) + goto err; if (!s->hit) { if (s->session->tlsext_ecpointformatlist) { OPENSSL_free(s->session->tlsext_ecpointformatlist); @@ -2145,15 +2185,13 @@ static int ssl_scan_clienthello_tlsext(SSL *s, unsigned char **p, if (ellipticcurvelist_length != size - 2 || ellipticcurvelist_length < 1 || /* Each NamedCurve is 2 bytes. */ - ellipticcurvelist_length & 1) { - *al = TLS1_AD_DECODE_ERROR; - return 0; - } + ellipticcurvelist_length & 1) + goto err; + if (!s->hit) { - if (s->session->tlsext_ellipticcurvelist) { - *al = TLS1_AD_DECODE_ERROR; - return 0; - } + if (s->session->tlsext_ellipticcurvelist) + goto err; + s->session->tlsext_ellipticcurvelist_length = 0; if ((s->session->tlsext_ellipticcurvelist = OPENSSL_malloc(ellipticcurvelist_length)) == NULL) { @@ -2221,26 +2259,18 @@ static int ssl_scan_clienthello_tlsext(SSL *s, unsigned char **p, renegotiate_seen = 1; } else if (type == TLSEXT_TYPE_signature_algorithms) { int dsize; - if (s->cert->peer_sigalgs || size < 2) { - *al = SSL_AD_DECODE_ERROR; - return 0; - } + if (s->cert->peer_sigalgs || size < 2) + goto err; n2s(data, dsize); size -= 2; - if (dsize != size || dsize & 1 || !dsize) { - *al = SSL_AD_DECODE_ERROR; - return 0; - } - if (!tls1_save_sigalgs(s, data, dsize)) { - *al = SSL_AD_DECODE_ERROR; - return 0; - } + if (dsize != size || dsize & 1 || !dsize) + goto err; + if (!tls1_save_sigalgs(s, data, dsize)) + goto err; } else if (type == TLSEXT_TYPE_status_request) { - if (size < 5) { - *al = SSL_AD_DECODE_ERROR; - return 0; - } + if (size < 5) + goto err; s->tlsext_status_type = *data++; size--; @@ -2250,35 +2280,26 @@ static int ssl_scan_clienthello_tlsext(SSL *s, unsigned char **p, /* Read in responder_id_list */ n2s(data, dsize); size -= 2; - if (dsize > size) { - *al = SSL_AD_DECODE_ERROR; - return 0; - } + if (dsize > size) + goto err; while (dsize > 0) { OCSP_RESPID *id; int idsize; - if (dsize < 4) { - *al = SSL_AD_DECODE_ERROR; - return 0; - } + if (dsize < 4) + goto err; n2s(data, idsize); dsize -= 2 + idsize; size -= 2 + idsize; - if (dsize < 0) { - *al = SSL_AD_DECODE_ERROR; - return 0; - } + if (dsize < 0) + goto err; sdata = data; data += idsize; id = d2i_OCSP_RESPID(NULL, &sdata, idsize); - if (!id) { - *al = SSL_AD_DECODE_ERROR; - return 0; - } + if (!id) + goto err; if (data != sdata) { OCSP_RESPID_free(id); - *al = SSL_AD_DECODE_ERROR; - return 0; + goto err; } if (!s->tlsext_ocsp_ids && !(s->tlsext_ocsp_ids = @@ -2295,16 +2316,12 @@ static int ssl_scan_clienthello_tlsext(SSL *s, unsigned char **p, } /* Read in request_extensions */ - if (size < 2) { - *al = SSL_AD_DECODE_ERROR; - return 0; - } + if (size < 2) + goto err; n2s(data, dsize); size -= 2; - if (dsize != size) { - *al = SSL_AD_DECODE_ERROR; - return 0; - } + if (dsize != size) + goto err; sdata = data; if (dsize > 0) { if (s->tlsext_ocsp_exts) { @@ -2314,10 +2331,8 @@ static int ssl_scan_clienthello_tlsext(SSL *s, unsigned char **p, s->tlsext_ocsp_exts = d2i_X509_EXTENSIONS(NULL, &sdata, dsize); - if (!s->tlsext_ocsp_exts || (data + dsize != sdata)) { - *al = SSL_AD_DECODE_ERROR; - return 0; - } + if (!s->tlsext_ocsp_exts || (data + dsize != sdata)) + goto err; } } /* @@ -2389,6 +2404,10 @@ static int ssl_scan_clienthello_tlsext(SSL *s, unsigned char **p, data += size; } + /* Spurious data on the end */ + if (data != d + n) + goto err; + *p = data; ri_check: @@ -2404,6 +2423,9 @@ static int ssl_scan_clienthello_tlsext(SSL *s, unsigned char **p, } return 1; +err: + *al = SSL_AD_DECODE_ERROR; + return 0; } /* @@ -3837,7 +3859,10 @@ int tls1_process_heartbeat(SSL *s) memcpy(bp, pl, payload); bp += payload; /* Random padding */ - RAND_pseudo_bytes(bp, padding); + if (RAND_pseudo_bytes(bp, padding) < 0) { + OPENSSL_free(buffer); + return -1; + } r = ssl3_write_bytes(s, TLS1_RT_HEARTBEAT, buffer, 3 + payload + padding); @@ -3872,7 +3897,7 @@ int tls1_process_heartbeat(SSL *s) int tls1_heartbeat(SSL *s) { unsigned char *buf, *p; - int ret; + int ret = -1; unsigned int payload = 18; /* Sequence number + random bytes */ unsigned int padding = 16; /* Use minimum padding */ @@ -3920,10 +3945,16 @@ int tls1_heartbeat(SSL *s) /* Sequence number */ s2n(s->tlsext_hb_seq, p); /* 16 random bytes */ - RAND_pseudo_bytes(p, 16); + if (RAND_pseudo_bytes(p, 16) < 0) { + SSLerr(SSL_F_TLS1_HEARTBEAT, ERR_R_INTERNAL_ERROR); + goto err; + } p += 16; /* Random padding */ - RAND_pseudo_bytes(p, padding); + if (RAND_pseudo_bytes(p, padding) < 0) { + SSLerr(SSL_F_TLS1_HEARTBEAT, ERR_R_INTERNAL_ERROR); + goto err; + } ret = ssl3_write_bytes(s, TLS1_RT_HEARTBEAT, buf, 3 + payload + padding); if (ret >= 0) { @@ -3935,6 +3966,7 @@ int tls1_heartbeat(SSL *s) s->tlsext_hb_pending = 1; } +err: OPENSSL_free(buf); return ret; diff --git a/deps/openssl/openssl/ssl/tls_srp.c b/deps/openssl/openssl/ssl/tls_srp.c index d36cfa0a5c2d20..bb719ba4cf9fd2 100644 --- a/deps/openssl/openssl/ssl/tls_srp.c +++ b/deps/openssl/openssl/ssl/tls_srp.c @@ -454,7 +454,8 @@ int SRP_Calc_A_param(SSL *s) { unsigned char rnd[SSL_MAX_MASTER_KEY_LENGTH]; - RAND_bytes(rnd, sizeof(rnd)); + if (RAND_bytes(rnd, sizeof(rnd)) <= 0) + return -1; s->srp_ctx.a = BN_bin2bn(rnd, sizeof(rnd), s->srp_ctx.a); OPENSSL_cleanse(rnd, sizeof(rnd)); diff --git a/deps/openssl/openssl/test/Makefile b/deps/openssl/openssl/test/Makefile index 338867952fd43c..a570fadf9843f3 100644 --- a/deps/openssl/openssl/test/Makefile +++ b/deps/openssl/openssl/test/Makefile @@ -351,12 +351,13 @@ test_constant_time: $(CONSTTIMETEST)$(EXE_EXT) lint: lint -DLINT $(INCLUDES) $(SRC)>fluff -depend: - @if [ -z "$(THIS)" ]; then \ - $(MAKE) -f $(TOP)/Makefile reflect THIS=$@; \ - else \ - $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(SRC); \ - fi +update: local_depend + @if [ -z "$(THIS)" ]; then $(MAKE) -f $(TOP)/Makefile reflect THIS=$@; fi + +depend: local_depend + @if [ -z "$(THIS)" ]; then $(MAKE) -f $(TOP)/Makefile reflect THIS=$@; fi +local_depend: + @[ -z "$(THIS)" ] || $(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(SRC) dclean: $(PERL) -pe 'if (/^# DO NOT DELETE THIS LINE/) {print; exit(0);}' $(MAKEFILE) >Makefile.new @@ -368,10 +369,10 @@ clean: rm -f .rnd tmp.bntest tmp.bctest *.o *.obj *.dll lib tags core .pure .nfs* *.old *.bak fluff $(EXE) *.ss *.srl log dummytest $(DLIBSSL): - (cd ..; $(MAKE) DIRS=ssl all) + (cd ..; $(MAKE) build_libssl) $(DLIBCRYPTO): - (cd ..; $(MAKE) DIRS=crypto all) + (cd ..; $(MAKE) build_libcrypto) BUILD_CMD=shlib_target=; if [ -n "$(SHARED_LIBS)" ]; then \ shlib_target="$(SHLIB_TARGET)"; \ diff --git a/deps/openssl/openssl/test/cms-test.pl b/deps/openssl/openssl/test/cms-test.pl index 57c541bbd9acca..baa3b5948a4428 100644 --- a/deps/openssl/openssl/test/cms-test.pl +++ b/deps/openssl/openssl/test/cms-test.pl @@ -113,7 +113,7 @@ { die "Error checking for EC support\n"; } - + system ("$ossl_path no-ec2m > $null_path"); if ($? == 0) { @@ -128,7 +128,7 @@ die "Error checking for EC2M support\n"; } -system ("$ossl_path no-ecdh > $null_path"); +system ("$ossl_path no-ec > $null_path"); if ($? == 0) { $no_ecdh = 1; @@ -141,7 +141,7 @@ { die "Error checking for ECDH support\n"; } - + my @smime_pkcs7_tests = ( [ @@ -452,6 +452,14 @@ "-decrypt -recip $smdir/smec1.pem -in test.cms -out smtst.txt" ], + [ +"enveloped content test streaming S/MIME format, ECDH, key identifier", + "-encrypt -keyid -in smcont.txt" + . " -stream -out test.cms" + . " -recip $smdir/smec1.pem", + "-decrypt -recip $smdir/smec1.pem -in test.cms -out smtst.txt" + ], + [ "enveloped content test streaming S/MIME format, ECDH, AES128, SHA256 KDF", "-encrypt -in smcont.txt" diff --git a/deps/openssl/openssl/test/ocsp-tests/D3_Cert_EE.pem b/deps/openssl/openssl/test/ocsp-tests/D3_Cert_EE.pem index e0a80d359bd0f1..f371ed1f17d995 100644 --- a/deps/openssl/openssl/test/ocsp-tests/D3_Cert_EE.pem +++ b/deps/openssl/openssl/test/ocsp-tests/D3_Cert_EE.pem @@ -21,11 +21,11 @@ BQUAA4ICAQA2+uCGX18kZD8gyfj44TlwV4TXJ5BrT0M9qogg2k5u057i+X2ePy3D iE2REyLkU+i5ekH5gvTl74uSJKtpSf/hMyJEByyPyIULhlXCl46z2Z60drYzO4ig apCdkm0JthVGvk6/hjdaxgBGhUvSTEP5nLNkDa+uYVHJI58wfX2oh9gqxf8VnMJ8 /A8Zi6mYCWUlFUobNd/ozyDZ6WVntrLib85sAFhds93nkoUYxgx1N9Xg/I31/jcL -6bqmpRAZcbPtvEom0RyqPLM+AOgySWiYbg1Nl8nKx25C2AuXk63NN4CVwkXpdFF3 -q5qk1izPruvJ68jNW0pG7nrMQsiY2BCesfGyEzY8vfrMjeR5MLNv5r+obeYFnC1j -uYp6JBt+thW+xPFzHYLjohKPwo/NbMOjIUM9gv/Pq3rVRPgWru4/8yYWhrmEK370 -rtlYBUSGRUdR8xed1Jvs+4qJ3s9t41mLSXvUfwyPsT7eoloUAfw3RhdwOzXoC2P6 -ftmniyu/b/HuYH1AWK+HFtFi9CHiMIqOJMhj/LnzL9udrQOpir7bVej/mlb3kSRo -2lZymKOvuMymMpJkvBvUU/QEbCxWZAkTyqL2qlcQhHv7W366DOFjxDqpthaTRD69 -T8i/2AnsBDjYFxa47DisIvR57rLmE+fILjSvd94N/IpGs3lSOS5JeA== +6bqmpRAZcbPtvEom0RyqPLM+AOgySWiYbg1Nl8nKx25C2AuXk63NN4CVwkXpdFF3 +q5qk1izPruvJ68jNW0pG7nrMQsiY2BCesfGyEzY8vfrMjeR5MLNv5r+obeYFnC1j +uYp6JBt+thW+xPFzHYLjohKPwo/NbMOjIUM9gv/Pq3rVRPgWru4/8yYWhrmEK370 +rtlYBUSGRUdR8xed1Jvs+4qJ3s9t41mLSXvUfwyPsT7eoloUAfw3RhdwOzXoC2P6 +ftmniyu/b/HuYH1AWK+HFtFi9CHiMIqOJMhj/LnzL9udrQOpir7bVej/mlb3kSRo +2lZymKOvuMymMpJkvBvUU/QEbCxWZAkTyqL2qlcQhHv7W366DOFjxDqpthaTRD69 +T8i/2AnsBDjYFxa47DisIvR57rLmE+fILjSvd94N/IpGs3lSOS5JeA== -----END CERTIFICATE----- diff --git a/deps/openssl/openssl/test/smime-certs/ca.cnf b/deps/openssl/openssl/test/smime-certs/ca.cnf index 9e1def2493deb5..5e8b108654b331 100644 --- a/deps/openssl/openssl/test/smime-certs/ca.cnf +++ b/deps/openssl/openssl/test/smime-certs/ca.cnf @@ -63,3 +63,4 @@ subjectKeyIdentifier=hash authorityKeyIdentifier=keyid:always basicConstraints = critical,CA:true keyUsage = critical, cRLSign, keyCertSign + diff --git a/deps/openssl/openssl/test/testssl b/deps/openssl/openssl/test/testssl index e3b342bfd4c060..ddebf085343b73 100644 --- a/deps/openssl/openssl/test/testssl +++ b/deps/openssl/openssl/test/testssl @@ -101,6 +101,30 @@ $ssltest -bio_pair -ssl3 -server_auth -client_auth $CA $extra || exit 1 echo test sslv2/sslv3 via BIO pair $ssltest $extra || exit 1 +echo test dtlsv1 +$ssltest -dtls1 $extra || exit 1 + +echo test dtlsv1 with server authentication +$ssltest -dtls1 -server_auth $CA $extra || exit 1 + +echo test dtlsv1 with client authentication +$ssltest -dtls1 -client_auth $CA $extra || exit 1 + +echo test dtlsv1 with both client and server authentication +$ssltest -dtls1 -server_auth -client_auth $CA $extra || exit 1 + +echo test dtlsv1.2 +$ssltest -dtls12 $extra || exit 1 + +echo test dtlsv1.2 with server authentication +$ssltest -dtls12 -server_auth $CA $extra || exit 1 + +echo test dtlsv1.2 with client authentication +$ssltest -dtls12 -client_auth $CA $extra || exit 1 + +echo test dtlsv1.2 with both client and server authentication +$ssltest -dtls12 -server_auth -client_auth $CA $extra || exit 1 + if [ $dsa_cert = NO ]; then echo 'test sslv2/sslv3 w/o (EC)DHE via BIO pair' $ssltest -bio_pair -no_dhe -no_ecdhe $extra || exit 1 @@ -121,10 +145,9 @@ $ssltest -bio_pair -server_auth -client_auth $CA $extra || exit 1 echo test sslv2/sslv3 with both client and server authentication via BIO pair and app verify $ssltest -bio_pair -server_auth -client_auth -app_verify $CA $extra || exit 1 -echo "Testing ciphersuites" -for protocol in TLSv1.2 SSLv3; do - echo "Testing ciphersuites for $protocol" - for cipher in `../util/shlib_wrap.sh ../apps/openssl ciphers "RSA+$protocol" | tr ':' ' '`; do +test_cipher() { + local cipher=$1 + local protocol=$2 echo "Testing $cipher" prot="" if [ $protocol = "SSLv3" ] ; then @@ -135,7 +158,38 @@ for protocol in TLSv1.2 SSLv3; do echo "Failed $cipher" exit 1 fi +} + +echo "Testing ciphersuites" +for protocol in TLSv1.2 SSLv3; do + echo "Testing ciphersuites for $protocol" + for cipher in `../util/shlib_wrap.sh ../apps/openssl ciphers "RSA+$protocol" | tr ':' ' '`; do + test_cipher $cipher $protocol done + if ../util/shlib_wrap.sh ../apps/openssl no-dh; then + echo "skipping RSA+DHE tests" + else + for cipher in `../util/shlib_wrap.sh ../apps/openssl ciphers "EDH+aRSA+$protocol:-EXP" | tr ':' ' '`; do + test_cipher $cipher $protocol + done + echo "testing connection with weak DH, expecting failure" + if [ $protocol = "SSLv3" ] ; then + $ssltest -cipher EDH -dhe512 -ssl3 + else + $ssltest -cipher EDH -dhe512 + fi + if [ $? -eq 0 ]; then + echo "FAIL: connection with weak DH succeeded" + exit 1 + fi + fi + if ../util/shlib_wrap.sh ../apps/openssl no-ec; then + echo "skipping RSA+ECDHE tests" + else + for cipher in `../util/shlib_wrap.sh ../apps/openssl ciphers "EECDH+aRSA+$protocol:-EXP" | tr ':' ' '`; do + test_cipher $cipher $protocol + done + fi done ############################################################################# diff --git a/deps/openssl/openssl/test/tocsp b/deps/openssl/openssl/test/tocsp index 5fc291ca6eeb72..48e81bf3025636 100644 --- a/deps/openssl/openssl/test/tocsp +++ b/deps/openssl/openssl/test/tocsp @@ -8,7 +8,7 @@ check_time="-attime 1355875200" test_ocsp () { $cmd base64 -d -in $ocspdir/$1 | \ - $cmd ocsp -respin - -partial_chain $check_time \ + $cmd ocsp -respin - -partial_chain $check_time -trusted_first \ -CAfile $ocspdir/$2 -verify_other $ocspdir/$2 -CApath /dev/null [ $? != $3 ] && exit 1 } diff --git a/deps/openssl/openssl/tools/Makefile b/deps/openssl/openssl/tools/Makefile index bb6fb71f3eb8a7..c1a2f6bccf54a4 100644 --- a/deps/openssl/openssl/tools/Makefile +++ b/deps/openssl/openssl/tools/Makefile @@ -44,6 +44,8 @@ tags: errors: +update: depend + depend: dclean: diff --git a/deps/openssl/openssl/tools/c_rehash b/deps/openssl/openssl/tools/c_rehash index 4a0f0e10eb466a..6a27c02245e917 100644 --- a/deps/openssl/openssl/tools/c_rehash +++ b/deps/openssl/openssl/tools/c_rehash @@ -15,13 +15,13 @@ my $symlink_exists=eval {symlink("",""); 1}; my $removelinks = 1; ## Parse flags. -while ( $ARGV[0] =~ '-.*' ) { +while ( $ARGV[0] =~ /^-/ ) { my $flag = shift @ARGV; last if ( $flag eq '--'); - if ( $flag =~ /-old/) { + if ( $flag eq '-old') { $x509hash = "-subject_hash_old"; $crlhash = "-hash_old"; - } elsif ( $flag =~ /-h/) { + } elsif ( $flag eq '-h') { help(); } elsif ( $flag eq '-n' ) { $removelinks = 0; diff --git a/deps/openssl/openssl/tools/c_rehash.in b/deps/openssl/openssl/tools/c_rehash.in index 887e9271254e49..b086ff9cf0d964 100644 --- a/deps/openssl/openssl/tools/c_rehash.in +++ b/deps/openssl/openssl/tools/c_rehash.in @@ -15,13 +15,13 @@ my $symlink_exists=eval {symlink("",""); 1}; my $removelinks = 1; ## Parse flags. -while ( $ARGV[0] =~ '-.*' ) { +while ( $ARGV[0] =~ /^-/ ) { my $flag = shift @ARGV; last if ( $flag eq '--'); - if ( $flag =~ /-old/) { + if ( $flag eq '-old') { $x509hash = "-subject_hash_old"; $crlhash = "-hash_old"; - } elsif ( $flag =~ /-h/) { + } elsif ( $flag eq '-h') { help(); } elsif ( $flag eq '-n' ) { $removelinks = 0; diff --git a/deps/openssl/openssl/util/copy-if-different.pl b/deps/openssl/openssl/util/copy-if-different.pl index 336b4b88f7ade1..ec99e084b56a90 100644 --- a/deps/openssl/openssl/util/copy-if-different.pl +++ b/deps/openssl/openssl/util/copy-if-different.pl @@ -75,3 +75,4 @@ close(OUT); print "Copying: $_ to $dfile\n"; } + diff --git a/deps/openssl/openssl/util/libeay.num b/deps/openssl/openssl/util/libeay.num index b977e4e4b22127..7f7487df504428 100755 --- a/deps/openssl/openssl/util/libeay.num +++ b/deps/openssl/openssl/util/libeay.num @@ -4413,3 +4413,4 @@ ECDSA_METHOD_get_app_data 4770 EXIST::FUNCTION:ECDSA X509_VERIFY_PARAM_add1_host 4771 EXIST::FUNCTION: EC_GROUP_get_mont_data 4772 EXIST::FUNCTION:EC i2d_re_X509_tbs 4773 EXIST::FUNCTION: +EVP_PKEY_asn1_set_item 4774 EXIST::FUNCTION: diff --git a/deps/openssl/openssl/util/mk1mf.pl b/deps/openssl/openssl/util/mk1mf.pl index 4ec01649bfa1e4..9b8abc0cf115fe 100755 --- a/deps/openssl/openssl/util/mk1mf.pl +++ b/deps/openssl/openssl/util/mk1mf.pl @@ -59,7 +59,7 @@ while() { my ($mf_opt, $mf_ref); while (($mf_opt, $mf_ref) = each %mf_import) { - if (/^$mf_opt\s*=\s*(.*)$/ && !defined($$mfref)) { + if (/^$mf_opt\s*=\s*(.*)$/ && !defined($$mfref)) { $$mf_ref = $1; } } @@ -340,7 +340,7 @@ $_=; for (;;) { - chop; + s/\s*$//; # was chop, didn't work in mixture of perls for Windows... ($key,$val)=/^([^=]+)=(.*)/; if ($key eq "RELATIVE_DIRECTORY") diff --git a/deps/openssl/openssl/util/mkdef.pl b/deps/openssl/openssl/util/mkdef.pl index 243108208e1780..c57c7f748eda47 100755 --- a/deps/openssl/openssl/util/mkdef.pl +++ b/deps/openssl/openssl/util/mkdef.pl @@ -122,7 +122,7 @@ # SRTP "SRTP", # SSL TRACE - "SSL_TRACE", + "SSL_TRACE", # Unit testing "UNIT_TEST"); diff --git a/deps/openssl/openssl/util/mkerr.pl b/deps/openssl/openssl/util/mkerr.pl index 7b6776ddaf6189..09ebebef9b52c4 100644 --- a/deps/openssl/openssl/util/mkerr.pl +++ b/deps/openssl/openssl/util/mkerr.pl @@ -535,14 +535,21 @@ # First, read any existing reason string definitions: my %err_reason_strings; if (open(IN,"<$cfile")) { + my $line = ""; while () { - if (/\b(${lib}_R_\w*)\b.*\"(.*)\"/) { - $err_reason_strings{$1} = $2; - } - if (/\b${lib}_F_(\w*)\b.*\"(.*)\"/) { - if (!exists $ftrans{$1} && ($1 ne $2)) { - print STDERR "WARNING: Mismatched function string $2\n"; - $ftrans{$1} = $2; + chomp; + $_ = $line . $_; + $line = ""; + if (/{ERR_(FUNC|REASON)\(/) { + if (/\b(${lib}_R_\w*)\b.*\"(.*)\"/) { + $err_reason_strings{$1} = $2; + } elsif (/\b${lib}_F_(\w*)\b.*\"(.*)\"/) { + if (!exists $ftrans{$1} && ($1 ne $2)) { + print STDERR "WARNING: Mismatched function string $2\n"; + $ftrans{$1} = $2; + } + } else { + $line = $_; } } } diff --git a/deps/openssl/openssl/util/pl/BC-32.pl b/deps/openssl/openssl/util/pl/BC-32.pl index ed28e65e661a64..f7161d7bfe2d5b 100644 --- a/deps/openssl/openssl/util/pl/BC-32.pl +++ b/deps/openssl/openssl/util/pl/BC-32.pl @@ -130,7 +130,7 @@ sub do_link_rule local($ret,$_); $file =~ s/\//$o/g if $o ne '/'; - $n=&bname($targer); + $n=&bname($target); $ret.="$target: $files $dep_libs\n"; $ret.="\t\$(LINK) \$(LFLAGS) $files \$(APP_EX_OBJ), $target,, $libs\n\n"; return($ret); diff --git a/deps/openssl/openssl/util/pl/VC-32.pl b/deps/openssl/openssl/util/pl/VC-32.pl index 852eb30d0aba06..da05e9dff810eb 100644 --- a/deps/openssl/openssl/util/pl/VC-32.pl +++ b/deps/openssl/openssl/util/pl/VC-32.pl @@ -357,7 +357,7 @@ sub do_link_rule my($target,$files,$dep_libs,$libs,$standalone)=@_; local($ret,$_); $file =~ s/\//$o/g if $o ne '/'; - $n=&bname($targer); + $n=&bname($target); $ret.="$target: $files $dep_libs\n"; if ($standalone == 1) {