diff --git a/build.rs b/build.rs index 3b85d3cff..d392cb323 100644 --- a/build.rs +++ b/build.rs @@ -855,6 +855,8 @@ fn prefix_all_symbols(pp: char, prefix_prefix: &str, prefix: &str) -> String { "CRYPTO_poly1305_update", "CRYPTO_poly1305_update_neon", "ChaCha20_ctr32", + "ChaCha20_ctr32_neon", + "ChaCha20_ctr32_nohw", "LIMBS_add_mod", "LIMBS_are_even", "LIMBS_are_zero", diff --git a/crypto/chacha/asm/chacha-armv8.pl b/crypto/chacha/asm/chacha-armv8.pl index fa6a80144..32b67dc25 100755 --- a/crypto/chacha/asm/chacha-armv8.pl +++ b/crypto/chacha/asm/chacha-armv8.pl @@ -122,9 +122,6 @@ sub ROUND { $code.=<<___; #include -.extern OPENSSL_armcap_P -.hidden OPENSSL_armcap_P - .section .rodata .align 5 @@ -136,24 +133,10 @@ sub ROUND { .text -.globl ChaCha20_ctr32 -.type ChaCha20_ctr32,%function +.globl ChaCha20_ctr32_nohw +.type ChaCha20_ctr32_nohw,%function .align 5 -ChaCha20_ctr32: - AARCH64_VALID_CALL_TARGET - cbz $len,.Labort -#if defined(OPENSSL_HWASAN) && __clang_major__ >= 10 - adrp @x[0],:pg_hi21_nc:OPENSSL_armcap_P -#else - adrp @x[0],:pg_hi21:OPENSSL_armcap_P -#endif - cmp $len,#192 - b.lo .Lshort - ldr w17,[@x[0],:lo12:OPENSSL_armcap_P] - tst w17,#ARMV7_NEON - b.ne ChaCha20_neon - -.Lshort: +ChaCha20_ctr32_nohw: AARCH64_SIGN_LINK_REGISTER stp x29,x30,[sp,#-96]! add x29,sp,#0 @@ -276,7 +259,6 @@ sub ROUND { ldp x27,x28,[x29,#80] ldp x29,x30,[sp],#96 AARCH64_VALIDATE_LINK_REGISTER -.Labort: ret .align 4 @@ -334,7 +316,7 @@ sub ROUND { ldp x29,x30,[sp],#96 AARCH64_VALIDATE_LINK_REGISTER ret -.size ChaCha20_ctr32,.-ChaCha20_ctr32 +.size ChaCha20_ctr32_nohw,.-ChaCha20_ctr32_nohw ___ {{{ @@ -375,9 +357,10 @@ sub NEONROUND { $code.=<<___; -.type ChaCha20_neon,%function +.globl ChaCha20_ctr32_neon +.type ChaCha20_ctr32_neon,%function .align 5 -ChaCha20_neon: +ChaCha20_ctr32_neon: AARCH64_SIGN_LINK_REGISTER stp x29,x30,[sp,#-96]! add x29,sp,#0 @@ -690,7 +673,7 @@ sub NEONROUND { ldp x29,x30,[sp],#96 AARCH64_VALIDATE_LINK_REGISTER ret -.size ChaCha20_neon,.-ChaCha20_neon +.size ChaCha20_ctr32_neon,.-ChaCha20_ctr32_neon ___ { my ($T0,$T1,$T2,$T3,$T4,$T5)=@K; diff --git a/src/aead/chacha.rs b/src/aead/chacha.rs index 51eac7382..e616ad657 100644 --- a/src/aead/chacha.rs +++ b/src/aead/chacha.rs @@ -86,20 +86,31 @@ impl Key { // "in place". See https://rt.openssl.org/Ticket/Display.html?id=4362. cfg_if! { if #[cfg(all(target_arch = "aarch64", target_endian = "little"))] { - chacha20_ctr32_ffi!( - unsafe { (cpu::Features, Overlapping<'_>) => ChaCha20_ctr32 }, - self, counter, in_out, cpu) + use cpu::{GetFeature as _, arm::Neon}; + const NEON_MIN_LEN: usize = 192 + 1; + if in_out.len() >= NEON_MIN_LEN { + if let Some(cpu) = cpu.get_feature() { + return chacha20_ctr32_ffi!( + unsafe { (NEON_MIN_LEN, Neon, Overlapping<'_>) => ChaCha20_ctr32_neon }, + self, counter, in_out, cpu); + } + } + if in_out.len() >= 1 { + chacha20_ctr32_ffi!( + unsafe { (1, cpu::Features, Overlapping<'_>) => ChaCha20_ctr32_nohw }, + self, counter, in_out, cpu) + } } else if #[cfg(all(target_arch = "arm", target_endian = "little"))] { chacha20_ctr32_ffi!( - unsafe { (cpu::Features, &mut [u8]) => ChaCha20_ctr32 }, + unsafe { (0, cpu::Features, &mut [u8]) => ChaCha20_ctr32 }, self, counter, in_out.copy_within(), cpu) } else if #[cfg(target_arch = "x86")] { chacha20_ctr32_ffi!( - unsafe { (cpu::Features, &mut [u8]) => ChaCha20_ctr32 }, + unsafe { (0, cpu::Features, &mut [u8]) => ChaCha20_ctr32 }, self, counter, in_out.copy_within(), cpu) } else if #[cfg(target_arch = "x86_64")] { chacha20_ctr32_ffi!( - unsafe { (cpu::Features, Overlapping<'_>) => ChaCha20_ctr32 }, + unsafe { (0, cpu::Features, Overlapping<'_>) => ChaCha20_ctr32 }, self, counter, in_out, cpu) } else { fallback::ChaCha20_ctr32(self, counter, in_out) diff --git a/src/aead/chacha/ffi.rs b/src/aead/chacha/ffi.rs index da19fed7d..54f5d36dc 100644 --- a/src/aead/chacha/ffi.rs +++ b/src/aead/chacha/ffi.rs @@ -14,12 +14,12 @@ use super::{super::overlapping::Overlapping, Counter, Key}; -// `unsafe { (C, InOut) => f }` means that the function `f` is safe to call -// iff CPU features `C` are available and the input type is `InOut`. If `f` -// supports overlapping input/output then `InOut` should be -// `Overlapping<'_, u8>`; otherwise it should be `&mut [u8]`. +// `unsafe { (N, C, InOut) => f }` means that the function `f` is safe to call +// iff the in/out length is at least `N`, the CPU features `C` are available, +// and the input type is `InOut`. If `f` supports overlapping input/output then +// `InOut` should be `Overlapping<'_, u8>`; otherwise it should be `&mut [u8]`. macro_rules! chacha20_ctr32_ffi { - ( unsafe { ($Cpu:ty, $InOut:ty) => $f:ident }, + ( unsafe { ($MIN_LEN:expr, $Cpu:ty, $InOut:ty) => $f:ident }, $key:expr, $counter:expr, $in_out:expr, $cpu:expr ) => {{ prefixed_extern! { fn $f( @@ -34,14 +34,21 @@ macro_rules! chacha20_ctr32_ffi { // to call if additionally we have a value of type `$Cpu` and an in/out // value of the indicated type, which we do. unsafe { - crate::aead::chacha::ffi::chacha20_ctr32_ffi::<$InOut, $Cpu>( + crate::aead::chacha::ffi::chacha20_ctr32_ffi::<$InOut, $Cpu, $MIN_LEN>( $key, $counter, $in_out, $cpu, $f, ) } }}; } -pub(super) unsafe fn chacha20_ctr32_ffi<'o, InOut: 'o + Into>, Cpu>( +// Panics if `in_out.len() < MIN_LEN`. The caller should have guarded against +// that so that the assertion gets optimized away. +pub(super) unsafe fn chacha20_ctr32_ffi< + 'o, + InOut: 'o + Into>, + Cpu, + const MIN_LEN: usize, +>( key: &Key, counter: Counter, in_out: InOut, @@ -50,6 +57,7 @@ pub(super) unsafe fn chacha20_ctr32_ffi<'o, InOut: 'o + Into ) { let in_out: Overlapping<'_, u8> = in_out.into(); let (input, output, len) = in_out.into_input_output_len(); + assert!(len >= MIN_LEN); let key = key.words_less_safe(); let _: Cpu = cpu; unsafe { f(output, input, len, key, &counter) }