Skip to content

Commit d131fcc

Browse files
committed
crypto: implement Shavite512's full Compress() routine
1 parent 31f93ad commit d131fcc

File tree

5 files changed

+363
-75
lines changed

5 files changed

+363
-75
lines changed

src/crypto/x11/arm_crypto/shavite.cpp

Lines changed: 159 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,35 +3,182 @@
33
// file COPYING or http://www.opensource.org/licenses/mit-license.php.
44

55
#if defined(ENABLE_ARM_AES)
6+
#include <crypto/x11/sph_shavite.h>
67
#include <crypto/x11/util/util.hpp>
78

89
#include <cstdint>
10+
#include <cstring>
911

1012
#include <arm_neon.h>
1113

1214
namespace sapphire {
13-
namespace arm_crypto_shavite {
15+
namespace {
1416
void CompressElement(uint32_t& l0, uint32_t& l1, uint32_t& l2, uint32_t& l3,
15-
uint32_t r0, uint32_t r1, uint32_t r2, uint32_t r3, const uint32_t* rk)
17+
uint32_t r0, uint32_t r1, uint32_t r2, uint32_t r3, const uint8x16_t* rk_words)
1618
{
17-
// Pack block + XOR with round key 1
1819
uint8x16_t block = util::pack_le(r0, r1, r2, r3);
19-
block = util::Xor(block, vreinterpretq_u8_u32(vld1q_u32(&rk[0])));
20-
// AES round + XOR with round key 2
21-
block = util::Xor(util::aes_round_nk(block), vreinterpretq_u8_u32(vld1q_u32(&rk[4])));
22-
// AES round + XOR with round key 3
23-
block = util::Xor(util::aes_round_nk(block), vreinterpretq_u8_u32(vld1q_u32(&rk[8])));
24-
// AES Round + XOR with round key 4
25-
block = util::Xor(util::aes_round_nk(block), vreinterpretq_u8_u32(vld1q_u32(&rk[12])));
26-
// AES round
20+
block = util::Xor(block, rk_words[0]);
21+
block = util::Xor(util::aes_round_nk(block), rk_words[1]);
22+
block = util::Xor(util::aes_round_nk(block), rk_words[2]);
23+
block = util::Xor(util::aes_round_nk(block), rk_words[3]);
2724
block = util::aes_round_nk(block);
28-
// Unpack + XOR with l values
2925
uint32x4_t result = vreinterpretq_u32_u8(block);
3026
l0 ^= vgetq_lane_u32(result, 0);
3127
l1 ^= vgetq_lane_u32(result, 1);
3228
l2 ^= vgetq_lane_u32(result, 2);
3329
l3 ^= vgetq_lane_u32(result, 3);
3430
}
31+
} // anonymous namespace
32+
33+
namespace arm_crypto_shavite {
34+
void Compress(sph_shavite_big_context *sc, const void *msg)
35+
{
36+
uint32_t p0, p1, p2, p3, p4, p5, p6, p7;
37+
uint32_t p8, p9, pA, pB, pC, pD, pE, pF;
38+
39+
alignas(16) uint8x16_t rk_words[448/4];
40+
alignas(16) uint32_t rk[448];
41+
42+
#if SPH_LITTLE_ENDIAN
43+
memcpy(rk, msg, 128);
44+
#else
45+
for (size_t u{0}; u < 32; u += 4) {
46+
rk[u + 0] = sph_dec32le_aligned(
47+
(const unsigned char *)msg + (u << 2) + 0);
48+
rk[u + 1] = sph_dec32le_aligned(
49+
(const unsigned char *)msg + (u << 2) + 4);
50+
rk[u + 2] = sph_dec32le_aligned(
51+
(const unsigned char *)msg + (u << 2) + 8);
52+
rk[u + 3] = sph_dec32le_aligned(
53+
(const unsigned char *)msg + (u << 2) + 12);
54+
}
55+
#endif
56+
57+
size_t u{32};
58+
for (;;) {
59+
for (int s{0}; s < 4; s++) {
60+
uint32_t x0 = rk[u - 31];
61+
uint32_t x1 = rk[u - 30];
62+
uint32_t x2 = rk[u - 29];
63+
uint32_t x3 = rk[u - 32];
64+
65+
uint32x4_t block = vreinterpretq_u32_u8(util::aes_round_nk(util::pack_le(x0, x1, x2, x3)));
66+
rk[u + 0] = vgetq_lane_u32(block, 0) ^ rk[u - 4];
67+
rk[u + 1] = vgetq_lane_u32(block, 1) ^ rk[u - 3];
68+
rk[u + 2] = vgetq_lane_u32(block, 2) ^ rk[u - 2];
69+
rk[u + 3] = vgetq_lane_u32(block, 3) ^ rk[u - 1];
70+
71+
if (u == 32) {
72+
rk[32] ^= sc->count0;
73+
rk[33] ^= sc->count1;
74+
rk[34] ^= sc->count2;
75+
rk[35] ^= SPH_T32(~sc->count3);
76+
} else if (u == 440) {
77+
rk[440] ^= sc->count1;
78+
rk[441] ^= sc->count0;
79+
rk[442] ^= sc->count3;
80+
rk[443] ^= SPH_T32(~sc->count2);
81+
}
82+
u += 4;
83+
84+
x0 = rk[u - 31];
85+
x1 = rk[u - 30];
86+
x2 = rk[u - 29];
87+
x3 = rk[u - 32];
88+
89+
block = vreinterpretq_u32_u8(util::aes_round_nk(util::pack_le(x0, x1, x2, x3)));
90+
rk[u + 0] = vgetq_lane_u32(block, 0) ^ rk[u - 4];
91+
rk[u + 1] = vgetq_lane_u32(block, 1) ^ rk[u - 3];
92+
rk[u + 2] = vgetq_lane_u32(block, 2) ^ rk[u - 2];
93+
rk[u + 3] = vgetq_lane_u32(block, 3) ^ rk[u - 1];
94+
95+
if (u == 164) {
96+
rk[164] ^= sc->count3;
97+
rk[165] ^= sc->count2;
98+
rk[166] ^= sc->count1;
99+
rk[167] ^= SPH_T32(~sc->count0);
100+
} else if (u == 316) {
101+
rk[316] ^= sc->count2;
102+
rk[317] ^= sc->count3;
103+
rk[318] ^= sc->count0;
104+
rk[319] ^= SPH_T32(~sc->count1);
105+
}
106+
u += 4;
107+
}
108+
if (u == 448)
109+
break;
110+
for (int s = 0; s < 8; s++) {
111+
rk[u + 0] = rk[u - 32] ^ rk[u - 7];
112+
rk[u + 1] = rk[u - 31] ^ rk[u - 6];
113+
rk[u + 2] = rk[u - 30] ^ rk[u - 5];
114+
rk[u + 3] = rk[u - 29] ^ rk[u - 4];
115+
u += 4;
116+
if (u == 448)
117+
break;
118+
}
119+
}
120+
121+
for (int i{0}; i < (448/4); i++) {
122+
rk_words[i] = vreinterpretq_u8_u32(vld1q_u32(&rk[i*4]));
123+
}
124+
125+
p0 = sc->h[0x0];
126+
p1 = sc->h[0x1];
127+
p2 = sc->h[0x2];
128+
p3 = sc->h[0x3];
129+
p4 = sc->h[0x4];
130+
p5 = sc->h[0x5];
131+
p6 = sc->h[0x6];
132+
p7 = sc->h[0x7];
133+
p8 = sc->h[0x8];
134+
p9 = sc->h[0x9];
135+
pA = sc->h[0xA];
136+
pB = sc->h[0xB];
137+
pC = sc->h[0xC];
138+
pD = sc->h[0xD];
139+
pE = sc->h[0xE];
140+
pF = sc->h[0xF];
141+
142+
size_t u_words{0};
143+
for (size_t r{0}; r < 14; r++) {
144+
CompressElement(p0, p1, p2, p3, p4, p5, p6, p7, &rk_words[u_words]);
145+
u_words += 4;
146+
CompressElement(p8, p9, pA, pB, pC, pD, pE, pF, &rk_words[u_words]);
147+
u_words += 4;
148+
149+
#define WROT(a, b, c, d) do { \
150+
uint32_t t = d; \
151+
d = c; \
152+
c = b; \
153+
b = a; \
154+
a = t; \
155+
} while (0)
156+
157+
WROT(p0, p4, p8, pC);
158+
WROT(p1, p5, p9, pD);
159+
WROT(p2, p6, pA, pE);
160+
WROT(p3, p7, pB, pF);
161+
162+
#undef WROT
163+
}
164+
165+
sc->h[0x0] ^= p0;
166+
sc->h[0x1] ^= p1;
167+
sc->h[0x2] ^= p2;
168+
sc->h[0x3] ^= p3;
169+
sc->h[0x4] ^= p4;
170+
sc->h[0x5] ^= p5;
171+
sc->h[0x6] ^= p6;
172+
sc->h[0x7] ^= p7;
173+
sc->h[0x8] ^= p8;
174+
sc->h[0x9] ^= p9;
175+
sc->h[0xA] ^= pA;
176+
sc->h[0xB] ^= pB;
177+
sc->h[0xC] ^= pC;
178+
sc->h[0xD] ^= pD;
179+
sc->h[0xE] ^= pE;
180+
sc->h[0xF] ^= pF;
181+
}
35182
} // namespace arm_crypto_shavite
36183
} // namespace sapphire
37184

src/crypto/x11/dispatch.cpp

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,7 @@ namespace arm_crypto_echo {
5050
void FullStateRound(uint64_t W[16][2], uint32_t& k0, uint32_t& k1, uint32_t& k2, uint32_t& k3);
5151
} // namespace arm_crypto_echo
5252
namespace arm_crypto_shavite {
53-
void CompressElement(uint32_t& l0, uint32_t& l1, uint32_t& l2, uint32_t& l3,
54-
uint32_t r0, uint32_t r1, uint32_t r2, uint32_t r3, const uint32_t* rk);
53+
void Compress(sph_shavite_big_context *sc, const void *msg);
5554
} // namespace arm_crypto_shavite
5655
#endif // ENABLE_ARM_AES
5756

@@ -79,8 +78,7 @@ namespace x86_aesni_echo {
7978
void FullStateRound(uint64_t W[16][2], uint32_t& k0, uint32_t& k1, uint32_t& k2, uint32_t& k3);
8079
} // namespace x86_aesni_echo
8180
namespace x86_aesni_shavite {
82-
void CompressElement(uint32_t& l0, uint32_t& l1, uint32_t& l2, uint32_t& l3,
83-
uint32_t r0, uint32_t r1, uint32_t r2, uint32_t r3, const uint32_t* rk);
81+
void Compress(sph_shavite_big_context *sc, const void *msg);
8482
} // namespace x86_aesni_shavite
8583
#endif // ENABLE_SSE41 && ENABLE_X86_AESNI
8684
#endif // !DISABLE_OPTIMIZED_SHA256
@@ -97,8 +95,7 @@ void FullStateRound(uint64_t W[16][2], uint32_t& k0, uint32_t& k1, uint32_t& k2,
9795
void ShiftAndMix(uint64_t W[16][2]);
9896
} // namespace soft_echo
9997
namespace soft_shavite {
100-
void CompressElement(uint32_t& l0, uint32_t& l1, uint32_t& l2, uint32_t& l3,
101-
uint32_t r0, uint32_t r1, uint32_t r2, uint32_t r3, const uint32_t* rk);
98+
void Compress(sph_shavite_big_context *sc, const void *msg);
10299
} // namespace soft_shavite
103100
} // namespace sapphire
104101

@@ -121,15 +118,15 @@ extern sapphire::dispatch::AESRoundFn aes_round;
121118
extern sapphire::dispatch::AESRoundFnNk aes_round_nk;
122119
extern sapphire::dispatch::EchoShiftMix echo_shift_mix;
123120
extern sapphire::dispatch::EchoRoundFn echo_round;
124-
extern sapphire::dispatch::ShaviteCompressFn shavite_c512e;
121+
extern sapphire::dispatch::ShaviteCompressFn shavite_c512;
125122

126123
void SapphireAutoDetect()
127124
{
128125
aes_round = sapphire::soft_aes::Round;
129126
aes_round_nk = sapphire::soft_aes::RoundKeyless;
130127
echo_round = sapphire::soft_echo::FullStateRound;
131128
echo_shift_mix = sapphire::soft_echo::ShiftAndMix;
132-
shavite_c512e = sapphire::soft_shavite::CompressElement;
129+
shavite_c512 = sapphire::soft_shavite::Compress;
133130

134131
#if !defined(DISABLE_OPTIMIZED_SHA256)
135132
#if defined(HAVE_GETCPUID)
@@ -142,7 +139,7 @@ void SapphireAutoDetect()
142139
aes_round = sapphire::x86_aesni_aes::Round;
143140
aes_round_nk = sapphire::x86_aesni_aes::RoundKeyless;
144141
echo_round = sapphire::x86_aesni_echo::FullStateRound;
145-
shavite_c512e = sapphire::x86_aesni_shavite::CompressElement;
142+
shavite_c512 = sapphire::x86_aesni_shavite::Compress;
146143
}
147144
#endif // ENABLE_SSE41 && ENABLE_X86_AESNI
148145
#if defined(ENABLE_SSSE3)
@@ -198,7 +195,7 @@ void SapphireAutoDetect()
198195
aes_round = sapphire::arm_crypto_aes::Round;
199196
aes_round_nk = sapphire::arm_crypto_aes::RoundKeyless;
200197
echo_round = sapphire::arm_crypto_echo::FullStateRound;
201-
shavite_c512e = sapphire::arm_crypto_shavite::CompressElement;
198+
shavite_c512 = sapphire::arm_crypto_shavite::Compress;
202199
}
203200
#endif // ENABLE_ARM_AES
204201

src/crypto/x11/dispatch.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
#ifndef BITCOIN_CRYPTO_X11_DISPATCH_H
66
#define BITCOIN_CRYPTO_X11_DISPATCH_H
77

8+
#include <crypto/x11/sph_shavite.h>
9+
810
#include <cstdint>
911

1012
namespace sapphire {
@@ -18,8 +20,7 @@ typedef void (*AESRoundFnNk)(uint32_t, uint32_t, uint32_t, uint32_t,
1820
typedef void (*EchoRoundFn)(uint64_t[16][2], uint32_t&, uint32_t&, uint32_t&, uint32_t&);
1921
typedef void (*EchoShiftMix)(uint64_t[16][2]);
2022

21-
typedef void (*ShaviteCompressFn)(uint32_t&, uint32_t&, uint32_t&, uint32_t&,
22-
uint32_t, uint32_t, uint32_t, uint32_t, const uint32_t*);
23+
typedef void (*ShaviteCompressFn)(sph_shavite_big_context*, const void *);
2324
} // namespace dispatch
2425
} // namespace sapphire
2526

0 commit comments

Comments
 (0)