Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Soup #922

Merged
merged 3 commits into from
Aug 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/lhttplib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ static int push_http_response (lua_State *L, soup::HttpRequestTask& task) {
#if SOUP_WASM
return 1; /* specialized HttpRequestTask for WASM doesn't have `getStatus` */
#else
lua_pushstring(L, soup::netStatusToString(task.getStatus()));
pluto_pushstring(L, task.getStatus());
return 2;
#endif
}
Expand Down
4 changes: 3 additions & 1 deletion src/lsocketlib.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,9 @@ static int l_connect (lua_State *L) {
}

static int l_send (lua_State *L) {
checksocket(L, 1)->sock->send(pluto_checkstring(L, 2));
size_t len;
const char *str = luaL_checklstring(L, 2, &len);
checksocket(L, 1)->sock->send(str, len);
return 0;
}

Expand Down
819 changes: 412 additions & 407 deletions src/vendor/Soup/Intrin/aes_helper.cpp

Large diffs are not rendered by default.

79 changes: 42 additions & 37 deletions src/vendor/Soup/Intrin/crc32_intrin.cpp
Original file line number Diff line number Diff line change
@@ -1,63 +1,68 @@
#include "../soup/base.hpp"

#include <cstddef>
#include <cstdint>

#if defined(__x86_64__) || defined(_M_X64)
#if SOUP_X86
#include <smmintrin.h> // _mm_extract_epi32
#include <wmmintrin.h> // _mm_clmulepi64_si128
#elif defined(__aarch64__) || defined(_M_ARM64)
#ifdef _WIN32
#elif SOUP_ARM
#if SOUP_WINDOWS
#include <intrin.h>
#else
#include <arm_acle.h>
#endif
#endif

namespace soup_intrin
NAMESPACE_SOUP
{
#if defined(__x86_64__) || defined(_M_X64)
uint32_t crc32_pclmul(const uint8_t* p, size_t size, uint32_t crc) noexcept
namespace intrin
{
// Original source: https://github.com/richgel999/fpng/blob/main/src/fpng.cpp
// Original licence: Dedicated to the public domain.
#if SOUP_X86
uint32_t crc32_pclmul(const uint8_t* p, size_t size, uint32_t crc) noexcept
{
// Original source: https://github.com/richgel999/fpng/blob/main/src/fpng.cpp
// Original licence: Dedicated to the public domain.

static const uint64_t
static const uint64_t
#ifdef _MSC_VER
__declspec(align(16))
__declspec(align(16))
#else
__attribute__((aligned(16)))
__attribute__((aligned(16)))
#endif
s_u[2] = { 0x1DB710641, 0x1F7011641 }, s_k5k0[2] = { 0x163CD6124, 0 }, s_k3k4[2] = { 0x1751997D0, 0xCCAA009E };
s_u[2] = { 0x1DB710641, 0x1F7011641 }, s_k5k0[2] = { 0x163CD6124, 0 }, s_k3k4[2] = { 0x1751997D0, 0xCCAA009E };

// Load first 16 bytes, apply initial CRC32
__m128i b = _mm_xor_si128(_mm_cvtsi32_si128(~crc), _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)));
// Load first 16 bytes, apply initial CRC32
__m128i b = _mm_xor_si128(_mm_cvtsi32_si128(~crc), _mm_loadu_si128(reinterpret_cast<const __m128i*>(p)));

// We're skipping directly to Step 2 page 12 - iteratively folding by 1 (by 4 is overkill for our needs)
const __m128i k3k4 = _mm_load_si128(reinterpret_cast<const __m128i*>(s_k3k4));
// We're skipping directly to Step 2 page 12 - iteratively folding by 1 (by 4 is overkill for our needs)
const __m128i k3k4 = _mm_load_si128(reinterpret_cast<const __m128i*>(s_k3k4));

for (size -= 16, p += 16; size >= 16; size -= 16, p += 16)
b = _mm_xor_si128(_mm_xor_si128(_mm_clmulepi64_si128(b, k3k4, 17), _mm_loadu_si128(reinterpret_cast<const __m128i*>(p))), _mm_clmulepi64_si128(b, k3k4, 0));
for (size -= 16, p += 16; size >= 16; size -= 16, p += 16)
b = _mm_xor_si128(_mm_xor_si128(_mm_clmulepi64_si128(b, k3k4, 17), _mm_loadu_si128(reinterpret_cast<const __m128i*>(p))), _mm_clmulepi64_si128(b, k3k4, 0));

// Final stages: fold to 64-bits, 32-bit Barrett reduction
const __m128i z = _mm_set_epi32(0, ~0, 0, ~0), u = _mm_load_si128(reinterpret_cast<const __m128i*>(s_u));
b = _mm_xor_si128(_mm_srli_si128(b, 8), _mm_clmulepi64_si128(b, k3k4, 16));
b = _mm_xor_si128(_mm_clmulepi64_si128(_mm_and_si128(b, z), _mm_loadl_epi64(reinterpret_cast<const __m128i*>(s_k5k0)), 0), _mm_srli_si128(b, 4));
return ~_mm_extract_epi32(_mm_xor_si128(b, _mm_clmulepi64_si128(_mm_and_si128(_mm_clmulepi64_si128(_mm_and_si128(b, z), u, 16), z), u, 0)), 1);
}
#elif defined(__aarch64__) || defined(_M_ARM64)
uint32_t crc32_armv8(const uint8_t* p, size_t size, uint32_t crc) noexcept
{
crc = ~crc;
for (; size >= 8; size -= 8)
{
crc = __crc32d(crc, *reinterpret_cast<const uint64_t*>(p));
p += 8;
// Final stages: fold to 64-bits, 32-bit Barrett reduction
const __m128i z = _mm_set_epi32(0, ~0, 0, ~0), u = _mm_load_si128(reinterpret_cast<const __m128i*>(s_u));
b = _mm_xor_si128(_mm_srli_si128(b, 8), _mm_clmulepi64_si128(b, k3k4, 16));
b = _mm_xor_si128(_mm_clmulepi64_si128(_mm_and_si128(b, z), _mm_loadl_epi64(reinterpret_cast<const __m128i*>(s_k5k0)), 0), _mm_srli_si128(b, 4));
return ~_mm_extract_epi32(_mm_xor_si128(b, _mm_clmulepi64_si128(_mm_and_si128(_mm_clmulepi64_si128(_mm_and_si128(b, z), u, 16), z), u, 0)), 1);
}
while (size--)
#elif SOUP_ARM
uint32_t crc32_armv8(const uint8_t* p, size_t size, uint32_t crc) noexcept
{
crc = __crc32b(crc, *p++);
crc = ~crc;
for (; size >= 8; size -= 8)
{
crc = __crc32d(crc, *reinterpret_cast<const uint64_t*>(p));
p += 8;
}
while (size--)
{
crc = __crc32b(crc, *p++);
}
crc = ~crc;
return crc;
}
crc = ~crc;
return crc;
}
#endif
}
}
76 changes: 26 additions & 50 deletions src/vendor/Soup/Intrin/hardware_rng.cpp
Original file line number Diff line number Diff line change
@@ -1,62 +1,38 @@
#if defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(__i386__) || defined(__i386) || defined(_M_IX86)
#include "../soup/base.hpp"
#if SOUP_X86

#include <cstdint>

#include <immintrin.h>

namespace soup_intrin
NAMESPACE_SOUP
{
// RDSEED

uint16_t hardware_rng_generate16() noexcept
namespace intrin
{
uint16_t res;
while (_rdseed16_step(&res) == 0);
return res;
}

uint32_t hardware_rng_generate32() noexcept
{
uint32_t res;
while (_rdseed32_step(&res) == 0);
return res;
}

#if defined(__x86_64__) || defined(_M_X64)
uint64_t hardware_rng_generate64() noexcept
{
unsigned long long res;
while (_rdseed64_step(&res) == 0);
return res;
}
static_assert(sizeof(uint64_t) == sizeof(unsigned long long));
uint16_t hardware_rng_generate16() noexcept
{
uint16_t res;
while (_rdseed16_step(&res) == 0);
return res;
}

uint32_t hardware_rng_generate32() noexcept
{
uint32_t res;
while (_rdseed32_step(&res) == 0);
return res;
}

#if SOUP_BITS == 64
uint64_t hardware_rng_generate64() noexcept
{
unsigned long long res;
while (_rdseed64_step(&res) == 0);
return res;
}
static_assert(sizeof(uint64_t) == sizeof(unsigned long long));
#endif

// RDRAND

uint16_t fast_hardware_rng_generate16() noexcept
{
uint16_t res;
while (_rdrand16_step(&res) == 0);
return res;
}

uint32_t fast_hardware_rng_generate32() noexcept
{
uint32_t res;
while (_rdrand32_step(&res) == 0);
return res;
}

#if defined(__x86_64__) || defined(_M_X64)
uint64_t fast_hardware_rng_generate64() noexcept
{
unsigned long long res;
while (_rdrand64_step(&res) == 0);
return res;
}
static_assert(sizeof(uint64_t) == sizeof(unsigned long long));
#endif
}

#endif
Loading