diff --git a/binding.gyp b/binding.gyp index 55240c33..a7e31f6e 100644 --- a/binding.gyp +++ b/binding.gyp @@ -32,6 +32,13 @@ "src/x15.c", "src/x16r.c", "src/x16rv2.c", + "src/kawpow.cpp", + "src/kawpow/ethash.cpp", + "src/kawpow/keccak.c", + "src/kawpow/keccakf800.c", + "src/kawpow/keccakf1600.c", + "src/kawpow/managed.cpp", + "src/kawpow/primes.c", "src/crypto/argon2/argon2.c", "src/crypto/argon2/core.c", "src/crypto/argon2/encoding.c", diff --git a/src/kawpow.cpp b/src/kawpow.cpp new file mode 100644 index 00000000..7efcd9ae --- /dev/null +++ b/src/kawpow.cpp @@ -0,0 +1,571 @@ +// ethash: C/C++ implementation of Ethash, the Ethereum Proof of Work algorithm. +// Copyright 2018-2019 Pawel Bylica. +// Licensed under the Apache License, Version 2.0. + +#include "kawpow.hpp" + +#include "kawpow/bit_manipulation.h" +#include "kawpow/endianness.hpp" +#include "kawpow/ethash-internal.hpp" +#include "kawpow/kiss99.hpp" +#include "kawpow/keccak.hpp" + +#include + +namespace progpow +{ +namespace +{ +/// A variant of Keccak hash function for ProgPoW. +/// +/// This Keccak hash function uses 800-bit permutation (Keccak-f[800]) with 576 bitrate. +/// It take exactly 576 bits of input (split across 3 arguments) and adds no padding. +/// +/// @param header_hash The 256-bit header hash. +/// @param nonce The 64-bit nonce. +/// @param mix_hash Additional 256-bits of data. +/// @return The 256-bit output of the hash function. +void keccak_progpow_256(uint32_t* st) noexcept +{ + ethash_keccakf800(st); +} + +/// The same as keccak_progpow_256() but uses null mix +/// and returns top 64 bits of the output being a big-endian prefix of the 256-bit hash. + inline void keccak_progpow_64(uint32_t* st) noexcept +{ + keccak_progpow_256(st); +} + + +/// ProgPoW mix RNG state. +/// +/// Encapsulates the state of the random number generator used in computing ProgPoW mix. +/// This includes the state of the KISS99 RNG and the precomputed random permutation of the +/// sequence of mix item indexes. +class mix_rng_state +{ +public: + inline explicit mix_rng_state(uint32_t* seed) noexcept; + + uint32_t next_dst() noexcept { return dst_seq[(dst_counter++) % num_regs]; } + uint32_t next_src() noexcept { return src_seq[(src_counter++) % num_regs]; } + + kiss99 rng; + +private: + size_t dst_counter = 0; + std::array dst_seq; + size_t src_counter = 0; + std::array src_seq; +}; + +mix_rng_state::mix_rng_state(uint32_t* hash_seed) noexcept +{ + const auto seed_lo = static_cast(hash_seed[0]); + const auto seed_hi = static_cast(hash_seed[1]); + + const auto z = fnv1a(fnv_offset_basis, seed_lo); + const auto w = fnv1a(z, seed_hi); + const auto jsr = fnv1a(w, seed_lo); + const auto jcong = fnv1a(jsr, seed_hi); + + rng = kiss99{z, w, jsr, jcong}; + + // Create random permutations of mix destinations / sources. + // Uses Fisher-Yates shuffle. + for (uint32_t i = 0; i < num_regs; ++i) + { + dst_seq[i] = i; + src_seq[i] = i; + } + + for (uint32_t i = num_regs; i > 1; --i) + { + std::swap(dst_seq[i - 1], dst_seq[rng() % i]); + std::swap(src_seq[i - 1], src_seq[rng() % i]); + } +} + + +NO_SANITIZE("unsigned-integer-overflow") +inline uint32_t random_math(uint32_t a, uint32_t b, uint32_t selector) noexcept +{ + switch (selector % 11) + { + default: + case 0: + return a + b; + case 1: + return a * b; + case 2: + return mul_hi32(a, b); + case 3: + return std::min(a, b); + case 4: + return rotl32(a, b); + case 5: + return rotr32(a, b); + case 6: + return a & b; + case 7: + return a | b; + case 8: + return a ^ b; + case 9: + return clz32(a) + clz32(b); + case 10: + return popcount32(a) + popcount32(b); + } +} + +/// Merge data from `b` and `a`. +/// Assuming `a` has high entropy, only do ops that retain entropy even if `b` +/// has low entropy (i.e. do not do `a & b`). +NO_SANITIZE("unsigned-integer-overflow") +inline void random_merge(uint32_t& a, uint32_t b, uint32_t selector) noexcept +{ + const auto x = (selector >> 16) % 31 + 1; // Additional non-zero selector from higher bits. + switch (selector % 4) + { + case 0: + a = (a * 33) + b; + break; + case 1: + a = (a ^ b) * 33; + break; + case 2: + a = rotl32(a, x) ^ b; + break; + case 3: + a = rotr32(a, x) ^ b; + break; + } +} + +static const uint32_t ravencoin_kawpow[15] = { + 0x00000072, //R + 0x00000041, //A + 0x00000056, //V + 0x00000045, //E + 0x0000004E, //N + 0x00000043, //C + 0x0000004F, //O + 0x00000049, //I + 0x0000004E, //N + 0x0000004B, //K + 0x00000041, //A + 0x00000057, //W + 0x00000050, //P + 0x0000004F, //O + 0x00000057, //W +}; + +using lookup_fn = hash2048 (*)(const epoch_context&, uint32_t); + +using mix_array = std::array, num_lanes>; + +void round( + const epoch_context& context, uint32_t r, mix_array& mix, mix_rng_state state, lookup_fn lookup) +{ + const uint32_t num_items = static_cast(context.full_dataset_num_items / 2); + const uint32_t item_index = mix[r % num_lanes][0] % num_items; + const hash2048 item = lookup(context, item_index); + + constexpr size_t num_words_per_lane = sizeof(item) / (sizeof(uint32_t) * num_lanes); + constexpr int max_operations = + num_cache_accesses > num_math_operations ? num_cache_accesses : num_math_operations; + + // Process lanes. + for (int i = 0; i < max_operations; ++i) + { + if (i < num_cache_accesses) // Random access to cached memory. + { + const auto src = state.next_src(); + const auto dst = state.next_dst(); + const auto sel = state.rng(); + + for (size_t l = 0; l < num_lanes; ++l) + { + const size_t offset = mix[l][src] % l1_cache_num_items; + random_merge(mix[l][dst], le::uint32(context.l1_cache[offset]), sel); + } + } + if (i < num_math_operations) // Random math. + { + // Generate 2 unique source indexes. + const auto src_rnd = state.rng() % (num_regs * (num_regs - 1)); + const auto src1 = src_rnd % num_regs; // O <= src1 < num_regs + auto src2 = src_rnd / num_regs; // 0 <= src2 < num_regs - 1 + if (src2 >= src1) + ++src2; + + const auto sel1 = state.rng(); + const auto dst = state.next_dst(); + const auto sel2 = state.rng(); + + for (size_t l = 0; l < num_lanes; ++l) + { + const uint32_t data = random_math(mix[l][src1], mix[l][src2], sel1); + random_merge(mix[l][dst], data, sel2); + } + } + } + + // DAG access pattern. + uint32_t dsts[num_words_per_lane]; + uint32_t sels[num_words_per_lane]; + for (size_t i = 0; i < num_words_per_lane; ++i) + { + dsts[i] = i == 0 ? 0 : state.next_dst(); + sels[i] = state.rng(); + } + + // DAG access. + for (size_t l = 0; l < num_lanes; ++l) + { + const auto offset = ((l ^ r) % num_lanes) * num_words_per_lane; + for (size_t i = 0; i < num_words_per_lane; ++i) + { + const auto word = le::uint32(item.word32s[offset + i]); + random_merge(mix[l][dsts[i]], word, sels[i]); + } + } +} + +mix_array init_mix(uint32_t* hash_seed) +{ + const uint32_t z = fnv1a(fnv_offset_basis, static_cast(hash_seed[0])); + const uint32_t w = fnv1a(z, static_cast(hash_seed[1])); + + mix_array mix; + for (uint32_t l = 0; l < mix.size(); ++l) + { + const uint32_t jsr = fnv1a(w, l); + const uint32_t jcong = fnv1a(jsr, l); + kiss99 rng{z, w, jsr, jcong}; + + for (auto& row : mix[l]) + row = rng(); + } + return mix; +} + +hash256 hash_mix( + const epoch_context& context, int block_number, uint32_t * seed, lookup_fn lookup) noexcept +{ + auto mix = init_mix(seed); + auto number = uint64_t(block_number / period_length); + uint32_t new_state[2]; + new_state[0] = (uint32_t)number; + new_state[1] = (uint32_t)(number >> 32); + mix_rng_state state{new_state}; + + for (uint32_t i = 0; i < 64; ++i) + round(context, i, mix, state, lookup); + + // Reduce mix data to a single per-lane result. + uint32_t lane_hash[num_lanes]; + for (size_t l = 0; l < num_lanes; ++l) + { + lane_hash[l] = fnv_offset_basis; + for (uint32_t i = 0; i < num_regs; ++i) + lane_hash[l] = fnv1a(lane_hash[l], mix[l][i]); + } + + // Reduce all lanes to a single 256-bit result. + static constexpr size_t num_words = sizeof(hash256) / sizeof(uint32_t); + hash256 mix_hash; + for (uint32_t& w : mix_hash.word32s) + w = fnv_offset_basis; + for (size_t l = 0; l < num_lanes; ++l) + mix_hash.word32s[l % num_words] = fnv1a(mix_hash.word32s[l % num_words], lane_hash[l]); + return le::uint32s(mix_hash); +} +} // namespace + +result k_hash(const epoch_context& context, int block_number, const hash256& header_hash, + uint64_t nonce) noexcept +{ + + nonce = be::uint64(nonce); + + uint32_t hash_seed[2]; // KISS99 initiator + + uint32_t state2[8]; + + { + uint32_t state[25] = {0x0}; // Keccak's state + + // Absorb phase for initial round of keccak + // 1st fill with header data (8 words) + for (int i = 0; i < 8; i++) + state[i] = header_hash.word32s[i]; + + // 2nd fill with nonce (2 words) + state[8] = (uint32_t)nonce; + state[9] = (uint32_t)(nonce >> 32); + + // 3rd apply ravencoin input constraints + for (int i = 10; i < 25; i++) + state[i] = ravencoin_kawpow[i-10]; + + keccak_progpow_64(state); + + for (int i = 0; i < 8; i++) + state2[i] = state[i]; + } + + hash_seed[0] = state2[0]; + hash_seed[1] = state2[1]; + const hash256 mix_hash = hash_mix(context, block_number, hash_seed, calculate_dataset_item_2048); + + uint32_t state[25] = {0x0}; // Keccak's state + + // Absorb phase for last round of keccak (256 bits) + // 1st initial 8 words of state are kept as carry-over from initial keccak + for (int i = 0; i < 8; i++) + state[i] = state2[i]; + + // 2nd subsequent 8 words are carried from digest/mix + for (int i = 8; i < 16; i++) + state[i] = mix_hash.word32s[i-8]; + + // 3rd apply ravencoin input constraints + for (int i = 16; i < 25; i++) + state[i] = ravencoin_kawpow[i - 16]; + + // Run keccak loop + keccak_progpow_256(state); + + hash256 output; + for (int i = 0; i < 8; ++i) + output.word32s[i] = le::uint32(state[i]); + + return {output, mix_hash}; +} + +result k_hash_full(const epoch_context_full& context, int block_number, const hash256& header_hash, + uint64_t nonce) noexcept +{ + static const auto lazy_lookup = [](const epoch_context& ctx, uint32_t index) noexcept + { + auto* full_dataset_1024 = static_cast(ctx).full_dataset; + auto* full_dataset_2048 = reinterpret_cast(full_dataset_1024); + hash2048& item = full_dataset_2048[index]; + if (item.word64s[0] == 0) + { + // TODO: Copy elision here makes it thread-safe? + item = calculate_dataset_item_2048(ctx, index); + } + + return item; + }; + + uint32_t hash_seed[2]; // KISS99 initiator + + uint32_t state2[8]; + + { + uint32_t state[25] = {0x0}; // Keccak's state + + // Absorb phase for initial round of keccak + // 1st fill with header data (8 words) + for (int i = 0; i < 8; i++) + state[i] = header_hash.word32s[i]; + + // 2nd fill with nonce (2 words) + state[8] = (uint32_t)nonce; + state[9] = (uint32_t)(nonce >> 32); + + // 3rd apply ravencoin input constraints + for (int i = 10; i < 25; i++) + state[i] = ravencoin_kawpow[i-10]; + + keccak_progpow_64(state); + + for (int i = 0; i < 8; i++) + state2[i] = state[i]; + } + + hash_seed[0] = state2[0]; + hash_seed[1] = state2[1]; + + const hash256 mix_hash = hash_mix(context, block_number, hash_seed, lazy_lookup); + + uint32_t state[25] = {0x0}; // Keccak's state + + // Absorb phase for last round of keccak (256 bits) + // 1st initial 8 words of state are kept as carry-over from initial keccak + for (int i = 0; i < 8; i++) + state[i] = state2[i]; + + // 2nd subsequent 8 words are carried from digest/mix + for (int i = 8; i < 16; i++) + state[i] = mix_hash.word32s[i-8]; + + // 3rd apply ravencoin input constraints + for (int i = 16; i < 25; i++) + state[i] = ravencoin_kawpow[i - 16]; + + // Run keccak loop + keccak_progpow_256(state); + + hash256 output; + for (int i = 0; i < 8; ++i) + output.word32s[i] = le::uint32(state[i]); + + return {output, mix_hash}; +} + +bool k_verify(const epoch_context& context, int block_number, const hash256& header_hash, + const hash256& mix_hash, uint64_t nonce, const hash256& boundary) noexcept +{ + uint32_t hash_seed[2]; // KISS99 initiator + uint32_t state2[8]; + + { + // Absorb phase for initial round of keccak + // 1st fill with header data (8 words) + uint32_t state[25] = {0x0}; // Keccak's state + for (int i = 0; i < 8; i++) + state[i] = header_hash.word32s[i]; + + // 2nd fill with nonce (2 words) + state[8] = (uint32_t)nonce; + state[9] = (uint32_t)(nonce >> 32); + + // 3rd apply ravencoin input constraints + for (int i = 10; i < 25; i++) + state[i] = ravencoin_kawpow[i-10]; + + keccak_progpow_64(state); + + for (int i = 0; i < 8; i++) + state2[i] = state[i]; + } + + hash_seed[0] = state2[0]; + hash_seed[1] = state2[1]; + + uint32_t state[25] = {0x0}; // Keccak's state + + // Absorb phase for last round of keccak (256 bits) + // 1st initial 8 words of state are kept as carry-over from initial keccak + for (int i = 0; i < 8; i++) + state[i] = state2[i]; + + + // 2nd subsequent 8 words are carried from digest/mix + for (int i = 8; i < 16; i++) + state[i] = mix_hash.word32s[i-8]; + + // 3rd apply ravencoin input constraints + for (int i = 16; i < 25; i++) + state[i] = ravencoin_kawpow[i - 16]; + + // Run keccak loop + keccak_progpow_256(state); + + hash256 output; + for (int i = 0; i < 8; ++i) + output.word32s[i] = le::uint32(state[i]); + if (!is_less_or_equal(output, boundary)) + return false; + + const hash256 expected_mix_hash = + hash_mix(context, block_number, hash_seed, calculate_dataset_item_2048); + + return is_equal(expected_mix_hash, mix_hash); +} + +//bool light_verify(const char* str_header_hash, +// const char* str_mix_hash, const char* str_nonce, const char* str_boundary, char* str_final) noexcept +//{ +// +// hash256 header_hash = to_hash256(str_header_hash); +// hash256 mix_hash = to_hash256(str_mix_hash); +// hash256 boundary = to_hash256(str_boundary); +// +// uint64_t nonce = std::stoull(str_nonce, nullptr, 16); +// +// uint32_t state2[8]; +// +// { +// // Absorb phase for initial round of keccak +// // 1st fill with header data (8 words) +// uint32_t state[25]; // Keccak's state +// for (int i = 0; i < 8; i++) +// state[i] = header_hash.word32s[i]; +// // 2nd fill with nonce (2 words) +// state[8] = (uint32_t)nonce; +// state[9] = (uint32_t)(nonce >> 32); +// // 3rd all remaining elements to zero +// for (int i = 10; i < 25; i++) +// state[i] = 0; +// +// keccak_progpow_64(state); +// +// for (int i = 0; i < 8; i++) +// state2[i] = state[i]; +// } +// +// uint32_t state[25]; // Keccak's state +// for (int i = 0; i < 8; i++) +// state[i] = state2[i]; +// +// // Absorb phase for last round of keccak (256 bits) +// // 1st initial 8 words of state are kept as carry-over from initial keccak +// // 2nd subsequent 8 words are carried from digest/mix +// for (int i = 8; i < 16; i++) +// state[i] = mix_hash.word32s[i-8]; +// +// // 3rd all other elements to zero +// for (int i = 16; i < 25; i++) +// state[i] = 0; +// +// // Run keccak loop +// keccak_progpow_256(state); +// +// hash256 output; +// for (int i = 0; i < 8; ++i) +// output.word32s[i] = le::uint32(state[i]); +// if (!is_less_or_equal(output, boundary)) +// return false; +// +// if (!is_less_or_equal(output, boundary)) +// return false; +// +// memcpy(str_final,&to_hex(output)[0],64); +// return true; +//} + +search_result k_search_light(const epoch_context& context, int block_number, + const hash256& header_hash, const hash256& boundary, uint64_t start_nonce, + size_t iterations) noexcept +{ + const uint64_t end_nonce = start_nonce + iterations; + for (uint64_t nonce = start_nonce; nonce < end_nonce; ++nonce) + { + result r = k_hash(context, block_number, header_hash, nonce); + if (is_less_or_equal(r.final_hash, boundary)) + return {r, nonce}; + } + return {}; +} + +search_result k_search(const epoch_context_full& context, int block_number, + const hash256& header_hash, const hash256& boundary, uint64_t start_nonce, + size_t iterations) noexcept +{ + const uint64_t end_nonce = start_nonce + iterations; + for (uint64_t nonce = start_nonce; nonce < end_nonce; ++nonce) + { + result r = k_hash(context, block_number, header_hash, nonce); + if (is_less_or_equal(r.final_hash, boundary)) + return {r, nonce}; + } + return {}; +} + +} // namespace progpow diff --git a/src/kawpow.hpp b/src/kawpow.hpp new file mode 100644 index 00000000..4f094cea --- /dev/null +++ b/src/kawpow.hpp @@ -0,0 +1,50 @@ +// ethash: C/C++ implementation of Ethash, the Ethereum Proof of Work algorithm. +// Copyright 2018-2019 Pawel Bylica. +// Licensed under the Apache License, Version 2.0. + +/// @file +/// +/// ProgPoW API +/// +/// This file provides the public API for ProgPoW as the Ethash API extension. + +#include "kawpow/ethash.hpp" + +namespace progpow +{ +using namespace ethash; // Include ethash namespace. + + +/// The ProgPoW algorithm revision implemented as specified in the spec +/// https://github.com/ifdefelse/ProgPOW#change-history. +constexpr auto kawpow_revision = "0.9.4"; + +constexpr int period_length = 3; +constexpr uint32_t num_regs = 32; +constexpr size_t num_lanes = 16; +constexpr int num_cache_accesses = 11; +constexpr int num_math_operations = 18; +constexpr size_t l1_cache_size = 16 * 1024; +constexpr size_t l1_cache_num_items = l1_cache_size / sizeof(uint32_t); + +result k_hash(const epoch_context& context, int block_number, const hash256& header_hash, + uint64_t nonce) noexcept; + +result k_hash_full(const epoch_context_full& context, int block_number, const hash256& header_hash, + uint64_t nonce) noexcept; + +bool k_verify(const epoch_context& context, int block_number, const hash256& header_hash, + const hash256& mix_hash, uint64_t nonce, const hash256& boundary) noexcept; + +//bool light_verify(const char* str_header_hash, +// const char* str_mix_hash, const char* str_nonce, const char* str_boundary, char* str_final) noexcept; + +search_result k_search_light(const epoch_context& context, int block_number, + const hash256& header_hash, const hash256& boundary, uint64_t start_nonce, + size_t iterations) noexcept; + +search_result k_search(const epoch_context_full& context, int block_number, + const hash256& header_hash, const hash256& boundary, uint64_t start_nonce, + size_t iterations) noexcept; + +} // namespace progpow diff --git a/src/kawpow/attributes.h b/src/kawpow/attributes.h new file mode 100644 index 00000000..83be231f --- /dev/null +++ b/src/kawpow/attributes.h @@ -0,0 +1,33 @@ +/* ethash: C/C++ implementation of Ethash, the Ethereum Proof of Work algorithm. + * Copyright 2018-2019 Pawel Bylica. + * Licensed under the Apache License, Version 2.0. + */ + +#pragma once + +/** inline */ +#if _MSC_VER || __STDC_VERSION__ +#define INLINE inline +#else +#define INLINE +#endif + +/** [[always_inline]] */ +#if _MSC_VER +#define ALWAYS_INLINE __forceinline +#elif defined(__has_attribute) && __STDC_VERSION__ +#if __has_attribute(always_inline) +#define ALWAYS_INLINE __attribute__((always_inline)) +#endif +#endif +#if !defined(ALWAYS_INLINE) +#define ALWAYS_INLINE +#endif + +/** [[no_sanitize()]] */ +#if __clang__ +#define NO_SANITIZE(sanitizer) \ + __attribute__((no_sanitize(sanitizer))) +#else +#define NO_SANITIZE(sanitizer) +#endif diff --git a/src/kawpow/bit_manipulation.h b/src/kawpow/bit_manipulation.h new file mode 100644 index 00000000..4b70dd8e --- /dev/null +++ b/src/kawpow/bit_manipulation.h @@ -0,0 +1,81 @@ +/* ethash: C/C++ implementation of Ethash, the Ethereum Proof of Work algorithm. + * Copyright 2018-2019 Pawel Bylica. + * Licensed under the Apache License, Version 2.0. + */ + +#pragma once + +#include "builtins.h" +#include "attributes.h" + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static inline uint32_t rotl32(uint32_t n, unsigned int c) +{ + const unsigned int mask = 31; + + c &= mask; + unsigned int neg_c = (unsigned int)(-(int)c); + return (n << c) | (n >> (neg_c & mask)); +} + +static inline uint32_t rotr32(uint32_t n, unsigned int c) +{ + const unsigned int mask = 31; + + c &= mask; + unsigned int neg_c = (unsigned int)(-(int)c); + return (n >> c) | (n << (neg_c & mask)); +} + +static inline uint32_t clz32(uint32_t x) +{ + return x ? (uint32_t)__builtin_clz(x) : 32; +} + +static inline uint32_t popcount32(uint32_t x) +{ + return (uint32_t)__builtin_popcount(x); +} + +static inline uint32_t mul_hi32(uint32_t x, uint32_t y) +{ + return (uint32_t)(((uint64_t)x * (uint64_t)y) >> 32); +} + + +/** FNV 32-bit prime. */ +static const uint32_t fnv_prime = 0x01000193; + +/** FNV 32-bit offset basis. */ +static const uint32_t fnv_offset_basis = 0x811c9dc5; + +/** + * The implementation of FNV-1 hash. + * + * See https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function#FNV-1_hash. + */ +NO_SANITIZE("unsigned-integer-overflow") +static inline uint32_t fnv1(uint32_t u, uint32_t v) noexcept +{ + return (u * fnv_prime) ^ v; +} + +/** + * The implementation of FNV-1a hash. + * + * See https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function#FNV-1a_hash. + */ +NO_SANITIZE("unsigned-integer-overflow") +static inline uint32_t fnv1a(uint32_t u, uint32_t v) noexcept +{ + return (u ^ v) * fnv_prime; +} + +#ifdef __cplusplus +} +#endif diff --git a/src/kawpow/builtins.h b/src/kawpow/builtins.h new file mode 100644 index 00000000..0c43188a --- /dev/null +++ b/src/kawpow/builtins.h @@ -0,0 +1,43 @@ +/* ethash: C/C++ implementation of Ethash, the Ethereum Proof of Work algorithm. + * Copyright 2018-2019 Pawel Bylica. + * Licensed under the Apache License, Version 2.0. + */ + +/** + * @file + * Implementation of GCC/clang builtins for MSVC compiler. + */ + +#pragma once + +#ifdef _MSC_VER +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Returns the number of leading 0-bits in `x`, starting at the most significant bit position. + * If `x` is 0, the result is undefined. + */ +static inline int __builtin_clz(unsigned int x) +{ + unsigned long most_significant_bit; + _BitScanReverse(&most_significant_bit, x); + return 31 - (int)most_significant_bit; +} + +/** + * Returns the number of 1-bits in `x`. + */ +static inline int __builtin_popcount(unsigned int x) +{ + return (int)__popcnt(x); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/kawpow/endianness.hpp b/src/kawpow/endianness.hpp new file mode 100644 index 00000000..55c3b9c8 --- /dev/null +++ b/src/kawpow/endianness.hpp @@ -0,0 +1,99 @@ +// ethash: C/C++ implementation of Ethash, the Ethereum Proof of Work algorithm. +// Copyright 2018-2019 Pawel Bylica. +// Licensed under the Apache License, Version 2.0. + +/// @file +/// This file contains helper functions to handle big-endian architectures. +/// The Ethash algorithm is naturally defined for little-endian architectures +/// so for those the helpers are just no-op empty functions. +/// For big-endian architectures we need 32-bit and 64-bit byte swapping in +/// some places. + +#pragma once + +#include "ethash.hpp" + +#if _WIN32 + +#include + +#define bswap32 _byteswap_ulong +#define bswap64 _byteswap_uint64 + +// On Windows assume little endian. +#define __LITTLE_ENDIAN 1234 +#define __BIG_ENDIAN 4321 +#define __BYTE_ORDER __LITTLE_ENDIAN + +#elif __APPLE__ + +#include + +#define bswap32 __builtin_bswap32 +#define bswap64 __builtin_bswap64 + +#else + +#include + +#define bswap32 __builtin_bswap32 +#define bswap64 __builtin_bswap64 + +#endif + +namespace ethash +{ +#if __BYTE_ORDER == __LITTLE_ENDIAN + +struct le +{ + static uint32_t uint32(uint32_t x) noexcept { return x; } + static uint64_t uint64(uint64_t x) noexcept { return x; } + + static const hash1024& uint32s(const hash1024& h) noexcept { return h; } + static const hash512& uint32s(const hash512& h) noexcept { return h; } + static const hash256& uint32s(const hash256& h) noexcept { return h; } +}; + +struct be +{ + static uint64_t uint64(uint64_t x) noexcept { return bswap64(x); } +}; + + +#elif __BYTE_ORDER == __BIG_ENDIAN + +struct le +{ + static uint32_t uint32(uint32_t x) noexcept { return bswap32(x); } + static uint64_t uint64(uint64_t x) noexcept { return bswap64(x); } + + static hash1024 uint32s(hash1024 h) noexcept + { + for (auto& w : h.word32s) + w = uint32(w); + return h; + } + + static hash512 uint32s(hash512 h) noexcept + { + for (auto& w : h.word32s) + w = uint32(w); + return h; + } + + static hash256 uint32s(hash256 h) noexcept + { + for (auto& w : h.word32s) + w = uint32(w); + return h; + } +}; + +struct be +{ + static uint64_t uint64(uint64_t x) noexcept { return x; } +}; + +#endif +} // namespace ethash \ No newline at end of file diff --git a/src/kawpow/ethash-internal.hpp b/src/kawpow/ethash-internal.hpp new file mode 100644 index 00000000..1d4ccd97 --- /dev/null +++ b/src/kawpow/ethash-internal.hpp @@ -0,0 +1,68 @@ +// ethash: C/C++ implementation of Ethash, the Ethereum Proof of Work algorithm. +// Copyright 2018-2019 Pawel Bylica. +// Licensed under the Apache License, Version 2.0. + +/// @file +/// Contains declarations of internal ethash functions to allow them to be +/// unit-tested. + +#pragma once + +#include "ethash.hpp" + +#include "endianness.hpp" + +#include +#include + +extern "C" struct ethash_epoch_context_full : ethash_epoch_context +{ + ethash_hash1024* full_dataset; + + constexpr ethash_epoch_context_full(int epoch, int light_num_items, + const ethash_hash512* light, const uint32_t* l1, int dataset_num_items, + ethash_hash1024* dataset) noexcept + : ethash_epoch_context{epoch, light_num_items, light, l1, dataset_num_items}, + full_dataset{dataset} + {} +}; + +namespace ethash +{ +inline bool is_less_or_equal(const hash256& a, const hash256& b) noexcept +{ + for (size_t i = 0; i < (sizeof(a) / sizeof(a.word64s[0])); ++i) + { + if (be::uint64(a.word64s[i]) > be::uint64(b.word64s[i])) + return false; + if (be::uint64(a.word64s[i]) < be::uint64(b.word64s[i])) + return true; + } + return true; +} + +inline bool is_equal(const hash256& a, const hash256& b) noexcept +{ + return std::memcmp(a.bytes, b.bytes, sizeof(a)) == 0; +} + +void build_light_cache(hash512 cache[], int num_items, const hash256& seed) noexcept; + +hash512 calculate_dataset_item_512(const epoch_context& context, int64_t index) noexcept; +hash1024 calculate_dataset_item_1024(const epoch_context& context, uint32_t index) noexcept; +hash2048 calculate_dataset_item_2048(const epoch_context& context, uint32_t index) noexcept; + +namespace generic +{ +using hash_fn_512 = hash512 (*)(const uint8_t* data, size_t size); +using build_light_cache_fn = void (*)(hash512 cache[], int num_items, const hash256& seed); + +void build_light_cache( + hash_fn_512 hash_fn, hash512 cache[], int num_items, const hash256& seed) noexcept; + +epoch_context_full* create_epoch_context( + build_light_cache_fn build_fn, int epoch_number, bool full) noexcept; + +} // namespace generic + +} // namespace ethash diff --git a/src/kawpow/ethash.cpp b/src/kawpow/ethash.cpp new file mode 100644 index 00000000..c891fb68 --- /dev/null +++ b/src/kawpow/ethash.cpp @@ -0,0 +1,442 @@ +// ethash: C/C++ implementation of Ethash, the Ethereum Proof of Work algorithm. +// Copyright 2018-2019 Pawel Bylica. +// Licensed under the Apache License, Version 2.0. + +#include "ethash-internal.hpp" + +#include "attributes.h" +#include "bit_manipulation.h" +#include "endianness.hpp" +#include "primes.h" +#include "keccak.hpp" +#include "../kawpow.hpp" + +#include +#include +#include +#include + +namespace ethash +{ +// Internal constants: +constexpr static int light_cache_init_size = 1 << 24; +constexpr static int light_cache_growth = 1 << 17; +constexpr static int light_cache_rounds = 3; +constexpr static int full_dataset_init_size = 1 << 30; +constexpr static int full_dataset_growth = 1 << 23; +constexpr static int full_dataset_item_parents = 512; + +// Verify constants: +static_assert(sizeof(hash512) == ETHASH_LIGHT_CACHE_ITEM_SIZE, ""); +static_assert(sizeof(hash1024) == ETHASH_FULL_DATASET_ITEM_SIZE, ""); +static_assert(light_cache_item_size == ETHASH_LIGHT_CACHE_ITEM_SIZE, ""); +static_assert(full_dataset_item_size == ETHASH_FULL_DATASET_ITEM_SIZE, ""); + + +namespace +{ +using ::fnv1; + +inline hash512 fnv1(const hash512& u, const hash512& v) noexcept +{ + hash512 r; + for (size_t i = 0; i < sizeof(r) / sizeof(r.word32s[0]); ++i) + r.word32s[i] = fnv1(u.word32s[i], v.word32s[i]); + return r; +} + +inline hash512 bitwise_xor(const hash512& x, const hash512& y) noexcept +{ + hash512 z; + for (size_t i = 0; i < sizeof(z) / sizeof(z.word64s[0]); ++i) + z.word64s[i] = x.word64s[i] ^ y.word64s[i]; + return z; +} +} // namespace + +int find_epoch_number(const hash256& seed) noexcept +{ + static constexpr int num_tries = 30000; // Divisible by 16. + + // Thread-local cache of the last search. + static thread_local int cached_epoch_number = 0; + static thread_local hash256 cached_seed = {}; + + // Load from memory once (memory will be clobbered by keccak256()). + const uint32_t seed_part = seed.word32s[0]; + const int e = cached_epoch_number; + hash256 s = cached_seed; + + if (s.word32s[0] == seed_part) + return e; + + // Try the next seed, will match for sequential epoch access. + s = keccak256(s); + if (s.word32s[0] == seed_part) + { + cached_seed = s; + cached_epoch_number = e + 1; + return e + 1; + } + + // Search for matching seed starting from epoch 0. + s = {}; + for (int i = 0; i < num_tries; ++i) + { + if (s.word32s[0] == seed_part) + { + cached_seed = s; + cached_epoch_number = i; + return i; + } + + s = keccak256(s); + } + + return -1; +} + +namespace generic +{ +void build_light_cache( + hash_fn_512 hash_fn, hash512 cache[], int num_items, const hash256& seed) noexcept +{ + hash512 item = hash_fn(seed.bytes, sizeof(seed)); + cache[0] = item; + for (int i = 1; i < num_items; ++i) + { + item = hash_fn(item.bytes, sizeof(item)); + cache[i] = item; + } + + for (int q = 0; q < light_cache_rounds; ++q) + { + for (int i = 0; i < num_items; ++i) + { + const uint32_t index_limit = static_cast(num_items); + + // Fist index: 4 first bytes of the item as little-endian integer. + const uint32_t t = le::uint32(cache[i].word32s[0]); + const uint32_t v = t % index_limit; + + // Second index. + const uint32_t w = static_cast(num_items + (i - 1)) % index_limit; + + const hash512 x = bitwise_xor(cache[v], cache[w]); + cache[i] = hash_fn(x.bytes, sizeof(x)); + } + } +} + +epoch_context_full* create_epoch_context( + build_light_cache_fn build_fn, int epoch_number, bool full) noexcept +{ + static_assert(sizeof(epoch_context_full) < sizeof(hash512), "epoch_context too big"); + static constexpr size_t context_alloc_size = sizeof(hash512); + + const int light_cache_num_items = calculate_light_cache_num_items(epoch_number); + const int full_dataset_num_items = calculate_full_dataset_num_items(epoch_number); + const size_t light_cache_size = get_light_cache_size(light_cache_num_items); + const size_t full_dataset_size = + full ? static_cast(full_dataset_num_items) * sizeof(hash1024) : + progpow::l1_cache_size; + + const size_t alloc_size = context_alloc_size + light_cache_size + full_dataset_size; + + char* const alloc_data = static_cast(std::calloc(1, alloc_size)); + if (!alloc_data) + return nullptr; // Signal out-of-memory by returning null pointer. + + hash512* const light_cache = reinterpret_cast(alloc_data + context_alloc_size); + const hash256 epoch_seed = calculate_epoch_seed(epoch_number); + build_fn(light_cache, light_cache_num_items, epoch_seed); + + uint32_t* const l1_cache = + reinterpret_cast(alloc_data + context_alloc_size + light_cache_size); + + hash1024* full_dataset = full ? reinterpret_cast(l1_cache) : nullptr; + + epoch_context_full* const context = new (alloc_data) epoch_context_full{ + epoch_number, + light_cache_num_items, + light_cache, + l1_cache, + full_dataset_num_items, + full_dataset, + }; + + auto* full_dataset_2048 = reinterpret_cast(l1_cache); + for (uint32_t i = 0; i < progpow::l1_cache_size / sizeof(full_dataset_2048[0]); ++i) + full_dataset_2048[i] = calculate_dataset_item_2048(*context, i); + return context; +} +} // namespace generic + +void build_light_cache(hash512 cache[], int num_items, const hash256& seed) noexcept +{ + return generic::build_light_cache(keccak512, cache, num_items, seed); +} + +struct item_state +{ + const hash512* const cache; + const int64_t num_cache_items; + const uint32_t seed; + + hash512 mix; + + ALWAYS_INLINE item_state(const epoch_context& context, int64_t index) noexcept + : cache{context.light_cache}, + num_cache_items{context.light_cache_num_items}, + seed{static_cast(index)} + { + mix = cache[index % num_cache_items]; + mix.word32s[0] ^= le::uint32(seed); + mix = le::uint32s(keccak512(mix)); + } + + ALWAYS_INLINE void update(uint32_t round) noexcept + { + static constexpr size_t num_words = sizeof(mix) / sizeof(uint32_t); + const uint32_t t = fnv1(seed ^ round, mix.word32s[round % num_words]); + const int64_t parent_index = t % num_cache_items; + mix = fnv1(mix, le::uint32s(cache[parent_index])); + } + + ALWAYS_INLINE hash512 final() noexcept { return keccak512(le::uint32s(mix)); } +}; + +hash512 calculate_dataset_item_512(const epoch_context& context, int64_t index) noexcept +{ + item_state item0{context, index}; + for (uint32_t j = 0; j < full_dataset_item_parents; ++j) + item0.update(j); + return item0.final(); +} + +/// Calculates a full dataset item +/// +/// This consist of two 512-bit items produced by calculate_dataset_item_partial(). +/// Here the computation is done interleaved for better performance. +hash1024 calculate_dataset_item_1024(const epoch_context& context, uint32_t index) noexcept +{ + item_state item0{context, int64_t(index) * 2}; + item_state item1{context, int64_t(index) * 2 + 1}; + + for (uint32_t j = 0; j < full_dataset_item_parents; ++j) + { + item0.update(j); + item1.update(j); + } + + return hash1024{{item0.final(), item1.final()}}; +} + +hash2048 calculate_dataset_item_2048(const epoch_context& context, uint32_t index) noexcept +{ + item_state item0{context, int64_t(index) * 4}; + item_state item1{context, int64_t(index) * 4 + 1}; + item_state item2{context, int64_t(index) * 4 + 2}; + item_state item3{context, int64_t(index) * 4 + 3}; + + for (uint32_t j = 0; j < full_dataset_item_parents; ++j) + { + item0.update(j); + item1.update(j); + item2.update(j); + item3.update(j); + } + + return hash2048{{item0.final(), item1.final(), item2.final(), item3.final()}}; +} + +namespace +{ +using lookup_fn = hash1024 (*)(const epoch_context&, uint32_t); + +inline hash512 hash_seed(const hash256& header_hash, uint64_t nonce) noexcept +{ + nonce = le::uint64(nonce); + uint8_t init_data[sizeof(header_hash) + sizeof(nonce)]; + std::memcpy(&init_data[0], &header_hash, sizeof(header_hash)); + std::memcpy(&init_data[sizeof(header_hash)], &nonce, sizeof(nonce)); + + return keccak512(init_data, sizeof(init_data)); +} + +inline hash256 hash_final(const hash512& seed, const hash256& mix_hash) +{ + uint8_t final_data[sizeof(seed) + sizeof(mix_hash)]; + std::memcpy(&final_data[0], seed.bytes, sizeof(seed)); + std::memcpy(&final_data[sizeof(seed)], mix_hash.bytes, sizeof(mix_hash)); + return keccak256(final_data, sizeof(final_data)); +} + +inline hash256 hash_kernel( + const epoch_context& context, const hash512& seed, lookup_fn lookup) noexcept +{ + static constexpr size_t num_words = sizeof(hash1024) / sizeof(uint32_t); + const uint32_t index_limit = static_cast(context.full_dataset_num_items); + const uint32_t seed_init = le::uint32(seed.word32s[0]); + + hash1024 mix{{le::uint32s(seed), le::uint32s(seed)}}; + + for (uint32_t i = 0; i < num_dataset_accesses; ++i) + { + const uint32_t p = fnv1(i ^ seed_init, mix.word32s[i % num_words]) % index_limit; + const hash1024 newdata = le::uint32s(lookup(context, p)); + + for (size_t j = 0; j < num_words; ++j) + mix.word32s[j] = fnv1(mix.word32s[j], newdata.word32s[j]); + } + + hash256 mix_hash; + for (size_t i = 0; i < num_words; i += 4) + { + const uint32_t h1 = fnv1(mix.word32s[i], mix.word32s[i + 1]); + const uint32_t h2 = fnv1(h1, mix.word32s[i + 2]); + const uint32_t h3 = fnv1(h2, mix.word32s[i + 3]); + mix_hash.word32s[i / 4] = h3; + } + + return le::uint32s(mix_hash); +} +} // namespace + +result e_hash_full(const epoch_context_full& context, const hash256& header_hash, uint64_t nonce) noexcept +{ + static const auto lazy_lookup = [](const epoch_context& ctx, uint32_t index) noexcept + { + auto full_dataset = static_cast(ctx).full_dataset; + hash1024& item = full_dataset[index]; + if (item.word64s[0] == 0) + { + // TODO: Copy elision here makes it thread-safe? + item = calculate_dataset_item_1024(ctx, index); + } + + return item; + }; + + const hash512 seed = hash_seed(header_hash, nonce); + const hash256 mix_hash = hash_kernel(context, seed, lazy_lookup); + return {hash_final(seed, mix_hash), mix_hash}; +} + +search_result e_search_light(const epoch_context& context, const hash256& header_hash, + const hash256& boundary, uint64_t start_nonce, size_t iterations) noexcept +{ + const uint64_t end_nonce = start_nonce + iterations; + for (uint64_t nonce = start_nonce; nonce < end_nonce; ++nonce) + { + result r = e_hash(context, header_hash, nonce); + if (is_less_or_equal(r.final_hash, boundary)) + return {r, nonce}; + } + return {}; +} + +search_result e_search(const epoch_context_full& context, const hash256& header_hash, + const hash256& boundary, uint64_t start_nonce, size_t iterations) noexcept +{ + const uint64_t end_nonce = start_nonce + iterations; + for (uint64_t nonce = start_nonce; nonce < end_nonce; ++nonce) + { + result r = e_hash(context, header_hash, nonce); + if (is_less_or_equal(r.final_hash, boundary)) + return {r, nonce}; + } + return {}; +} +} // namespace ethash + +using namespace ethash; + +extern "C" { + +ethash_hash256 ethash_calculate_epoch_seed(int epoch_number) noexcept +{ + ethash_hash256 epoch_seed = {}; + for (int i = 0; i < epoch_number; ++i) + epoch_seed = ethash_keccak256_32(epoch_seed.bytes); + return epoch_seed; +} + +int ethash_calculate_light_cache_num_items(int epoch_number) noexcept +{ + static constexpr int item_size = sizeof(hash512); + static constexpr int num_items_init = light_cache_init_size / item_size; + static constexpr int num_items_growth = light_cache_growth / item_size; + static_assert( + light_cache_init_size % item_size == 0, "light_cache_init_size not multiple of item size"); + static_assert( + light_cache_growth % item_size == 0, "light_cache_growth not multiple of item size"); + + int num_items_upper_bound = num_items_init + epoch_number * num_items_growth; + int num_items = ethash_find_largest_prime(num_items_upper_bound); + return num_items; +} + +int ethash_calculate_full_dataset_num_items(int epoch_number) noexcept +{ + static constexpr int item_size = sizeof(hash1024); + static constexpr int num_items_init = full_dataset_init_size / item_size; + static constexpr int num_items_growth = full_dataset_growth / item_size; + static_assert(full_dataset_init_size % item_size == 0, + "full_dataset_init_size not multiple of item size"); + static_assert( + full_dataset_growth % item_size == 0, "full_dataset_growth not multiple of item size"); + + int num_items_upper_bound = num_items_init + epoch_number * num_items_growth; + int num_items = ethash_find_largest_prime(num_items_upper_bound); + return num_items; +} + +epoch_context* ethash_create_epoch_context(int epoch_number) noexcept +{ + return generic::create_epoch_context(build_light_cache, epoch_number, false); +} + +epoch_context_full* ethash_create_epoch_context_full(int epoch_number) noexcept +{ + return generic::create_epoch_context(build_light_cache, epoch_number, true); +} + +void ethash_destroy_epoch_context_full(epoch_context_full* context) noexcept +{ + ethash_destroy_epoch_context(context); +} + +void ethash_destroy_epoch_context(epoch_context* context) noexcept +{ + context->~epoch_context(); + std::free(context); +} + +ethash_result ethash_hash( + const epoch_context* context, const hash256* header_hash, uint64_t nonce) noexcept +{ + const hash512 seed = hash_seed(*header_hash, nonce); + const hash256 mix_hash = hash_kernel(*context, seed, calculate_dataset_item_1024); + return {hash_final(seed, mix_hash), mix_hash}; +} + +bool ethash_verify_final_hash(const hash256* header_hash, const hash256* mix_hash, uint64_t nonce, + const hash256* boundary) noexcept +{ + const hash512 seed = hash_seed(*header_hash, nonce); + return is_less_or_equal(hash_final(seed, *mix_hash), *boundary); +} + +bool ethash_verify(const epoch_context* context, const hash256* header_hash, + const hash256* mix_hash, uint64_t nonce, const hash256* boundary) noexcept +{ + const hash512 seed = hash_seed(*header_hash, nonce); + if (!is_less_or_equal(hash_final(seed, *mix_hash), *boundary)) + return false; + + const hash256 expected_mix_hash = hash_kernel(*context, seed, calculate_dataset_item_1024); + return is_equal(expected_mix_hash, *mix_hash); +} + +} // extern "C" diff --git a/src/kawpow/ethash.h b/src/kawpow/ethash.h new file mode 100644 index 00000000..519dcd2f --- /dev/null +++ b/src/kawpow/ethash.h @@ -0,0 +1,131 @@ +/* ethash: C/C++ implementation of Ethash, the Ethereum Proof of Work algorithm. + * Copyright 2018-2019 Pawel Bylica. + * Licensed under the Apache License, Version 2.0. + */ + +#pragma once + +#include "hash_types.h" + +#include +#include + +#ifdef __cplusplus +#define NOEXCEPT noexcept +#else +#define NOEXCEPT +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * The Ethash algorithm revision implemented as specified in the Ethash spec + * https://github.com/ethereum/wiki/wiki/Ethash. + */ +#define ETHASH_REVISION "23" + +#define ETHASH_EPOCH_LENGTH 7500 +#define ETHASH_LIGHT_CACHE_ITEM_SIZE 64 +#define ETHASH_FULL_DATASET_ITEM_SIZE 128 +#define ETHASH_NUM_DATASET_ACCESSES 64 + + +struct ethash_epoch_context +{ + const int epoch_number; + const int light_cache_num_items; + const union ethash_hash512* const light_cache; + const uint32_t* const l1_cache; + const int full_dataset_num_items; +}; + + +struct ethash_epoch_context_full; + + +struct ethash_result +{ + union ethash_hash256 final_hash; + union ethash_hash256 mix_hash; +}; + + +/** + * Calculates the number of items in the light cache for given epoch. + * + * This function will search for a prime number matching the criteria given + * by the Ethash so the execution time is not constant. It takes ~ 0.01 ms. + * + * @param epoch_number The epoch number. + * @return The number items in the light cache. + */ +int ethash_calculate_light_cache_num_items(int epoch_number) NOEXCEPT; + + +/** + * Calculates the number of items in the full dataset for given epoch. + * + * This function will search for a prime number matching the criteria given + * by the Ethash so the execution time is not constant. It takes ~ 0.05 ms. + * + * @param epoch_number The epoch number. + * @return The number items in the full dataset. + */ +int ethash_calculate_full_dataset_num_items(int epoch_number) NOEXCEPT; + +/** + * Calculates the epoch seed hash. + * @param epoch_number The epoch number. + * @return The epoch seed hash. + */ +union ethash_hash256 ethash_calculate_epoch_seed(int epoch_number) NOEXCEPT; + + +struct ethash_epoch_context* ethash_create_epoch_context(int epoch_number) NOEXCEPT; + +/** + * Creates the epoch context with the full dataset initialized. + * + * The memory for the full dataset is only allocated and marked as "not-generated". + * The items of the full dataset are generated on the fly when hit for the first time. + * + * The memory allocated in the context MUST be freed with ethash_destroy_epoch_context_full(). + * + * @param epoch_number The epoch number. + * @return Pointer to the context or null in case of memory allocation failure. + */ +struct ethash_epoch_context_full* ethash_create_epoch_context_full(int epoch_number) NOEXCEPT; + +void ethash_destroy_epoch_context(struct ethash_epoch_context* context) NOEXCEPT; + +void ethash_destroy_epoch_context_full(struct ethash_epoch_context_full* context) NOEXCEPT; + + +/** + * Get global shared epoch context. + */ +const struct ethash_epoch_context* ethash_get_global_epoch_context(int epoch_number) NOEXCEPT; + +/** + * Get global shared epoch context with full dataset initialized. + */ +const struct ethash_epoch_context_full* ethash_get_global_epoch_context_full( + int epoch_number) NOEXCEPT; + + +struct ethash_result ethash_hash(const struct ethash_epoch_context* context, + const union ethash_hash256* header_hash, uint64_t nonce) NOEXCEPT; + +bool ethash_verify(const struct ethash_epoch_context* context, + const union ethash_hash256* header_hash, const union ethash_hash256* mix_hash, uint64_t nonce, + const union ethash_hash256* boundary) NOEXCEPT; + +bool ethash_verify_final_hash(const union ethash_hash256* header_hash, + const union ethash_hash256* mix_hash, uint64_t nonce, + const union ethash_hash256* boundary) NOEXCEPT; + +#ifdef __cplusplus +} +#endif diff --git a/src/kawpow/ethash.hpp b/src/kawpow/ethash.hpp new file mode 100644 index 00000000..a5d03ac1 --- /dev/null +++ b/src/kawpow/ethash.hpp @@ -0,0 +1,172 @@ +// ethash: C/C++ implementation of Ethash, the Ethereum Proof of Work algorithm. +// Copyright 2018-2019 Pawel Bylica. +// Licensed under the Apache License, Version 2.0. + +/// @file +/// +/// API design decisions: +/// +/// 1. Signed integer type is used whenever the size of the type is not +/// restricted by the Ethash specification. +/// See http://www.aristeia.com/Papers/C++ReportColumns/sep95.pdf. +/// See https://stackoverflow.com/questions/10168079/why-is-size-t-unsigned/. +/// See https://github.com/Microsoft/GSL/issues/171. + +#pragma once + +#include "ethash.h" +#include "hash_types.hpp" + +#include +#include +#include + +namespace ethash +{ +constexpr auto ethash_revision = ETHASH_REVISION; + +static constexpr int epoch_length = ETHASH_EPOCH_LENGTH; +static constexpr int light_cache_item_size = ETHASH_LIGHT_CACHE_ITEM_SIZE; +static constexpr int full_dataset_item_size = ETHASH_FULL_DATASET_ITEM_SIZE; +static constexpr int num_dataset_accesses = ETHASH_NUM_DATASET_ACCESSES; + +using epoch_context = ethash_epoch_context; +using epoch_context_full = ethash_epoch_context_full; + +using result = ethash_result; + +/// Constructs a 256-bit hash from an array of bytes. +/// +/// @param bytes A pointer to array of at least 32 bytes. +/// @return The constructed hash. +inline hash256 hash256_from_bytes(const uint8_t bytes[32]) noexcept +{ + hash256 h; + std::memcpy(&h, bytes, sizeof(h)); + return h; +} + +struct search_result +{ + bool solution_found = false; + uint64_t nonce = 0; + hash256 final_hash = {}; + hash256 mix_hash = {}; + + search_result() noexcept = default; + + search_result(result res, uint64_t n) noexcept + : solution_found(true), nonce(n), final_hash(res.final_hash), mix_hash(res.mix_hash) + {} +}; + + +/// Alias for ethash_calculate_light_cache_num_items(). +static constexpr auto calculate_light_cache_num_items = ethash_calculate_light_cache_num_items; + +/// Alias for ethash_calculate_full_dataset_num_items(). +static constexpr auto calculate_full_dataset_num_items = ethash_calculate_full_dataset_num_items; + +/// Alias for ethash_calculate_epoch_seed(). +static constexpr auto calculate_epoch_seed = ethash_calculate_epoch_seed; + + +/// Calculates the epoch number out of the block number. +inline constexpr int get_epoch_number(int block_number) noexcept +{ + return block_number / epoch_length; +} + +/** + * Coverts the number of items of a light cache to size in bytes. + * + * @param num_items The number of items in the light cache. + * @return The size of the light cache in bytes. + */ +inline constexpr size_t get_light_cache_size(int num_items) noexcept +{ + return static_cast(num_items) * light_cache_item_size; +} + +/** + * Coverts the number of items of a full dataset to size in bytes. + * + * @param num_items The number of items in the full dataset. + * @return The size of the full dataset in bytes. + */ +inline constexpr uint64_t get_full_dataset_size(int num_items) noexcept +{ + return static_cast(num_items) * full_dataset_item_size; +} + +/// Owned unique pointer to an epoch context. +using epoch_context_ptr = std::unique_ptr; + +using epoch_context_full_ptr = + std::unique_ptr; + +/// Creates Ethash epoch context. +/// +/// This is a wrapper for ethash_create_epoch_number C function that returns +/// the context as a smart pointer which handles the destruction of the context. +inline epoch_context_ptr create_epoch_context(int epoch_number) noexcept +{ + return {ethash_create_epoch_context(epoch_number), ethash_destroy_epoch_context}; +} + +inline epoch_context_full_ptr create_epoch_context_full(int epoch_number) noexcept +{ + return {ethash_create_epoch_context_full(epoch_number), ethash_destroy_epoch_context_full}; +} + + +inline result e_hash( + const epoch_context& context, const hash256& header_hash, uint64_t nonce) noexcept +{ + return ethash_hash(&context, &header_hash, nonce); +} + +result e_hash_full(const epoch_context_full& context, const hash256& header_hash, uint64_t nonce) noexcept; + +inline bool verify_final_hash(const hash256& header_hash, const hash256& mix_hash, uint64_t nonce, + const hash256& boundary) noexcept +{ + return ethash_verify_final_hash(&header_hash, &mix_hash, nonce, &boundary); +} + +inline bool e_verify(const epoch_context& context, const hash256& header_hash, const hash256& mix_hash, + uint64_t nonce, const hash256& boundary) noexcept +{ + return ethash_verify(&context, &header_hash, &mix_hash, nonce, &boundary); +} + +search_result e_search_light(const epoch_context& context, const hash256& header_hash, + const hash256& boundary, uint64_t start_nonce, size_t iterations) noexcept; + +search_result e_search(const epoch_context_full& context, const hash256& header_hash, + const hash256& boundary, uint64_t start_nonce, size_t iterations) noexcept; + + +/// Tries to find the epoch number matching the given seed hash. +/// +/// Mining pool protocols (many variants of stratum and "getwork") send out +/// seed hash instead of epoch number to workers. This function tries to recover +/// the epoch number from this seed hash. +/// +/// @param seed Ethash seed hash. +/// @return The epoch number or -1 if not found. +int find_epoch_number(const hash256& seed) noexcept; + + +/// Get global shared epoch context. +inline const epoch_context& get_global_epoch_context(int epoch_number) noexcept +{ + return *ethash_get_global_epoch_context(epoch_number); +} + +/// Get global shared epoch context with full dataset initialized. +inline const epoch_context_full& get_global_epoch_context_full(int epoch_number) noexcept +{ + return *ethash_get_global_epoch_context_full(epoch_number); +} +} // namespace ethash diff --git a/src/kawpow/hash_types.h b/src/kawpow/hash_types.h new file mode 100644 index 00000000..bd934368 --- /dev/null +++ b/src/kawpow/hash_types.h @@ -0,0 +1,50 @@ +/* ethash: C/C++ implementation of Ethash, the Ethereum Proof of Work algorithm. + * Copyright 2018-2019 Pawel Bylica. + * Licensed under the Apache License, Version 2.0. + */ + +#pragma once + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +union ethash_hash256 +{ + uint64_t word64s[4]; + uint32_t word32s[8]; + uint8_t bytes[32]; + char str[32]; +}; + +union ethash_hash512 +{ + uint64_t word64s[8]; + uint32_t word32s[16]; + uint8_t bytes[64]; + char str[64]; +}; + +union ethash_hash1024 +{ + union ethash_hash512 hash512s[2]; + uint64_t word64s[16]; + uint32_t word32s[32]; + uint8_t bytes[128]; + char str[128]; +}; + +union ethash_hash2048 +{ + union ethash_hash512 hash512s[4]; + uint64_t word64s[32]; + uint32_t word32s[64]; + uint8_t bytes[256]; + char str[256]; +}; + +#ifdef __cplusplus +} +#endif diff --git a/src/kawpow/hash_types.hpp b/src/kawpow/hash_types.hpp new file mode 100644 index 00000000..a31cff49 --- /dev/null +++ b/src/kawpow/hash_types.hpp @@ -0,0 +1,15 @@ +// ethash: C/C++ implementation of Ethash, the Ethereum Proof of Work algorithm. +// Copyright 2018-2019 Pawel Bylica. +// Licensed under the Apache License, Version 2.0. + +#pragma once + +#include "hash_types.h" + +namespace ethash +{ +using hash256 = ethash_hash256; +using hash512 = ethash_hash512; +using hash1024 = ethash_hash1024; +using hash2048 = ethash_hash2048; +} // namespace ethash diff --git a/src/kawpow/keccak.c b/src/kawpow/keccak.c new file mode 100644 index 00000000..b0207997 --- /dev/null +++ b/src/kawpow/keccak.c @@ -0,0 +1,123 @@ +/* ethash: C/C++ implementation of Ethash, the Ethereum Proof of Work algorithm. + * Copyright 2018-2019 Pawel Bylica. + * Licensed under the Apache License, Version 2.0. + */ + +#include "keccak.h" + +#include "attributes.h" +#include + +#if _WIN32 +/* On Windows assume little endian. */ +#define __LITTLE_ENDIAN 1234 +#define __BIG_ENDIAN 4321 +#define __BYTE_ORDER __LITTLE_ENDIAN +#elif __APPLE__ +#include +#else +#include +#endif + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define to_le64(X) X +#else +#define to_le64(X) __builtin_bswap64(X) +#endif + + +/** Loads 64-bit integer from given memory location as little-endian number. */ +static INLINE ALWAYS_INLINE uint64_t load_le(const uint8_t* data) +{ + /* memcpy is the best way of expressing the intention. Every compiler will + optimize is to single load instruction if the target architecture + supports unaligned memory access (GCC and clang even in O0). + This is great trick because we are violating C/C++ memory alignment + restrictions with no performance penalty. */ + uint64_t word; + memcpy(&word, data, sizeof(word)); + return to_le64(word); +} + +static INLINE ALWAYS_INLINE void keccak( + uint64_t* out, size_t bits, const uint8_t* data, size_t size) +{ + static const size_t word_size = sizeof(uint64_t); + const size_t hash_size = bits / 8; + const size_t block_size = (1600 - bits * 2) / 8; + + size_t i; + uint64_t* state_iter; + uint64_t last_word = 0; + uint8_t* last_word_iter = (uint8_t*)&last_word; + + uint64_t state[25] = {0}; + + while (size >= block_size) + { + for (i = 0; i < (block_size / word_size); ++i) + { + state[i] ^= load_le(data); + data += word_size; + } + + ethash_keccakf1600(state); + + size -= block_size; + } + + state_iter = state; + + while (size >= word_size) + { + *state_iter ^= load_le(data); + ++state_iter; + data += word_size; + size -= word_size; + } + + while (size > 0) + { + *last_word_iter = *data; + ++last_word_iter; + ++data; + --size; + } + *last_word_iter = 0x01; + *state_iter ^= to_le64(last_word); + + state[(block_size / word_size) - 1] ^= 0x8000000000000000; + + ethash_keccakf1600(state); + + for (i = 0; i < (hash_size / word_size); ++i) + out[i] = to_le64(state[i]); +} + +union ethash_hash256 ethash_keccak256(const uint8_t* data, size_t size) +{ + union ethash_hash256 hash; + keccak(hash.word64s, 256, data, size); + return hash; +} + +union ethash_hash256 ethash_keccak256_32(const uint8_t data[32]) +{ + union ethash_hash256 hash; + keccak(hash.word64s, 256, data, 32); + return hash; +} + +union ethash_hash512 ethash_keccak512(const uint8_t* data, size_t size) +{ + union ethash_hash512 hash; + keccak(hash.word64s, 512, data, size); + return hash; +} + +union ethash_hash512 ethash_keccak512_64(const uint8_t data[64]) +{ + union ethash_hash512 hash; + keccak(hash.word64s, 512, data, 64); + return hash; +} diff --git a/src/kawpow/keccak.h b/src/kawpow/keccak.h new file mode 100644 index 00000000..79ef7c5c --- /dev/null +++ b/src/kawpow/keccak.h @@ -0,0 +1,49 @@ +/* ethash: C/C++ implementation of Ethash, the Ethereum Proof of Work algorithm. + * Copyright 2018-2019 Pawel Bylica. + * Licensed under the Apache License, Version 2.0. + */ + +#pragma once + +#include "hash_types.h" + +#include + +#ifdef __cplusplus +#define NOEXCEPT noexcept +#else +#define NOEXCEPT +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * The Keccak-f[1600] function. + * + * The implementation of the Keccak-f function with 1600-bit width of the permutation (b). + * The size of the state is also 1600 bit what gives 25 64-bit words. + * + * @param state The state of 25 64-bit words on which the permutation is to be performed. + */ +void ethash_keccakf1600(uint64_t state[25]) NOEXCEPT; + +/** + * The Keccak-f[800] function. + * + * The implementation of the Keccak-f function with 800-bit width of the permutation (b). + * The size of the state is also 800 bit what gives 25 32-bit words. + * + * @param state The state of 25 32-bit words on which the permutation is to be performed. + */ +void ethash_keccakf800(uint32_t state[25]) NOEXCEPT; + +union ethash_hash256 ethash_keccak256(const uint8_t* data, size_t size) NOEXCEPT; +union ethash_hash256 ethash_keccak256_32(const uint8_t data[32]) NOEXCEPT; +union ethash_hash512 ethash_keccak512(const uint8_t* data, size_t size) NOEXCEPT; +union ethash_hash512 ethash_keccak512_64(const uint8_t data[64]) NOEXCEPT; + +#ifdef __cplusplus +} +#endif diff --git a/src/kawpow/keccak.hpp b/src/kawpow/keccak.hpp new file mode 100644 index 00000000..97406792 --- /dev/null +++ b/src/kawpow/keccak.hpp @@ -0,0 +1,35 @@ +// ethash: C/C++ implementation of Ethash, the Ethereum Proof of Work algorithm. +// Copyright 2018-2019 Pawel Bylica. +// Licensed under the Apache License, Version 2.0. + +#pragma once + +#include "keccak.h" +#include "hash_types.hpp" + +namespace ethash +{ +inline hash256 keccak256(const uint8_t* data, size_t size) noexcept +{ + return ethash_keccak256(data, size); +} + +inline hash256 keccak256(const hash256& input) noexcept +{ + return ethash_keccak256_32(input.bytes); +} + +inline hash512 keccak512(const uint8_t* data, size_t size) noexcept +{ + return ethash_keccak512(data, size); +} + +inline hash512 keccak512(const hash512& input) noexcept +{ + return ethash_keccak512_64(input.bytes); +} + +static constexpr auto keccak256_32 = ethash_keccak256_32; +static constexpr auto keccak512_64 = ethash_keccak512_64; + +} // namespace ethash diff --git a/src/kawpow/keccakf1600.c b/src/kawpow/keccakf1600.c new file mode 100644 index 00000000..e12b268d --- /dev/null +++ b/src/kawpow/keccakf1600.c @@ -0,0 +1,255 @@ +/* ethash: C/C++ implementation of Ethash, the Ethereum Proof of Work algorithm. + * Copyright 2018-2019 Pawel Bylica. + * Licensed under the Apache License, Version 2.0. + */ + +#include + +static uint64_t rol(uint64_t x, unsigned s) +{ + return (x << s) | (x >> (64 - s)); +} + +static const uint64_t round_constants[24] = { + 0x0000000000000001, + 0x0000000000008082, + 0x800000000000808a, + 0x8000000080008000, + 0x000000000000808b, + 0x0000000080000001, + 0x8000000080008081, + 0x8000000000008009, + 0x000000000000008a, + 0x0000000000000088, + 0x0000000080008009, + 0x000000008000000a, + 0x000000008000808b, + 0x800000000000008b, + 0x8000000000008089, + 0x8000000000008003, + 0x8000000000008002, + 0x8000000000000080, + 0x000000000000800a, + 0x800000008000000a, + 0x8000000080008081, + 0x8000000000008080, + 0x0000000080000001, + 0x8000000080008008, +}; + +void ethash_keccakf1600(uint64_t state[25]) +{ + /* The implementation based on the "simple" implementation by Ronny Van Keer. */ + + int round; + + uint64_t Aba, Abe, Abi, Abo, Abu; + uint64_t Aga, Age, Agi, Ago, Agu; + uint64_t Aka, Ake, Aki, Ako, Aku; + uint64_t Ama, Ame, Ami, Amo, Amu; + uint64_t Asa, Ase, Asi, Aso, Asu; + + uint64_t Eba, Ebe, Ebi, Ebo, Ebu; + uint64_t Ega, Ege, Egi, Ego, Egu; + uint64_t Eka, Eke, Eki, Eko, Eku; + uint64_t Ema, Eme, Emi, Emo, Emu; + uint64_t Esa, Ese, Esi, Eso, Esu; + + uint64_t Ba, Be, Bi, Bo, Bu; + + uint64_t Da, De, Di, Do, Du; + + Aba = state[0]; + Abe = state[1]; + Abi = state[2]; + Abo = state[3]; + Abu = state[4]; + Aga = state[5]; + Age = state[6]; + Agi = state[7]; + Ago = state[8]; + Agu = state[9]; + Aka = state[10]; + Ake = state[11]; + Aki = state[12]; + Ako = state[13]; + Aku = state[14]; + Ama = state[15]; + Ame = state[16]; + Ami = state[17]; + Amo = state[18]; + Amu = state[19]; + Asa = state[20]; + Ase = state[21]; + Asi = state[22]; + Aso = state[23]; + Asu = state[24]; + + for (round = 0; round < 24; round += 2) + { + /* Round (round + 0): Axx -> Exx */ + + Ba = Aba ^ Aga ^ Aka ^ Ama ^ Asa; + Be = Abe ^ Age ^ Ake ^ Ame ^ Ase; + Bi = Abi ^ Agi ^ Aki ^ Ami ^ Asi; + Bo = Abo ^ Ago ^ Ako ^ Amo ^ Aso; + Bu = Abu ^ Agu ^ Aku ^ Amu ^ Asu; + + Da = Bu ^ rol(Be, 1); + De = Ba ^ rol(Bi, 1); + Di = Be ^ rol(Bo, 1); + Do = Bi ^ rol(Bu, 1); + Du = Bo ^ rol(Ba, 1); + + Ba = Aba ^ Da; + Be = rol(Age ^ De, 44); + Bi = rol(Aki ^ Di, 43); + Bo = rol(Amo ^ Do, 21); + Bu = rol(Asu ^ Du, 14); + Eba = Ba ^ (~Be & Bi) ^ round_constants[round]; + Ebe = Be ^ (~Bi & Bo); + Ebi = Bi ^ (~Bo & Bu); + Ebo = Bo ^ (~Bu & Ba); + Ebu = Bu ^ (~Ba & Be); + + Ba = rol(Abo ^ Do, 28); + Be = rol(Agu ^ Du, 20); + Bi = rol(Aka ^ Da, 3); + Bo = rol(Ame ^ De, 45); + Bu = rol(Asi ^ Di, 61); + Ega = Ba ^ (~Be & Bi); + Ege = Be ^ (~Bi & Bo); + Egi = Bi ^ (~Bo & Bu); + Ego = Bo ^ (~Bu & Ba); + Egu = Bu ^ (~Ba & Be); + + Ba = rol(Abe ^ De, 1); + Be = rol(Agi ^ Di, 6); + Bi = rol(Ako ^ Do, 25); + Bo = rol(Amu ^ Du, 8); + Bu = rol(Asa ^ Da, 18); + Eka = Ba ^ (~Be & Bi); + Eke = Be ^ (~Bi & Bo); + Eki = Bi ^ (~Bo & Bu); + Eko = Bo ^ (~Bu & Ba); + Eku = Bu ^ (~Ba & Be); + + Ba = rol(Abu ^ Du, 27); + Be = rol(Aga ^ Da, 36); + Bi = rol(Ake ^ De, 10); + Bo = rol(Ami ^ Di, 15); + Bu = rol(Aso ^ Do, 56); + Ema = Ba ^ (~Be & Bi); + Eme = Be ^ (~Bi & Bo); + Emi = Bi ^ (~Bo & Bu); + Emo = Bo ^ (~Bu & Ba); + Emu = Bu ^ (~Ba & Be); + + Ba = rol(Abi ^ Di, 62); + Be = rol(Ago ^ Do, 55); + Bi = rol(Aku ^ Du, 39); + Bo = rol(Ama ^ Da, 41); + Bu = rol(Ase ^ De, 2); + Esa = Ba ^ (~Be & Bi); + Ese = Be ^ (~Bi & Bo); + Esi = Bi ^ (~Bo & Bu); + Eso = Bo ^ (~Bu & Ba); + Esu = Bu ^ (~Ba & Be); + + + /* Round (round + 1): Exx -> Axx */ + + Ba = Eba ^ Ega ^ Eka ^ Ema ^ Esa; + Be = Ebe ^ Ege ^ Eke ^ Eme ^ Ese; + Bi = Ebi ^ Egi ^ Eki ^ Emi ^ Esi; + Bo = Ebo ^ Ego ^ Eko ^ Emo ^ Eso; + Bu = Ebu ^ Egu ^ Eku ^ Emu ^ Esu; + + Da = Bu ^ rol(Be, 1); + De = Ba ^ rol(Bi, 1); + Di = Be ^ rol(Bo, 1); + Do = Bi ^ rol(Bu, 1); + Du = Bo ^ rol(Ba, 1); + + Ba = Eba ^ Da; + Be = rol(Ege ^ De, 44); + Bi = rol(Eki ^ Di, 43); + Bo = rol(Emo ^ Do, 21); + Bu = rol(Esu ^ Du, 14); + Aba = Ba ^ (~Be & Bi) ^ round_constants[round + 1]; + Abe = Be ^ (~Bi & Bo); + Abi = Bi ^ (~Bo & Bu); + Abo = Bo ^ (~Bu & Ba); + Abu = Bu ^ (~Ba & Be); + + Ba = rol(Ebo ^ Do, 28); + Be = rol(Egu ^ Du, 20); + Bi = rol(Eka ^ Da, 3); + Bo = rol(Eme ^ De, 45); + Bu = rol(Esi ^ Di, 61); + Aga = Ba ^ (~Be & Bi); + Age = Be ^ (~Bi & Bo); + Agi = Bi ^ (~Bo & Bu); + Ago = Bo ^ (~Bu & Ba); + Agu = Bu ^ (~Ba & Be); + + Ba = rol(Ebe ^ De, 1); + Be = rol(Egi ^ Di, 6); + Bi = rol(Eko ^ Do, 25); + Bo = rol(Emu ^ Du, 8); + Bu = rol(Esa ^ Da, 18); + Aka = Ba ^ (~Be & Bi); + Ake = Be ^ (~Bi & Bo); + Aki = Bi ^ (~Bo & Bu); + Ako = Bo ^ (~Bu & Ba); + Aku = Bu ^ (~Ba & Be); + + Ba = rol(Ebu ^ Du, 27); + Be = rol(Ega ^ Da, 36); + Bi = rol(Eke ^ De, 10); + Bo = rol(Emi ^ Di, 15); + Bu = rol(Eso ^ Do, 56); + Ama = Ba ^ (~Be & Bi); + Ame = Be ^ (~Bi & Bo); + Ami = Bi ^ (~Bo & Bu); + Amo = Bo ^ (~Bu & Ba); + Amu = Bu ^ (~Ba & Be); + + Ba = rol(Ebi ^ Di, 62); + Be = rol(Ego ^ Do, 55); + Bi = rol(Eku ^ Du, 39); + Bo = rol(Ema ^ Da, 41); + Bu = rol(Ese ^ De, 2); + Asa = Ba ^ (~Be & Bi); + Ase = Be ^ (~Bi & Bo); + Asi = Bi ^ (~Bo & Bu); + Aso = Bo ^ (~Bu & Ba); + Asu = Bu ^ (~Ba & Be); + } + + state[0] = Aba; + state[1] = Abe; + state[2] = Abi; + state[3] = Abo; + state[4] = Abu; + state[5] = Aga; + state[6] = Age; + state[7] = Agi; + state[8] = Ago; + state[9] = Agu; + state[10] = Aka; + state[11] = Ake; + state[12] = Aki; + state[13] = Ako; + state[14] = Aku; + state[15] = Ama; + state[16] = Ame; + state[17] = Ami; + state[18] = Amo; + state[19] = Amu; + state[20] = Asa; + state[21] = Ase; + state[22] = Asi; + state[23] = Aso; + state[24] = Asu; +} diff --git a/src/kawpow/keccakf800.c b/src/kawpow/keccakf800.c new file mode 100644 index 00000000..5b9a1802 --- /dev/null +++ b/src/kawpow/keccakf800.c @@ -0,0 +1,253 @@ +/* ethash: C/C++ implementation of Ethash, the Ethereum Proof of Work algorithm. + * Copyright 2018-2019 Pawel Bylica. + * Licensed under the Apache License, Version 2.0. + */ + +#include + +static uint32_t rol(uint32_t x, unsigned s) +{ + return (x << s) | (x >> (32 - s)); +} + +static const uint32_t round_constants[22] = { + 0x00000001, + 0x00008082, + 0x0000808A, + 0x80008000, + 0x0000808B, + 0x80000001, + 0x80008081, + 0x00008009, + 0x0000008A, + 0x00000088, + 0x80008009, + 0x8000000A, + 0x8000808B, + 0x0000008B, + 0x00008089, + 0x00008003, + 0x00008002, + 0x00000080, + 0x0000800A, + 0x8000000A, + 0x80008081, + 0x00008080, +}; + +void ethash_keccakf800(uint32_t state[25]) +{ + /* The implementation directly translated from ethash_keccakf1600. */ + + int round; + + uint32_t Aba, Abe, Abi, Abo, Abu; + uint32_t Aga, Age, Agi, Ago, Agu; + uint32_t Aka, Ake, Aki, Ako, Aku; + uint32_t Ama, Ame, Ami, Amo, Amu; + uint32_t Asa, Ase, Asi, Aso, Asu; + + uint32_t Eba, Ebe, Ebi, Ebo, Ebu; + uint32_t Ega, Ege, Egi, Ego, Egu; + uint32_t Eka, Eke, Eki, Eko, Eku; + uint32_t Ema, Eme, Emi, Emo, Emu; + uint32_t Esa, Ese, Esi, Eso, Esu; + + uint32_t Ba, Be, Bi, Bo, Bu; + + uint32_t Da, De, Di, Do, Du; + + Aba = state[0]; + Abe = state[1]; + Abi = state[2]; + Abo = state[3]; + Abu = state[4]; + Aga = state[5]; + Age = state[6]; + Agi = state[7]; + Ago = state[8]; + Agu = state[9]; + Aka = state[10]; + Ake = state[11]; + Aki = state[12]; + Ako = state[13]; + Aku = state[14]; + Ama = state[15]; + Ame = state[16]; + Ami = state[17]; + Amo = state[18]; + Amu = state[19]; + Asa = state[20]; + Ase = state[21]; + Asi = state[22]; + Aso = state[23]; + Asu = state[24]; + + for (round = 0; round < 22; round += 2) + { + /* Round (round + 0): Axx -> Exx */ + + Ba = Aba ^ Aga ^ Aka ^ Ama ^ Asa; + Be = Abe ^ Age ^ Ake ^ Ame ^ Ase; + Bi = Abi ^ Agi ^ Aki ^ Ami ^ Asi; + Bo = Abo ^ Ago ^ Ako ^ Amo ^ Aso; + Bu = Abu ^ Agu ^ Aku ^ Amu ^ Asu; + + Da = Bu ^ rol(Be, 1); + De = Ba ^ rol(Bi, 1); + Di = Be ^ rol(Bo, 1); + Do = Bi ^ rol(Bu, 1); + Du = Bo ^ rol(Ba, 1); + + Ba = Aba ^ Da; + Be = rol(Age ^ De, 12); + Bi = rol(Aki ^ Di, 11); + Bo = rol(Amo ^ Do, 21); + Bu = rol(Asu ^ Du, 14); + Eba = Ba ^ (~Be & Bi) ^ round_constants[round]; + Ebe = Be ^ (~Bi & Bo); + Ebi = Bi ^ (~Bo & Bu); + Ebo = Bo ^ (~Bu & Ba); + Ebu = Bu ^ (~Ba & Be); + + Ba = rol(Abo ^ Do, 28); + Be = rol(Agu ^ Du, 20); + Bi = rol(Aka ^ Da, 3); + Bo = rol(Ame ^ De, 13); + Bu = rol(Asi ^ Di, 29); + Ega = Ba ^ (~Be & Bi); + Ege = Be ^ (~Bi & Bo); + Egi = Bi ^ (~Bo & Bu); + Ego = Bo ^ (~Bu & Ba); + Egu = Bu ^ (~Ba & Be); + + Ba = rol(Abe ^ De, 1); + Be = rol(Agi ^ Di, 6); + Bi = rol(Ako ^ Do, 25); + Bo = rol(Amu ^ Du, 8); + Bu = rol(Asa ^ Da, 18); + Eka = Ba ^ (~Be & Bi); + Eke = Be ^ (~Bi & Bo); + Eki = Bi ^ (~Bo & Bu); + Eko = Bo ^ (~Bu & Ba); + Eku = Bu ^ (~Ba & Be); + + Ba = rol(Abu ^ Du, 27); + Be = rol(Aga ^ Da, 4); + Bi = rol(Ake ^ De, 10); + Bo = rol(Ami ^ Di, 15); + Bu = rol(Aso ^ Do, 24); + Ema = Ba ^ (~Be & Bi); + Eme = Be ^ (~Bi & Bo); + Emi = Bi ^ (~Bo & Bu); + Emo = Bo ^ (~Bu & Ba); + Emu = Bu ^ (~Ba & Be); + + Ba = rol(Abi ^ Di, 30); + Be = rol(Ago ^ Do, 23); + Bi = rol(Aku ^ Du, 7); + Bo = rol(Ama ^ Da, 9); + Bu = rol(Ase ^ De, 2); + Esa = Ba ^ (~Be & Bi); + Ese = Be ^ (~Bi & Bo); + Esi = Bi ^ (~Bo & Bu); + Eso = Bo ^ (~Bu & Ba); + Esu = Bu ^ (~Ba & Be); + + + /* Round (round + 1): Exx -> Axx */ + + Ba = Eba ^ Ega ^ Eka ^ Ema ^ Esa; + Be = Ebe ^ Ege ^ Eke ^ Eme ^ Ese; + Bi = Ebi ^ Egi ^ Eki ^ Emi ^ Esi; + Bo = Ebo ^ Ego ^ Eko ^ Emo ^ Eso; + Bu = Ebu ^ Egu ^ Eku ^ Emu ^ Esu; + + Da = Bu ^ rol(Be, 1); + De = Ba ^ rol(Bi, 1); + Di = Be ^ rol(Bo, 1); + Do = Bi ^ rol(Bu, 1); + Du = Bo ^ rol(Ba, 1); + + Ba = Eba ^ Da; + Be = rol(Ege ^ De, 12); + Bi = rol(Eki ^ Di, 11); + Bo = rol(Emo ^ Do, 21); + Bu = rol(Esu ^ Du, 14); + Aba = Ba ^ (~Be & Bi) ^ round_constants[round + 1]; + Abe = Be ^ (~Bi & Bo); + Abi = Bi ^ (~Bo & Bu); + Abo = Bo ^ (~Bu & Ba); + Abu = Bu ^ (~Ba & Be); + + Ba = rol(Ebo ^ Do, 28); + Be = rol(Egu ^ Du, 20); + Bi = rol(Eka ^ Da, 3); + Bo = rol(Eme ^ De, 13); + Bu = rol(Esi ^ Di, 29); + Aga = Ba ^ (~Be & Bi); + Age = Be ^ (~Bi & Bo); + Agi = Bi ^ (~Bo & Bu); + Ago = Bo ^ (~Bu & Ba); + Agu = Bu ^ (~Ba & Be); + + Ba = rol(Ebe ^ De, 1); + Be = rol(Egi ^ Di, 6); + Bi = rol(Eko ^ Do, 25); + Bo = rol(Emu ^ Du, 8); + Bu = rol(Esa ^ Da, 18); + Aka = Ba ^ (~Be & Bi); + Ake = Be ^ (~Bi & Bo); + Aki = Bi ^ (~Bo & Bu); + Ako = Bo ^ (~Bu & Ba); + Aku = Bu ^ (~Ba & Be); + + Ba = rol(Ebu ^ Du, 27); + Be = rol(Ega ^ Da, 4); + Bi = rol(Eke ^ De, 10); + Bo = rol(Emi ^ Di, 15); + Bu = rol(Eso ^ Do, 24); + Ama = Ba ^ (~Be & Bi); + Ame = Be ^ (~Bi & Bo); + Ami = Bi ^ (~Bo & Bu); + Amo = Bo ^ (~Bu & Ba); + Amu = Bu ^ (~Ba & Be); + + Ba = rol(Ebi ^ Di, 30); + Be = rol(Ego ^ Do, 23); + Bi = rol(Eku ^ Du, 7); + Bo = rol(Ema ^ Da, 9); + Bu = rol(Ese ^ De, 2); + Asa = Ba ^ (~Be & Bi); + Ase = Be ^ (~Bi & Bo); + Asi = Bi ^ (~Bo & Bu); + Aso = Bo ^ (~Bu & Ba); + Asu = Bu ^ (~Ba & Be); + } + + state[0] = Aba; + state[1] = Abe; + state[2] = Abi; + state[3] = Abo; + state[4] = Abu; + state[5] = Aga; + state[6] = Age; + state[7] = Agi; + state[8] = Ago; + state[9] = Agu; + state[10] = Aka; + state[11] = Ake; + state[12] = Aki; + state[13] = Ako; + state[14] = Aku; + state[15] = Ama; + state[16] = Ame; + state[17] = Ami; + state[18] = Amo; + state[19] = Amu; + state[20] = Asa; + state[21] = Ase; + state[22] = Asi; + state[23] = Aso; + state[24] = Asu; +} diff --git a/src/kawpow/kiss99.hpp b/src/kawpow/kiss99.hpp new file mode 100644 index 00000000..1d0e3aea --- /dev/null +++ b/src/kawpow/kiss99.hpp @@ -0,0 +1,64 @@ +/* ethash: C/C++ implementation of Ethash, the Ethereum Proof of Work algorithm. + * Copyright 2018-2019 Pawel Bylica. + * Licensed under the Apache License, Version 2.0. + */ + +#pragma once + +#include "attributes.h" +#include + +/** + * KISS PRNG by the spec from 1999. + * + * The implementation of KISS pseudo-random number generator + * by the specification published on 21 Jan 1999 in + * http://www.cse.yorku.ca/~oz/marsaglia-rng.html. + * The KISS is not versioned so here we are using `kiss99` prefix to indicate + * the version from 1999. + * + * The specification uses `unsigned long` type with the intention for 32-bit + * values. Because in GCC/clang for 64-bit architectures `unsigned long` is + * 64-bit size type, here the explicit `uint32_t` type is used. + * + * @defgroup kiss99 KISS99 + * @{ + */ + +/** + * The KISS generator. + */ +class kiss99 +{ + uint32_t z = 362436069; + uint32_t w = 521288629; + uint32_t jsr = 123456789; + uint32_t jcong = 380116160; + +public: + /** Creates KISS generator state with default values provided by the specification. */ + kiss99() noexcept = default; + + /** Creates KISS generator state with provided init values.*/ + kiss99(uint32_t _z, uint32_t _w, uint32_t _jsr, uint32_t _jcong) noexcept + : z{_z}, w{_w}, jsr{_jsr}, jcong{_jcong} + {} + + /** Generates next number from the KISS generator. */ + NO_SANITIZE("unsigned-integer-overflow") + uint32_t operator()() noexcept + { + z = 36969 * (z & 0xffff) + (z >> 16); + w = 18000 * (w & 0xffff) + (w >> 16); + + jcong = 69069 * jcong + 1234567; + + jsr ^= (jsr << 17); + jsr ^= (jsr >> 13); + jsr ^= (jsr << 5); + + return (((z << 16) + w) ^ jcong) + jsr; + } +}; + +/** @} */ diff --git a/src/kawpow/managed.cpp b/src/kawpow/managed.cpp new file mode 100644 index 00000000..47dd49b8 --- /dev/null +++ b/src/kawpow/managed.cpp @@ -0,0 +1,99 @@ +// ethash: C/C++ implementation of Ethash, the Ethereum Proof of Work algorithm. +// Copyright 2018-2019 Pawel Bylica. +// Licensed under the Apache License, Version 2.0. + +#include "ethash-internal.hpp" + +#include +#include + +#if !defined(__has_cpp_attribute) +#define __has_cpp_attribute(x) 0 +#endif + +#if __has_cpp_attribute(gnu::noinline) +#define ATTRIBUTE_NOINLINE [[gnu::noinline]] +#elif _MSC_VER +#define ATTRIBUTE_NOINLINE __declspec(noinline) +#else +#define ATTRIBUTE_NOINLINE +#endif + +using namespace ethash; + +namespace +{ +std::mutex shared_context_mutex; +std::shared_ptr shared_context; +thread_local std::shared_ptr thread_local_context; + +std::mutex shared_context_full_mutex; +std::shared_ptr shared_context_full; +thread_local std::shared_ptr thread_local_context_full; + +/// Update thread local epoch context. +/// +/// This function is on the slow path. It's separated to allow inlining the fast +/// path. +/// +/// @todo: Redesign to guarantee deallocation before new allocation. +ATTRIBUTE_NOINLINE +void update_local_context(int epoch_number) +{ + // Release the shared pointer of the obsoleted context. + thread_local_context.reset(); + + // Local context invalid, check the shared context. + std::lock_guard lock{shared_context_mutex}; + + if (!shared_context || shared_context->epoch_number != epoch_number) + { + // Release the shared pointer of the obsoleted context. + shared_context.reset(); + + // Build new context. + shared_context = create_epoch_context(epoch_number); + } + + thread_local_context = shared_context; +} + +ATTRIBUTE_NOINLINE +void update_local_context_full(int epoch_number) +{ + // Release the shared pointer of the obsoleted context. + thread_local_context_full.reset(); + + // Local context invalid, check the shared context. + std::lock_guard lock{shared_context_full_mutex}; + + if (!shared_context_full || shared_context_full->epoch_number != epoch_number) + { + // Release the shared pointer of the obsoleted context. + shared_context_full.reset(); + + // Build new context. + shared_context_full = create_epoch_context_full(epoch_number); + } + + thread_local_context_full = shared_context_full; +} +} // namespace + +const ethash_epoch_context* ethash_get_global_epoch_context(int epoch_number) noexcept +{ + // Check if local context matches epoch number. + if (!thread_local_context || thread_local_context->epoch_number != epoch_number) + update_local_context(epoch_number); + + return thread_local_context.get(); +} + +const ethash_epoch_context_full* ethash_get_global_epoch_context_full(int epoch_number) noexcept +{ + // Check if local context matches epoch number. + if (!thread_local_context_full || thread_local_context_full->epoch_number != epoch_number) + update_local_context_full(epoch_number); + + return thread_local_context_full.get(); +} diff --git a/src/kawpow/primes.c b/src/kawpow/primes.c new file mode 100644 index 00000000..e27a535e --- /dev/null +++ b/src/kawpow/primes.c @@ -0,0 +1,43 @@ +/* ethash: C/C++ implementation of Ethash, the Ethereum Proof of Work algorithm. + * Copyright 2018-2019 Pawel Bylica. + * Licensed under the Apache License, Version 2.0. + */ + +#include "primes.h" + +/** Checks if the number is prime. Requires the number to be > 2 and odd. */ +static int is_odd_prime(int number) +{ + int d; + + /* Check factors up to sqrt(number). + To avoid computing sqrt, compare d*d <= number with 64-bit precision. */ + for (d = 3; (int64_t)d * (int64_t)d <= (int64_t)number; d += 2) + { + if (number % d == 0) + return 0; + } + + return 1; +} + +int ethash_find_largest_prime(int upper_bound) +{ + int n = upper_bound; + + if (n < 2) + return 0; + + if (n == 2) + return 2; + + /* If even number, skip it. */ + if (n % 2 == 0) + --n; + + /* Test descending odd numbers. */ + while (!is_odd_prime(n)) + n -= 2; + + return n; +} diff --git a/src/kawpow/primes.h b/src/kawpow/primes.h new file mode 100644 index 00000000..e37c532f --- /dev/null +++ b/src/kawpow/primes.h @@ -0,0 +1,25 @@ +/* ethash: C/C++ implementation of Ethash, the Ethereum Proof of Work algorithm. + * Copyright 2018-2019 Pawel Bylica. + * Licensed under the Apache License, Version 2.0. + */ + +#pragma once + +#include "ethash.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Finds the largest prime number not greater than the provided upper bound. + * + * @param upper_bound The upper bound. SHOULD be greater than 1. + * @return The largest prime number `p` such `p <= upper_bound`. + * In case `upper_bound <= 1`, returns 0. + */ +int ethash_find_largest_prime(int upper_bound) NOEXCEPT; + +#ifdef __cplusplus +} +#endif diff --git a/src/multihashing.cc b/src/multihashing.cc index 8959b4d3..801a6ed0 100644 --- a/src/multihashing.cc +++ b/src/multihashing.cc @@ -39,6 +39,7 @@ extern "C" { #include "crypto/yescrypt/yescrypt.h" } +#include "kawpow.hpp" #include "boolberry.h" using namespace node; @@ -383,6 +384,50 @@ DECLARE_FUNC(boolberry) { SET_BUFFER_RETURN(output, 32); } +DECLARE_FUNC(kawpow) { + if (info.Length() < 3) + RETURN_EXCEPT("You must provide 3 arguments."); + Local obj1 = Nan::To(info[0]).ToLocalChecked(); + Local obj2 = Nan::To(info[1]).ToLocalChecked(); + uint32_t height = 1; + if(!Buffer::HasInstance(obj1)) + RETURN_EXCEPT("Argument 1 (header hash) should be a buffer object."); + uint32_t obj1_len = Buffer::Length(obj1); + if (obj1_len != 32) + RETURN_EXCEPT("The header hash should be 32 bytes."); + if(!Buffer::HasInstance(obj2)) + RETURN_EXCEPT("Argument 2 (nonce) should be a buffer object."); + uint32_t obj2_len = Buffer::Length(obj2); + if (obj2_len != 8) + RETURN_EXCEPT("The nonce should be 8 bytes."); + if(info[2]->IsUint32()) + height = Nan::To(info[2]).ToChecked(); + else + RETURN_EXCEPT("Argument 3 (height) should be an unsigned integer."); + + uint64_t nonce = 0; + + char *nonce_data = Buffer::Data(obj2); + + std::memcpy((uint8_t *)&nonce, nonce_data, 8); + + uint8_t *header_hash = (uint8_t *)Buffer::Data(obj1); + ethash::hash256 hash = {}; + for (int i = 0; i < 32; i++) + { + hash.bytes[i] = header_hash[i]; + } + + char output[64]; + + auto context = ethash::create_epoch_context(ethash::get_epoch_number(height)); + const auto result = progpow::k_hash(*context, height, hash, nonce); + + std::memcpy(output, result.final_hash.bytes, 32); + std::memcpy(&output[32], result.mix_hash.bytes, 32); + + SET_BUFFER_RETURN(output, 64); +} NAN_MODULE_INIT(init) { NAN_EXPORT(target, argon2d); @@ -391,6 +436,7 @@ NAN_MODULE_INIT(init) { NAN_EXPORT(target, bcrypt); NAN_EXPORT(target, blake); NAN_EXPORT(target, boolberry); + NAN_EXPORT(target, kawpow); NAN_EXPORT(target, c11); NAN_EXPORT(target, cryptonight); NAN_EXPORT(target, cryptonightfast);