diff --git a/include/bitcoin/system/hash/sha/algorithm.hpp b/include/bitcoin/system/hash/sha/algorithm.hpp index 480d470de4..e5e635a4a6 100644 --- a/include/bitcoin/system/hash/sha/algorithm.hpp +++ b/include/bitcoin/system/hash/sha/algorithm.hpp @@ -234,8 +234,8 @@ class algorithm INLINE static constexpr void input(buffer_t& buffer, const block_t& block) NOEXCEPT; INLINE static constexpr void input_left(auto& buffer, const half_t& half) NOEXCEPT; INLINE static constexpr void input_right(auto& buffer, const half_t& half) NOEXCEPT; - INLINE static constexpr void reinput_left(auto& buffer, const auto& left) NOEXCEPT; - INLINE static constexpr void reinput_right(auto& buffer, const auto& right) NOEXCEPT; + INLINE static constexpr void inject_left(auto& buffer, const auto& left) NOEXCEPT; + INLINE static constexpr void inject_right(auto& buffer, const auto& right) NOEXCEPT; INLINE static constexpr digest_t output(const state_t& state) NOEXCEPT; /// Padding. diff --git a/include/bitcoin/system/impl/hash/sha/algorithm_double.ipp b/include/bitcoin/system/impl/hash/sha/algorithm_double.ipp index ea13a311ae..0ccd6c2361 100644 --- a/include/bitcoin/system/impl/hash/sha/algorithm_double.ipp +++ b/include/bitcoin/system/impl/hash/sha/algorithm_double.ipp @@ -84,7 +84,7 @@ double_hash(const block_t& block) NOEXCEPT { static_assert(is_same_type); - const auto hash2 = [](const block_t& block) NOEXCEPT + const auto hasher = [](const block_t& block) NOEXCEPT { auto state = H::get; buffer_t buffer{}; @@ -95,7 +95,7 @@ double_hash(const block_t& block) NOEXCEPT compress(state, buffer); // Second hash - reinput_left(buffer, state); + inject_left(buffer, state); pad_half(buffer); schedule(buffer); state = H::get; @@ -106,7 +106,7 @@ double_hash(const block_t& block) NOEXCEPT if (std::is_constant_evaluated()) { - return hash2(block); + return hasher(block); } else if constexpr (native && SHA::strength == 256) { @@ -114,7 +114,7 @@ double_hash(const block_t& block) NOEXCEPT } else { - return hash2(block); + return hasher(block); } } @@ -124,7 +124,7 @@ double_hash(const half_t& half) NOEXCEPT { static_assert(is_same_type); - const auto hash2 = [](const half_t& half) NOEXCEPT + const auto hasher = [](const half_t& half) NOEXCEPT { auto state = H::get; buffer_t buffer{}; @@ -134,7 +134,7 @@ double_hash(const half_t& half) NOEXCEPT compress(state, buffer); // Second hash - reinput_left(buffer, state); + inject_left(buffer, state); pad_half(buffer); schedule(buffer); state = H::get; @@ -145,7 +145,7 @@ double_hash(const half_t& half) NOEXCEPT if (std::is_constant_evaluated()) { - return hash2(half); + return hasher(half); } else if constexpr (native && SHA::strength == 256) { @@ -153,7 +153,7 @@ double_hash(const half_t& half) NOEXCEPT } else { - return hash2(half); + return hasher(half); } } @@ -163,7 +163,7 @@ double_hash(const half_t& left, const half_t& right) NOEXCEPT { static_assert(is_same_type); - const auto hash2 = [](const half_t& left, const half_t& right) NOEXCEPT + const auto hasher = [](const half_t& left, const half_t& right) NOEXCEPT { auto state = H::get; buffer_t buffer{}; @@ -175,7 +175,7 @@ double_hash(const half_t& left, const half_t& right) NOEXCEPT compress(state, buffer); // Second hash - reinput_left(buffer, state); + inject_left(buffer, state); pad_half(buffer); schedule(buffer); state = H::get; @@ -186,7 +186,7 @@ double_hash(const half_t& left, const half_t& right) NOEXCEPT if (std::is_constant_evaluated()) { - return hash2(left, right); + return hasher(left, right); } else if constexpr (native && SHA::strength == 256) { @@ -194,7 +194,7 @@ double_hash(const half_t& left, const half_t& right) NOEXCEPT } else { - return hash2(left, right); + return hasher(left, right); } } diff --git a/include/bitcoin/system/impl/hash/sha/algorithm_functions.ipp b/include/bitcoin/system/impl/hash/sha/algorithm_functions.ipp index 4cfb1e2df1..98b6d6d13a 100644 --- a/include/bitcoin/system/impl/hash/sha/algorithm_functions.ipp +++ b/include/bitcoin/system/impl/hash/sha/algorithm_functions.ipp @@ -16,8 +16,8 @@ * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ -#ifndef LIBBITCOIN_SYSTEM_HASH_SHA_ALGORITHM_FUCNTIONS_IPP -#define LIBBITCOIN_SYSTEM_HASH_SHA_ALGORITHM_FUCNTIONS_IPP +#ifndef LIBBITCOIN_SYSTEM_HASH_SHA_ALGORITHM_FUNCTIONS_IPP +#define LIBBITCOIN_SYSTEM_HASH_SHA_ALGORITHM_FUNCTIONS_IPP // 4.1 Functions // ============================================================================ diff --git a/include/bitcoin/system/impl/hash/sha/algorithm_konstant.ipp b/include/bitcoin/system/impl/hash/sha/algorithm_konstant.ipp index 26cf225d14..99d47edd0a 100644 --- a/include/bitcoin/system/impl/hash/sha/algorithm_konstant.ipp +++ b/include/bitcoin/system/impl/hash/sha/algorithm_konstant.ipp @@ -252,15 +252,16 @@ TEMPLATE INLINE constexpr void CLASS:: konstant(buffer_t& buffer) NOEXCEPT { - if (std::is_constant_evaluated()) - { - konstant_(buffer); - } - else if constexpr (vector && !with_clang) - { - vector_konstant(buffer); - } - else + // This optimization is neutral in 4/8/16 lane sha256 perf. + ////if (std::is_constant_evaluated()) + ////{ + //// konstant_(buffer); + ////} + ////else if constexpr (vector && !with_clang) + ////{ + //// vector_konstant(buffer); + ////} + ////else { konstant_(buffer); } diff --git a/include/bitcoin/system/impl/hash/sha/algorithm_merkle.ipp b/include/bitcoin/system/impl/hash/sha/algorithm_merkle.ipp index 05d8ae2a56..aa3d8e4f98 100644 --- a/include/bitcoin/system/impl/hash/sha/algorithm_merkle.ipp +++ b/include/bitcoin/system/impl/hash/sha/algorithm_merkle.ipp @@ -372,7 +372,7 @@ merkle_hash_vector(idigests_t& digests, iblocks_t& blocks) NOEXCEPT compress_(xstate, xbuffer); // Second hash - reinput_left(xbuffer, xstate); + inject_left(xbuffer, xstate); pad_half(xbuffer); schedule_(xbuffer); xstate = initial; diff --git a/include/bitcoin/system/impl/hash/sha/algorithm_native.ipp b/include/bitcoin/system/impl/hash/sha/algorithm_native.ipp index b63156804e..f605fa9a80 100644 --- a/include/bitcoin/system/impl/hash/sha/algorithm_native.ipp +++ b/include/bitcoin/system/impl/hash/sha/algorithm_native.ipp @@ -21,8 +21,10 @@ // Native (SHA-NI or NEON) // ============================================================================ -// The iterative method is used for sha native as it is an order of magnitude -// more efficient and cannot benefit from vectorization. +// The rotating variables method is used for sha native. Tha native +// instructions rely on register locality to achieve performance benefits. +// Implementation of native sha using buffer expansion is horribly slow. +// This split creates bifurcations (additional complexities) in this template. namespace libbitcoin { namespace system { @@ -98,9 +100,6 @@ round_4(xint128_t& state0, xint128_t& state1, xint128_t message) NOEXCEPT // Platform agnostic. // ---------------------------------------------------------------------------- -// Individual state vars are used vs. array to ensure register persistence. -// This creates bifurcations in this template because of the lack of a buffer -// and the differing optimal locations for applying endianness conversions. TEMPLATE template @@ -223,6 +222,9 @@ native_transform(state_t& state, const auto& block) NOEXCEPT // accumulation and performs big-endian conversion from state_t to digest_t. // As padding blocks are generated and therefore do not require endianness // conversion, those calls are not applied when transforming the pad block. +// This lack of conversion also applies to double hashing. In both cases +// the "inject" functions are using in place of the "input" functions. +// There is no benefit to caching pading because it is not prescheduled. // ---------------------------------------------------------------------------- TEMPLATE @@ -251,8 +253,6 @@ template typename CLASS::digest_t CLASS:: native_finalize(state_t& state) NOEXCEPT { - // We could use Blocks to cache padding but given the padding blocks are - // unscheduled when performing native transformations there's no benefit. return native_finalize(state, Blocks); } @@ -273,7 +273,7 @@ native_finalize_second(const state_t& state) NOEXCEPT // Hash a state value and finalize it. auto state2 = H::get; words_t block{}; - reinput_left(block, state); // swapped + inject_left(block, state); // swapped pad_half(block); // swapped return native_finalize(state2, block); // no block swap (swaps state) } @@ -288,7 +288,7 @@ native_finalize_double(state_t& state, size_t blocks) NOEXCEPT // This is native_finalize_second() but reuses the initial block. auto state2 = H::get; - reinput_left(block, state); // swapped + inject_left(block, state); // swapped pad_half(block); // swapped return native_finalize(state2, block); // no block swap (swaps state) } @@ -316,8 +316,8 @@ native_hash(const half_t& left, const half_t& right) NOEXCEPT { auto state = H::get; words_t block{}; - reinput_left(block, array_cast(left)); // unswapped - reinput_right(block, array_cast(right)); // unswapped + inject_left(block, array_cast(left)); // unswapped + inject_right(block, array_cast(right)); // unswapped native_transform(state, block); // swap return native_finalize(state); // no block swap (swaps state) } @@ -335,7 +335,7 @@ native_double_hash(const block_t& block) NOEXCEPT // Second hash words_t block2{}; - reinput_left(block2, state); // swapped + inject_left(block2, state); // swapped pad_half(block2); // swapped state = H::get; // [reuse state var] return native_finalize(state, block2); // no block swap (swaps state) @@ -352,7 +352,7 @@ native_double_hash(const half_t& half) NOEXCEPT native_transform(state, block); // no block swap // Second hash - reinput_left(block, state); // swapped + inject_left(block, state); // swapped pad_half(block); // swapped state = H::get; // [reuse state var] return native_finalize(state, block); // no block swap (swaps state) @@ -364,13 +364,13 @@ native_double_hash(const half_t& left, const half_t& right) NOEXCEPT { auto state = H::get; words_t block{}; - reinput_left(block, array_cast(left)); // unswapped - reinput_right(block, array_cast(right)); // unswapped - native_transform(state, block); // swap - native_transform(state, pad_block()); // swapped + inject_left(block, array_cast(left)); // unswapped + inject_right(block, array_cast(right)); // unswapped + native_transform(state, block); // swap + native_transform(state, pad_block()); // swapped // Second hash - reinput_left(block, state); // swapped + inject_left(block, state); // swapped pad_half(block); // swapped state = H::get; // [reuse state var] return native_finalize(state, block); // no block swap (swaps state) diff --git a/include/bitcoin/system/impl/hash/sha/algorithm_parsing.ipp b/include/bitcoin/system/impl/hash/sha/algorithm_parsing.ipp index 9ba22b5f38..c612254af2 100644 --- a/include/bitcoin/system/impl/hash/sha/algorithm_parsing.ipp +++ b/include/bitcoin/system/impl/hash/sha/algorithm_parsing.ipp @@ -58,6 +58,7 @@ input(buffer_t& buffer, const block_t& block) NOEXCEPT } else if constexpr (bc::is_little_endian) { + // This optimization is neutral in 4/8/16 lane sha256 perf. ////if constexpr (have_lanes && !with_clang) ////{ //// using xword_t = to_extended; @@ -131,6 +132,7 @@ input_left(auto& buffer, const half_t& half) NOEXCEPT } else if constexpr (bc::is_little_endian) { + // This optimization is neutral in 4/8 lane sha256 perf. ////if constexpr (have_lanes && !with_clang) ////{ //// using xword_t = to_extended; @@ -185,6 +187,7 @@ input_right(auto& buffer, const half_t& half) NOEXCEPT } else if constexpr (bc::is_little_endian) { + // This optimization is neutral in 4/8 lane sha256 perf. ////if constexpr (have_lanes && !with_clang) ////{ //// using xword_t = to_extended; @@ -248,6 +251,7 @@ output(const state_t& state) NOEXCEPT { if constexpr (SHA::strength != 160) { + // This optimization is neutral in 4/8 lane sha256 perf. ////if constexpr (have_lanes && !with_clang) ////{ //// using xword_t = to_extended; @@ -306,7 +310,7 @@ output(const state_t& state) NOEXCEPT TEMPLATE INLINE constexpr void CLASS:: -reinput_left(auto& buffer, const auto& left) NOEXCEPT +inject_left(auto& buffer, const auto& left) NOEXCEPT { using words = decltype(buffer); static_assert(array_count >= SHA::state_words); @@ -331,7 +335,7 @@ reinput_left(auto& buffer, const auto& left) NOEXCEPT TEMPLATE INLINE constexpr void CLASS:: -reinput_right(auto& buffer, const auto& right) NOEXCEPT +inject_right(auto& buffer, const auto& right) NOEXCEPT { using words = decltype(buffer); static_assert(array_count >= SHA::state_words); diff --git a/include/bitcoin/system/impl/hash/sha/algorithm_single.ipp b/include/bitcoin/system/impl/hash/sha/algorithm_single.ipp index 9e27cf11cb..af11afcddb 100644 --- a/include/bitcoin/system/impl/hash/sha/algorithm_single.ipp +++ b/include/bitcoin/system/impl/hash/sha/algorithm_single.ipp @@ -62,7 +62,7 @@ TEMPLATE constexpr typename CLASS::digest_t CLASS:: hash(const half_t& half) NOEXCEPT { - const auto hash1 = [](const half_t& half) NOEXCEPT + const auto hasher = [](const half_t& half) NOEXCEPT { auto state = H::get; buffer_t buffer{}; @@ -75,7 +75,7 @@ hash(const half_t& half) NOEXCEPT if (std::is_constant_evaluated()) { - return hash1(half); + return hasher(half); } else if constexpr (native && SHA::strength == 256) { @@ -83,7 +83,7 @@ hash(const half_t& half) NOEXCEPT } else { - return hash1(half); + return hasher(half); } } @@ -91,7 +91,7 @@ TEMPLATE constexpr typename CLASS::digest_t CLASS:: hash(const half_t& left, const half_t& right) NOEXCEPT { - const auto hash1 = [](const half_t& left, const half_t& right) NOEXCEPT + const auto hasher = [](const half_t& left, const half_t& right) NOEXCEPT { auto state = H::get; buffer_t buffer{}; @@ -106,7 +106,7 @@ hash(const half_t& left, const half_t& right) NOEXCEPT if (std::is_constant_evaluated()) { - return hash1(left, right); + return hasher(left, right); } else if constexpr (native && SHA::strength == 256) { @@ -114,7 +114,7 @@ hash(const half_t& left, const half_t& right) NOEXCEPT } else { - return hash1(left, right); + return hasher(left, right); } } diff --git a/include/bitcoin/system/impl/hash/sha/algorithm_stream.ipp b/include/bitcoin/system/impl/hash/sha/algorithm_stream.ipp index a018fae98d..9e56b423fd 100644 --- a/include/bitcoin/system/impl/hash/sha/algorithm_stream.ipp +++ b/include/bitcoin/system/impl/hash/sha/algorithm_stream.ipp @@ -116,7 +116,7 @@ finalize_second(const state_t& state) NOEXCEPT { auto state2 = H::get; buffer_t buffer{}; - reinput_left(buffer, state); + inject_left(buffer, state); pad_half(buffer); schedule(buffer); compress(state2, buffer); @@ -150,7 +150,7 @@ finalize_double(state_t& state, size_t blocks) NOEXCEPT // This is finalize_second() but reuses the initial buffer. auto state2 = H::get; - reinput_left(buffer, state); + inject_left(buffer, state); pad_half(buffer); schedule(buffer); compress(state2, buffer);