Refactor long strings hash computations and move len <= PiecewiseChunkSize() out of the line to keep only one function call in the inlined hash code.

I am moving low level hash to hash.cc. It is needed to allow the compiler to inline low level hash and avoid two function calls. I also find it easier to follow.

```
name                           old CYCLES/op        new CYCLES/op        delta
BM_latency_AbslHash_Int32        16.0 ± 3%            16.1 ± 3%    ~         (p=0.285 n=151+150)
BM_latency_AbslHash_Int64        16.4 ± 5%            16.5 ± 4%    ~         (p=0.073 n=152+144)
BM_latency_AbslHash_String3      21.1 ± 0%            21.1 ± 0%  -0.07%      (p=0.000 n=132+140)
BM_latency_AbslHash_String5      21.1 ±11%            21.1 ±10%    ~         (p=0.862 n=152+154)
BM_latency_AbslHash_String9      21.0 ±13%            20.7 ±14%  -1.27%      (p=0.006 n=151+147)
BM_latency_AbslHash_String17     19.9 ±11%            20.5 ±21%  +3.27%      (p=0.033 n=131+144)
BM_latency_AbslHash_String33     21.1 ± 4%            21.2 ± 5%    ~         (p=0.122 n=140+146)
BM_latency_AbslHash_String65     26.9 ± 7%            27.2 ± 7%  +1.06%      (p=0.000 n=140+145)
BM_latency_AbslHash_String257    44.4 ±10%            44.5 ±11%    ~         (p=0.754 n=149+143)
```

PiperOrigin-RevId: 770672450
Change-Id: Ia6195c8a149cae89c8ca9013d4bf48052c09ba55
This commit is contained in:
Vitaly Goldshteyn
2025-06-12 08:47:30 -07:00
committed by Copybara-Service
parent f8288c18a1
commit 2ea5334068
8 changed files with 321 additions and 429 deletions

View File

@@ -159,8 +159,6 @@ set(ABSL_INTERNAL_DLL_FILES
"hash/internal/hash.h"
"hash/internal/hash.cc"
"hash/internal/spy_hash_state.h"
"hash/internal/low_level_hash.h"
"hash/internal/low_level_hash.cc"
"hash/internal/weakly_mixed_integer.h"
"log/absl_check.h"
"log/absl_log.h"

View File

@@ -43,11 +43,11 @@ cc_library(
linkopts = ABSL_DEFAULT_LINKOPTS,
deps = [
":city",
":low_level_hash",
":weakly_mixed_integer",
"//absl/base:config",
"//absl/base:core_headers",
"//absl/base:endian",
"//absl/base:prefetch",
"//absl/container:fixed_array",
"//absl/functional:function_ref",
"//absl/meta:type_traits",
@@ -187,22 +187,6 @@ cc_test(
],
)
cc_library(
name = "low_level_hash",
srcs = ["internal/low_level_hash.cc"],
hdrs = ["internal/low_level_hash.h"],
copts = ABSL_DEFAULT_COPTS,
linkopts = ABSL_DEFAULT_LINKOPTS,
visibility = ["//visibility:private"],
deps = [
"//absl/base:config",
"//absl/base:core_headers",
"//absl/base:endian",
"//absl/base:prefetch",
"//absl/numeric:int128",
],
)
cc_library(
name = "weakly_mixed_integer",
hdrs = ["internal/weakly_mixed_integer.h"],
@@ -225,7 +209,7 @@ cc_test(
linkopts = ABSL_DEFAULT_LINKOPTS,
visibility = ["//visibility:private"],
deps = [
":low_level_hash",
":hash",
"//absl/strings",
"@googletest//:gtest",
"@googletest//:gtest_main",

View File

@@ -38,7 +38,6 @@ absl_cc_library(
absl::optional
absl::variant
absl::utility
absl::low_level_hash
absl::weakly_mixed_integer
PUBLIC
)
@@ -153,24 +152,6 @@ absl_cc_test(
GTest::gmock_main
)
# Internal-only target, do not depend on directly.
absl_cc_library(
NAME
low_level_hash
HDRS
"internal/low_level_hash.h"
SRCS
"internal/low_level_hash.cc"
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
absl::config
absl::core_headers
absl::endian
absl::int128
absl::prefetch
)
# Internal-only target, do not depend on directly.
absl_cc_library(
NAME
@@ -191,7 +172,7 @@ absl_cc_test(
COPTS
${ABSL_TEST_COPTS}
DEPS
absl::low_level_hash
absl::hash
absl::strings
GTest::gmock_main
)

View File

@@ -14,27 +14,102 @@
#include "absl/hash/internal/hash.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
#include <type_traits>
#include "absl/base/attributes.h"
#include "absl/base/config.h"
#include "absl/base/internal/unaligned_access.h"
#include "absl/base/optimization.h"
#include "absl/base/prefetch.h"
#include "absl/hash/internal/city.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace hash_internal {
uint64_t MixingHashState::CombineLargeContiguousImpl32(
const unsigned char* first, size_t len, uint64_t state) {
namespace {
uint64_t Mix32Bytes(const uint8_t* ptr, uint64_t current_state) {
uint64_t a = absl::base_internal::UnalignedLoad64(ptr);
uint64_t b = absl::base_internal::UnalignedLoad64(ptr + 8);
uint64_t c = absl::base_internal::UnalignedLoad64(ptr + 16);
uint64_t d = absl::base_internal::UnalignedLoad64(ptr + 24);
uint64_t cs0 = Mix(a ^ kStaticRandomData[1], b ^ current_state);
uint64_t cs1 = Mix(c ^ kStaticRandomData[2], d ^ current_state);
return cs0 ^ cs1;
}
[[maybe_unused]] uint64_t LowLevelHashLenGt32(const void* data, size_t len,
uint64_t seed) {
assert(len > 32);
const uint8_t* ptr = static_cast<const uint8_t*>(data);
uint64_t current_state = seed ^ kStaticRandomData[0] ^ len;
const uint8_t* last_32_ptr = ptr + len - 32;
if (len > 64) {
// If we have more than 64 bytes, we're going to handle chunks of 64
// bytes at a time. We're going to build up four separate hash states
// which we will then hash together. This avoids short dependency chains.
uint64_t duplicated_state0 = current_state;
uint64_t duplicated_state1 = current_state;
uint64_t duplicated_state2 = current_state;
do {
// Always prefetch the next cacheline.
PrefetchToLocalCache(ptr + ABSL_CACHELINE_SIZE);
uint64_t a = absl::base_internal::UnalignedLoad64(ptr);
uint64_t b = absl::base_internal::UnalignedLoad64(ptr + 8);
uint64_t c = absl::base_internal::UnalignedLoad64(ptr + 16);
uint64_t d = absl::base_internal::UnalignedLoad64(ptr + 24);
uint64_t e = absl::base_internal::UnalignedLoad64(ptr + 32);
uint64_t f = absl::base_internal::UnalignedLoad64(ptr + 40);
uint64_t g = absl::base_internal::UnalignedLoad64(ptr + 48);
uint64_t h = absl::base_internal::UnalignedLoad64(ptr + 56);
current_state = Mix(a ^ kStaticRandomData[1], b ^ current_state);
duplicated_state0 = Mix(c ^ kStaticRandomData[2], d ^ duplicated_state0);
duplicated_state1 = Mix(e ^ kStaticRandomData[3], f ^ duplicated_state1);
duplicated_state2 = Mix(g ^ kStaticRandomData[4], h ^ duplicated_state2);
ptr += 64;
len -= 64;
} while (len > 64);
current_state = (current_state ^ duplicated_state0) ^
(duplicated_state1 + duplicated_state2);
}
// We now have a data `ptr` with at most 64 bytes and the current state
// of the hashing state machine stored in current_state.
if (len > 32) {
current_state = Mix32Bytes(ptr, current_state);
}
// We now have a data `ptr` with at most 32 bytes and the current state
// of the hashing state machine stored in current_state. But we can
// safely read from `ptr + len - 32`.
return Mix32Bytes(last_32_ptr, current_state);
}
ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t HashBlockOn32Bit(
const unsigned char* data, size_t len, uint64_t state) {
// TODO(b/417141985): expose and use CityHash32WithSeed.
return Mix(
PrecombineLengthMix(state, len) ^
hash_internal::CityHash32(reinterpret_cast<const char*>(data), len),
kMul);
}
ABSL_ATTRIBUTE_NOINLINE uint64_t
SplitAndCombineOn32Bit(const unsigned char* first, size_t len, uint64_t state) {
while (len >= PiecewiseChunkSize()) {
// TODO(b/417141985): avoid code duplication with CombineContiguousImpl.
state =
Mix(PrecombineLengthMix(state, PiecewiseChunkSize()) ^
hash_internal::CityHash32(reinterpret_cast<const char*>(first),
PiecewiseChunkSize()),
kMul);
state = HashBlockOn32Bit(first, PiecewiseChunkSize(), state);
len -= PiecewiseChunkSize();
first += PiecewiseChunkSize();
}
@@ -48,10 +123,20 @@ uint64_t MixingHashState::CombineLargeContiguousImpl32(
std::integral_constant<int, 4>{});
}
uint64_t MixingHashState::CombineLargeContiguousImpl64(
const unsigned char* first, size_t len, uint64_t state) {
ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t HashBlockOn64Bit(
const unsigned char* data, size_t len, uint64_t state) {
#ifdef ABSL_HAVE_INTRINSIC_INT128
return LowLevelHashLenGt32(data, len, state);
#else
return hash_internal::CityHash64WithSeed(reinterpret_cast<const char*>(data),
len, state);
#endif
}
ABSL_ATTRIBUTE_NOINLINE uint64_t
SplitAndCombineOn64Bit(const unsigned char* first, size_t len, uint64_t state) {
while (len >= PiecewiseChunkSize()) {
state = Hash64(first, PiecewiseChunkSize(), state);
state = HashBlockOn64Bit(first, PiecewiseChunkSize(), state);
len -= PiecewiseChunkSize();
first += PiecewiseChunkSize();
}
@@ -65,6 +150,30 @@ uint64_t MixingHashState::CombineLargeContiguousImpl64(
std::integral_constant<int, 8>{});
}
} // namespace
uint64_t CombineLargeContiguousImplOn32BitLengthGt8(const unsigned char* first,
size_t len,
uint64_t state) {
assert(len > 8);
assert(sizeof(size_t) == 4); // NOLINT(misc-static-assert)
if (ABSL_PREDICT_TRUE(len <= PiecewiseChunkSize())) {
return HashBlockOn32Bit(first, len, state);
}
return SplitAndCombineOn32Bit(first, len, state);
}
uint64_t CombineLargeContiguousImplOn64BitLengthGt32(const unsigned char* first,
size_t len,
uint64_t state) {
assert(len > 32);
assert(sizeof(size_t) == 8); // NOLINT(misc-static-assert)
if (ABSL_PREDICT_TRUE(len <= PiecewiseChunkSize())) {
return HashBlockOn64Bit(first, len, state);
}
return SplitAndCombineOn64Bit(first, len, state);
}
ABSL_CONST_INIT const void* const MixingHashState::kSeed = &kSeed;
} // namespace hash_internal

View File

@@ -79,7 +79,6 @@
#include "absl/base/port.h"
#include "absl/container/fixed_array.h"
#include "absl/hash/internal/city.h"
#include "absl/hash/internal/low_level_hash.h"
#include "absl/hash/internal/weakly_mixed_integer.h"
#include "absl/meta/type_traits.h"
#include "absl/numeric/bits.h"
@@ -940,6 +939,186 @@ inline uint64_t PrecombineLengthMix(uint64_t state, size_t len) {
return state + (uint64_t{len} << 24);
}
inline constexpr uint64_t kMul = uint64_t{0xdcb22ca68cb134ed};
// Random data taken from the hexadecimal digits of Pi's fractional component.
// https://en.wikipedia.org/wiki/Nothing-up-my-sleeve_number
ABSL_CACHELINE_ALIGNED inline constexpr uint64_t kStaticRandomData[] = {
0x243f'6a88'85a3'08d3, 0x1319'8a2e'0370'7344, 0xa409'3822'299f'31d0,
0x082e'fa98'ec4e'6c89, 0x4528'21e6'38d0'1377,
};
ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t Mix(uint64_t lhs, uint64_t rhs) {
// For 32 bit platforms we are trying to use all 64 lower bits.
if constexpr (sizeof(size_t) < 8) {
uint64_t m = lhs * rhs;
return m ^ (m >> 32);
}
// absl::uint128 is not an alias or a thin wrapper around the intrinsic.
// We use the intrinsic when available to improve performance.
// TODO(b/399425325): Try to remove MulType since compiler seem to generate
// the same code with just absl::uint128.
// See https://gcc.godbolt.org/z/s3hGarraG for details.
#ifdef ABSL_HAVE_INTRINSIC_INT128
using MulType = __uint128_t;
#else // ABSL_HAVE_INTRINSIC_INT128
using MulType = absl::uint128;
#endif // ABSL_HAVE_INTRINSIC_INT128
// Though the 128-bit product on AArch64 needs two instructions, it is
// still a good balance between speed and hash quality.
MulType m = lhs;
m *= rhs;
return Uint128High64(m) ^ Uint128Low64(m);
}
// Reads 8 bytes from p.
inline uint64_t Read8(const unsigned char* p) {
// Suppress erroneous array bounds errors on GCC.
#if defined(__GNUC__) && !defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Warray-bounds"
#endif
return absl::base_internal::UnalignedLoad64(p);
#if defined(__GNUC__) && !defined(__clang__)
#pragma GCC diagnostic pop
#endif
}
// Reads 9 to 16 bytes from p.
// The first 8 bytes are in .first, and the rest of the bytes are in .second
// along with duplicated bytes from .first if len<16.
inline std::pair<uint64_t, uint64_t> Read9To16(const unsigned char* p,
size_t len) {
return {Read8(p), Read8(p + len - 8)};
}
// Reads 4 to 8 bytes from p.
// Bytes are permuted and some input bytes may be duplicated in output.
inline uint64_t Read4To8(const unsigned char* p, size_t len) {
// If `len < 8`, we duplicate bytes. We always put low memory at the end.
// E.g., on little endian platforms:
// `ABCD` will be read as `ABCDABCD`.
// `ABCDE` will be read as `BCDEABCD`.
// `ABCDEF` will be read as `CDEFABCD`.
// `ABCDEFG` will be read as `DEFGABCD`.
// `ABCDEFGH` will be read as `EFGHABCD`.
// We also do not care about endianness. On big-endian platforms, bytes will
// be permuted differently. We always shift low memory by 32, because that
// can be pipelined earlier. Reading high memory requires computing
// `p + len - 4`.
uint64_t most_significant =
static_cast<uint64_t>(absl::base_internal::UnalignedLoad32(p)) << 32;
uint64_t least_significant =
absl::base_internal::UnalignedLoad32(p + len - 4);
return most_significant | least_significant;
}
// Reads 1 to 3 bytes from p. Some input bytes may be duplicated in output.
inline uint32_t Read1To3(const unsigned char* p, size_t len) {
// The trick used by this implementation is to avoid branches.
// We always read three bytes by duplicating.
// E.g.,
// `A` is read as `AAA`.
// `AB` is read as `ABB`.
// `ABC` is read as `ABC`.
// We always shift `p[0]` so that it can be pipelined better.
// Other bytes require extra computation to find indices.
uint32_t mem0 = (static_cast<uint32_t>(p[0]) << 16) | p[len - 1];
uint32_t mem1 = static_cast<uint32_t>(p[len / 2]) << 8;
return mem0 | mem1;
}
// Slow dispatch path for calls to CombineContiguousImpl with a size argument
// larger than inlined size. Has the same effect as calling
// CombineContiguousImpl() repeatedly with the chunk stride size.
uint64_t CombineLargeContiguousImplOn32BitLengthGt8(const unsigned char* first,
size_t len, uint64_t state);
uint64_t CombineLargeContiguousImplOn64BitLengthGt32(const unsigned char* first,
size_t len,
uint64_t state);
ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t CombineSmallContiguousImpl(
uint64_t state, const unsigned char* first, size_t len) {
ABSL_ASSUME(len <= 8);
uint64_t v;
if (len >= 4) {
v = Read4To8(first, len);
} else if (len > 0) {
v = Read1To3(first, len);
} else {
// Empty string must modify the state.
v = 0x57;
}
return Mix(state ^ v, kMul);
}
ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t CombineContiguousImpl9to16(
uint64_t state, const unsigned char* first, size_t len) {
ABSL_ASSUME(len >= 9);
ABSL_ASSUME(len <= 16);
// Note: any time one half of the mix function becomes zero it will fail to
// incorporate any bits from the other half. However, there is exactly 1 in
// 2^64 values for each side that achieve this, and only when the size is
// exactly 16 -- for smaller sizes there is an overlapping byte that makes
// this impossible unless the seed is *also* incredibly unlucky.
auto p = Read9To16(first, len);
return Mix(state ^ p.first, kMul ^ p.second);
}
ABSL_ATTRIBUTE_ALWAYS_INLINE inline uint64_t CombineContiguousImpl17to32(
uint64_t state, const unsigned char* first, size_t len) {
ABSL_ASSUME(len >= 17);
ABSL_ASSUME(len <= 32);
// Do two mixes of overlapping 16-byte ranges in parallel to minimize
// latency.
const uint64_t m0 =
Mix(Read8(first) ^ kStaticRandomData[1], Read8(first + 8) ^ state);
const unsigned char* tail_16b_ptr = first + (len - 16);
const uint64_t m1 = Mix(Read8(tail_16b_ptr) ^ kStaticRandomData[3],
Read8(tail_16b_ptr + 8) ^ state);
return m0 ^ m1;
}
// Implementation of the base case for combine_contiguous where we actually
// mix the bytes into the state.
// Dispatch to different implementations of combine_contiguous depending
// on the value of `sizeof(size_t)`.
inline uint64_t CombineContiguousImpl(
uint64_t state, const unsigned char* first, size_t len,
std::integral_constant<int, 4> /* sizeof_size_t */) {
// For large values we use CityHash, for small ones we use custom low latency
// hash.
if (len <= 8) {
return CombineSmallContiguousImpl(PrecombineLengthMix(state, len), first,
len);
}
return CombineLargeContiguousImplOn32BitLengthGt8(first, len, state);
}
inline uint64_t CombineContiguousImpl(
uint64_t state, const unsigned char* first, size_t len,
std::integral_constant<int, 8> /* sizeof_size_t */) {
// For large values we use LowLevelHash or CityHash depending on the platform,
// for small ones we use custom low latency hash.
if (len <= 8) {
return CombineSmallContiguousImpl(PrecombineLengthMix(state, len), first,
len);
}
if (len <= 16) {
return CombineContiguousImpl9to16(PrecombineLengthMix(state, len), first,
len);
}
if (len <= 32) {
return CombineContiguousImpl17to32(PrecombineLengthMix(state, len), first,
len);
}
// We must not mix length into the state here because calling
// CombineContiguousImpl twice with PiecewiseChunkSize() must be equivalent
// to calling CombineLargeContiguousImpl once with 2 * PiecewiseChunkSize().
return CombineLargeContiguousImplOn64BitLengthGt32(first, len, state);
}
#if defined(ABSL_INTERNAL_LEGACY_HASH_NAMESPACE) && \
ABSL_META_INTERNAL_STD_HASH_SFINAE_FRIENDLY_
#define ABSL_HASH_INTERNAL_SUPPORT_LEGACY_HASH_ 1
@@ -1044,22 +1223,11 @@ struct is_hashable
: std::integral_constant<bool, HashSelect::template Apply<T>::value> {};
class ABSL_DLL MixingHashState : public HashStateBase<MixingHashState> {
// absl::uint128 is not an alias or a thin wrapper around the intrinsic.
// We use the intrinsic when available to improve performance.
#ifdef ABSL_HAVE_INTRINSIC_INT128
using uint128 = __uint128_t;
#else // ABSL_HAVE_INTRINSIC_INT128
using uint128 = absl::uint128;
#endif // ABSL_HAVE_INTRINSIC_INT128
template <typename T>
using IntegralFastPath =
conjunction<std::is_integral<T>, is_uniquely_represented<T>,
FitsIn64Bits<T>>;
static constexpr uint64_t kMul =
uint64_t{0xdcb22ca68cb134ed};
public:
// Move only
MixingHashState(MixingHashState&&) = default;
@@ -1153,151 +1321,6 @@ class ABSL_DLL MixingHashState : public HashStateBase<MixingHashState> {
return MixingHashState::combine(std::move(state), unordered_state);
}
// Implementation of the base case for combine_contiguous where we actually
// mix the bytes into the state.
// Dispatch to different implementations of the combine_contiguous depending
// on the value of `sizeof(size_t)`.
static uint64_t CombineContiguousImpl(uint64_t state,
const unsigned char* first, size_t len,
std::integral_constant<int, 4>
/* sizeof_size_t */);
static uint64_t CombineContiguousImpl(uint64_t state,
const unsigned char* first, size_t len,
std::integral_constant<int, 8>
/* sizeof_size_t */);
ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t CombineSmallContiguousImpl(
uint64_t state, const unsigned char* first, size_t len) {
ABSL_ASSUME(len <= 8);
uint64_t v;
if (len >= 4) {
v = Read4To8(first, len);
} else if (len > 0) {
v = Read1To3(first, len);
} else {
// Empty string must modify the state.
v = 0x57;
}
return Mix(state ^ v, kMul);
}
ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t CombineContiguousImpl9to16(
uint64_t state, const unsigned char* first, size_t len) {
ABSL_ASSUME(len >= 9);
ABSL_ASSUME(len <= 16);
// Note: any time one half of the mix function becomes zero it will fail to
// incorporate any bits from the other half. However, there is exactly 1 in
// 2^64 values for each side that achieve this, and only when the size is
// exactly 16 -- for smaller sizes there is an overlapping byte that makes
// this impossible unless the seed is *also* incredibly unlucky.
auto p = Read9To16(first, len);
return Mix(state ^ p.first, kMul ^ p.second);
}
ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t CombineContiguousImpl17to32(
uint64_t state, const unsigned char* first, size_t len) {
ABSL_ASSUME(len >= 17);
ABSL_ASSUME(len <= 32);
// Do two mixes of overlapping 16-byte ranges in parallel to minimize
// latency.
const uint64_t m0 =
Mix(Read8(first) ^ kStaticRandomData[1], Read8(first + 8) ^ state);
const unsigned char* tail_16b_ptr = first + (len - 16);
const uint64_t m1 = Mix(Read8(tail_16b_ptr) ^ kStaticRandomData[3],
Read8(tail_16b_ptr + 8) ^ state);
return m0 ^ m1;
}
// Slow dispatch path for calls to CombineContiguousImpl with a size argument
// larger than PiecewiseChunkSize(). Has the same effect as calling
// CombineContiguousImpl() repeatedly with the chunk stride size.
static uint64_t CombineLargeContiguousImpl32(const unsigned char* first,
size_t len, uint64_t state);
static uint64_t CombineLargeContiguousImpl64(const unsigned char* first,
size_t len, uint64_t state);
// Reads 9 to 16 bytes from p.
// The first 8 bytes are in .first, and the rest of the bytes are in .second
// along with duplicated bytes from .first if len<16.
static std::pair<uint64_t, uint64_t> Read9To16(const unsigned char* p,
size_t len) {
return {Read8(p), Read8(p + len - 8)};
}
// Reads 8 bytes from p.
static uint64_t Read8(const unsigned char* p) {
// Suppress erroneous array bounds errors on GCC.
#if defined(__GNUC__) && !defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Warray-bounds"
#endif
return absl::base_internal::UnalignedLoad64(p);
#if defined(__GNUC__) && !defined(__clang__)
#pragma GCC diagnostic pop
#endif
}
// Reads 4 to 8 bytes from p.
// Bytes are permuted and some input bytes may be duplicated in output.
static uint64_t Read4To8(const unsigned char* p, size_t len) {
// If `len < 8`, we duplicate bytes. We always put low memory at the end.
// E.g., on little endian platforms:
// `ABCD` will be read as `ABCDABCD`.
// `ABCDE` will be read as `BCDEABCD`.
// `ABCDEF` will be read as `CDEFABCD`.
// `ABCDEFG` will be read as `DEFGABCD`.
// `ABCDEFGH` will be read as `EFGHABCD`.
// We also do not care about endianness. On big-endian platforms, bytes will
// be permuted differently. We always shift low memory by 32, because that
// can be pipelined earlier. Reading high memory requires computing
// `p + len - 4`.
uint64_t most_significant =
static_cast<uint64_t>(absl::base_internal::UnalignedLoad32(p)) << 32;
uint64_t least_significant =
absl::base_internal::UnalignedLoad32(p + len - 4);
return most_significant | least_significant;
}
// Reads 1 to 3 bytes from p. Some input bytes may be duplicated in output.
static uint32_t Read1To3(const unsigned char* p, size_t len) {
// The trick used by this implementation is to avoid branches.
// We always read three bytes by duplicating.
// E.g.,
// `A` is read as `AAA`.
// `AB` is read as `ABB`.
// `ABC` is read as `ABC`.
// We always shift `p[0]` so that it can be pipelined better.
// Other bytes require extra computation to find indices.
uint32_t mem0 = (static_cast<uint32_t>(p[0]) << 16) | p[len - 1];
uint32_t mem1 = static_cast<uint32_t>(p[len / 2]) << 8;
return mem0 | mem1;
}
ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Mix(uint64_t lhs, uint64_t rhs) {
// For 32 bit platforms we are trying to use all 64 lower bits.
if constexpr (sizeof(size_t) < 8) {
uint64_t m = lhs * rhs;
return m ^ (m >> 32);
}
// Though the 128-bit product on AArch64 needs two instructions, it is
// still a good balance between speed and hash quality.
uint128 m = lhs;
m *= rhs;
return Uint128High64(m) ^ Uint128Low64(m);
}
ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Hash64(const unsigned char* data,
size_t len,
uint64_t state) {
#ifdef ABSL_HAVE_INTRINSIC_INT128
return LowLevelHashLenGt32(data, len, state);
#else
return hash_internal::CityHash64WithSeed(
reinterpret_cast<const char*>(data), len, state);
#endif
}
// A non-deterministic seed.
//
// The current purpose of this seed is to generate non-deterministic results
@@ -1327,52 +1350,6 @@ class ABSL_DLL MixingHashState : public HashStateBase<MixingHashState> {
uint64_t state_;
};
inline uint64_t MixingHashState::CombineContiguousImpl(
uint64_t state, const unsigned char* first, size_t len,
std::integral_constant<int, 4> /* sizeof_size_t */) {
// For large values we use CityHash, for small ones we use custom low latency
// hash.
if (len <= 8) {
return CombineSmallContiguousImpl(PrecombineLengthMix(state, len), first,
len);
}
if (ABSL_PREDICT_TRUE(len <= PiecewiseChunkSize())) {
// TODO(b/417141985): expose and use CityHash32WithSeed.
return Mix(PrecombineLengthMix(state, len) ^
hash_internal::CityHash32(
reinterpret_cast<const char*>(first), len),
kMul);
}
return CombineLargeContiguousImpl32(first, len, state);
}
inline uint64_t MixingHashState::CombineContiguousImpl(
uint64_t state, const unsigned char* first, size_t len,
std::integral_constant<int, 8> /* sizeof_size_t */) {
// For large values we use LowLevelHash or CityHash depending on the platform,
// for small ones we use custom low latency hash.
if (len <= 8) {
return CombineSmallContiguousImpl(PrecombineLengthMix(state, len), first,
len);
}
if (len <= 16) {
return CombineContiguousImpl9to16(PrecombineLengthMix(state, len), first,
len);
}
if (len <= 32) {
return CombineContiguousImpl17to32(PrecombineLengthMix(state, len), first,
len);
}
if (ABSL_PREDICT_TRUE(len <= PiecewiseChunkSize())) {
// Length is mixed into the state inside of Hash64.
return Hash64(first, len, state);
}
// We must not mix length to the state here because calling
// CombineContiguousImpl twice with PiecewiseChunkSize() must be equivalent
// to calling CombineLargeContiguousImpl once with 2 * PiecewiseChunkSize().
return CombineLargeContiguousImpl64(first, len, state);
}
struct AggregateBarrier {};
// Add a private base class to make sure this type is not an aggregate.

View File

@@ -1,106 +0,0 @@
// Copyright 2020 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/hash/internal/low_level_hash.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
#include "absl/base/config.h"
#include "absl/base/internal/unaligned_access.h"
#include "absl/base/optimization.h"
#include "absl/base/prefetch.h"
#include "absl/numeric/int128.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace hash_internal {
namespace {
uint64_t Mix(uint64_t v0, uint64_t v1) {
absl::uint128 p = v0;
p *= v1;
return absl::Uint128Low64(p) ^ absl::Uint128High64(p);
}
uint64_t Mix32Bytes(const uint8_t* ptr, uint64_t current_state) {
uint64_t a = absl::base_internal::UnalignedLoad64(ptr);
uint64_t b = absl::base_internal::UnalignedLoad64(ptr + 8);
uint64_t c = absl::base_internal::UnalignedLoad64(ptr + 16);
uint64_t d = absl::base_internal::UnalignedLoad64(ptr + 24);
uint64_t cs0 = Mix(a ^ kStaticRandomData[1], b ^ current_state);
uint64_t cs1 = Mix(c ^ kStaticRandomData[2], d ^ current_state);
return cs0 ^ cs1;
}
} // namespace
uint64_t LowLevelHashLenGt32(const void* data, size_t len, uint64_t seed) {
assert(len > 32);
const uint8_t* ptr = static_cast<const uint8_t*>(data);
uint64_t current_state = seed ^ kStaticRandomData[0] ^ len;
const uint8_t* last_32_ptr = ptr + len - 32;
if (len > 64) {
// If we have more than 64 bytes, we're going to handle chunks of 64
// bytes at a time. We're going to build up four separate hash states
// which we will then hash together. This avoids short dependency chains.
uint64_t duplicated_state0 = current_state;
uint64_t duplicated_state1 = current_state;
uint64_t duplicated_state2 = current_state;
do {
// Always prefetch the next cacheline.
PrefetchToLocalCache(ptr + ABSL_CACHELINE_SIZE);
uint64_t a = absl::base_internal::UnalignedLoad64(ptr);
uint64_t b = absl::base_internal::UnalignedLoad64(ptr + 8);
uint64_t c = absl::base_internal::UnalignedLoad64(ptr + 16);
uint64_t d = absl::base_internal::UnalignedLoad64(ptr + 24);
uint64_t e = absl::base_internal::UnalignedLoad64(ptr + 32);
uint64_t f = absl::base_internal::UnalignedLoad64(ptr + 40);
uint64_t g = absl::base_internal::UnalignedLoad64(ptr + 48);
uint64_t h = absl::base_internal::UnalignedLoad64(ptr + 56);
current_state = Mix(a ^ kStaticRandomData[1], b ^ current_state);
duplicated_state0 = Mix(c ^ kStaticRandomData[2], d ^ duplicated_state0);
duplicated_state1 = Mix(e ^ kStaticRandomData[3], f ^ duplicated_state1);
duplicated_state2 = Mix(g ^ kStaticRandomData[4], h ^ duplicated_state2);
ptr += 64;
len -= 64;
} while (len > 64);
current_state = (current_state ^ duplicated_state0) ^
(duplicated_state1 + duplicated_state2);
}
// We now have a data `ptr` with at most 64 bytes and the current state
// of the hashing state machine stored in current_state.
if (len > 32) {
current_state = Mix32Bytes(ptr, current_state);
}
// We now have a data `ptr` with at most 32 bytes and the current state
// of the hashing state machine stored in current_state. But we can
// safely read from `ptr + len - 32`.
return Mix32Bytes(last_32_ptr, current_state);
}
} // namespace hash_internal
ABSL_NAMESPACE_END
} // namespace absl

View File

@@ -1,57 +0,0 @@
// Copyright 2020 The Abseil Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// This file provides the Google-internal implementation of LowLevelHash.
//
// LowLevelHash is a fast hash function for hash tables, the fastest we've
// currently (late 2020) found that passes the SMHasher tests. The algorithm
// relies on intrinsic 128-bit multiplication for speed. This is not meant to be
// secure - just fast.
//
// It is closely based on a version of wyhash, but does not maintain or
// guarantee future compatibility with it.
#ifndef ABSL_HASH_INTERNAL_LOW_LEVEL_HASH_H_
#define ABSL_HASH_INTERNAL_LOW_LEVEL_HASH_H_
#include <stdint.h>
#include <stdlib.h>
#include "absl/base/config.h"
#include "absl/base/optimization.h"
namespace absl {
ABSL_NAMESPACE_BEGIN
namespace hash_internal {
// Random data taken from the hexadecimal digits of Pi's fractional component.
// https://en.wikipedia.org/wiki/Nothing-up-my-sleeve_number
ABSL_CACHELINE_ALIGNED static constexpr uint64_t kStaticRandomData[] = {
0x243f'6a88'85a3'08d3, 0x1319'8a2e'0370'7344, 0xa409'3822'299f'31d0,
0x082e'fa98'ec4e'6c89, 0x4528'21e6'38d0'1377,
};
// Hash function for a byte array. A 64-bit seed and a set of five 64-bit
// integers are hashed into the result. The length must be greater than 32.
//
// To allow all hashable types (including string_view and Span) to depend on
// this algorithm, we keep the API low-level, with as few dependencies as
// possible.
uint64_t LowLevelHashLenGt32(const void* data, size_t len, uint64_t seed);
} // namespace hash_internal
ABSL_NAMESPACE_END
} // namespace absl
#endif // ABSL_HASH_INTERNAL_LOW_LEVEL_HASH_H_

View File

@@ -12,25 +12,26 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "absl/hash/internal/low_level_hash.h"
#include <cstddef>
#include <cstdint>
#include <string>
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "absl/hash/hash.h"
#include "absl/strings/escaping.h"
#include "absl/strings/string_view.h"
#define UPDATE_GOLDEN 0
namespace {
TEST(LowLevelHashTest, VerifyGolden) {
constexpr size_t kNumGoldenOutputs = 94;
constexpr size_t kNumGoldenOutputs = 95;
static struct {
absl::string_view base64_data;
uint64_t seed;
} cases[] = {
} cases[kNumGoldenOutputs] = {
{"VprUGNH+5NnNRaORxgH/ySrZFQFDL+4VAodhfBNinmn8cg==",
uint64_t{0x531858a40bfa7ea1}},
{"gc1xZaY+q0nPcUvOOnWnT3bqfmT/geth/f7Dm2e/DemMfk4=",
@@ -357,9 +358,12 @@ TEST(LowLevelHashTest, VerifyGolden) {
uint64_t{0xc9ae5c8759b4877a}},
};
#if defined(ABSL_IS_BIG_ENDIAN)
#if defined(ABSL_IS_BIG_ENDIAN) || !defined(ABSL_HAVE_INTRINSIC_INT128) || \
UINTPTR_MAX != UINT64_MAX
constexpr uint64_t kGolden[kNumGoldenOutputs] = {};
GTEST_SKIP() << "We only maintain golden data for little endian systems.";
GTEST_SKIP()
<< "We only maintain golden data for little endian 64 bit systems with "
"128 bit intristics.";
#else
constexpr uint64_t kGolden[kNumGoldenOutputs] = {
0x669da02f8d009e0f, 0xceb19bf2255445cd, 0x0e746992d6d43a7c,
@@ -393,18 +397,22 @@ TEST(LowLevelHashTest, VerifyGolden) {
0xb8116dd26cf6feec, 0x7a77a6e4ed0cf081, 0xb71eec2d5a184316,
0x6fa932f77b4da817, 0x795f79b33909b2c4, 0x1b8755ef6b5eb34e,
0x2255b72d7d6b2d79, 0xf2bdafafa90bd50a, 0x442a578f02cb1fc8,
0xc25aefe55ecf83db,
0xc25aefe55ecf83db, 0x3114c056f9c5a676,
};
#endif
auto hash_fn = [](absl::string_view s, uint64_t state) {
return absl::hash_internal::CombineLargeContiguousImplOn64BitLengthGt32(
reinterpret_cast<const unsigned char*>(s.data()), s.size(), state);
};
#if UPDATE_GOLDEN
(void)kGolden; // Silence warning.
for (size_t i = 0; i < kNumGoldenOutputs; ++i) {
std::string str;
ASSERT_TRUE(absl::Base64Unescape(cases[i].base64_data, &str));
ASSERT_GT(str.size(), 32);
uint64_t h = absl::hash_internal::LowLevelHashLenGt32(
str.data(), str.size(), cases[i].seed);
uint64_t h = hash_fn(str, cases[i].seed);
printf("0x%016" PRIx64 ", ", h);
if (i % 3 == 2) {
printf("\n");
@@ -419,9 +427,7 @@ TEST(LowLevelHashTest, VerifyGolden) {
std::string str;
ASSERT_TRUE(absl::Base64Unescape(cases[i].base64_data, &str));
ASSERT_GT(str.size(), 32);
EXPECT_EQ(absl::hash_internal::LowLevelHashLenGt32(str.data(), str.size(),
cases[i].seed),
kGolden[i]);
EXPECT_EQ(hash_fn(str, cases[i].seed), kGolden[i]);
}
#endif
}