Add special handling for hashing integral types so that we can optimize Read1To3 and Read4To8 for the strings case.

We introduce combine_raw as an implementation detail of HashState that allows for keeping the same IntegralFastPath implementation but to have separately optimized Read*To* functions for the string hashing use case.

PiperOrigin-RevId: 715122407
Change-Id: Iabe35d2d3ecbca5d134a782378b7ecb2e3b9aac6
This commit is contained in:
Evan Brown
2025-01-13 15:09:26 -08:00
committed by Copybara-Service
parent 67d3d20683
commit d498bf66ef
3 changed files with 89 additions and 31 deletions

View File

@@ -79,6 +79,7 @@
#define ABSL_HASH_HASH_H_
#include <cstddef>
#include <cstdint>
#include <tuple>
#include <type_traits>
#include <utility>
@@ -361,6 +362,7 @@ class HashState : public hash_internal::HashStateBase<HashState> {
HashState() = default;
friend class HashState::HashStateBase;
friend struct hash_internal::CombineRaw;
template <typename T>
static void CombineContiguousImpl(void* p, const unsigned char* first,
@@ -369,10 +371,22 @@ class HashState : public hash_internal::HashStateBase<HashState> {
state = T::combine_contiguous(std::move(state), first, size);
}
static HashState combine_raw(HashState hash_state, uint64_t value) {
hash_state.combine_raw_(hash_state.state_, value);
return hash_state;
}
template <typename T>
static void CombineRawImpl(void* p, uint64_t value) {
T& state = *static_cast<T*>(p);
state = hash_internal::CombineRaw()(std::move(state), value);
}
template <typename T>
void Init(T* state) {
state_ = state;
combine_contiguous_ = &CombineContiguousImpl<T>;
combine_raw_ = &CombineRawImpl<T>;
run_combine_unordered_ = &RunCombineUnorderedImpl<T>;
}
@@ -411,6 +425,7 @@ class HashState : public hash_internal::HashStateBase<HashState> {
void Init(HashState* state) {
state_ = state->state_;
combine_contiguous_ = state->combine_contiguous_;
combine_raw_ = state->combine_raw_;
run_combine_unordered_ = state->run_combine_unordered_;
}
@@ -421,6 +436,7 @@ class HashState : public hash_internal::HashStateBase<HashState> {
void* state_;
void (*combine_contiguous_)(void*, const unsigned char*, size_t);
void (*combine_raw_)(void*, uint64_t);
HashState (*run_combine_unordered_)(
HashState state,
absl::FunctionRef<void(HashState, absl::FunctionRef<void(HashState&)>)>);

View File

@@ -352,11 +352,39 @@ template <>
struct is_uniquely_represented<unsigned __int128> : std::true_type {};
#endif // ABSL_HAVE_INTRINSIC_INT128
template <typename T>
struct FitsIn64Bits : std::integral_constant<bool, sizeof(T) <= 8> {};
struct CombineRaw {
template <typename H>
H operator()(H state, uint64_t value) const {
return H::combine_raw(std::move(state), value);
}
};
// hash_bytes()
//
// Convenience function that combines `hash_state` with the byte representation
// of `value`.
template <typename H, typename T>
template <typename H, typename T,
absl::enable_if_t<FitsIn64Bits<T>::value, int> = 0>
H hash_bytes(H hash_state, const T& value) {
const unsigned char* start = reinterpret_cast<const unsigned char*>(&value);
uint64_t v;
if (sizeof(T) == 1) {
v = *start;
} else if (sizeof(T) == 2) {
v = absl::base_internal::UnalignedLoad16(start);
} else if (sizeof(T) == 4) {
v = absl::base_internal::UnalignedLoad32(start);
} else {
assert(sizeof(T) == 8);
v = absl::base_internal::UnalignedLoad64(start);
}
return CombineRaw()(std::move(hash_state), v);
}
template <typename H, typename T,
absl::enable_if_t<!FitsIn64Bits<T>::value, int> = 0>
H hash_bytes(H hash_state, const T& value) {
const unsigned char* start = reinterpret_cast<const unsigned char*>(&value);
return H::combine_contiguous(std::move(hash_state), start, sizeof(value));
@@ -940,6 +968,7 @@ struct HashSelect {
static State combine_contiguous(State hash_state, const unsigned char*,
size_t);
using State::HashStateBase::combine_contiguous;
static State combine_raw(State state, uint64_t value);
};
struct UniquelyRepresentedProbe {
@@ -1033,9 +1062,6 @@ class ABSL_DLL MixingHashState : public HashStateBase<MixingHashState> {
sizeof(size_t) == 4 ? uint64_t{0xcc9e2d51}
: uint64_t{0xdcb22ca68cb134ed};
template <typename T>
struct FitsIn64Bits : std::integral_constant<bool, sizeof(T) <= 8> {};
template <typename T>
using IntegralFastPath =
conjunction<std::is_integral<T>, is_uniquely_represented<T>,
@@ -1107,6 +1133,7 @@ class ABSL_DLL MixingHashState : public HashStateBase<MixingHashState> {
// Allow the HashState type-erasure implementation to invoke
// RunCombinedUnordered() directly.
friend class absl::HashState;
friend struct CombineRaw;
// Workaround for MSVC bug.
// We make the type copyable to fix the calling convention, even though we
@@ -1116,6 +1143,14 @@ class ABSL_DLL MixingHashState : public HashStateBase<MixingHashState> {
explicit MixingHashState(uint64_t state) : state_(state) {}
// Combines a raw value from e.g. integrals/floats/pointers/etc. This allows
// us to be consistent with IntegralFastPath when combining raw types, but
// optimize Read1To3 and Read4To8 differently for the string case.
static MixingHashState combine_raw(MixingHashState hash_state,
uint64_t value) {
return MixingHashState(WeakMix(hash_state.state_ ^ value));
}
// Implementation of the base case for combine_contiguous where we actually
// mix the bytes into the state.
// Dispatch to different implementations of the combine_contiguous depending
@@ -1218,37 +1253,36 @@ class ABSL_DLL MixingHashState : public HashStateBase<MixingHashState> {
// behavior for the HashConsistentAcrossIntTypes test case. Ditto for
// Read1To3.
static uint64_t Read4To8(const unsigned char* p, size_t len) {
uint32_t low_mem = absl::base_internal::UnalignedLoad32(p);
uint32_t high_mem = absl::base_internal::UnalignedLoad32(p + len - 4);
#ifdef ABSL_IS_LITTLE_ENDIAN
uint32_t most_significant = high_mem;
uint32_t least_significant = low_mem;
#else
uint32_t most_significant = low_mem;
uint32_t least_significant = high_mem;
#endif
return (static_cast<uint64_t>(most_significant) << (len - 4) * 8) |
least_significant;
// If `len < 8`, we duplicate bytes in the middle.
// E.g.:
// `ABCD` will be read as `ABCDABCD`.
// `ABCDE` will be read as `ABCDBCDE`.
// `ABCDEF` will be read as `ABCDCDEF`.
// `ABCDEFG` will be read as `ABCDDEFG`.
// We also do not care about endianness. On big-endian platforms, bytes will
// be shuffled (it's fine). We always shift low memory by 32, because that
// can be pipelined earlier. Reading high memory requires computing
// `p + len - 4`.
uint64_t most_significant =
static_cast<uint64_t>(absl::base_internal::UnalignedLoad32(p)) << 32;
uint64_t least_significant =
absl::base_internal::UnalignedLoad32(p + len - 4);
return most_significant | least_significant;
}
// Reads 1 to 3 bytes from p. Zero pads to fill uint32_t.
static uint32_t Read1To3(const unsigned char* p, size_t len) {
// The trick used by this implementation is to avoid branches if possible.
unsigned char mem0 = p[0];
unsigned char mem1 = p[len / 2];
unsigned char mem2 = p[len - 1];
#ifdef ABSL_IS_LITTLE_ENDIAN
unsigned char significant2 = mem2;
unsigned char significant1 = mem1;
unsigned char significant0 = mem0;
#else
unsigned char significant2 = mem0;
unsigned char significant1 = len == 2 ? mem0 : mem1;
unsigned char significant0 = mem2;
#endif
return static_cast<uint32_t>(significant0 | //
(significant1 << (len / 2 * 8)) | //
(significant2 << ((len - 1) * 8)));
// The trick used by this implementation is to avoid branches.
// We always read three bytes by duplicating.
// E.g.,
// `A` is read as `AAA`.
// `AB` is read as `ABB`.
// `ABC` is read as `ABC`.
// We always shift `p[0]` so that it can be pipelined better.
// Other bytes require extra computation to find indices.
uint32_t mem0 = (static_cast<uint32_t>(p[0]) << 16) | p[len - 1];
uint32_t mem1 = static_cast<uint32_t>(p[len / 2]) << 8;
return mem0 | mem1;
}
ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Mix(uint64_t lhs, uint64_t rhs) {

View File

@@ -16,6 +16,7 @@
#define ABSL_HASH_INTERNAL_SPY_HASH_STATE_H_
#include <algorithm>
#include <cstdint>
#include <ostream>
#include <string>
#include <vector>
@@ -196,6 +197,7 @@ class SpyHashStateImpl : public HashStateBase<SpyHashStateImpl<T>> {
private:
template <typename U>
friend class SpyHashStateImpl;
friend struct CombineRaw;
struct UnorderedCombinerCallback {
std::vector<std::string> element_hash_representations;
@@ -213,6 +215,12 @@ class SpyHashStateImpl : public HashStateBase<SpyHashStateImpl<T>> {
}
};
// Combines raw data from e.g. integrals/floats/pointers/etc.
static SpyHashStateImpl combine_raw(SpyHashStateImpl state, uint64_t value) {
const unsigned char* data = reinterpret_cast<const unsigned char*>(&value);
return SpyHashStateImpl::combine_contiguous(std::move(state), data, 8);
}
// This is true if SpyHashStateImpl<T> has been passed to a call of
// AbslHashValue with the wrong type. This detects that the user called
// AbslHashValue directly (because the hash state type does not match).