mirror of
https://github.com/abseil/abseil-cpp.git
synced 2026-06-04 20:14:23 +08:00
Add special handling for hashing integral types so that we can optimize Read1To3 and Read4To8 for the strings case.
We introduce combine_raw as an implementation detail of HashState that allows for keeping the same IntegralFastPath implementation but to have separately optimized Read*To* functions for the string hashing use case. PiperOrigin-RevId: 715122407 Change-Id: Iabe35d2d3ecbca5d134a782378b7ecb2e3b9aac6
This commit is contained in:
committed by
Copybara-Service
parent
67d3d20683
commit
d498bf66ef
@@ -79,6 +79,7 @@
|
||||
#define ABSL_HASH_HASH_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
@@ -361,6 +362,7 @@ class HashState : public hash_internal::HashStateBase<HashState> {
|
||||
HashState() = default;
|
||||
|
||||
friend class HashState::HashStateBase;
|
||||
friend struct hash_internal::CombineRaw;
|
||||
|
||||
template <typename T>
|
||||
static void CombineContiguousImpl(void* p, const unsigned char* first,
|
||||
@@ -369,10 +371,22 @@ class HashState : public hash_internal::HashStateBase<HashState> {
|
||||
state = T::combine_contiguous(std::move(state), first, size);
|
||||
}
|
||||
|
||||
static HashState combine_raw(HashState hash_state, uint64_t value) {
|
||||
hash_state.combine_raw_(hash_state.state_, value);
|
||||
return hash_state;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static void CombineRawImpl(void* p, uint64_t value) {
|
||||
T& state = *static_cast<T*>(p);
|
||||
state = hash_internal::CombineRaw()(std::move(state), value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Init(T* state) {
|
||||
state_ = state;
|
||||
combine_contiguous_ = &CombineContiguousImpl<T>;
|
||||
combine_raw_ = &CombineRawImpl<T>;
|
||||
run_combine_unordered_ = &RunCombineUnorderedImpl<T>;
|
||||
}
|
||||
|
||||
@@ -411,6 +425,7 @@ class HashState : public hash_internal::HashStateBase<HashState> {
|
||||
void Init(HashState* state) {
|
||||
state_ = state->state_;
|
||||
combine_contiguous_ = state->combine_contiguous_;
|
||||
combine_raw_ = state->combine_raw_;
|
||||
run_combine_unordered_ = state->run_combine_unordered_;
|
||||
}
|
||||
|
||||
@@ -421,6 +436,7 @@ class HashState : public hash_internal::HashStateBase<HashState> {
|
||||
|
||||
void* state_;
|
||||
void (*combine_contiguous_)(void*, const unsigned char*, size_t);
|
||||
void (*combine_raw_)(void*, uint64_t);
|
||||
HashState (*run_combine_unordered_)(
|
||||
HashState state,
|
||||
absl::FunctionRef<void(HashState, absl::FunctionRef<void(HashState&)>)>);
|
||||
|
||||
@@ -352,11 +352,39 @@ template <>
|
||||
struct is_uniquely_represented<unsigned __int128> : std::true_type {};
|
||||
#endif // ABSL_HAVE_INTRINSIC_INT128
|
||||
|
||||
template <typename T>
|
||||
struct FitsIn64Bits : std::integral_constant<bool, sizeof(T) <= 8> {};
|
||||
|
||||
struct CombineRaw {
|
||||
template <typename H>
|
||||
H operator()(H state, uint64_t value) const {
|
||||
return H::combine_raw(std::move(state), value);
|
||||
}
|
||||
};
|
||||
|
||||
// hash_bytes()
|
||||
//
|
||||
// Convenience function that combines `hash_state` with the byte representation
|
||||
// of `value`.
|
||||
template <typename H, typename T>
|
||||
template <typename H, typename T,
|
||||
absl::enable_if_t<FitsIn64Bits<T>::value, int> = 0>
|
||||
H hash_bytes(H hash_state, const T& value) {
|
||||
const unsigned char* start = reinterpret_cast<const unsigned char*>(&value);
|
||||
uint64_t v;
|
||||
if (sizeof(T) == 1) {
|
||||
v = *start;
|
||||
} else if (sizeof(T) == 2) {
|
||||
v = absl::base_internal::UnalignedLoad16(start);
|
||||
} else if (sizeof(T) == 4) {
|
||||
v = absl::base_internal::UnalignedLoad32(start);
|
||||
} else {
|
||||
assert(sizeof(T) == 8);
|
||||
v = absl::base_internal::UnalignedLoad64(start);
|
||||
}
|
||||
return CombineRaw()(std::move(hash_state), v);
|
||||
}
|
||||
template <typename H, typename T,
|
||||
absl::enable_if_t<!FitsIn64Bits<T>::value, int> = 0>
|
||||
H hash_bytes(H hash_state, const T& value) {
|
||||
const unsigned char* start = reinterpret_cast<const unsigned char*>(&value);
|
||||
return H::combine_contiguous(std::move(hash_state), start, sizeof(value));
|
||||
@@ -940,6 +968,7 @@ struct HashSelect {
|
||||
static State combine_contiguous(State hash_state, const unsigned char*,
|
||||
size_t);
|
||||
using State::HashStateBase::combine_contiguous;
|
||||
static State combine_raw(State state, uint64_t value);
|
||||
};
|
||||
|
||||
struct UniquelyRepresentedProbe {
|
||||
@@ -1033,9 +1062,6 @@ class ABSL_DLL MixingHashState : public HashStateBase<MixingHashState> {
|
||||
sizeof(size_t) == 4 ? uint64_t{0xcc9e2d51}
|
||||
: uint64_t{0xdcb22ca68cb134ed};
|
||||
|
||||
template <typename T>
|
||||
struct FitsIn64Bits : std::integral_constant<bool, sizeof(T) <= 8> {};
|
||||
|
||||
template <typename T>
|
||||
using IntegralFastPath =
|
||||
conjunction<std::is_integral<T>, is_uniquely_represented<T>,
|
||||
@@ -1107,6 +1133,7 @@ class ABSL_DLL MixingHashState : public HashStateBase<MixingHashState> {
|
||||
// Allow the HashState type-erasure implementation to invoke
|
||||
// RunCombinedUnordered() directly.
|
||||
friend class absl::HashState;
|
||||
friend struct CombineRaw;
|
||||
|
||||
// Workaround for MSVC bug.
|
||||
// We make the type copyable to fix the calling convention, even though we
|
||||
@@ -1116,6 +1143,14 @@ class ABSL_DLL MixingHashState : public HashStateBase<MixingHashState> {
|
||||
|
||||
explicit MixingHashState(uint64_t state) : state_(state) {}
|
||||
|
||||
// Combines a raw value from e.g. integrals/floats/pointers/etc. This allows
|
||||
// us to be consistent with IntegralFastPath when combining raw types, but
|
||||
// optimize Read1To3 and Read4To8 differently for the string case.
|
||||
static MixingHashState combine_raw(MixingHashState hash_state,
|
||||
uint64_t value) {
|
||||
return MixingHashState(WeakMix(hash_state.state_ ^ value));
|
||||
}
|
||||
|
||||
// Implementation of the base case for combine_contiguous where we actually
|
||||
// mix the bytes into the state.
|
||||
// Dispatch to different implementations of the combine_contiguous depending
|
||||
@@ -1218,37 +1253,36 @@ class ABSL_DLL MixingHashState : public HashStateBase<MixingHashState> {
|
||||
// behavior for the HashConsistentAcrossIntTypes test case. Ditto for
|
||||
// Read1To3.
|
||||
static uint64_t Read4To8(const unsigned char* p, size_t len) {
|
||||
uint32_t low_mem = absl::base_internal::UnalignedLoad32(p);
|
||||
uint32_t high_mem = absl::base_internal::UnalignedLoad32(p + len - 4);
|
||||
#ifdef ABSL_IS_LITTLE_ENDIAN
|
||||
uint32_t most_significant = high_mem;
|
||||
uint32_t least_significant = low_mem;
|
||||
#else
|
||||
uint32_t most_significant = low_mem;
|
||||
uint32_t least_significant = high_mem;
|
||||
#endif
|
||||
return (static_cast<uint64_t>(most_significant) << (len - 4) * 8) |
|
||||
least_significant;
|
||||
// If `len < 8`, we duplicate bytes in the middle.
|
||||
// E.g.:
|
||||
// `ABCD` will be read as `ABCDABCD`.
|
||||
// `ABCDE` will be read as `ABCDBCDE`.
|
||||
// `ABCDEF` will be read as `ABCDCDEF`.
|
||||
// `ABCDEFG` will be read as `ABCDDEFG`.
|
||||
// We also do not care about endianness. On big-endian platforms, bytes will
|
||||
// be shuffled (it's fine). We always shift low memory by 32, because that
|
||||
// can be pipelined earlier. Reading high memory requires computing
|
||||
// `p + len - 4`.
|
||||
uint64_t most_significant =
|
||||
static_cast<uint64_t>(absl::base_internal::UnalignedLoad32(p)) << 32;
|
||||
uint64_t least_significant =
|
||||
absl::base_internal::UnalignedLoad32(p + len - 4);
|
||||
return most_significant | least_significant;
|
||||
}
|
||||
|
||||
// Reads 1 to 3 bytes from p. Zero pads to fill uint32_t.
|
||||
static uint32_t Read1To3(const unsigned char* p, size_t len) {
|
||||
// The trick used by this implementation is to avoid branches if possible.
|
||||
unsigned char mem0 = p[0];
|
||||
unsigned char mem1 = p[len / 2];
|
||||
unsigned char mem2 = p[len - 1];
|
||||
#ifdef ABSL_IS_LITTLE_ENDIAN
|
||||
unsigned char significant2 = mem2;
|
||||
unsigned char significant1 = mem1;
|
||||
unsigned char significant0 = mem0;
|
||||
#else
|
||||
unsigned char significant2 = mem0;
|
||||
unsigned char significant1 = len == 2 ? mem0 : mem1;
|
||||
unsigned char significant0 = mem2;
|
||||
#endif
|
||||
return static_cast<uint32_t>(significant0 | //
|
||||
(significant1 << (len / 2 * 8)) | //
|
||||
(significant2 << ((len - 1) * 8)));
|
||||
// The trick used by this implementation is to avoid branches.
|
||||
// We always read three bytes by duplicating.
|
||||
// E.g.,
|
||||
// `A` is read as `AAA`.
|
||||
// `AB` is read as `ABB`.
|
||||
// `ABC` is read as `ABC`.
|
||||
// We always shift `p[0]` so that it can be pipelined better.
|
||||
// Other bytes require extra computation to find indices.
|
||||
uint32_t mem0 = (static_cast<uint32_t>(p[0]) << 16) | p[len - 1];
|
||||
uint32_t mem1 = static_cast<uint32_t>(p[len / 2]) << 8;
|
||||
return mem0 | mem1;
|
||||
}
|
||||
|
||||
ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Mix(uint64_t lhs, uint64_t rhs) {
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#define ABSL_HASH_INTERNAL_SPY_HASH_STATE_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <ostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
@@ -196,6 +197,7 @@ class SpyHashStateImpl : public HashStateBase<SpyHashStateImpl<T>> {
|
||||
private:
|
||||
template <typename U>
|
||||
friend class SpyHashStateImpl;
|
||||
friend struct CombineRaw;
|
||||
|
||||
struct UnorderedCombinerCallback {
|
||||
std::vector<std::string> element_hash_representations;
|
||||
@@ -213,6 +215,12 @@ class SpyHashStateImpl : public HashStateBase<SpyHashStateImpl<T>> {
|
||||
}
|
||||
};
|
||||
|
||||
// Combines raw data from e.g. integrals/floats/pointers/etc.
|
||||
static SpyHashStateImpl combine_raw(SpyHashStateImpl state, uint64_t value) {
|
||||
const unsigned char* data = reinterpret_cast<const unsigned char*>(&value);
|
||||
return SpyHashStateImpl::combine_contiguous(std::move(state), data, 8);
|
||||
}
|
||||
|
||||
// This is true if SpyHashStateImpl<T> has been passed to a call of
|
||||
// AbslHashValue with the wrong type. This detects that the user called
|
||||
// AbslHashValue directly (because the hash state type does not match).
|
||||
|
||||
Reference in New Issue
Block a user