Minimize number of InitializeSlots with respect to SizeOfSlot.

I am still keeping `SooSlotMemcpySize` that may be 1, 4, 8 or 16. Microbenchmarks showed that it is useful to have compile time memcpy. In the long run, when/if more types will become trivially transferable and a potential increase of the SOO maximal size, this optimization can become (even) more useful. So I would stick with it for the time being.

`SooSlotMemcpySize` is always zero for non SOO types or not memcpyable types.
So as a result we merge all sizes for non SOO or non transferable types.
For memcpyable SOO types we have 4 options.

PiperOrigin-RevId: 723233539
Change-Id: If38c69527a2f873c07ce869bf3ca609fc7371a07
This commit is contained in:
Vitaly Goldshteyn
2025-02-04 14:45:19 -08:00
committed by Copybara-Service
parent 9ebb40865f
commit 69b58f9227
2 changed files with 146 additions and 16 deletions

View File

@@ -1304,6 +1304,9 @@ struct HeapPtrs {
MaybeInitializedPtr slot_array;
};
// Returns the maximum size of the SOO slot.
constexpr size_t MaxSooSlotSize() { return sizeof(HeapPtrs); }
// Manages the backing array pointers or the SOO slot. When raw_hash_set::is_soo
// is true, the SOO slot is stored in `soo_data`. Otherwise, we use `heap`.
union HeapOrSoo {
@@ -1330,7 +1333,7 @@ union HeapOrSoo {
}
HeapPtrs heap;
unsigned char soo_data[sizeof(HeapPtrs)];
unsigned char soo_data[MaxSooSlotSize()];
};
// CommonFields hold the fields in raw_hash_set that do not depend
@@ -2076,6 +2079,36 @@ ABSL_ATTRIBUTE_ALWAYS_INLINE inline void InitializeSmallControlBytesAfterSoo(
// new_ctrl after 3rd store = E0EEEEESE0EEEEEEEEEEEEE
}
// Returns the optimal size for memcpy when transferring SOO slot.
// If any of transfer_uses_memcpy or soo_enabled is false, returns 0.
// Otherwise, returns the optimal size for memcpy SOO slot transfer
// to SooSlotIndex().
// At the destination we are allowed to copy upto twice more bytes,
// because there is at least one more slot after SooSlotIndex().
// The result must not exceed MaxSooSlotSize().
// Some of the cases are merged to minimize the number of function
// instantiations.
constexpr size_t OptimalMemcpySizeForSooSlotTransfer(bool transfer_uses_memcpy,
bool soo_enabled,
size_t slot_size) {
if (!transfer_uses_memcpy || !soo_enabled) {
return 0;
}
if (slot_size == 1) {
return 1;
}
if (slot_size <= 3) {
return 4;
}
// We are merging 4 and 8 into one case because we expect them to be the
// hottest cases. Copying 8 bytes is as fast on common architectures.
if (slot_size <= 8) {
return 8;
}
static_assert(MaxSooSlotSize() <= 16, "unexpectedly large SOO slot size");
return 16;
}
// Helper class to perform resize of the hash set.
//
// It contains special optimizations for small group resizes.
@@ -2144,27 +2177,31 @@ class HashSetResizeHelper {
// infoz.RecordRehash is called if old_capacity == 0.
//
// Returns IsGrowingIntoSingleGroupApplicable result to avoid recomputation.
template <typename Alloc, size_t SizeOfSlot, bool TransferUsesMemcpy,
bool SooEnabled, size_t AlignOfSlot>
template <typename Alloc,
// The size we are allowed to copy to transfer SOO slot to
// SooSlotIndex(). See OptimalMemcpySizeForSooSlotTransfer().
size_t SooSlotMemcpySize, bool TransferUsesMemcpy, bool SooEnabled,
size_t AlignOfSlot>
ABSL_ATTRIBUTE_NOINLINE bool InitializeSlots(CommonFields& c, Alloc alloc,
size_t soo_slot_hash,
size_t key_size,
size_t value_size,
const PolicyFunctions& policy) {
ABSL_SWISSTABLE_ASSERT(c.capacity());
const size_t slot_size = policy.slot_size;
HashtablezInfoHandle infoz = c.infoz();
const bool should_sample =
ShouldSampleHashtablezInfoOnResize<Alloc, SooEnabled>(force_infoz_,
old_capacity_, c);
if (ABSL_PREDICT_FALSE(should_sample)) {
infoz = ForcedTrySample(SizeOfSlot, key_size, value_size,
infoz = ForcedTrySample(slot_size, key_size, value_size,
SooEnabled ? SooCapacity() : 0);
}
const bool has_infoz = infoz.IsSampled();
RawHashSetLayout layout(c.capacity(), AlignOfSlot, has_infoz);
char* mem = static_cast<char*>(Allocate<BackingArrayAlignment(AlignOfSlot)>(
&alloc, layout.alloc_size(SizeOfSlot)));
&alloc, layout.alloc_size(slot_size)));
const GenerationType old_generation = c.generation();
c.set_generation_ptr(
reinterpret_cast<GenerationType*>(mem + layout.generation_offset()));
@@ -2180,18 +2217,31 @@ class HashSetResizeHelper {
if (!had_soo_slot_) {
c.set_control(new_ctrl);
c.set_slots(new_slots);
ResetCtrl(c, SizeOfSlot);
ResetCtrl(c, slot_size);
} else if (ABSL_PREDICT_TRUE(layout.capacity() <=
MaxSmallAfterSooCapacity())) {
if (TransferUsesMemcpy) {
InsertOldSooSlotAndInitializeControlBytesSmall(
c, soo_slot_hash, new_ctrl, new_slots, SizeOfSlot,
[](void* target_slot, void* source_slot) {
std::memcpy(target_slot, source_slot, SizeOfSlot);
c, soo_slot_hash, new_ctrl, new_slots, slot_size,
[&](void* target_slot, void* source_slot) {
// Target slot is placed at index 1, but capacity is at
// minimum 3. So we are allowed to copy at least twice as much
// memory.
static_assert(SooSlotIndex() == 1, "");
static_assert(SooSlotMemcpySize <= MaxSooSlotSize(), "");
ABSL_SWISSTABLE_ASSERT(SooSlotMemcpySize != 0 &&
SooSlotMemcpySize <= 2 * slot_size);
ABSL_SWISSTABLE_ASSERT(SooSlotMemcpySize >= slot_size);
void* next_slot = SlotAddress(target_slot, 1, slot_size);
SanitizerUnpoisonMemoryRegion(next_slot,
SooSlotMemcpySize - slot_size);
std::memcpy(target_slot, source_slot, SooSlotMemcpySize);
SanitizerPoisonMemoryRegion(next_slot,
SooSlotMemcpySize - slot_size);
});
} else {
InsertOldSooSlotAndInitializeControlBytesSmall(
c, soo_slot_hash, new_ctrl, new_slots, SizeOfSlot,
c, soo_slot_hash, new_ctrl, new_slots, slot_size,
[&](void* target_slot, void* source_slot) {
policy.transfer(&c, target_slot, source_slot);
});
@@ -2209,14 +2259,14 @@ class HashSetResizeHelper {
// SooEnabled implies that old_capacity_ != 0.
if ((SooEnabled || old_capacity_ != 0) && grow_single_group) {
if (TransferUsesMemcpy) {
GrowSizeIntoSingleGroupTransferable(c, SizeOfSlot);
DeallocateOld<AlignOfSlot>(alloc, SizeOfSlot);
GrowSizeIntoSingleGroupTransferable(c, slot_size);
DeallocateOld<AlignOfSlot>(alloc, slot_size);
} else {
GrowIntoSingleGroupShuffleControlBytes(c.control(),
layout.capacity());
}
} else {
ResetCtrl(c, SizeOfSlot);
ResetCtrl(c, slot_size);
}
}
@@ -3843,7 +3893,10 @@ class raw_hash_set {
// depending on the values of `transfer_uses_memcpy` and capacities.
// Refer to the comment in `InitializeSlots` for more details.
const bool grow_single_group =
resize_helper.InitializeSlots<CharAlloc, sizeof(slot_type),
resize_helper.InitializeSlots<CharAlloc,
OptimalMemcpySizeForSooSlotTransfer(
PolicyTraits::transfer_uses_memcpy(),
SooEnabled(), sizeof(slot_type)),
PolicyTraits::transfer_uses_memcpy(),
SooEnabled(), alignof(slot_type)>(
common, CharAlloc(set->alloc_ref()), soo_slot_hash,

View File

@@ -20,6 +20,7 @@
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <deque>
#include <functional>
#include <iostream>
@@ -576,6 +577,48 @@ template <int N, bool kSoo>
using SizedValuePolicy =
ValuePolicy<SizedValue<N>, /*kTransferable=*/true, kSoo>;
// Value is aligned as type T and contains N copies of it.
template <typename T, int N>
class AlignedValue {
public:
AlignedValue(int64_t v) { // NOLINT
for (int i = 0; i < N; ++i) {
vals_[i] = v;
if (sizeof(T) < sizeof(int64_t)) {
v >>= (8 * sizeof(T));
} else {
v = 0;
}
}
}
AlignedValue() : AlignedValue(0) {}
AlignedValue(const AlignedValue&) = default;
AlignedValue& operator=(const AlignedValue&) = default;
int64_t operator*() const {
if (sizeof(T) == sizeof(int64_t)) {
return vals_[0];
}
int64_t result = 0;
for (int i = N - 1; i >= 0; --i) {
result <<= (8 * sizeof(T));
result += vals_[i];
}
return result;
}
explicit operator int() const { return **this; }
explicit operator int64_t() const { return **this; }
template <typename H>
friend H AbslHashValue(H h, AlignedValue sv) {
return H::combine(std::move(h), *sv);
}
bool operator==(const AlignedValue& rhs) const { return **this == *rhs; }
private:
T vals_[N];
};
class StringPolicy {
template <class F, class K, class V,
class = typename std::enable_if<
@@ -942,8 +985,42 @@ TYPED_TEST(SooTest, InsertWithinCapacity) {
template <class TableType>
class SmallTableResizeTest : public testing::Test {};
using SmallTableTypes =
::testing::Types<IntTable, TransferableIntTable, SooIntTable>;
using SmallTableTypes = ::testing::Types<
IntTable, TransferableIntTable, SooIntTable,
// int8
ValueTable<int8_t, /*kTransferable=*/true, /*kSoo=*/true>,
ValueTable<int8_t, /*kTransferable=*/false, /*kSoo=*/true>,
// int16
ValueTable<int16_t, /*kTransferable=*/true, /*kSoo=*/true>,
ValueTable<int16_t, /*kTransferable=*/false, /*kSoo=*/true>,
// int128
ValueTable<SizedValue<16>, /*kTransferable=*/true, /*kSoo=*/true>,
ValueTable<SizedValue<16>, /*kTransferable=*/false, /*kSoo=*/true>,
// int192
ValueTable<SizedValue<24>, /*kTransferable=*/true, /*kSoo=*/true>,
ValueTable<SizedValue<24>, /*kTransferable=*/false, /*kSoo=*/true>,
// Special tables.
MinimumAlignmentUint8Table,
CustomAllocIntTable,
BadTable,
// alignment 1, size 2.
ValueTable<AlignedValue<uint8_t, 2>, /*kTransferable=*/true, /*kSoo=*/true>,
ValueTable<AlignedValue<uint8_t, 2>, /*kTransferable=*/false,
/*kSoo=*/true>,
// alignment 1, size 7.
ValueTable<AlignedValue<uint8_t, 7>, /*kTransferable=*/true, /*kSoo=*/true>,
ValueTable<AlignedValue<uint8_t, 7>, /*kTransferable=*/false,
/*kSoo=*/true>,
// alignment 2, size 6.
ValueTable<AlignedValue<uint16_t, 3>, /*kTransferable=*/true,
/*kSoo=*/true>,
ValueTable<AlignedValue<uint16_t, 3>, /*kTransferable=*/false,
/*kSoo=*/true>,
// alignment 2, size 10.
ValueTable<AlignedValue<uint16_t, 5>, /*kTransferable=*/true,
/*kSoo=*/true>,
ValueTable<AlignedValue<uint16_t, 5>, /*kTransferable=*/false,
/*kSoo=*/true>>;
TYPED_TEST_SUITE(SmallTableResizeTest, SmallTableTypes);
TYPED_TEST(SmallTableResizeTest, InsertIntoSmallTable) {