Optimize InitControlBytesAfterSoo to have less writes and make them with compile time known size.

X86:
```
name                                                                           old CYCLES/op        new CYCLES/op        delta
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:1           14.8 ± 0%            14.8 ± 0%  -0.05%  (p=0.001 n=49+50)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:2           49.2 ± 0%            44.7 ± 0%  -9.19%  (p=0.000 n=54+50)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:4           59.3 ± 0%            57.2 ± 0%  -3.61%  (p=0.000 n=54+53)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:8           56.1 ± 1%            55.1 ± 1%  -1.86%  (p=0.000 n=54+54)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:16          61.8 ± 1%            61.2 ± 1%  -0.98%  (p=0.000 n=56+55)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:32          60.5 ± 1%            60.2 ± 1%  -0.41%  (p=0.000 n=42+55)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:64          57.7 ± 2%            57.5 ± 2%  -0.32%  (p=0.031 n=55+56)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:128         55.2 ± 1%            55.2 ± 1%    ~     (p=0.493 n=52+51)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:256         52.9 ± 1%            53.0 ± 1%    ~     (p=0.877 n=45+49)
```

ARM (altra,arch=aarch64)
```
name                                                                           old CYCLES/op        new CYCLES/op        delta
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:1           16.9 ± 3%            16.8 ± 3%    ~     (p=0.107 n=57+57)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:2           55.9 ± 1%            51.6 ±11%  -7.62%  (p=0.000 n=46+57)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:4           61.6 ± 0%            59.9 ± 7%  -2.77%  (p=0.000 n=33+57)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:8           61.9 ± 0%            61.0 ± 4%  -1.47%  (p=0.000 n=33+57)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:16          58.0 ± 2%            57.7 ± 3%    ~     (p=0.073 n=40+57)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:32          54.4 ± 3%            54.2 ± 3%    ~     (p=0.138 n=57+57)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:64          51.3 ± 2%            51.1 ± 2%    ~     (p=0.110 n=57+57)
BM_SWISSMAP_InsertManyToEmpty_Hot<::absl::flat_hash_set, 4>/set_size:128         48.8 ± 2%            48.7 ± 2%    ~     (p=0.163 n=57+57)
```

PiperOrigin-RevId: 720984174
Change-Id: I3297a89e678421e4e784af6b5f66b862fd1aae05
This commit is contained in:
Vitaly Goldshteyn
2025-01-29 08:17:51 -08:00
committed by Copybara-Service
parent da2b12d7c9
commit 5f9f2cf80a
2 changed files with 37 additions and 6 deletions

View File

@@ -481,13 +481,44 @@ void HashSetResizeHelper::GrowIntoSingleGroupShuffleControlBytes(
void HashSetResizeHelper::InitControlBytesAfterSoo(ctrl_t* new_ctrl, ctrl_t h2,
size_t new_capacity) {
assert(is_single_group(new_capacity));
std::memset(new_ctrl, static_cast<int8_t>(ctrl_t::kEmpty),
NumControlBytes(new_capacity));
assert(HashSetResizeHelper::SooSlotIndex() == 1);
static_assert(HashSetResizeHelper::SooSlotIndex() == 1, "");
// This allows us to avoid branching on had_soo_slot_.
assert(had_soo_slot_ || h2 == ctrl_t::kEmpty);
new_ctrl[1] = new_ctrl[new_capacity + 2] = h2;
new_ctrl[new_capacity] = ctrl_t::kSentinel;
if (Group::kWidth == 16) {
// Initialize the second 8 bytes in the original and mirrored control bytes.
// The ranges can overlap.
absl::little_endian::Store64(new_ctrl + 8, kMsbs8Bytes);
absl::little_endian::Store64(new_ctrl + new_capacity + 8, kMsbs8Bytes);
}
static constexpr uint64_t kAllEmptyExceptSoo =
kMsbs8Bytes ^ (static_cast<uint64_t>(static_cast<uint8_t>(ctrl_t::kEmpty))
<< (8 * HashSetResizeHelper::SooSlotIndex()));
// Initialize the first 8 bytes in the original control bytes.
// The first 8 bytes are all empty except the SOO slot.
// The range may overlap with the mirrored control bytes. These bytes will be
// overwritten later.
uint64_t first_ctrl_bytes =
kAllEmptyExceptSoo ^ (static_cast<uint64_t>(static_cast<uint8_t>(h2))
<< (8 * HashSetResizeHelper::SooSlotIndex()));
absl::little_endian::Store64(new_ctrl, first_ctrl_bytes);
// Initialize Sentinel byte and the first 7 bytes in the mirrored control
// bytes.
// We are adding kSentinel as the first byte of the mirrored control bytes.
uint64_t mirrored_ctrl_bytes =
(first_ctrl_bytes << 8) ^
static_cast<uint64_t>(static_cast<uint8_t>(ctrl_t::kSentinel));
absl::little_endian::Store64(new_ctrl + new_capacity, mirrored_ctrl_bytes);
// Example for capacity 3:
// new_ctrl after 2 stores = ????????EEEEEEEEEEE
// new_ctrl after 3rd store = E0EEEEEEEEEEEEEEEEE
// new_ctrl after 4th store = E0ESE0EEEEEEEEEEEEE
// Example for capacity 15:
// new_ctrl after 2 stores = ????????EEEEEEEE???????EEEEEEEE
// new_ctrl after 3rd store = E0EEEEEEEEEEEEEE???????EEEEEEEE
// new_ctrl after 4th store = E0EEEEEEEEEEEEESE0EEEEEEEEEEEEE
}
void HashSetResizeHelper::GrowIntoSingleGroupShuffleTransferableSlots(

View File

@@ -2033,7 +2033,7 @@ class HashSetResizeHelper {
// index 1 so that when resizing from capacity 1 to 3, we can still have
// random iteration order between the first two inserted elements.
// I.e. it allows inserting the second element at either index 0 or 2.
static size_t SooSlotIndex() { return 1; }
static constexpr size_t SooSlotIndex() { return 1; }
// Allocates a backing array for the hashtable.
// Reads `capacity` and updates all other fields based on the result of