mirror of
https://github.com/abseil/abseil-cpp.git
synced 2026-06-04 20:14:23 +08:00
Avoid using the non-portable type __m128i_u.
According to https://stackoverflow.com/a/68939636 it is safe to use __m128i instead. https://learn.microsoft.com/en-us/cpp/intrinsics/x86-intrinsics-list?view=msvc-170 also uses this type instead __m128i_u is just __m128i with a looser alignment requirement, but simply calling _mm_loadu_si128() instead of _mm_load_si128() is enough to tell the compiler when a pointer is unaligned. Fixes #1552 PiperOrigin-RevId: 576931936 Change-Id: I7c3530001149b360c12a1786c7e1832754d0e35c
This commit is contained in:
committed by
Copybara-Service
parent
d59eabb121
commit
c8087ae8bd
@@ -58,10 +58,10 @@ namespace crc_internal {
|
||||
|
||||
#if defined(ABSL_CRC_INTERNAL_HAVE_ARM_SIMD)
|
||||
using V128 = uint64x2_t;
|
||||
using V128u = uint64x2_t;
|
||||
#else
|
||||
// Note: Do not use __m128i_u, it is not portable.
|
||||
// Use V128_LoadU() perform an unaligned load from __m128i*.
|
||||
using V128 = __m128i;
|
||||
using V128u = __m128i_u;
|
||||
#endif
|
||||
|
||||
// Starting with the initial value in |crc|, accumulates a CRC32 value for
|
||||
@@ -78,7 +78,7 @@ uint32_t CRC32_u64(uint32_t crc, uint64_t v);
|
||||
V128 V128_Load(const V128* src);
|
||||
|
||||
// Load 128 bits of integer data. |src| does not need to be aligned.
|
||||
V128 V128_LoadU(const V128u* src);
|
||||
V128 V128_LoadU(const V128* src);
|
||||
|
||||
// Store 128 bits of integer data. |src| must be 16-byte aligned.
|
||||
void V128_Store(V128* dst, V128 data);
|
||||
@@ -146,7 +146,7 @@ inline uint32_t CRC32_u64(uint32_t crc, uint64_t v) {
|
||||
|
||||
inline V128 V128_Load(const V128* src) { return _mm_load_si128(src); }
|
||||
|
||||
inline V128 V128_LoadU(const V128u* src) { return _mm_loadu_si128(src); }
|
||||
inline V128 V128_LoadU(const V128* src) { return _mm_loadu_si128(src); }
|
||||
|
||||
inline void V128_Store(V128* dst, V128 data) { _mm_store_si128(dst, data); }
|
||||
|
||||
@@ -215,7 +215,7 @@ inline V128 V128_Load(const V128* src) {
|
||||
return vld1q_u64(reinterpret_cast<const uint64_t*>(src));
|
||||
}
|
||||
|
||||
inline V128 V128_LoadU(const V128u* src) {
|
||||
inline V128 V128_LoadU(const V128* src) {
|
||||
return vld1q_u64(reinterpret_cast<const uint64_t*>(src));
|
||||
}
|
||||
|
||||
|
||||
@@ -98,7 +98,7 @@ inline void LargeTailCopy(crc32c_t* crcs, char** dst, const char** src,
|
||||
for (size_t i = 0; i < vec_regions; i++) {
|
||||
size_t region = i;
|
||||
|
||||
auto* vsrc = reinterpret_cast<const V128u*>(*src + region_size * region);
|
||||
auto* vsrc = reinterpret_cast<const V128*>(*src + region_size * region);
|
||||
auto* vdst = reinterpret_cast<V128*>(*dst + region_size * region);
|
||||
|
||||
// Load the blocks, unaligned
|
||||
@@ -262,7 +262,7 @@ crc32c_t AcceleratedCrcMemcpyEngine<vec_regions, int_regions>::Compute(
|
||||
size_t region = (j + i) % kRegions;
|
||||
|
||||
auto* vsrc =
|
||||
reinterpret_cast<const V128u*>(src_bytes + region_size * region);
|
||||
reinterpret_cast<const V128*>(src_bytes + region_size * region);
|
||||
auto* vdst = reinterpret_cast<V128*>(dst_bytes + region_size * region);
|
||||
|
||||
// Load and CRC data.
|
||||
|
||||
Reference in New Issue
Block a user