Bug 1941001 - Part 2: Support uint32_t in SIMD memchr. r=spidermonkey-reviewers,jandem
Used in the next part. Differential Revision: https://phabricator.services.mozilla.com/D233885
This commit is contained in:
@@ -478,6 +478,14 @@ const char16_t* SIMD::memchr16(const char16_t* ptr, char16_t value,
|
||||
return memchr16SSE2(ptr, value, length);
|
||||
}
|
||||
|
||||
const uint32_t* SIMD::memchr32(const uint32_t* ptr, uint32_t value,
|
||||
size_t length) {
|
||||
if (SupportsAVX2()) {
|
||||
return memchr32AVX2(ptr, value, length);
|
||||
}
|
||||
return FindInBufferNaive<uint32_t>(ptr, value, length);
|
||||
}
|
||||
|
||||
const uint64_t* SIMD::memchr64(const uint64_t* ptr, uint64_t value,
|
||||
size_t length) {
|
||||
if (SupportsAVX2()) {
|
||||
@@ -524,6 +532,11 @@ const char16_t* SIMD::memchr16SSE2(const char16_t* ptr, char16_t value,
|
||||
return memchr16(ptr, value, length);
|
||||
}
|
||||
|
||||
const uint32_t* SIMD::memchr32(const uint32_t* ptr, uint32_t value,
|
||||
size_t length) {
|
||||
return FindInBufferNaive<uint32_t>(ptr, value, length);
|
||||
}
|
||||
|
||||
const uint64_t* SIMD::memchr64(const uint64_t* ptr, uint64_t value,
|
||||
size_t length) {
|
||||
return FindInBufferNaive<uint64_t>(ptr, value, length);
|
||||
|
||||
@@ -54,6 +54,15 @@ class SIMD {
|
||||
static MFBT_API const char16_t* memchr16AVX2(const char16_t* ptr,
|
||||
char16_t value, size_t length);
|
||||
|
||||
// Search through `ptr[0..length]` for the first occurrence of `value` and
|
||||
// return the pointer to it, or nullptr if it cannot be found.
|
||||
static MFBT_API const uint32_t* memchr32(const uint32_t* ptr, uint32_t value,
|
||||
size_t length);
|
||||
|
||||
// This function just restricts our execution to the AVX2 path
|
||||
static MFBT_API const uint32_t* memchr32AVX2(const uint32_t* ptr,
|
||||
uint32_t value, size_t length);
|
||||
|
||||
// Search through `ptr[0..length]` for the first occurrence of `value` and
|
||||
// return the pointer to it, or nullptr if it cannot be found.
|
||||
static MFBT_API const uint64_t* memchr64(const uint64_t* ptr, uint64_t value,
|
||||
|
||||
@@ -47,13 +47,16 @@ __m128i CmpEq128(__m128i a, __m128i b) {
|
||||
template <typename TValue>
|
||||
__m256i CmpEq256(__m256i a, __m256i b) {
|
||||
static_assert(sizeof(TValue) == 1 || sizeof(TValue) == 2 ||
|
||||
sizeof(TValue) == 8);
|
||||
sizeof(TValue) == 4 || sizeof(TValue) == 8);
|
||||
if (sizeof(TValue) == 1) {
|
||||
return _mm256_cmpeq_epi8(a, b);
|
||||
}
|
||||
if (sizeof(TValue) == 2) {
|
||||
return _mm256_cmpeq_epi16(a, b);
|
||||
}
|
||||
if (sizeof(TValue) == 4) {
|
||||
return _mm256_cmpeq_epi32(a, b);
|
||||
}
|
||||
|
||||
return _mm256_cmpeq_epi64(a, b);
|
||||
}
|
||||
@@ -156,7 +159,7 @@ const TValue* Check4x32Bytes(__m256i needle, uintptr_t a, uintptr_t b,
|
||||
template <typename TValue>
|
||||
const TValue* FindInBufferAVX2(const TValue* ptr, TValue value, size_t length) {
|
||||
static_assert(sizeof(TValue) == 1 || sizeof(TValue) == 2 ||
|
||||
sizeof(TValue) == 8);
|
||||
sizeof(TValue) == 4 || sizeof(TValue) == 8);
|
||||
static_assert(std::is_unsigned<TValue>::value);
|
||||
|
||||
// Load our needle into a 32-byte register
|
||||
@@ -165,6 +168,8 @@ const TValue* FindInBufferAVX2(const TValue* ptr, TValue value, size_t length) {
|
||||
needle = _mm256_set1_epi8(value);
|
||||
} else if (sizeof(TValue) == 2) {
|
||||
needle = _mm256_set1_epi16(value);
|
||||
} else if (sizeof(TValue) == 4) {
|
||||
needle = _mm256_set1_epi32(value);
|
||||
} else {
|
||||
needle = _mm256_set1_epi64x(value);
|
||||
}
|
||||
@@ -173,7 +178,7 @@ const TValue* FindInBufferAVX2(const TValue* ptr, TValue value, size_t length) {
|
||||
uintptr_t cur = reinterpret_cast<uintptr_t>(ptr);
|
||||
uintptr_t end = cur + numBytes;
|
||||
|
||||
if (numBytes < 8 || (sizeof(TValue) == 8 && numBytes < 32)) {
|
||||
if (numBytes < 8 || (sizeof(TValue) >= 4 && numBytes < 32)) {
|
||||
while (cur < end) {
|
||||
if (GetAs<TValue>(cur) == value) {
|
||||
return reinterpret_cast<const TValue*>(cur);
|
||||
@@ -183,7 +188,7 @@ const TValue* FindInBufferAVX2(const TValue* ptr, TValue value, size_t length) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if constexpr (sizeof(TValue) != 8) {
|
||||
if constexpr (sizeof(TValue) < 4) {
|
||||
if (numBytes < 32) {
|
||||
__m128i needle_narrow;
|
||||
if (sizeof(TValue) == 1) {
|
||||
@@ -264,6 +269,11 @@ const char16_t* SIMD::memchr16AVX2(const char16_t* ptr, char16_t value,
|
||||
return FindInBufferAVX2<char16_t>(ptr, value, length);
|
||||
}
|
||||
|
||||
const uint32_t* SIMD::memchr32AVX2(const uint32_t* ptr, uint32_t value,
|
||||
size_t length) {
|
||||
return FindInBufferAVX2<uint32_t>(ptr, value, length);
|
||||
}
|
||||
|
||||
const uint64_t* SIMD::memchr64AVX2(const uint64_t* ptr, uint64_t value,
|
||||
size_t length) {
|
||||
return FindInBufferAVX2<uint64_t>(ptr, value, length);
|
||||
@@ -284,6 +294,11 @@ const char16_t* SIMD::memchr16AVX2(const char16_t* ptr, char16_t value,
|
||||
MOZ_RELEASE_ASSERT(false, "AVX2 not supported in this binary.");
|
||||
}
|
||||
|
||||
const uint32_t* SIMD::memchr32AVX2(const uint32_t* ptr, uint32_t value,
|
||||
size_t length) {
|
||||
MOZ_RELEASE_ASSERT(false, "AVX2 not supported in this binary.");
|
||||
}
|
||||
|
||||
const uint64_t* SIMD::memchr64AVX2(const uint64_t* ptr, uint64_t value,
|
||||
size_t length) {
|
||||
MOZ_RELEASE_ASSERT(false, "AVX2 not supported in this binary.");
|
||||
|
||||
Reference in New Issue
Block a user