Bug 1941001 - Part 2: Support uint32_t in SIMD memchr. r=spidermonkey-reviewers,jandem

Used in the next part. Differential Revision: https://phabricator.services.mozilla.com/D233885
2025-01-15 10:50:02 +00:00
parent ae6530b698
commit e078d00aad
3 changed files with 41 additions and 4 deletions
--- a/mozglue/misc/SIMD.cpp
+++ b/mozglue/misc/SIMD.cpp
@@ -478,6 +478,14 @@ const char16_t* SIMD::memchr16(const char16_t* ptr, char16_t value,
  return memchr16SSE2(ptr, value, length);
 }

+const uint32_t* SIMD::memchr32(const uint32_t* ptr, uint32_t value,
+                               size_t length) {
+  if (SupportsAVX2()) {
+    return memchr32AVX2(ptr, value, length);
+  }
+  return FindInBufferNaive<uint32_t>(ptr, value, length);
+}
+
 const uint64_t* SIMD::memchr64(const uint64_t* ptr, uint64_t value,
                               size_t length) {
  if (SupportsAVX2()) {
@@ -524,6 +532,11 @@ const char16_t* SIMD::memchr16SSE2(const char16_t* ptr, char16_t value,
  return memchr16(ptr, value, length);
 }

+const uint32_t* SIMD::memchr32(const uint32_t* ptr, uint32_t value,
+                               size_t length) {
+  return FindInBufferNaive<uint32_t>(ptr, value, length);
+}
+
 const uint64_t* SIMD::memchr64(const uint64_t* ptr, uint64_t value,
                               size_t length) {
  return FindInBufferNaive<uint64_t>(ptr, value, length);
--- a/mozglue/misc/SIMD.h
+++ b/mozglue/misc/SIMD.h
@@ -54,6 +54,15 @@ class SIMD {
  static MFBT_API const char16_t* memchr16AVX2(const char16_t* ptr,
                                               char16_t value, size_t length);

+  // Search through `ptr[0..length]` for the first occurrence of `value` and
+  // return the pointer to it, or nullptr if it cannot be found.
+  static MFBT_API const uint32_t* memchr32(const uint32_t* ptr, uint32_t value,
+                                           size_t length);
+
+  // This function just restricts our execution to the AVX2 path
+  static MFBT_API const uint32_t* memchr32AVX2(const uint32_t* ptr,
+                                               uint32_t value, size_t length);
+
  // Search through `ptr[0..length]` for the first occurrence of `value` and
  // return the pointer to it, or nullptr if it cannot be found.
  static MFBT_API const uint64_t* memchr64(const uint64_t* ptr, uint64_t value,
--- a/mozglue/misc/SIMD_avx2.cpp
+++ b/mozglue/misc/SIMD_avx2.cpp
@@ -47,13 +47,16 @@ __m128i CmpEq128(__m128i a, __m128i b) {
 template <typename TValue>
 __m256i CmpEq256(__m256i a, __m256i b) {
  static_assert(sizeof(TValue) == 1 || sizeof(TValue) == 2 ||
-                sizeof(TValue) == 8);
+                sizeof(TValue) == 4 || sizeof(TValue) == 8);
  if (sizeof(TValue) == 1) {
    return _mm256_cmpeq_epi8(a, b);
  }
  if (sizeof(TValue) == 2) {
    return _mm256_cmpeq_epi16(a, b);
  }
+  if (sizeof(TValue) == 4) {
+    return _mm256_cmpeq_epi32(a, b);
+  }

  return _mm256_cmpeq_epi64(a, b);
 }
@@ -156,7 +159,7 @@ const TValue* Check4x32Bytes(__m256i needle, uintptr_t a, uintptr_t b,
 template <typename TValue>
 const TValue* FindInBufferAVX2(const TValue* ptr, TValue value, size_t length) {
  static_assert(sizeof(TValue) == 1 || sizeof(TValue) == 2 ||
-                sizeof(TValue) == 8);
+                sizeof(TValue) == 4 || sizeof(TValue) == 8);
  static_assert(std::is_unsigned<TValue>::value);

  // Load our needle into a 32-byte register
@@ -165,6 +168,8 @@ const TValue* FindInBufferAVX2(const TValue* ptr, TValue value, size_t length) {
    needle = _mm256_set1_epi8(value);
  } else if (sizeof(TValue) == 2) {
    needle = _mm256_set1_epi16(value);
+  } else if (sizeof(TValue) == 4) {
+    needle = _mm256_set1_epi32(value);
  } else {
    needle = _mm256_set1_epi64x(value);
  }
@@ -173,7 +178,7 @@ const TValue* FindInBufferAVX2(const TValue* ptr, TValue value, size_t length) {
  uintptr_t cur = reinterpret_cast<uintptr_t>(ptr);
  uintptr_t end = cur + numBytes;

-  if (numBytes < 8 || (sizeof(TValue) == 8 && numBytes < 32)) {
+  if (numBytes < 8 || (sizeof(TValue) >= 4 && numBytes < 32)) {
    while (cur < end) {
      if (GetAs<TValue>(cur) == value) {
        return reinterpret_cast<const TValue*>(cur);
@@ -183,7 +188,7 @@ const TValue* FindInBufferAVX2(const TValue* ptr, TValue value, size_t length) {
    return nullptr;
  }

-  if constexpr (sizeof(TValue) != 8) {
+  if constexpr (sizeof(TValue) < 4) {
    if (numBytes < 32) {
      __m128i needle_narrow;
      if (sizeof(TValue) == 1) {
@@ -264,6 +269,11 @@ const char16_t* SIMD::memchr16AVX2(const char16_t* ptr, char16_t value,
  return FindInBufferAVX2<char16_t>(ptr, value, length);
 }

+const uint32_t* SIMD::memchr32AVX2(const uint32_t* ptr, uint32_t value,
+                                   size_t length) {
+  return FindInBufferAVX2<uint32_t>(ptr, value, length);
+}
+
 const uint64_t* SIMD::memchr64AVX2(const uint64_t* ptr, uint64_t value,
                                   size_t length) {
  return FindInBufferAVX2<uint64_t>(ptr, value, length);
@@ -284,6 +294,11 @@ const char16_t* SIMD::memchr16AVX2(const char16_t* ptr, char16_t value,
  MOZ_RELEASE_ASSERT(false, "AVX2 not supported in this binary.");
 }

+const uint32_t* SIMD::memchr32AVX2(const uint32_t* ptr, uint32_t value,
+                                   size_t length) {
+  MOZ_RELEASE_ASSERT(false, "AVX2 not supported in this binary.");
+}
+
 const uint64_t* SIMD::memchr64AVX2(const uint64_t* ptr, uint64_t value,
                                   size_t length) {
  MOZ_RELEASE_ASSERT(false, "AVX2 not supported in this binary.");