Bug 1301742 - Improve UTF-16 to UTF-8 conversion speed. r=froydnj

This adds an SSE2 optimized path for UTF-16 strings that can be trivially converted to UTF-8 (they contain no characters > 0X7F). It has a speedup for the case where many of the leading characters are < 0X80 and should have the same performance in the worst-case scenario where the first character is > 0X7F.
2016-09-15 15:46:29 -07:00
parent 3ffd3ed130
commit bed8f9f769
5 changed files with 314 additions and 8 deletions
--- a/xpcom/string/moz.build
+++ b/xpcom/string/moz.build
@@ -51,9 +51,11 @@ UNIFIED_SOURCES += [
 ]
 # Are we targeting x86 or x86-64?  If so, compile the SSE2 functions for
-# nsUTF8Utils.cpp.
+# nsUTF8Utils.cpp and nsReadableUtils.cpp.
 if CONFIG['INTEL_ARCHITECTURE']:
    SOURCES += ['nsUTF8UtilsSSE2.cpp']
    SOURCES['nsUTF8UtilsSSE2.cpp'].flags += CONFIG['SSE2_FLAGS']
    SOURCES += ['nsReadableUtilsSSE2.cpp']
    SOURCES['nsReadableUtilsSSE2.cpp'].flags += CONFIG['SSE2_FLAGS']
 FINAL_LIBRARY = 'xul'
--- a/xpcom/string/nsReadableUtils.cpp
+++ b/xpcom/string/nsReadableUtils.cpp
@@ -5,14 +5,79 @@
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 #include "nsReadableUtils.h"
 #include "nsReadableUtilsImpl.h"
 #include <algorithm>
 #include "mozilla/CheckedInt.h"
 #include "nscore.h"
 #include "nsMemory.h"
 #include "nsString.h"
 #include "nsTArray.h"
 #include "nsUTF8Utils.h"
 using mozilla::IsASCII;
 /**
 * Fallback implementation for finding the first non-ASCII character in a
 * UTF-16 string.
 */
 static inline int32_t
 FirstNonASCIIUnvectorized(const char16_t* aBegin, const char16_t* aEnd)
 {
  typedef mozilla::NonASCIIParameters<sizeof(size_t)> p;
  const size_t kMask = p::mask();
  const uintptr_t kAlignMask = p::alignMask();
  const size_t kNumUnicharsPerWord = p::numUnicharsPerWord();
  const char16_t* idx = aBegin;
  // Align ourselves to a word boundary.
  for (; idx != aEnd && ((uintptr_t(idx) & kAlignMask) != 0); idx++) {
    if (!IsASCII(*idx)) {
      return idx - aBegin;
    }
  }
  // Check one word at a time.
  const char16_t* wordWalkEnd = mozilla::aligned(aEnd, kAlignMask);
  for (; idx != wordWalkEnd; idx += kNumUnicharsPerWord) {
    const size_t word = *reinterpret_cast<const size_t*>(idx);
    if (word & kMask) {
      return idx - aBegin;
    }
  }
  // Take care of the remainder one character at a time.
  for (; idx != aEnd; idx++) {
    if (!IsASCII(*idx)) {
      return idx - aBegin;
    }
  }
  return -1;
 }
 /*
 * This function returns -1 if all characters in str are ASCII characters.
 * Otherwise, it returns a value less than or equal to the index of the first
 * ASCII character in str. For example, if first non-ASCII character is at
 * position 25, it may return 25, 24, or 16. But it guarantees
 * there are only ASCII characters before returned value.
 */
 static inline int32_t
 FirstNonASCII(const char16_t* aBegin, const char16_t* aEnd)
 {
 #ifdef MOZILLA_MAY_SUPPORT_SSE2
  if (mozilla::supports_sse2()) {
    return mozilla::SSE2::FirstNonASCII(aBegin, aEnd);
  }
 #endif
  return FirstNonASCIIUnvectorized(aBegin, aEnd);
 }
 void
 LossyCopyUTF16toASCII(const nsAString& aSource, nsACString& aDest)
 {
@@ -180,16 +245,46 @@ bool
 AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest,
                  const mozilla::fallible_t& aFallible)
 {
  // At 16 characters analysis showed better performance of both the all ASCII
  // and non-ASCII cases, so we limit calling |FirstNonASCII| to strings of
  // that length.
  const nsAString::size_type kFastPathMinLength = 16;
  int32_t firstNonASCII = 0;
  if (aSource.Length() >= kFastPathMinLength) {
    firstNonASCII = FirstNonASCII(aSource.BeginReading(), aSource.EndReading());
  }
  if (firstNonASCII == -1) {
    // This is all ASCII, we can use the more efficient lossy append.
    mozilla::CheckedInt<nsACString::size_type> new_length(aSource.Length());
    new_length += aDest.Length();
    if (!new_length.isValid() ||
        !aDest.SetCapacity(new_length.value(), aFallible)) {
      return false;
    }
    LossyAppendUTF16toASCII(aSource, aDest);
    return true;
  }
  nsAString::const_iterator source_start, source_end;
  CalculateUTF8Size calculator;
-  copy_string(aSource.BeginReading(source_start),
+  aSource.BeginReading(source_start);
-              aSource.EndReading(source_end), calculator);
+  aSource.EndReading(source_end);
-  size_t count = calculator.Size();
+  // Skip the characters that we know are single byte.
  source_start.advance(firstNonASCII);
  copy_string(source_start,
              source_end, calculator);
  // Include the ASCII characters that were skipped in the count.
  size_t count = calculator.Size() + firstNonASCII;
  if (count) {
    auto old_dest_length = aDest.Length();
    // Grow the buffer if we need to.
    mozilla::CheckedInt<nsACString::size_type> new_length(count);
    new_length += old_dest_length;
@@ -201,11 +296,30 @@ AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest,
    // All ready? Time to convert
-    ConvertUTF16toUTF8 converter(aDest.BeginWriting() + old_dest_length);
+    nsAString::const_iterator ascii_end;
-    copy_string(aSource.BeginReading(source_start),
+    aSource.BeginReading(ascii_end);
    if (firstNonASCII >= static_cast<int32_t>(kFastPathMinLength)) {
      // Use the more efficient lossy converter for the ASCII portion.
      LossyConvertEncoding16to8 lossy_converter(
          aDest.BeginWriting() + old_dest_length);
      nsAString::const_iterator ascii_start;
      aSource.BeginReading(ascii_start);
      ascii_end.advance(firstNonASCII);
      copy_string(ascii_start, ascii_end, lossy_converter);
    } else {
      // Not using the lossy shortcut, we need to include the leading ASCII
      // chars.
      firstNonASCII = 0;
    }
    ConvertUTF16toUTF8 converter(
        aDest.BeginWriting() + old_dest_length + firstNonASCII);
    copy_string(ascii_end,
                aSource.EndReading(source_end), converter);
-    NS_ASSERTION(converter.Size() == count,
+    NS_ASSERTION(converter.Size() == count - firstNonASCII,
                 "Unexpected disparity between CalculateUTF8Size and "
                 "ConvertUTF16toUTF8");
  }
--- a/xpcom/string/nsReadableUtilsImpl.h
+++ b/xpcom/string/nsReadableUtilsImpl.h
@@ -0,0 +1,54 @@
 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 #include <stdint.h>
 namespace mozilla {
 inline bool IsASCII(char16_t aChar) {
  return (aChar & 0xFF80) == 0;
 }
 /**
 * Provides a pointer before or equal to |aPtr| that is is suitably aligned.
 */
 inline const char16_t* aligned(const char16_t* aPtr, const uintptr_t aMask)
 {
  return reinterpret_cast<const char16_t*>(
      reinterpret_cast<const uintptr_t>(aPtr) & ~aMask);
 }
 /**
 * Structures for word-sized vectorization of ASCII checking for UTF-16
 * strings.
 */
 template<size_t size> struct NonASCIIParameters;
 template<> struct NonASCIIParameters<4> {
  static inline size_t mask() { return 0xff80ff80; }
  static inline uintptr_t alignMask() { return 0x3; }
  static inline size_t numUnicharsPerWord() { return 2; }
 };
 template<> struct NonASCIIParameters<8> {
  static inline size_t mask() {
    static const uint64_t maskAsUint64 = UINT64_C(0xff80ff80ff80ff80);
    // We have to explicitly cast this 64-bit value to a size_t, or else
    // compilers for 32-bit platforms will warn about it being too large to fit
    // in the size_t return type. (Fortunately, this code isn't actually
    // invoked on 32-bit platforms -- they'll use the <4> specialization above.
    // So it is, in fact, OK that this value is too large for a 32-bit size_t.)
    return (size_t)maskAsUint64;
  }
  static inline uintptr_t alignMask() { return 0x7; }
  static inline size_t numUnicharsPerWord() { return 4; }
 };
 namespace SSE2 {
 int32_t FirstNonASCII(const char16_t* aBegin, const char16_t* aEnd);
 } // namespace SSE2
 } // namespace mozilla
--- a/xpcom/string/nsReadableUtilsSSE2.cpp
+++ b/xpcom/string/nsReadableUtilsSSE2.cpp
@@ -0,0 +1,70 @@
 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
 /* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 #include <emmintrin.h>
 #include "nsReadableUtilsImpl.h"
 namespace mozilla {
 namespace SSE2 {
 static inline bool
 is_zero (__m128i x)
 {
  return
    _mm_movemask_epi8(_mm_cmpeq_epi8(x, _mm_setzero_si128())) == 0xffff;
 }
 int32_t
 FirstNonASCII(const char16_t* aBegin, const char16_t* aEnd)
 {
  const size_t kNumUnicharsPerVector = sizeof(__m128i) / sizeof(char16_t);
  typedef NonASCIIParameters<sizeof(size_t)> p;
  const size_t kMask = p::mask();
  const uintptr_t kXmmAlignMask = 0xf;
  const uint16_t kShortMask = 0xff80;
  const size_t kNumUnicharsPerWord = p::numUnicharsPerWord();
  const char16_t* idx = aBegin;
  // Align ourselves to a 16-byte boundary as required by _mm_load_si128
  for (; idx != aEnd && ((uintptr_t(idx) & kXmmAlignMask) != 0); idx++) {
    if (!IsASCII(*idx)) {
      return idx - aBegin;
    }
  }
  // Check one XMM register (16 bytes) at a time.
  const char16_t* vectWalkEnd = aligned(aEnd, kXmmAlignMask);
  __m128i vectmask = _mm_set1_epi16(static_cast<int16_t>(kShortMask));
  for (; idx != vectWalkEnd; idx += kNumUnicharsPerVector) {
    const __m128i vect = *reinterpret_cast<const __m128i*>(idx);
    if (!is_zero(_mm_and_si128(vect, vectmask))) {
      return idx - aBegin;
    }
  }
  // Check one word at a time.
  const char16_t* wordWalkEnd = aligned(aEnd, p::alignMask());
  for(; idx != wordWalkEnd; idx += kNumUnicharsPerWord) {
    const size_t word = *reinterpret_cast<const size_t*>(idx);
    if (word & kMask) {
      return idx - aBegin;
    }
  }
  // Take care of the remainder one character at a time.
  for (; idx != aEnd; idx++) {
    if (!IsASCII(*idx)) {
      return idx - aBegin;
    }
  }
  return -1;
 }
 } // namespace SSE2
 } // namespace mozilla
--- a/xpcom/tests/gtest/TestUTF.cpp
+++ b/xpcom/tests/gtest/TestUTF.cpp
@@ -122,4 +122,70 @@ TEST(UTF, Hash16)
 #endif
 }
 /**
 * This tests the handling of a non-ascii character at various locations in a
 * UTF-16 string that is being converted to UTF-8.
 */
 void NonASCII16_helper(const size_t aStrSize)
 {
  const size_t kTestSize = aStrSize;
  const size_t kMaxASCII = 0x80;
  const char16_t kUTF16Char = 0xC9;
  const char kUTF8Surrogates[] = { char(0xC3), char(0x89) };
  // Generate a string containing only ASCII characters.
  nsString asciiString;
  asciiString.SetLength(kTestSize);
  nsCString asciiCString;
  asciiCString.SetLength(kTestSize);
  auto str_buff = asciiString.BeginWriting();
  auto cstr_buff = asciiCString.BeginWriting();
  for (size_t i = 0; i < kTestSize; i++) {
    str_buff[i] = i % kMaxASCII;
    cstr_buff[i] = i % kMaxASCII;
  }
  // Now go through and test conversion when exactly one character will
  // result in a multibyte sequence.
  for (size_t i = 0; i < kTestSize; i++) {
    // Setup the UTF-16 string.
    nsString unicodeString(asciiString);
    auto buff = unicodeString.BeginWriting();
    buff[i] = kUTF16Char;
    // Do the conversion, make sure the length increased by 1.
    nsCString dest;
    AppendUTF16toUTF8(unicodeString, dest);
    EXPECT_EQ(dest.Length(), unicodeString.Length() + 1);
    // Build up the expected UTF-8 string.
    nsCString expected;
    // First add the leading ASCII chars.
    expected.Append(asciiCString.BeginReading(), i);
    // Now append the UTF-8 surrogate pair we expect the UTF-16 unicode char to
    // be converted to.
    for (auto& c : kUTF8Surrogates) {
      expected.Append(c);
    }
    // And finish with the trailing ASCII chars.
    expected.Append(asciiCString.BeginReading() + i + 1, kTestSize - i - 1);
    EXPECT_STREQ(dest.BeginReading(), expected.BeginReading());
  }
 }
 TEST(UTF, NonASCII16)
 {
  // Test with various string sizes to catch any special casing.
  NonASCII16_helper(1);
  NonASCII16_helper(8);
  NonASCII16_helper(16);
  NonASCII16_helper(32);
  NonASCII16_helper(512);
 }
 } // namespace TestUTF