Bug 1301742 - Improve UTF-16 to UTF-8 conversion speed. r=froydnj
This adds an SSE2 optimized path for UTF-16 strings that can be trivially converted to UTF-8 (they contain no characters > 0X7F). It has a speedup for the case where many of the leading characters are < 0X80 and should have the same performance in the worst-case scenario where the first character is > 0X7F.
This commit is contained in:
@@ -51,9 +51,11 @@ UNIFIED_SOURCES += [
|
|||||||
]
|
]
|
||||||
|
|
||||||
# Are we targeting x86 or x86-64? If so, compile the SSE2 functions for
|
# Are we targeting x86 or x86-64? If so, compile the SSE2 functions for
|
||||||
# nsUTF8Utils.cpp.
|
# nsUTF8Utils.cpp and nsReadableUtils.cpp.
|
||||||
if CONFIG['INTEL_ARCHITECTURE']:
|
if CONFIG['INTEL_ARCHITECTURE']:
|
||||||
SOURCES += ['nsUTF8UtilsSSE2.cpp']
|
SOURCES += ['nsUTF8UtilsSSE2.cpp']
|
||||||
SOURCES['nsUTF8UtilsSSE2.cpp'].flags += CONFIG['SSE2_FLAGS']
|
SOURCES['nsUTF8UtilsSSE2.cpp'].flags += CONFIG['SSE2_FLAGS']
|
||||||
|
SOURCES += ['nsReadableUtilsSSE2.cpp']
|
||||||
|
SOURCES['nsReadableUtilsSSE2.cpp'].flags += CONFIG['SSE2_FLAGS']
|
||||||
|
|
||||||
FINAL_LIBRARY = 'xul'
|
FINAL_LIBRARY = 'xul'
|
||||||
|
|||||||
@@ -5,14 +5,79 @@
|
|||||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||||
|
|
||||||
#include "nsReadableUtils.h"
|
#include "nsReadableUtils.h"
|
||||||
|
#include "nsReadableUtilsImpl.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
#include "mozilla/CheckedInt.h"
|
#include "mozilla/CheckedInt.h"
|
||||||
|
|
||||||
|
#include "nscore.h"
|
||||||
#include "nsMemory.h"
|
#include "nsMemory.h"
|
||||||
#include "nsString.h"
|
#include "nsString.h"
|
||||||
#include "nsTArray.h"
|
#include "nsTArray.h"
|
||||||
#include "nsUTF8Utils.h"
|
#include "nsUTF8Utils.h"
|
||||||
|
|
||||||
|
using mozilla::IsASCII;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fallback implementation for finding the first non-ASCII character in a
|
||||||
|
* UTF-16 string.
|
||||||
|
*/
|
||||||
|
static inline int32_t
|
||||||
|
FirstNonASCIIUnvectorized(const char16_t* aBegin, const char16_t* aEnd)
|
||||||
|
{
|
||||||
|
typedef mozilla::NonASCIIParameters<sizeof(size_t)> p;
|
||||||
|
const size_t kMask = p::mask();
|
||||||
|
const uintptr_t kAlignMask = p::alignMask();
|
||||||
|
const size_t kNumUnicharsPerWord = p::numUnicharsPerWord();
|
||||||
|
|
||||||
|
const char16_t* idx = aBegin;
|
||||||
|
|
||||||
|
// Align ourselves to a word boundary.
|
||||||
|
for (; idx != aEnd && ((uintptr_t(idx) & kAlignMask) != 0); idx++) {
|
||||||
|
if (!IsASCII(*idx)) {
|
||||||
|
return idx - aBegin;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check one word at a time.
|
||||||
|
const char16_t* wordWalkEnd = mozilla::aligned(aEnd, kAlignMask);
|
||||||
|
for (; idx != wordWalkEnd; idx += kNumUnicharsPerWord) {
|
||||||
|
const size_t word = *reinterpret_cast<const size_t*>(idx);
|
||||||
|
if (word & kMask) {
|
||||||
|
return idx - aBegin;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Take care of the remainder one character at a time.
|
||||||
|
for (; idx != aEnd; idx++) {
|
||||||
|
if (!IsASCII(*idx)) {
|
||||||
|
return idx - aBegin;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This function returns -1 if all characters in str are ASCII characters.
|
||||||
|
* Otherwise, it returns a value less than or equal to the index of the first
|
||||||
|
* ASCII character in str. For example, if first non-ASCII character is at
|
||||||
|
* position 25, it may return 25, 24, or 16. But it guarantees
|
||||||
|
* there are only ASCII characters before returned value.
|
||||||
|
*/
|
||||||
|
static inline int32_t
|
||||||
|
FirstNonASCII(const char16_t* aBegin, const char16_t* aEnd)
|
||||||
|
{
|
||||||
|
#ifdef MOZILLA_MAY_SUPPORT_SSE2
|
||||||
|
if (mozilla::supports_sse2()) {
|
||||||
|
return mozilla::SSE2::FirstNonASCII(aBegin, aEnd);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return FirstNonASCIIUnvectorized(aBegin, aEnd);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
LossyCopyUTF16toASCII(const nsAString& aSource, nsACString& aDest)
|
LossyCopyUTF16toASCII(const nsAString& aSource, nsACString& aDest)
|
||||||
{
|
{
|
||||||
@@ -180,16 +245,46 @@ bool
|
|||||||
AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest,
|
AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest,
|
||||||
const mozilla::fallible_t& aFallible)
|
const mozilla::fallible_t& aFallible)
|
||||||
{
|
{
|
||||||
|
// At 16 characters analysis showed better performance of both the all ASCII
|
||||||
|
// and non-ASCII cases, so we limit calling |FirstNonASCII| to strings of
|
||||||
|
// that length.
|
||||||
|
const nsAString::size_type kFastPathMinLength = 16;
|
||||||
|
|
||||||
|
int32_t firstNonASCII = 0;
|
||||||
|
if (aSource.Length() >= kFastPathMinLength) {
|
||||||
|
firstNonASCII = FirstNonASCII(aSource.BeginReading(), aSource.EndReading());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (firstNonASCII == -1) {
|
||||||
|
// This is all ASCII, we can use the more efficient lossy append.
|
||||||
|
mozilla::CheckedInt<nsACString::size_type> new_length(aSource.Length());
|
||||||
|
new_length += aDest.Length();
|
||||||
|
|
||||||
|
if (!new_length.isValid() ||
|
||||||
|
!aDest.SetCapacity(new_length.value(), aFallible)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
LossyAppendUTF16toASCII(aSource, aDest);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
nsAString::const_iterator source_start, source_end;
|
nsAString::const_iterator source_start, source_end;
|
||||||
CalculateUTF8Size calculator;
|
CalculateUTF8Size calculator;
|
||||||
copy_string(aSource.BeginReading(source_start),
|
aSource.BeginReading(source_start);
|
||||||
aSource.EndReading(source_end), calculator);
|
aSource.EndReading(source_end);
|
||||||
|
|
||||||
size_t count = calculator.Size();
|
// Skip the characters that we know are single byte.
|
||||||
|
source_start.advance(firstNonASCII);
|
||||||
|
|
||||||
|
copy_string(source_start,
|
||||||
|
source_end, calculator);
|
||||||
|
|
||||||
|
// Include the ASCII characters that were skipped in the count.
|
||||||
|
size_t count = calculator.Size() + firstNonASCII;
|
||||||
|
|
||||||
if (count) {
|
if (count) {
|
||||||
auto old_dest_length = aDest.Length();
|
auto old_dest_length = aDest.Length();
|
||||||
|
|
||||||
// Grow the buffer if we need to.
|
// Grow the buffer if we need to.
|
||||||
mozilla::CheckedInt<nsACString::size_type> new_length(count);
|
mozilla::CheckedInt<nsACString::size_type> new_length(count);
|
||||||
new_length += old_dest_length;
|
new_length += old_dest_length;
|
||||||
@@ -201,11 +296,30 @@ AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest,
|
|||||||
|
|
||||||
// All ready? Time to convert
|
// All ready? Time to convert
|
||||||
|
|
||||||
ConvertUTF16toUTF8 converter(aDest.BeginWriting() + old_dest_length);
|
nsAString::const_iterator ascii_end;
|
||||||
copy_string(aSource.BeginReading(source_start),
|
aSource.BeginReading(ascii_end);
|
||||||
|
|
||||||
|
if (firstNonASCII >= static_cast<int32_t>(kFastPathMinLength)) {
|
||||||
|
// Use the more efficient lossy converter for the ASCII portion.
|
||||||
|
LossyConvertEncoding16to8 lossy_converter(
|
||||||
|
aDest.BeginWriting() + old_dest_length);
|
||||||
|
nsAString::const_iterator ascii_start;
|
||||||
|
aSource.BeginReading(ascii_start);
|
||||||
|
ascii_end.advance(firstNonASCII);
|
||||||
|
|
||||||
|
copy_string(ascii_start, ascii_end, lossy_converter);
|
||||||
|
} else {
|
||||||
|
// Not using the lossy shortcut, we need to include the leading ASCII
|
||||||
|
// chars.
|
||||||
|
firstNonASCII = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
ConvertUTF16toUTF8 converter(
|
||||||
|
aDest.BeginWriting() + old_dest_length + firstNonASCII);
|
||||||
|
copy_string(ascii_end,
|
||||||
aSource.EndReading(source_end), converter);
|
aSource.EndReading(source_end), converter);
|
||||||
|
|
||||||
NS_ASSERTION(converter.Size() == count,
|
NS_ASSERTION(converter.Size() == count - firstNonASCII,
|
||||||
"Unexpected disparity between CalculateUTF8Size and "
|
"Unexpected disparity between CalculateUTF8Size and "
|
||||||
"ConvertUTF16toUTF8");
|
"ConvertUTF16toUTF8");
|
||||||
}
|
}
|
||||||
|
|||||||
54
xpcom/string/nsReadableUtilsImpl.h
Normal file
54
xpcom/string/nsReadableUtilsImpl.h
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||||
|
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
|
||||||
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
namespace mozilla {
|
||||||
|
|
||||||
|
inline bool IsASCII(char16_t aChar) {
|
||||||
|
return (aChar & 0xFF80) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Provides a pointer before or equal to |aPtr| that is is suitably aligned.
|
||||||
|
*/
|
||||||
|
inline const char16_t* aligned(const char16_t* aPtr, const uintptr_t aMask)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<const char16_t*>(
|
||||||
|
reinterpret_cast<const uintptr_t>(aPtr) & ~aMask);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Structures for word-sized vectorization of ASCII checking for UTF-16
|
||||||
|
* strings.
|
||||||
|
*/
|
||||||
|
template<size_t size> struct NonASCIIParameters;
|
||||||
|
template<> struct NonASCIIParameters<4> {
|
||||||
|
static inline size_t mask() { return 0xff80ff80; }
|
||||||
|
static inline uintptr_t alignMask() { return 0x3; }
|
||||||
|
static inline size_t numUnicharsPerWord() { return 2; }
|
||||||
|
};
|
||||||
|
|
||||||
|
template<> struct NonASCIIParameters<8> {
|
||||||
|
static inline size_t mask() {
|
||||||
|
static const uint64_t maskAsUint64 = UINT64_C(0xff80ff80ff80ff80);
|
||||||
|
// We have to explicitly cast this 64-bit value to a size_t, or else
|
||||||
|
// compilers for 32-bit platforms will warn about it being too large to fit
|
||||||
|
// in the size_t return type. (Fortunately, this code isn't actually
|
||||||
|
// invoked on 32-bit platforms -- they'll use the <4> specialization above.
|
||||||
|
// So it is, in fact, OK that this value is too large for a 32-bit size_t.)
|
||||||
|
return (size_t)maskAsUint64;
|
||||||
|
}
|
||||||
|
static inline uintptr_t alignMask() { return 0x7; }
|
||||||
|
static inline size_t numUnicharsPerWord() { return 4; }
|
||||||
|
};
|
||||||
|
|
||||||
|
namespace SSE2 {
|
||||||
|
|
||||||
|
int32_t FirstNonASCII(const char16_t* aBegin, const char16_t* aEnd);
|
||||||
|
|
||||||
|
} // namespace SSE2
|
||||||
|
} // namespace mozilla
|
||||||
70
xpcom/string/nsReadableUtilsSSE2.cpp
Normal file
70
xpcom/string/nsReadableUtilsSSE2.cpp
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||||
|
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
|
||||||
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||||
|
|
||||||
|
#include <emmintrin.h>
|
||||||
|
|
||||||
|
#include "nsReadableUtilsImpl.h"
|
||||||
|
|
||||||
|
namespace mozilla {
|
||||||
|
namespace SSE2 {
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
is_zero (__m128i x)
|
||||||
|
{
|
||||||
|
return
|
||||||
|
_mm_movemask_epi8(_mm_cmpeq_epi8(x, _mm_setzero_si128())) == 0xffff;
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t
|
||||||
|
FirstNonASCII(const char16_t* aBegin, const char16_t* aEnd)
|
||||||
|
{
|
||||||
|
const size_t kNumUnicharsPerVector = sizeof(__m128i) / sizeof(char16_t);
|
||||||
|
typedef NonASCIIParameters<sizeof(size_t)> p;
|
||||||
|
const size_t kMask = p::mask();
|
||||||
|
const uintptr_t kXmmAlignMask = 0xf;
|
||||||
|
const uint16_t kShortMask = 0xff80;
|
||||||
|
const size_t kNumUnicharsPerWord = p::numUnicharsPerWord();
|
||||||
|
|
||||||
|
const char16_t* idx = aBegin;
|
||||||
|
|
||||||
|
// Align ourselves to a 16-byte boundary as required by _mm_load_si128
|
||||||
|
for (; idx != aEnd && ((uintptr_t(idx) & kXmmAlignMask) != 0); idx++) {
|
||||||
|
if (!IsASCII(*idx)) {
|
||||||
|
return idx - aBegin;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check one XMM register (16 bytes) at a time.
|
||||||
|
const char16_t* vectWalkEnd = aligned(aEnd, kXmmAlignMask);
|
||||||
|
__m128i vectmask = _mm_set1_epi16(static_cast<int16_t>(kShortMask));
|
||||||
|
for (; idx != vectWalkEnd; idx += kNumUnicharsPerVector) {
|
||||||
|
const __m128i vect = *reinterpret_cast<const __m128i*>(idx);
|
||||||
|
if (!is_zero(_mm_and_si128(vect, vectmask))) {
|
||||||
|
return idx - aBegin;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check one word at a time.
|
||||||
|
const char16_t* wordWalkEnd = aligned(aEnd, p::alignMask());
|
||||||
|
for(; idx != wordWalkEnd; idx += kNumUnicharsPerWord) {
|
||||||
|
const size_t word = *reinterpret_cast<const size_t*>(idx);
|
||||||
|
if (word & kMask) {
|
||||||
|
return idx - aBegin;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Take care of the remainder one character at a time.
|
||||||
|
for (; idx != aEnd; idx++) {
|
||||||
|
if (!IsASCII(*idx)) {
|
||||||
|
return idx - aBegin;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace SSE2
|
||||||
|
} // namespace mozilla
|
||||||
@@ -122,4 +122,70 @@ TEST(UTF, Hash16)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This tests the handling of a non-ascii character at various locations in a
|
||||||
|
* UTF-16 string that is being converted to UTF-8.
|
||||||
|
*/
|
||||||
|
void NonASCII16_helper(const size_t aStrSize)
|
||||||
|
{
|
||||||
|
const size_t kTestSize = aStrSize;
|
||||||
|
const size_t kMaxASCII = 0x80;
|
||||||
|
const char16_t kUTF16Char = 0xC9;
|
||||||
|
const char kUTF8Surrogates[] = { char(0xC3), char(0x89) };
|
||||||
|
|
||||||
|
// Generate a string containing only ASCII characters.
|
||||||
|
nsString asciiString;
|
||||||
|
asciiString.SetLength(kTestSize);
|
||||||
|
nsCString asciiCString;
|
||||||
|
asciiCString.SetLength(kTestSize);
|
||||||
|
|
||||||
|
auto str_buff = asciiString.BeginWriting();
|
||||||
|
auto cstr_buff = asciiCString.BeginWriting();
|
||||||
|
for (size_t i = 0; i < kTestSize; i++) {
|
||||||
|
str_buff[i] = i % kMaxASCII;
|
||||||
|
cstr_buff[i] = i % kMaxASCII;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now go through and test conversion when exactly one character will
|
||||||
|
// result in a multibyte sequence.
|
||||||
|
for (size_t i = 0; i < kTestSize; i++) {
|
||||||
|
// Setup the UTF-16 string.
|
||||||
|
nsString unicodeString(asciiString);
|
||||||
|
auto buff = unicodeString.BeginWriting();
|
||||||
|
buff[i] = kUTF16Char;
|
||||||
|
|
||||||
|
// Do the conversion, make sure the length increased by 1.
|
||||||
|
nsCString dest;
|
||||||
|
AppendUTF16toUTF8(unicodeString, dest);
|
||||||
|
EXPECT_EQ(dest.Length(), unicodeString.Length() + 1);
|
||||||
|
|
||||||
|
// Build up the expected UTF-8 string.
|
||||||
|
nsCString expected;
|
||||||
|
|
||||||
|
// First add the leading ASCII chars.
|
||||||
|
expected.Append(asciiCString.BeginReading(), i);
|
||||||
|
|
||||||
|
// Now append the UTF-8 surrogate pair we expect the UTF-16 unicode char to
|
||||||
|
// be converted to.
|
||||||
|
for (auto& c : kUTF8Surrogates) {
|
||||||
|
expected.Append(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
// And finish with the trailing ASCII chars.
|
||||||
|
expected.Append(asciiCString.BeginReading() + i + 1, kTestSize - i - 1);
|
||||||
|
|
||||||
|
EXPECT_STREQ(dest.BeginReading(), expected.BeginReading());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(UTF, NonASCII16)
|
||||||
|
{
|
||||||
|
// Test with various string sizes to catch any special casing.
|
||||||
|
NonASCII16_helper(1);
|
||||||
|
NonASCII16_helper(8);
|
||||||
|
NonASCII16_helper(16);
|
||||||
|
NonASCII16_helper(32);
|
||||||
|
NonASCII16_helper(512);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace TestUTF
|
} // namespace TestUTF
|
||||||
|
|||||||
Reference in New Issue
Block a user