Bug 1301742 - Improve UTF-16 to UTF-8 conversion speed. r=froydnj
This adds an SSE2 optimized path for UTF-16 strings that can be trivially converted to UTF-8 (they contain no characters > 0X7F). It has a speedup for the case where many of the leading characters are < 0X80 and should have the same performance in the worst-case scenario where the first character is > 0X7F.
This commit is contained in:
@@ -5,14 +5,79 @@
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "nsReadableUtils.h"
|
||||
#include "nsReadableUtilsImpl.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "mozilla/CheckedInt.h"
|
||||
|
||||
#include "nscore.h"
|
||||
#include "nsMemory.h"
|
||||
#include "nsString.h"
|
||||
#include "nsTArray.h"
|
||||
#include "nsUTF8Utils.h"
|
||||
|
||||
using mozilla::IsASCII;
|
||||
|
||||
/**
|
||||
* Fallback implementation for finding the first non-ASCII character in a
|
||||
* UTF-16 string.
|
||||
*/
|
||||
static inline int32_t
|
||||
FirstNonASCIIUnvectorized(const char16_t* aBegin, const char16_t* aEnd)
|
||||
{
|
||||
typedef mozilla::NonASCIIParameters<sizeof(size_t)> p;
|
||||
const size_t kMask = p::mask();
|
||||
const uintptr_t kAlignMask = p::alignMask();
|
||||
const size_t kNumUnicharsPerWord = p::numUnicharsPerWord();
|
||||
|
||||
const char16_t* idx = aBegin;
|
||||
|
||||
// Align ourselves to a word boundary.
|
||||
for (; idx != aEnd && ((uintptr_t(idx) & kAlignMask) != 0); idx++) {
|
||||
if (!IsASCII(*idx)) {
|
||||
return idx - aBegin;
|
||||
}
|
||||
}
|
||||
|
||||
// Check one word at a time.
|
||||
const char16_t* wordWalkEnd = mozilla::aligned(aEnd, kAlignMask);
|
||||
for (; idx != wordWalkEnd; idx += kNumUnicharsPerWord) {
|
||||
const size_t word = *reinterpret_cast<const size_t*>(idx);
|
||||
if (word & kMask) {
|
||||
return idx - aBegin;
|
||||
}
|
||||
}
|
||||
|
||||
// Take care of the remainder one character at a time.
|
||||
for (; idx != aEnd; idx++) {
|
||||
if (!IsASCII(*idx)) {
|
||||
return idx - aBegin;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function returns -1 if all characters in str are ASCII characters.
|
||||
* Otherwise, it returns a value less than or equal to the index of the first
|
||||
* ASCII character in str. For example, if first non-ASCII character is at
|
||||
* position 25, it may return 25, 24, or 16. But it guarantees
|
||||
* there are only ASCII characters before returned value.
|
||||
*/
|
||||
static inline int32_t
|
||||
FirstNonASCII(const char16_t* aBegin, const char16_t* aEnd)
|
||||
{
|
||||
#ifdef MOZILLA_MAY_SUPPORT_SSE2
|
||||
if (mozilla::supports_sse2()) {
|
||||
return mozilla::SSE2::FirstNonASCII(aBegin, aEnd);
|
||||
}
|
||||
#endif
|
||||
|
||||
return FirstNonASCIIUnvectorized(aBegin, aEnd);
|
||||
}
|
||||
|
||||
void
|
||||
LossyCopyUTF16toASCII(const nsAString& aSource, nsACString& aDest)
|
||||
{
|
||||
@@ -180,16 +245,46 @@ bool
|
||||
AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest,
|
||||
const mozilla::fallible_t& aFallible)
|
||||
{
|
||||
// At 16 characters analysis showed better performance of both the all ASCII
|
||||
// and non-ASCII cases, so we limit calling |FirstNonASCII| to strings of
|
||||
// that length.
|
||||
const nsAString::size_type kFastPathMinLength = 16;
|
||||
|
||||
int32_t firstNonASCII = 0;
|
||||
if (aSource.Length() >= kFastPathMinLength) {
|
||||
firstNonASCII = FirstNonASCII(aSource.BeginReading(), aSource.EndReading());
|
||||
}
|
||||
|
||||
if (firstNonASCII == -1) {
|
||||
// This is all ASCII, we can use the more efficient lossy append.
|
||||
mozilla::CheckedInt<nsACString::size_type> new_length(aSource.Length());
|
||||
new_length += aDest.Length();
|
||||
|
||||
if (!new_length.isValid() ||
|
||||
!aDest.SetCapacity(new_length.value(), aFallible)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
LossyAppendUTF16toASCII(aSource, aDest);
|
||||
return true;
|
||||
}
|
||||
|
||||
nsAString::const_iterator source_start, source_end;
|
||||
CalculateUTF8Size calculator;
|
||||
copy_string(aSource.BeginReading(source_start),
|
||||
aSource.EndReading(source_end), calculator);
|
||||
aSource.BeginReading(source_start);
|
||||
aSource.EndReading(source_end);
|
||||
|
||||
size_t count = calculator.Size();
|
||||
// Skip the characters that we know are single byte.
|
||||
source_start.advance(firstNonASCII);
|
||||
|
||||
copy_string(source_start,
|
||||
source_end, calculator);
|
||||
|
||||
// Include the ASCII characters that were skipped in the count.
|
||||
size_t count = calculator.Size() + firstNonASCII;
|
||||
|
||||
if (count) {
|
||||
auto old_dest_length = aDest.Length();
|
||||
|
||||
// Grow the buffer if we need to.
|
||||
mozilla::CheckedInt<nsACString::size_type> new_length(count);
|
||||
new_length += old_dest_length;
|
||||
@@ -201,11 +296,30 @@ AppendUTF16toUTF8(const nsAString& aSource, nsACString& aDest,
|
||||
|
||||
// All ready? Time to convert
|
||||
|
||||
ConvertUTF16toUTF8 converter(aDest.BeginWriting() + old_dest_length);
|
||||
copy_string(aSource.BeginReading(source_start),
|
||||
nsAString::const_iterator ascii_end;
|
||||
aSource.BeginReading(ascii_end);
|
||||
|
||||
if (firstNonASCII >= static_cast<int32_t>(kFastPathMinLength)) {
|
||||
// Use the more efficient lossy converter for the ASCII portion.
|
||||
LossyConvertEncoding16to8 lossy_converter(
|
||||
aDest.BeginWriting() + old_dest_length);
|
||||
nsAString::const_iterator ascii_start;
|
||||
aSource.BeginReading(ascii_start);
|
||||
ascii_end.advance(firstNonASCII);
|
||||
|
||||
copy_string(ascii_start, ascii_end, lossy_converter);
|
||||
} else {
|
||||
// Not using the lossy shortcut, we need to include the leading ASCII
|
||||
// chars.
|
||||
firstNonASCII = 0;
|
||||
}
|
||||
|
||||
ConvertUTF16toUTF8 converter(
|
||||
aDest.BeginWriting() + old_dest_length + firstNonASCII);
|
||||
copy_string(ascii_end,
|
||||
aSource.EndReading(source_end), converter);
|
||||
|
||||
NS_ASSERTION(converter.Size() == count,
|
||||
NS_ASSERTION(converter.Size() == count - firstNonASCII,
|
||||
"Unexpected disparity between CalculateUTF8Size and "
|
||||
"ConvertUTF16toUTF8");
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user