Bug 1301742 - Improve UTF-16 to UTF-8 conversion speed. r=froydnj
This adds an SSE2 optimized path for UTF-16 strings that can be trivially converted to UTF-8 (they contain no characters > 0X7F). It has a speedup for the case where many of the leading characters are < 0X80 and should have the same performance in the worst-case scenario where the first character is > 0X7F.
This commit is contained in:
70
xpcom/string/nsReadableUtilsSSE2.cpp
Normal file
70
xpcom/string/nsReadableUtilsSSE2.cpp
Normal file
@@ -0,0 +1,70 @@
|
||||
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include <emmintrin.h>
|
||||
|
||||
#include "nsReadableUtilsImpl.h"
|
||||
|
||||
namespace mozilla {
|
||||
namespace SSE2 {
|
||||
|
||||
static inline bool
|
||||
is_zero (__m128i x)
|
||||
{
|
||||
return
|
||||
_mm_movemask_epi8(_mm_cmpeq_epi8(x, _mm_setzero_si128())) == 0xffff;
|
||||
}
|
||||
|
||||
int32_t
|
||||
FirstNonASCII(const char16_t* aBegin, const char16_t* aEnd)
|
||||
{
|
||||
const size_t kNumUnicharsPerVector = sizeof(__m128i) / sizeof(char16_t);
|
||||
typedef NonASCIIParameters<sizeof(size_t)> p;
|
||||
const size_t kMask = p::mask();
|
||||
const uintptr_t kXmmAlignMask = 0xf;
|
||||
const uint16_t kShortMask = 0xff80;
|
||||
const size_t kNumUnicharsPerWord = p::numUnicharsPerWord();
|
||||
|
||||
const char16_t* idx = aBegin;
|
||||
|
||||
// Align ourselves to a 16-byte boundary as required by _mm_load_si128
|
||||
for (; idx != aEnd && ((uintptr_t(idx) & kXmmAlignMask) != 0); idx++) {
|
||||
if (!IsASCII(*idx)) {
|
||||
return idx - aBegin;
|
||||
}
|
||||
}
|
||||
|
||||
// Check one XMM register (16 bytes) at a time.
|
||||
const char16_t* vectWalkEnd = aligned(aEnd, kXmmAlignMask);
|
||||
__m128i vectmask = _mm_set1_epi16(static_cast<int16_t>(kShortMask));
|
||||
for (; idx != vectWalkEnd; idx += kNumUnicharsPerVector) {
|
||||
const __m128i vect = *reinterpret_cast<const __m128i*>(idx);
|
||||
if (!is_zero(_mm_and_si128(vect, vectmask))) {
|
||||
return idx - aBegin;
|
||||
}
|
||||
}
|
||||
|
||||
// Check one word at a time.
|
||||
const char16_t* wordWalkEnd = aligned(aEnd, p::alignMask());
|
||||
for(; idx != wordWalkEnd; idx += kNumUnicharsPerWord) {
|
||||
const size_t word = *reinterpret_cast<const size_t*>(idx);
|
||||
if (word & kMask) {
|
||||
return idx - aBegin;
|
||||
}
|
||||
}
|
||||
|
||||
// Take care of the remainder one character at a time.
|
||||
for (; idx != aEnd; idx++) {
|
||||
if (!IsASCII(*idx)) {
|
||||
return idx - aBegin;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
} // namespace SSE2
|
||||
} // namespace mozilla
|
||||
Reference in New Issue
Block a user