Bug 1261841 part 2 - Use encoding_rs instead of uconv. r=emk,mystor.

MozReview-Commit-ID: 15Y5GTX98bv
2017-04-27 13:27:03 +03:00
parent 12e1264b06
commit 7af0452b6b
178 changed files with 3560 additions and 4174 deletions
--- a/dom/script/ScriptLoadHandler.cpp
+++ b/dom/script/ScriptLoadHandler.cpp
@@ -10,7 +10,6 @@

 #include "nsContentUtils.h"

-#include "mozilla/dom/EncodingUtils.h"
 #include "mozilla/Telemetry.h"

 namespace mozilla {
@@ -100,31 +99,34 @@ ScriptLoadHandler::DecodeRawData(const uint8_t* aData,
                                 uint32_t aDataLength,
                                 bool aEndOfStream)
 {
-  int32_t srcLen = aDataLength;
-  const char* src = reinterpret_cast<const char*>(aData);
-  int32_t dstLen;
-  nsresult rv =
-    mDecoder->GetMaxLength(src, srcLen, &dstLen);
-
-  NS_ENSURE_SUCCESS(rv, rv);
+  CheckedInt<size_t> needed = mDecoder->MaxUTF16BufferLength(aDataLength);
+  if (!needed.isValid()) {
+    return NS_ERROR_OUT_OF_MEMORY;
+  }

  uint32_t haveRead = mRequest->mScriptText.length();

  CheckedInt<uint32_t> capacity = haveRead;
-  capacity += dstLen;
+  capacity += needed.value();

  if (!capacity.isValid() || !mRequest->mScriptText.reserve(capacity.value())) {
    return NS_ERROR_OUT_OF_MEMORY;
  }

-  rv = mDecoder->Convert(src,
-                         &srcLen,
-                         mRequest->mScriptText.begin() + haveRead,
-                         &dstLen);
+  uint32_t result;
+  size_t read;
+  size_t written;
+  bool hadErrors;
+  Tie(result, read, written, hadErrors) = mDecoder->DecodeToUTF16(
+    MakeSpan(aData, aDataLength),
+    MakeSpan(mRequest->mScriptText.begin() + haveRead, needed.value()),
+    aEndOfStream);
+  MOZ_ASSERT(result == kInputEmpty);
+  MOZ_ASSERT(read == aDataLength);
+  MOZ_ASSERT(written <= needed.value());
+  Unused << hadErrors;

-  NS_ENSURE_SUCCESS(rv, rv);
-
-  haveRead += dstLen;
+  haveRead += written;
  MOZ_ASSERT(haveRead <= capacity.value(), "mDecoder produced more data than expected");
  MOZ_ALWAYS_TRUE(mRequest->mScriptText.resizeUninitialized(haveRead));

@@ -164,7 +166,7 @@ ScriptLoadHandler::EnsureDecoder(nsIIncrementalStreamLoader* aLoader,
  // JavaScript modules are always UTF-8.
  if (mRequest->IsModuleRequest()) {
    oCharset = "UTF-8";
-    mDecoder = EncodingUtils::DecoderForEncoding(oCharset);
+    mDecoder = UTF_8_ENCODING->NewDecoderWithBOMRemoval();
    return true;
  }

@@ -176,8 +178,12 @@ ScriptLoadHandler::EnsureDecoder(nsIIncrementalStreamLoader* aLoader,
  }

  // Do BOM detection.
-  if (nsContentUtils::CheckForBOM(aData, aDataLength, oCharset)) {
-    mDecoder = EncodingUtils::DecoderForEncoding(oCharset);
+  const Encoding* encoding;
+  size_t bomLength;
+  Tie(encoding, bomLength) = Encoding::ForBOM(MakeSpan(aData, aDataLength));
+  if (encoding) {
+    mDecoder = encoding->NewDecoderWithBOMRemoval();
+    encoding->Name(oCharset);
    return true;
  }

@@ -189,11 +195,14 @@ ScriptLoadHandler::EnsureDecoder(nsIIncrementalStreamLoader* aLoader,

  nsCOMPtr<nsIChannel> channel = do_QueryInterface(req);

-  if (channel &&
-      NS_SUCCEEDED(channel->GetContentCharset(oCharset)) &&
-      EncodingUtils::FindEncodingForLabel(oCharset, oCharset)) {
-    mDecoder = EncodingUtils::DecoderForEncoding(oCharset);
-    return true;
+  if (channel) {
+    nsAutoCString label;
+    if (NS_SUCCEEDED(channel->GetContentCharset(label)) &&
+        (encoding = Encoding::ForLabel(label))) {
+      mDecoder = encoding->NewDecoderWithoutBOMHandling();
+      encoding->Name(oCharset);
+      return true;
+    }
  }

  // Check the hint charset from the script element or preload
@@ -211,25 +220,26 @@ ScriptLoadHandler::EnsureDecoder(nsIIncrementalStreamLoader* aLoader,
    hintCharset = mScriptLoader->mPreloads[i].mCharset;
  }

-  if (EncodingUtils::FindEncodingForLabel(hintCharset, oCharset)) {
-    mDecoder = EncodingUtils::DecoderForEncoding(oCharset);
+  if ((encoding = Encoding::ForLabel(hintCharset))) {
+    mDecoder = encoding->NewDecoderWithoutBOMHandling();
+    encoding->Name(oCharset);
    return true;
  }

  // Get the charset from the charset of the document.
  if (mScriptLoader->mDocument) {
-    oCharset = mScriptLoader->mDocument->GetDocumentCharacterSet();
-    mDecoder = EncodingUtils::DecoderForEncoding(oCharset);
+    encoding =
+      Encoding::ForName(mScriptLoader->mDocument->GetDocumentCharacterSet());
+    mDecoder = encoding->NewDecoderWithoutBOMHandling();
+    encoding->Name(oCharset);
    return true;
  }

  // Curiously, there are various callers that don't pass aDocument. The
  // fallback in the old code was ISO-8859-1, which behaved like
-  // windows-1252. Saying windows-1252 for clarity and for compliance
-  // with the Encoding Standard.
+  // windows-1252.
  oCharset = "windows-1252";
-  mDecoder = EncodingUtils::DecoderForEncoding(oCharset);
-
+  mDecoder = WINDOWS_1252_ENCODING->NewDecoderWithoutBOMHandling();
  return true;
 }