Bug 1373984 - Turn nsIDocument::mCharacterSet into mozilla::NotNull<const mozilla::Encoding*>. r=hsivonen

MozReview-Commit-ID: GF0YXDwfA14
2017-06-18 20:37:50 +09:00
parent cba92e4ddd
commit a725dc3e13
81 changed files with 584 additions and 450 deletions
--- a/parser/html/nsHtml5StreamParser.cpp
+++ b/parser/html/nsHtml5StreamParser.cpp
@@ -152,6 +152,7 @@ nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor,
  : mSniffingLength(0)
  , mBomState(eBomState::BOM_SNIFFING_NOT_STARTED)
  , mCharsetSource(kCharsetUninitialized)
+  , mEncoding(WINDOWS_1252_ENCODING)
  , mReparseForbidden(false)
  , mLastBuffer(nullptr) // Will be filled when starting
  , mExecutor(aExecutor)
@@ -253,23 +254,21 @@ nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf)
  NS_ASSERTION(IsParserThread(), "Wrong thread!");
  if (aConf == eBestAnswer || aConf == eSureAnswer) {
    mFeedChardet = false; // just in case
-    const Encoding* encoding = Encoding::ForLabelNoReplacement(
+    auto encoding = Encoding::ForLabelNoReplacement(
        nsDependentCString(aCharset));
    if (!encoding) {
      return NS_OK;
    }
-    nsAutoCString charset;
-    encoding->Name(charset);
    if (HasDecoder()) {
-      if (mCharset.Equals(charset)) {
+      if (mEncoding == encoding) {
        NS_ASSERTION(mCharsetSource < kCharsetFromAutoDetection,
            "Why are we running chardet at all?");
        mCharsetSource = kCharsetFromAutoDetection;
-        mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
+        mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
      } else {
        // We've already committed to a decoder. Request a reload from the
        // docshell.
-        mTreeBuilder->NeedsCharsetSwitchTo(charset,
+        mTreeBuilder->NeedsCharsetSwitchTo(WrapNotNull(encoding),
                                           kCharsetFromAutoDetection,
                                           0);
        FlushTreeOpsAndDisarmTimer();
@@ -278,9 +277,9 @@ nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf)
    } else {
      // Got a confident answer from the sniffing buffer. That code will
      // take care of setting up the decoder.
-      mCharset.Assign(charset);
+      mEncoding = WrapNotNull(encoding);
      mCharsetSource = kCharsetFromAutoDetection;
-      mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
+      mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
    }
  }
  return NS_OK;
@@ -321,7 +320,7 @@ nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(const
 {
  NS_ASSERTION(IsParserThread(), "Wrong thread!");
  nsresult rv = NS_OK;
-  mUnicodeDecoder = Encoding::ForName(mCharset)->NewDecoderWithBOMRemoval();
+  mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
  if (mSniffingBuffer) {
    uint32_t writeCount;
    rv = WriteStreamBytes(mSniffingBuffer.get(), mSniffingLength, &writeCount);
@@ -336,14 +335,14 @@ nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(const
 }

 nsresult
-nsHtml5StreamParser::SetupDecodingFromBom(const char* aDecoderCharsetName)
+nsHtml5StreamParser::SetupDecodingFromBom(NotNull<const Encoding*> aEncoding)
 {
  NS_ASSERTION(IsParserThread(), "Wrong thread!");
-  mCharset.Assign(aDecoderCharsetName);
-  mUnicodeDecoder = Encoding::ForName(mCharset)->NewDecoderWithBOMRemoval();
+  mEncoding = aEncoding;
+  mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
  mCharsetSource = kCharsetFromByteOrderMark;
  mFeedChardet = false;
-  mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
+  mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
  mSniffingBuffer = nullptr;
  mMetaScanner = nullptr;
  mBomState = BOM_SNIFFING_OVER;
@@ -398,12 +397,12 @@ nsHtml5StreamParser::SniffBOMlessUTF16BasicLatin(const uint8_t* aFromSegment,
  }

  if (byteNonZero[0]) {
-    mCharset.AssignLiteral("UTF-16LE");
+    mEncoding = UTF_16LE_ENCODING;
  } else {
-    mCharset.AssignLiteral("UTF-16BE");
+    mEncoding = UTF_16BE_ENCODING;
  }
  mCharsetSource = kCharsetFromIrreversibleAutoDetection;
-  mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
+  mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
  mFeedChardet = false;
  mTreeBuilder->MaybeComplainAboutCharset("EncBomlessUtf16",
                                          true,
@@ -418,8 +417,9 @@ nsHtml5StreamParser::SetEncodingFromExpat(const char16_t* aEncoding)
    nsDependentString utf16(aEncoding);
    nsAutoCString utf8;
    CopyUTF16toUTF8(utf16, utf8);
-    if (PreferredForInternalEncodingDecl(utf8)) {
-      mCharset.Assign(utf8);
+    auto encoding = PreferredForInternalEncodingDecl(utf8);
+    if (encoding) {
+      mEncoding = WrapNotNull(encoding);
      mCharsetSource = kCharsetFromMetaTag; // closest for XML
      return;
    }
@@ -428,7 +428,7 @@ nsHtml5StreamParser::SetEncodingFromExpat(const char16_t* aEncoding)
    // right away and let the encoding be set to UTF-8 which we'd default to
    // anyway.
  }
-  mCharset.AssignLiteral("UTF-8"); // XML defaults to UTF-8 without a BOM
+  mEncoding = UTF_8_ENCODING; // XML defaults to UTF-8 without a BOM
  mCharsetSource = kCharsetFromMetaTag; // means confident
 }

@@ -560,7 +560,7 @@ nsHtml5StreamParser::FinalizeSniffing(const uint8_t* aFromSegment, // can be nul
      // confidently to UTF-8 in this case.
      // It is also possible that the document has an XML declaration that is
      // longer than 1024 bytes, but that case is not worth worrying about.
-      mCharset.AssignLiteral("UTF-8");
+      mEncoding = UTF_8_ENCODING;
      mCharsetSource = kCharsetFromMetaTag; // means confident
    }

@@ -612,18 +612,18 @@ nsHtml5StreamParser::FinalizeSniffing(const uint8_t* aFromSegment, // can be nul
  }
  if (mCharsetSource == kCharsetUninitialized) {
    // Hopefully this case is never needed, but dealing with it anyway
-    mCharset.AssignLiteral("windows-1252");
+    mEncoding = WINDOWS_1252_ENCODING;
    mCharsetSource = kCharsetFromFallback;
-    mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
+    mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
  } else if (mMode == LOAD_AS_DATA &&
             mCharsetSource == kCharsetFromFallback) {
    NS_ASSERTION(mReparseForbidden, "Reparse should be forbidden for XHR");
    NS_ASSERTION(!mFeedChardet, "Should not feed chardet for XHR");
-    NS_ASSERTION(mCharset.EqualsLiteral("UTF-8"),
+    NS_ASSERTION(mEncoding == UTF_8_ENCODING,
                 "XHR should default to UTF-8");
    // Now mark charset source as non-weak to signal that we have a decision
    mCharsetSource = kCharsetFromDocTypeDefault;
-    mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
+    mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
  }
  return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
 }
@@ -637,9 +637,9 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
  nsresult rv = NS_OK;
  uint32_t writeCount;

-  // mCharset and mCharsetSource potentially have come from channel or higher
+  // mEncoding and mCharsetSource potentially have come from channel or higher
  // by now. If we find a BOM, SetupDecodingFromBom() will overwrite them.
-  // If we don't find a BOM, the previously set values of mCharset and
+  // If we don't find a BOM, the previously set values of mEncoding and
  // mCharsetSource are not modified by the BOM sniffing here.
  for (uint32_t i = 0; i < aCount && mBomState != BOM_SNIFFING_OVER; i++) {
    switch (mBomState) {
@@ -662,7 +662,7 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
        break;
      case SEEN_UTF_16_LE_FIRST_BYTE:
        if (aFromSegment[i] == 0xFE) {
-          rv = SetupDecodingFromBom("UTF-16LE"); // upper case is the raw form
+          rv = SetupDecodingFromBom(UTF_16LE_ENCODING); // upper case is the raw form
          NS_ENSURE_SUCCESS(rv, rv);
          uint32_t count = aCount - (i + 1);
          rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount);
@@ -674,7 +674,7 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
        break;
      case SEEN_UTF_16_BE_FIRST_BYTE:
        if (aFromSegment[i] == 0xFF) {
-          rv = SetupDecodingFromBom("UTF-16BE"); // upper case is the raw form
+          rv = SetupDecodingFromBom(UTF_16BE_ENCODING); // upper case is the raw form
          NS_ENSURE_SUCCESS(rv, rv);
          uint32_t count = aCount - (i + 1);
          rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount);
@@ -693,7 +693,7 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
        break;
      case SEEN_UTF_8_SECOND_BYTE:
        if (aFromSegment[i] == 0xBF) {
-          rv = SetupDecodingFromBom("UTF-8"); // upper case is the raw form
+          rv = SetupDecodingFromBom(UTF_8_ENCODING); // upper case is the raw form
          NS_ENSURE_SUCCESS(rv, rv);
          uint32_t count = aCount - (i + 1);
          rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount);
@@ -718,13 +718,13 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,

  if (mBomState == BOM_SNIFFING_OVER &&
    mCharsetSource == kCharsetFromChannel) {
-    // There was no BOM and the charset came from channel. mCharset
+    // There was no BOM and the charset came from channel. mEncoding
    // still contains the charset from the channel as set by an
    // earlier call to SetDocumentCharset(), since we didn't find a BOM and
-    // overwrite mCharset. (Note that if the user has overridden the charset,
+    // overwrite mEncoding. (Note that if the user has overridden the charset,
    // we don't come here but check <meta> for XSS-dangerous charsets first.)
    mFeedChardet = false;
-    mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
+    mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
    return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment,
      aCount, aWriteCount);
  }
@@ -743,15 +743,14 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
      nsHtml5ByteReadable readable(aFromSegment, aFromSegment +
          countToSniffingLimit);
      nsAutoCString charset;
-      mMetaScanner->sniff(&readable, charset);
+      auto encoding = mMetaScanner->sniff(&readable);
      // Due to the way nsHtml5Portability reports OOM, ask the tree buider
      nsresult rv;
      if (NS_FAILED((rv = mTreeBuilder->IsBroken()))) {
        MarkAsBroken(rv);
        return rv;
      }
-      if (!charset.IsEmpty()) {
-        const Encoding* encoding = Encoding::ForName(charset);
+      if (encoding) {
        // meta scan successful; honor overrides unless meta is XSS-dangerous
        if ((mCharsetSource == kCharsetFromParentForced ||
             mCharsetSource == kCharsetFromUserForced) &&
@@ -761,10 +760,10 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
          return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
            aFromSegment, aCount, aWriteCount);
        }
-        mCharset.Assign(charset);
+        mEncoding = WrapNotNull(encoding);
        mCharsetSource = kCharsetFromMetaPrescan;
        mFeedChardet = false;
-        mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
+        mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
        return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
          aFromSegment, aCount, aWriteCount);
      }
@@ -782,16 +781,14 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
  // not the last buffer
  if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) {
    nsHtml5ByteReadable readable(aFromSegment, aFromSegment + aCount);
-    nsAutoCString charset;
-    mMetaScanner->sniff(&readable, charset);
+    auto encoding = mMetaScanner->sniff(&readable);
    // Due to the way nsHtml5Portability reports OOM, ask the tree buider
    nsresult rv;
    if (NS_FAILED((rv = mTreeBuilder->IsBroken()))) {
      MarkAsBroken(rv);
      return rv;
    }
-    if (!charset.IsEmpty()) {
-      const Encoding* encoding = Encoding::ForName(charset);
+    if (encoding) {
      // meta scan successful; honor overrides unless meta is XSS-dangerous
      if ((mCharsetSource == kCharsetFromParentForced ||
           mCharsetSource == kCharsetFromUserForced) &&
@@ -801,10 +798,10 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
        return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment,
            aCount, aWriteCount);
      }
-      mCharset.Assign(charset);
+      mEncoding = WrapNotNull(encoding);
      mCharsetSource = kCharsetFromMetaPrescan;
      mFeedChardet = false;
-      mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
+      mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
      return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment,
        aCount, aWriteCount);
    }
@@ -1007,7 +1004,7 @@ nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest, nsISupports* aContext)
  mFeedChardet = false;

  // Instantiate the converter here to avoid BOM sniffing.
-  mUnicodeDecoder = Encoding::ForName(mCharset)->NewDecoderWithBOMRemoval();
+  mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
  return NS_OK;
 }

@@ -1231,8 +1228,8 @@ nsHtml5StreamParser::CopySegmentsToParser(nsIInputStream *aInStream,
  return NS_OK;
 }

-bool
-nsHtml5StreamParser::PreferredForInternalEncodingDecl(nsACString& aEncoding)
+const Encoding*
+nsHtml5StreamParser::PreferredForInternalEncodingDecl(const nsACString& aEncoding)
 {
  const Encoding* newEncoding = Encoding::ForLabel(aEncoding);
  if (!newEncoding) {
@@ -1240,7 +1237,7 @@ nsHtml5StreamParser::PreferredForInternalEncodingDecl(nsACString& aEncoding)
    mTreeBuilder->MaybeComplainAboutCharset("EncMetaUnsupported",
                                            true,
                                            mTokenizer->getLineNumber());
-    return false;
+    return nullptr;
  }

  if (newEncoding == UTF_16BE_ENCODING ||
@@ -1259,7 +1256,7 @@ nsHtml5StreamParser::PreferredForInternalEncodingDecl(nsACString& aEncoding)
    newEncoding = WINDOWS_1252_ENCODING;
  }

-  if (newEncoding == Encoding::ForName(mCharset)) {
+  if (newEncoding == mEncoding) {
    if (mCharsetSource < kCharsetFromMetaPrescan) {
      if (mInitialEncodingWasFromParentFrame) {
        mTreeBuilder->MaybeComplainAboutCharset("EncLateMetaFrame",
@@ -1273,11 +1270,10 @@ nsHtml5StreamParser::PreferredForInternalEncodingDecl(nsACString& aEncoding)
    }
    mCharsetSource = kCharsetFromMetaTag; // become confident
    mFeedChardet = false; // don't feed chardet when confident
-    return false;
+    return nullptr;
  }

-  newEncoding->Name(aEncoding);
-  return true;
+  return newEncoding;
 }

 bool
@@ -1296,7 +1292,8 @@ nsHtml5StreamParser::internalEncodingDeclaration(nsHtml5String aEncoding)
  nsAutoCString newEncoding;
  CopyUTF16toUTF8(newEncoding16, newEncoding);

-  if (!PreferredForInternalEncodingDecl(newEncoding)) {
+  auto encoding = PreferredForInternalEncodingDecl(newEncoding);
+  if (!encoding) {
    return false;
  }

@@ -1314,7 +1311,7 @@ nsHtml5StreamParser::internalEncodingDeclaration(nsHtml5String aEncoding)
  // Avoid having the chardet ask for another restart after this restart
  // request.
  mFeedChardet = false;
-  mTreeBuilder->NeedsCharsetSwitchTo(newEncoding,
+  mTreeBuilder->NeedsCharsetSwitchTo(WrapNotNull(encoding),
                                     kCharsetFromMetaTag,
                                     mTokenizer->getLineNumber());
  FlushTreeOpsAndDisarmTimer();