Backed out 6 changesets (bug 1543077) for causing bc failures at docshell/test/browser/browser_bug1543077.js

Backed out changeset f593045cc48f (bug 1543077) Backed out changeset 25449ba8aceb (bug 1543077) Backed out changeset ccc438262e29 (bug 1543077) Backed out changeset 4573c25b1ce0 (bug 1543077) Backed out changeset 1cbaafb9373a (bug 1543077) Backed out changeset 1a0e7ced8e47 (bug 1543077)
2019-05-27 12:00:21 +03:00
parent 3f1e6bf9b9
commit c1c0700d91
89 changed files with 3091 additions and 1841 deletions
--- a/parser/html/nsHtml5StreamParser.cpp
+++ b/parser/html/nsHtml5StreamParser.cpp
@@ -31,6 +31,7 @@
 #include "nsIThreadRetargetableRequest.h"
 #include "nsPrintfCString.h"
 #include "nsNetUtil.h"
+#include "nsUdetXPCOMWrapper.h"
 #include "nsXULAppAPI.h"
 #include "mozilla/SchedulerGroup.h"
 #include "nsJSEnvironment.h"
@@ -155,7 +156,7 @@ nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor,
      mBomState(eBomState::BOM_SNIFFING_NOT_STARTED),
      mCharsetSource(kCharsetUninitialized),
      mEncoding(WINDOWS_1252_ENCODING),
-      mFeedChardet(true),
+      mFeedChardetIfEncoding(nullptr),
      mReparseForbidden(false),
      mLastBuffer(nullptr),  // Will be filled when starting
      mExecutor(aExecutor),
@@ -180,8 +181,6 @@ nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor,
      mEventTarget(nsHtml5Module::GetStreamParserThread()->SerialEventTarget()),
      mExecutorFlusher(new nsHtml5ExecutorFlusher(aExecutor)),
      mLoadFlusher(new nsHtml5LoadFlusher(aExecutor)),
-      mJapaneseDetector(mozilla::JapaneseDetector::Create(
-          StaticPrefs::intl_charset_detector_iso2022jp_allowed())),
      mInitialEncodingWasFromParentFrame(false),
      mHasHadErrors(false),
      mDecodingLocalFileAsUTF8(false),
@@ -211,11 +210,16 @@ nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor,
  nsAutoCString detectorName;
  Preferences::GetLocalizedCString("intl.charset.detector", detectorName);
  if (!detectorName.IsEmpty()) {
-    // We recognize one of the two magic strings for Russian and Ukranian.
+    // We recognize one of the three magic strings for the following languages.
    if (detectorName.EqualsLiteral("ruprob")) {
      mChardet = new nsRUProbDetector();
+      mFeedChardetIfEncoding = WINDOWS_1251_ENCODING;
    } else if (detectorName.EqualsLiteral("ukprob")) {
      mChardet = new nsUKProbDetector();
+      mFeedChardetIfEncoding = WINDOWS_1251_ENCODING;
+    } else if (detectorName.EqualsLiteral("ja_parallel_state_machine")) {
+      mChardet = new nsJAPSMDetector();
+      mFeedChardetIfEncoding = SHIFT_JIS_ENCODING;
    }
    if (mChardet) {
      (void)mChardet->Init(this);
@@ -259,7 +263,7 @@ NS_IMETHODIMP
 nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf) {
  NS_ASSERTION(IsParserThread(), "Wrong thread!");
  if (aConf == eBestAnswer || aConf == eSureAnswer) {
-    mFeedChardet = false;  // just in case
+    mFeedChardetIfEncoding = nullptr;  // just in case
    auto encoding =
        Encoding::ForLabelNoReplacement(nsDependentCString(aCharset));
    if (!encoding) {
@@ -267,8 +271,8 @@ nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf) {
    }
    if (HasDecoder()) {
      if (mEncoding == encoding) {
-        MOZ_ASSERT(mCharsetSource < kCharsetFromAutoDetection,
-                   "Why are we running chardet at all?");
+        NS_ASSERTION(mCharsetSource < kCharsetFromAutoDetection,
+                     "Why are we running chardet at all?");
        mCharsetSource = kCharsetFromAutoDetection;
        mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
      } else {
@@ -290,62 +294,6 @@ nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf) {
  return NS_OK;
 }

-void nsHtml5StreamParser::FeedJapaneseDetector(Span<const uint8_t> aBuffer,
-                                               bool aLast) {
-  const Encoding* detected = mJapaneseDetector->Feed(aBuffer, aLast);
-  if (!detected) {
-    return;
-  }
-  mFeedChardet = false;
-  if (mDecodingLocalFileAsUTF8 && detected != ISO_2022_JP_ENCODING) {
-    return;
-  }
-  int32_t source = kCharsetFromAutoDetection;
-  if (mCharsetSource == kCharsetFromParentForced ||
-      mCharsetSource == kCharsetFromUserForced) {
-    source = kCharsetFromUserForcedAutoDetection;
-  }
-  if (detected == mEncoding) {
-    MOZ_ASSERT(mCharsetSource < source, "Why are we running chardet at all?");
-    mCharsetSource = source;
-    mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
-  } else if (HasDecoder()) {
-    // We've already committed to a decoder. Request a reload from the
-    // docshell.
-    mTreeBuilder->NeedsCharsetSwitchTo(WrapNotNull(detected), source, 0);
-    FlushTreeOpsAndDisarmTimer();
-    Interrupt();
-  } else {
-    // Got a confident answer from the sniffing buffer. That code will
-    // take care of setting up the decoder.
-    mEncoding = WrapNotNull(detected);
-    mCharsetSource = source;
-    mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
-  }
-}
-
-void nsHtml5StreamParser::FeedDetector(Span<const uint8_t> aBuffer,
-                                       bool aLast) {
-  if (mEncoding->IsJapaneseLegacy()) {
-    FeedJapaneseDetector(aBuffer, aLast);
-  } else if (mEncoding == WINDOWS_1251_ENCODING && mChardet &&
-             !mDecodingLocalFileAsUTF8) {
-    if (!aBuffer.IsEmpty()) {
-      bool dontFeed = false;
-      mozilla::Unused << mChardet->DoIt((const char*)aBuffer.Elements(),
-                                        aBuffer.Length(), &dontFeed);
-      if (dontFeed) {
-        mFeedChardet = false;
-      }
-    }
-    if (aLast) {
-      mozilla::Unused << mChardet->Done();
-    }
-  } else {
-    mFeedChardet = false;
-  }
-}
-
 void nsHtml5StreamParser::SetViewSourceTitle(nsIURI* aURL) {
  if (recordreplay::IsRecordingOrReplaying()) {
    nsAutoCString spec;
@@ -387,11 +335,6 @@ nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
    MOZ_ASSERT(mEncoding != UTF_8_ENCODING);
    mUnicodeDecoder = UTF_8_ENCODING->NewDecoderWithBOMRemoval();
  } else {
-    if (mCharsetSource >= kCharsetFromAutoDetection &&
-        !(mCharsetSource == kCharsetFromUserForced ||
-          mCharsetSource == kCharsetFromParentForced)) {
-      mFeedChardet = false;
-    }
    mDecodingLocalFileAsUTF8 = false;
    mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
  }
@@ -411,7 +354,7 @@ nsresult nsHtml5StreamParser::SetupDecodingFromBom(
  mDecodingLocalFileAsUTF8 = false;
  mUnicodeDecoder = mEncoding->NewDecoderWithoutBOMHandling();
  mCharsetSource = kCharsetFromByteOrderMark;
-  mFeedChardet = false;
+  mFeedChardetIfEncoding = nullptr;
  mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
  mSniffingBuffer = nullptr;
  mMetaScanner = nullptr;
@@ -469,7 +412,7 @@ void nsHtml5StreamParser::SniffBOMlessUTF16BasicLatin(
  }
  mCharsetSource = kCharsetFromIrreversibleAutoDetection;
  mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
-  mFeedChardet = false;
+  mFeedChardetIfEncoding = nullptr;
  mTreeBuilder->MaybeComplainAboutCharset("EncBomlessUtf16", true, 0);
 }

@@ -536,38 +479,12 @@ static void HandleProcessingInstruction(void* aUserData,
  XML_StopParser(ud->mExpat, false);
 }

-void nsHtml5StreamParser::FinalizeSniffingWithDetector(
-    Span<const uint8_t> aFromSegment, uint32_t aCountToSniffingLimit,
-    bool aEof) {
-  if (mSniffingBuffer) {
-    FeedDetector(MakeSpan(mSniffingBuffer.get(), mSniffingLength), false);
-  }
-  if (mFeedChardet && !aFromSegment.IsEmpty()) {
-    // Avoid buffer boundary-dependent behavior when
-    // reparsing is forbidden. If reparse is forbidden,
-    // act as if we only saw the first 1024 bytes.
-    // When reparsing isn't forbidden, buffer boundaries
-    // can have an effect on whether the page is loaded
-    // once or twice. :-(
-    FeedDetector(mReparseForbidden ? aFromSegment.To(aCountToSniffingLimit)
-                                   : aFromSegment,
-                 false);
-  }
-  if (mFeedChardet && aEof &&
-      (!mReparseForbidden || aCountToSniffingLimit == aFromSegment.Length())) {
-    // Don't signal EOF if reparse is forbidden and we didn't pass all input
-    // to the detector above.
-    mFeedChardet = false;
-    FeedDetector(Span<const uint8_t>(), true);
-  }
-}
-
 nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
                                               uint32_t aCountToSniffingLimit,
                                               bool aEof) {
-  MOZ_ASSERT(IsParserThread(), "Wrong thread!");
-  MOZ_ASSERT(mCharsetSource < kCharsetFromUserForcedAutoDetection,
-             "Should not finalize sniffing with strong decision already made.");
+  NS_ASSERTION(IsParserThread(), "Wrong thread!");
+  NS_ASSERTION(mCharsetSource < kCharsetFromParentForced,
+               "Should not finalize sniffing when using forced charset.");
  if (mMode == VIEW_SOURCE_XML) {
    static const XML_Memory_Handling_Suite memsuite = {
        (void* (*)(size_t))moz_xmalloc, (void* (*)(void*, size_t))moz_xrealloc,
@@ -630,15 +547,50 @@ nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
  }

  // meta scan failed.
-  if (mCharsetSource < kCharsetFromMetaPrescan) {
-    // Check for BOMless UTF-16 with Basic
-    // Latin content for compat with IE. See bug 631751.
-    SniffBOMlessUTF16BasicLatin(aFromSegment.To(aCountToSniffingLimit));
+  if (mCharsetSource >= kCharsetFromHintPrevDoc) {
+    mFeedChardetIfEncoding = nullptr;
+    return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
  }
+  // Check for BOMless UTF-16 with Basic
+  // Latin content for compat with IE. See bug 631751.
+  SniffBOMlessUTF16BasicLatin(aFromSegment.To(aCountToSniffingLimit));
  // the charset may have been set now
  // maybe try chardet now;
-  if (mFeedChardet) {
-    FinalizeSniffingWithDetector(aFromSegment, aCountToSniffingLimit, aEof);
+  if ((mFeedChardetIfEncoding == mEncoding) && !mDecodingLocalFileAsUTF8) {
+    bool dontFeed;
+    nsresult rv;
+    if (mSniffingBuffer) {
+      rv = mChardet->DoIt((const char*)mSniffingBuffer.get(), mSniffingLength,
+                          &dontFeed);
+      if (dontFeed) {
+        mFeedChardetIfEncoding = nullptr;
+      }
+      NS_ENSURE_SUCCESS(rv, rv);
+    }
+    if ((mFeedChardetIfEncoding == mEncoding) && !aFromSegment.IsEmpty()) {
+      rv = mChardet->DoIt(
+          (const char*)aFromSegment.Elements(),
+          // Avoid buffer boundary-dependent behavior when
+          // reparsing is forbidden. If reparse is forbidden,
+          // act as if we only saw the first 1024 bytes.
+          // When reparsing isn't forbidden, buffer boundaries
+          // can have an effect on whether the page is loaded
+          // once or twice. :-(
+          mReparseForbidden ? aCountToSniffingLimit : aFromSegment.Length(),
+          &dontFeed);
+      if (dontFeed) {
+        mFeedChardetIfEncoding = nullptr;
+      }
+      NS_ENSURE_SUCCESS(rv, rv);
+    }
+    if ((mFeedChardetIfEncoding == mEncoding) && (aEof || mReparseForbidden)) {
+      // mReparseForbidden is checked so that we get to use the sniffing
+      // buffer with the best guess so far if we aren't allowed to guess
+      // better later.
+      mFeedChardetIfEncoding = nullptr;
+      rv = mChardet->Done();
+      NS_ENSURE_SUCCESS(rv, rv);
+    }
    // fall thru; callback may have changed charset
  }
  if (mCharsetSource == kCharsetUninitialized) {
@@ -648,7 +600,7 @@ nsresult nsHtml5StreamParser::FinalizeSniffing(Span<const uint8_t> aFromSegment,
    mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
  } else if (mMode == LOAD_AS_DATA && mCharsetSource == kCharsetFromFallback) {
    NS_ASSERTION(mReparseForbidden, "Reparse should be forbidden for XHR");
-    NS_ASSERTION(!mFeedChardet, "Should not feed chardet for XHR");
+    NS_ASSERTION(!mFeedChardetIfEncoding, "Should not feed chardet for XHR");
    NS_ASSERTION(mEncoding == UTF_8_ENCODING, "XHR should default to UTF-8");
    // Now mark charset source as non-weak to signal that we have a decision
    mCharsetSource = kCharsetFromDocTypeDefault;
@@ -735,6 +687,7 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
    // earlier call to SetDocumentCharset(), since we didn't find a BOM and
    // overwrite mEncoding. (Note that if the user has overridden the charset,
    // we don't come here but check <meta> for XSS-dangerous charsets first.)
+    mFeedChardetIfEncoding = nullptr;
    mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
    return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
  }
@@ -766,16 +719,12 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
            (encoding->IsAsciiCompatible() ||
             encoding == ISO_2022_JP_ENCODING)) {
          // Honor override
-          if (mEncoding->IsJapaneseLegacy()) {
-            mFeedChardet = true;
-            FinalizeSniffingWithDetector(aFromSegment, countToSniffingLimit,
-                                         false);
-          }
          return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
              aFromSegment);
        }
        mEncoding = WrapNotNull(encoding);
        mCharsetSource = kCharsetFromMetaPrescan;
+        mFeedChardetIfEncoding = nullptr;
        mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
        return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
            aFromSegment);
@@ -784,10 +733,6 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
    if (mCharsetSource == kCharsetFromParentForced ||
        mCharsetSource == kCharsetFromUserForced) {
      // meta not found, honor override
-      if (mEncoding->IsJapaneseLegacy()) {
-        mFeedChardet = true;
-        FinalizeSniffingWithDetector(aFromSegment, countToSniffingLimit, false);
-      }
      return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
    }
    return FinalizeSniffing(aFromSegment, countToSniffingLimit, false);
@@ -816,6 +761,7 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
      }
      mEncoding = WrapNotNull(encoding);
      mCharsetSource = kCharsetFromMetaPrescan;
+      mFeedChardetIfEncoding = nullptr;
      mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
      return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
    }
@@ -895,12 +841,6 @@ void nsHtml5StreamParser::ReDecodeLocalFile() {
  mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
  mHasHadErrors = false;

-  // We need the detector to start with fresh state.
-  // Turn off ISO-2022-JP detection, because if this doc was
-  // ISO-2022-JP, it would have already been detected.
-  mJapaneseDetector = mozilla::JapaneseDetector::Create(false);
-  mFeedChardet = true;
-
  // Throw away previous decoded data
  mLastBuffer = mFirstBuffer;
  mLastBuffer->next = nullptr;
@@ -916,7 +856,7 @@ void nsHtml5StreamParser::ReDecodeLocalFile() {
 void nsHtml5StreamParser::CommitLocalFileToUTF8() {
  MOZ_ASSERT(mDecodingLocalFileAsUTF8);
  mDecodingLocalFileAsUTF8 = false;
-  mFeedChardet = false;
+  mFeedChardetIfEncoding = nullptr;
  mEncoding = UTF_8_ENCODING;
  mCharsetSource = kCharsetFromFileURLGuess;
  mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
@@ -1060,7 +1000,7 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
      // This is the old Gecko behavior but the HTML5 spec disagrees.
      // Don't reparse on POST.
      mReparseForbidden = true;
-      mFeedChardet = false;  // can't restart anyway
+      mFeedChardetIfEncoding = nullptr;  // can't restart anyway
    }
  }

@@ -1091,10 +1031,8 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
    mInitialEncodingWasFromParentFrame = true;
  }

-  if (mCharsetSource >= kCharsetFromAutoDetection &&
-      !(mCharsetSource == kCharsetFromParentForced ||
-        mCharsetSource == kCharsetFromUserForced)) {
-    mFeedChardet = false;
+  if (mCharsetSource >= kCharsetFromAutoDetection) {
+    mFeedChardetIfEncoding = nullptr;
  }

  if (mCharsetSource < kCharsetFromUtf8OnlyMime) {
@@ -1103,11 +1041,11 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
    return NS_OK;
  }

-  // We are loading JSON/WebVTT/etc. into a browsing context.
-  // There's no need to remove the BOM manually here, because
-  // the UTF-8 decoder removes it.
+  // We are reloading a document.open()ed doc or loading JSON/WebVTT/etc. into
+  // a browsing context. In the latter case, there's no need to remove the
+  // BOM manually here, because the UTF-8 decoder removes it.
  mReparseForbidden = true;
-  mFeedChardet = false;
+  mFeedChardetIfEncoding = nullptr;

  // Instantiate the converter here to avoid BOM sniffing.
  mDecodingLocalFileAsUTF8 = false;
@@ -1147,9 +1085,8 @@ void nsHtml5StreamParser::DoStopRequest() {
      return;
    }
  }
-  if (mFeedChardet) {
-    mFeedChardet = false;
-    FeedDetector(Span<uint8_t>(), true);
+  if ((mFeedChardetIfEncoding == mEncoding) && !mDecodingLocalFileAsUTF8) {
+    mChardet->Done();
  }

  MOZ_ASSERT(mUnicodeDecoder,
@@ -1309,8 +1246,13 @@ void nsHtml5StreamParser::DoDataAvailable(Span<const uint8_t> aBuffer) {

  nsresult rv;
  if (HasDecoder()) {
-    if (mFeedChardet) {
-      FeedDetector(aBuffer, false);
+    if ((mFeedChardetIfEncoding == mEncoding) && !mDecodingLocalFileAsUTF8) {
+      bool dontFeed;
+      mChardet->DoIt((const char*)aBuffer.Elements(), aBuffer.Length(),
+                     &dontFeed);
+      if (dontFeed) {
+        mFeedChardetIfEncoding = nullptr;
+      }
    }
    rv = WriteStreamBytes(aBuffer);
  } else {
@@ -1469,7 +1411,7 @@ const Encoding* nsHtml5StreamParser::PreferredForInternalEncodingDecl(
      }
    }
    mCharsetSource = kCharsetFromMetaTag;  // become confident
-    mFeedChardet = false;                  // don't feed chardet when confident
+    mFeedChardetIfEncoding = nullptr;      // don't feed chardet when confident
    return nullptr;
  }

@@ -1508,7 +1450,7 @@ bool nsHtml5StreamParser::internalEncodingDeclaration(nsHtml5String aEncoding) {

  // Avoid having the chardet ask for another restart after this restart
  // request.
-  mFeedChardet = false;
+  mFeedChardetIfEncoding = nullptr;
  mTreeBuilder->NeedsCharsetSwitchTo(WrapNotNull(encoding), kCharsetFromMetaTag,
                                     mTokenizer->getLineNumber());
  FlushTreeOpsAndDisarmTimer();