Backed out changeset 0810ad586986 (bug 1551276) for wpt failures in ar-ISO-8859-6-late.tentative.html on a CLOSED TREE

This commit is contained in:
Oana Pop Rus
2019-12-12 16:38:54 +02:00
parent fa72909260
commit 53a8a406db
157 changed files with 2126 additions and 7028 deletions

View File

@@ -9,6 +9,7 @@
#include "mozilla/DebugOnly.h"
#include "mozilla/Encoding.h"
#include "nsContentUtils.h"
#include "nsCyrillicDetector.h"
#include "nsHtml5Tokenizer.h"
#include "nsIHttpChannel.h"
#include "nsHtml5Parser.h"
@@ -69,7 +70,7 @@ NS_IMPL_CYCLE_COLLECTING_ADDREF(nsHtml5StreamParser)
NS_IMPL_CYCLE_COLLECTING_RELEASE(nsHtml5StreamParser)
NS_INTERFACE_TABLE_HEAD(nsHtml5StreamParser)
NS_INTERFACE_TABLE(nsHtml5StreamParser, nsISupports)
NS_INTERFACE_TABLE(nsHtml5StreamParser, nsICharsetDetectionObserver)
NS_INTERFACE_TABLE_TO_MAP_SEGUE_CYCLE_COLLECTION(nsHtml5StreamParser)
NS_INTERFACE_MAP_END
@@ -83,6 +84,7 @@ NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN(nsHtml5StreamParser)
tmp->mExecutorFlusher = nullptr;
tmp->mLoadFlusher = nullptr;
tmp->mExecutor = nullptr;
NS_IMPL_CYCLE_COLLECTION_UNLINK(mChardet)
NS_IMPL_CYCLE_COLLECTION_UNLINK_END
NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsHtml5StreamParser)
@@ -99,6 +101,11 @@ NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN(nsHtml5StreamParser)
NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mLoadFlusher->mExecutor");
cb.NoteXPCOMChild(static_cast<nsIContentSink*>(tmp->mExecutor));
}
// hack: count self if held by mChardet
if (tmp->mChardet) {
NS_CYCLE_COLLECTION_NOTE_EDGE_NAME(cb, "mChardet->mObserver");
cb.NoteXPCOMChild(static_cast<nsICharsetDetectionObserver*>(tmp));
}
NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
class nsHtml5ExecutorFlusher : public Runnable {
@@ -151,7 +158,6 @@ nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor,
mCharsetSource(kCharsetUninitialized),
mEncoding(WINDOWS_1252_ENCODING),
mFeedChardet(true),
mGuessEncoding(true),
mReparseForbidden(false),
mLastBuffer(nullptr), // Will be filled when starting
mExecutor(aExecutor),
@@ -178,10 +184,9 @@ nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor,
mLoadFlusher(new nsHtml5LoadFlusher(aExecutor)),
mJapaneseDetector(mozilla::JapaneseDetector::Create(
StaticPrefs::intl_charset_detector_iso2022jp_allowed())),
mUseJapaneseDetector(false),
mInitialEncodingWasFromParentFrame(false),
mHasHadErrors(false),
mDecodingLocalFileWithoutTokenizing(false),
mDecodingLocalFileAsUTF8(false),
mFlushTimer(NS_NewTimer(mEventTarget)),
mFlushTimerMutex("nsHtml5StreamParser mFlushTimerMutex"),
mFlushTimerArmed(false),
@@ -202,6 +207,24 @@ nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor,
mTreeBuilder->EnableViewSource(highlighter); // doesn't own
}
// Chardet instantiation adapted from File.
// Chardet is initialized here even if it turns out to be useless
// to make the chardet refcount its observer (nsHtml5StreamParser)
// on the main thread.
nsAutoCString detectorName;
Preferences::GetLocalizedCString("intl.charset.detector", detectorName);
if (!detectorName.IsEmpty()) {
// We recognize one of the two magic strings for Russian and Ukranian.
if (detectorName.EqualsLiteral("ruprob")) {
mChardet = new nsRUProbDetector();
} else if (detectorName.EqualsLiteral("ukprob")) {
mChardet = new nsUKProbDetector();
}
if (mChardet) {
(void)mChardet->Init(this);
}
}
// There's a zeroing operator new for everything else
}
@@ -232,49 +255,52 @@ nsresult nsHtml5StreamParser::GetChannel(nsIChannel** aChannel) {
: NS_ERROR_NOT_AVAILABLE;
}
void nsHtml5StreamParser::GuessEncoding(bool aEof, bool aInitial) {
if (mUseJapaneseDetector) {
return;
}
if (!aInitial) {
mGuessEncoding = false;
}
auto encoding = mDetector->Guess(mTLD, mDecodingLocalFileWithoutTokenizing);
if (HasDecoder() && !mDecodingLocalFileWithoutTokenizing) {
if (mEncoding == encoding) {
auto source = aInitial ? kCharsetFromInitialAutoDetection
: kCharsetFromFinalAutoDetection;
MOZ_ASSERT(mCharsetSource < source, "Why are we running chardet at all?");
mCharsetSource = source;
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
} else {
MOZ_ASSERT(mCharsetSource < kCharsetFromFinalAutoDetection);
// We've already committed to a decoder. Request a reload from the
// docshell.
mTreeBuilder->NeedsCharsetSwitchTo(encoding,
kCharsetFromFinalAutoDetection, 0);
FlushTreeOpsAndDisarmTimer();
Interrupt();
NS_IMETHODIMP
nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf) {
NS_ASSERTION(IsParserThread(), "Wrong thread!");
if (aConf == eBestAnswer || aConf == eSureAnswer) {
mFeedChardet = false; // just in case
auto encoding =
Encoding::ForLabelNoReplacement(nsDependentCString(aCharset));
if (!encoding) {
return NS_OK;
}
if (HasDecoder()) {
if (mEncoding == encoding) {
MOZ_ASSERT(mCharsetSource < kCharsetFromAutoDetection,
"Why are we running chardet at all?");
mCharsetSource = kCharsetFromAutoDetection;
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
} else {
// We've already committed to a decoder. Request a reload from the
// docshell.
mTreeBuilder->NeedsCharsetSwitchTo(WrapNotNull(encoding),
kCharsetFromAutoDetection, 0);
FlushTreeOpsAndDisarmTimer();
Interrupt();
}
} else {
// Got a confident answer from the sniffing buffer. That code will
// take care of setting up the decoder.
mEncoding = WrapNotNull(encoding);
mCharsetSource = kCharsetFromAutoDetection;
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
}
} else {
// Got a confident answer from the sniffing buffer. That code will
// take care of setting up the decoder.
mEncoding = encoding;
mCharsetSource = aInitial ? kCharsetFromInitialAutoDetection
: kCharsetFromFinalAutoDetection;
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
}
return NS_OK;
}
void nsHtml5StreamParser::FeedJapaneseDetector(Span<const uint8_t> aBuffer,
bool aLast) {
MOZ_ASSERT(!mDecodingLocalFileWithoutTokenizing);
const Encoding* detected = mJapaneseDetector->Feed(aBuffer, aLast);
if (!detected) {
return;
}
DontGuessEncoding();
int32_t source = kCharsetFromFinalAutoDetection;
mFeedChardet = false;
if (mDecodingLocalFileAsUTF8 && detected != ISO_2022_JP_ENCODING) {
return;
}
int32_t source = kCharsetFromAutoDetection;
if (mCharsetSource == kCharsetFromParentForced ||
mCharsetSource == kCharsetFromUserForced) {
source = kCharsetFromUserForcedAutoDetection;
@@ -300,10 +326,23 @@ void nsHtml5StreamParser::FeedJapaneseDetector(Span<const uint8_t> aBuffer,
void nsHtml5StreamParser::FeedDetector(Span<const uint8_t> aBuffer,
bool aLast) {
if (mUseJapaneseDetector) {
if (mEncoding->IsJapaneseLegacy()) {
FeedJapaneseDetector(aBuffer, aLast);
} else if (mEncoding == WINDOWS_1251_ENCODING && mChardet &&
!mDecodingLocalFileAsUTF8) {
if (!aBuffer.IsEmpty()) {
bool dontFeed = false;
mozilla::Unused << mChardet->DoIt((const char*)aBuffer.Elements(),
aBuffer.Length(), &dontFeed);
if (dontFeed) {
mFeedChardet = false;
}
}
if (aLast) {
mozilla::Unused << mChardet->Done();
}
} else {
Unused << mDetector->Feed(aBuffer, aLast);
mFeedChardet = false;
}
}
@@ -349,18 +388,16 @@ nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
Span<const uint8_t> aFromSegment) {
NS_ASSERTION(IsParserThread(), "Wrong thread!");
nsresult rv = NS_OK;
if (mDecodingLocalFileWithoutTokenizing &&
mCharsetSource <= kCharsetFromFileURLGuess) {
if (mDecodingLocalFileAsUTF8 && mCharsetSource <= kCharsetFromFileURLGuess) {
MOZ_ASSERT(mEncoding != UTF_8_ENCODING);
mUnicodeDecoder = UTF_8_ENCODING->NewDecoderWithBOMRemoval();
} else {
if (mCharsetSource >= kCharsetFromFinalAutoDetection) {
if (!(mCharsetSource == kCharsetFromUserForced ||
mCharsetSource == kCharsetFromParentForced)) {
DontGuessEncoding();
}
mDecodingLocalFileWithoutTokenizing = false;
if (mCharsetSource >= kCharsetFromAutoDetection &&
!(mCharsetSource == kCharsetFromUserForced ||
mCharsetSource == kCharsetFromParentForced)) {
mFeedChardet = false;
}
mDecodingLocalFileAsUTF8 = false;
mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
}
if (mSniffingBuffer) {
@@ -376,10 +413,10 @@ nsresult nsHtml5StreamParser::SetupDecodingFromBom(
NotNull<const Encoding*> aEncoding) {
NS_ASSERTION(IsParserThread(), "Wrong thread!");
mEncoding = aEncoding;
mDecodingLocalFileWithoutTokenizing = false;
mDecodingLocalFileAsUTF8 = false;
mUnicodeDecoder = mEncoding->NewDecoderWithoutBOMHandling();
mCharsetSource = kCharsetFromByteOrderMark;
DontGuessEncoding();
mFeedChardet = false;
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
mSniffingBuffer = nullptr;
mMetaScanner = nullptr;
@@ -437,7 +474,7 @@ void nsHtml5StreamParser::SniffBOMlessUTF16BasicLatin(
}
mCharsetSource = kCharsetFromIrreversibleAutoDetection;
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
DontGuessEncoding();
mFeedChardet = false;
mTreeBuilder->MaybeComplainAboutCharset("EncBomlessUtf16", true, 0);
}
@@ -511,23 +548,22 @@ void nsHtml5StreamParser::FinalizeSniffingWithDetector(
FeedDetector(MakeSpan(mSniffingBuffer.get(), mSniffingLength), false);
}
if (mFeedChardet && !aFromSegment.IsEmpty()) {
// Avoid buffer boundary-dependent behavior.
FeedDetector(aFromSegment.To(aCountToSniffingLimit), false);
// Avoid buffer boundary-dependent behavior when
// reparsing is forbidden. If reparse is forbidden,
// act as if we only saw the first 1024 bytes.
// When reparsing isn't forbidden, buffer boundaries
// can have an effect on whether the page is loaded
// once or twice. :-(
FeedDetector(mReparseForbidden ? aFromSegment.To(aCountToSniffingLimit)
: aFromSegment,
false);
}
bool guess = mFeedChardet;
if (mFeedChardet && aEof && aCountToSniffingLimit <= aFromSegment.Length()) {
FeedDetector(Span<const uint8_t>(), true);
if (mFeedChardet && aEof &&
(!mReparseForbidden || aCountToSniffingLimit == aFromSegment.Length())) {
// Don't signal EOF if reparse is forbidden and we didn't pass all input
// to the detector above.
mFeedChardet = false;
}
if (guess) {
GuessEncoding(aEof, (guess == mFeedChardet));
}
if (mReparseForbidden) {
DontGuessEncoding();
}
if (mFeedChardet && !aEof && aCountToSniffingLimit < aFromSegment.Length()) {
// Avoid buffer boundary-dependent behavior.
FeedDetector(aFromSegment.From(aCountToSniffingLimit), false);
FeedDetector(Span<const uint8_t>(), true);
}
}
@@ -738,11 +774,8 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
// Honor override
if (mEncoding->IsJapaneseLegacy()) {
mFeedChardet = true;
mUseJapaneseDetector = true;
FinalizeSniffingWithDetector(aFromSegment, countToSniffingLimit,
false);
} else {
DontGuessEncoding();
}
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
aFromSegment);
@@ -759,10 +792,7 @@ nsresult nsHtml5StreamParser::SniffStreamBytes(
// meta not found, honor override
if (mEncoding->IsJapaneseLegacy()) {
mFeedChardet = true;
mUseJapaneseDetector = true;
FinalizeSniffingWithDetector(aFromSegment, countToSniffingLimit, false);
} else {
DontGuessEncoding();
}
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment);
}
@@ -877,10 +907,14 @@ nsresult nsHtml5StreamParser::WriteStreamBytes(
bool hadErrors;
Tie(result, read, written, hadErrors) =
mUnicodeDecoder->DecodeToUTF16(src, dst, false);
if (!mDecodingLocalFileWithoutTokenizing) {
if (!mDecodingLocalFileAsUTF8) {
OnNewContent(dst.To(written));
}
if (hadErrors && !mHasHadErrors) {
if (mDecodingLocalFileAsUTF8) {
ReDecodeLocalFile();
return NS_OK;
}
mHasHadErrors = true;
if (mEncoding == UTF_8_ENCODING) {
mTreeBuilder->TryToEnableEncodingMenu();
@@ -900,15 +934,9 @@ nsresult nsHtml5StreamParser::WriteStreamBytes(
} else {
MOZ_ASSERT(totalRead == aFromSegment.Length(),
"The Unicode decoder consumed the wrong number of bytes.");
if (mDecodingLocalFileWithoutTokenizing &&
if (mDecodingLocalFileAsUTF8 &&
mLocalFileBytesBuffered == LOCAL_FILE_UTF_8_BUFFER_SIZE) {
auto encoding = mEncoding;
GuessEncoding(false, false);
if (encoding == mEncoding) {
CommitLocalFileToEncoding();
} else {
ReDecodeLocalFile();
}
CommitLocalFileToUTF8();
}
return NS_OK;
}
@@ -916,12 +944,16 @@ nsresult nsHtml5StreamParser::WriteStreamBytes(
}
void nsHtml5StreamParser::ReDecodeLocalFile() {
MOZ_ASSERT(mDecodingLocalFileWithoutTokenizing);
mDecodingLocalFileWithoutTokenizing = false;
MOZ_ASSERT(mDecodingLocalFileAsUTF8);
mDecodingLocalFileAsUTF8 = false;
mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
mHasHadErrors = false;
DontGuessEncoding();
// We need the detector to start with fresh state.
// Turn off ISO-2022-JP detection, because if this doc was
// ISO-2022-JP, it would have already been detected.
mJapaneseDetector = mozilla::JapaneseDetector::Create(false);
mFeedChardet = true;
// Throw away previous decoded data
mLastBuffer = mFirstBuffer;
@@ -935,11 +967,13 @@ void nsHtml5StreamParser::ReDecodeLocalFile() {
}
}
void nsHtml5StreamParser::CommitLocalFileToEncoding() {
MOZ_ASSERT(mDecodingLocalFileWithoutTokenizing);
mDecodingLocalFileWithoutTokenizing = false;
void nsHtml5StreamParser::CommitLocalFileToUTF8() {
MOZ_ASSERT(mDecodingLocalFileAsUTF8);
mDecodingLocalFileAsUTF8 = false;
mFeedChardet = false;
mGuessEncoding = false;
mEncoding = UTF_8_ENCODING;
mCharsetSource = kCharsetFromFileURLGuess;
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
nsHtml5OwningUTF16Buffer* buffer = mFirstBuffer;
while (buffer) {
@@ -970,17 +1004,7 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
MOZ_ASSERT(
!mExecutor->HasStarted(),
"Got OnStartRequest at the wrong stage in the executor life cycle.");
MOZ_ASSERT(NS_IsMainThread(), "Wrong thread!");
// To avoid the cost of instantiating the detector when it's not needed,
// let's instantiate only if we make it out of this method with the
// intent to use it.
auto detectorCreator = MakeScopeExit([&] {
if (mFeedChardet && !mUseJapaneseDetector) {
mDetector = mozilla::EncodingDetector::Create();
}
});
NS_ASSERTION(NS_IsMainThread(), "Wrong thread!");
if (mObserver) {
mObserver->OnStartRequest(aRequest);
}
@@ -990,7 +1014,7 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
mSkipContentSniffing = loadInfo->GetSkipContentSniffing();
if (mSkipContentSniffing) {
DontGuessEncoding();
mFeedChardet = false;
}
mStreamState = STREAM_BEING_READ;
@@ -1005,7 +1029,7 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
mMode == LOAD_AS_DATA ? false : mExecutor->IsScriptEnabled();
mOwner->StartTokenizer(scriptingEnabled);
MOZ_ASSERT(!mDecodingLocalFileWithoutTokenizing);
MOZ_ASSERT(!mDecodingLocalFileAsUTF8);
bool isSrcdoc = false;
nsCOMPtr<nsIChannel> channel;
nsresult rv = GetChannel(getter_AddRefs(channel));
@@ -1023,27 +1047,7 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
rv = channel->GetURI(getter_AddRefs(currentURI));
if (NS_SUCCEEDED(rv)) {
nsCOMPtr<nsIURI> innermost = NS_GetInnermostURI(currentURI);
if (innermost->SchemeIs("file")) {
mDecodingLocalFileWithoutTokenizing = true;
} else {
nsAutoCString host;
innermost->GetAsciiHost(host);
if (!host.IsEmpty()) {
// First let's see if the host is DNS-absolute and ends with a
// dot and get rid of that one.
if (host.Last() == '.') {
host.SetLength(host.Length() - 1);
}
int32_t index = host.RFindChar('.');
if (index != kNotFound) {
// We tolerate an IPv4 component as generic "TLD", so don't
// bother checking.
ToLowerCase(
Substring(host, index + 1, host.Length() - (index + 1)),
mTLD);
}
}
}
mDecodingLocalFileAsUTF8 = innermost->SchemeIs("file");
}
}
}
@@ -1112,6 +1116,7 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
// This is the old Gecko behavior but the HTML5 spec disagrees.
// Don't reparse on POST.
mReparseForbidden = true;
mFeedChardet = false; // can't restart anyway
}
}
@@ -1138,58 +1143,14 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
}
if (mCharsetSource == kCharsetFromParentFrame) {
// Remember this for error reporting.
// Remember this in case chardet overwrites mCharsetSource
mInitialEncodingWasFromParentFrame = true;
}
if (mCharsetSource >= kCharsetFromFinalAutoDetection) {
if ((mCharsetSource == kCharsetFromParentForced ||
mCharsetSource == kCharsetFromUserForced) &&
mEncoding->IsJapaneseLegacy()) {
// Japanese detector only
mUseJapaneseDetector = true;
mGuessEncoding = false;
} else {
DontGuessEncoding();
}
}
// Compute various pref-based special cases
if (!mDecodingLocalFileWithoutTokenizing && mFeedChardet) {
if (StaticPrefs::intl_charset_detector_ng_enabled()) {
if (mTLD.EqualsLiteral("jp")) {
mUseJapaneseDetector =
!StaticPrefs::intl_charset_detector_ng_jp_enabled();
} else if (mTLD.EqualsLiteral("in") &&
mEncoding == WINDOWS_1252_ENCODING &&
!StaticPrefs::intl_charset_detector_ng_in_enabled()) {
// Avoid breaking font hacks that Chrome doesn't break.
DontGuessEncoding();
} else if (mTLD.EqualsLiteral("lk") &&
mEncoding == WINDOWS_1252_ENCODING &&
!StaticPrefs::intl_charset_detector_ng_lk_enabled()) {
// Avoid breaking font hacks that Chrome doesn't break.
DontGuessEncoding();
}
} else {
// If the new detector is turned off in general, we still use it to
// emulate the old Cyrillic detector in cases where the old Cyrillic
// detector would have been enabled.
nsAutoCString detectorName;
Preferences::GetLocalizedCString("intl.charset.detector", detectorName);
bool forceEncodingDetectorToCyrillicOnly =
detectorName.EqualsLiteral("ruprob") ||
detectorName.EqualsLiteral("ukprob");
if (mEncoding->IsJapaneseLegacy()) {
mUseJapaneseDetector = true;
} else if (mEncoding == WINDOWS_1251_ENCODING &&
forceEncodingDetectorToCyrillicOnly) {
mTLD.AssignLiteral("ru"); // Force the detector into Cyrillic mode
// regardless of real TLD
} else {
DontGuessEncoding();
}
}
if (mCharsetSource >= kCharsetFromAutoDetection &&
!(mCharsetSource == kCharsetFromParentForced ||
mCharsetSource == kCharsetFromUserForced)) {
mFeedChardet = false;
}
if (mCharsetSource < kCharsetFromUtf8OnlyMime) {
@@ -1202,10 +1163,10 @@ nsresult nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest) {
// There's no need to remove the BOM manually here, because
// the UTF-8 decoder removes it.
mReparseForbidden = true;
DontGuessEncoding();
mFeedChardet = false;
// Instantiate the converter here to avoid BOM sniffing.
mDecodingLocalFileWithoutTokenizing = false;
mDecodingLocalFileAsUTF8 = false;
mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
return NS_OK;
}
@@ -1269,10 +1230,15 @@ void nsHtml5StreamParser::DoStopRequest() {
bool hadErrors;
Tie(result, read, written, hadErrors) =
mUnicodeDecoder->DecodeToUTF16(src, dst, true);
if (!mDecodingLocalFileWithoutTokenizing) {
if (!mDecodingLocalFileAsUTF8) {
OnNewContent(dst.To(written));
}
if (hadErrors && !mHasHadErrors) {
if (mDecodingLocalFileAsUTF8) {
ReDecodeLocalFile();
DoStopRequest();
return;
}
mHasHadErrors = true;
if (mEncoding == UTF_8_ENCODING) {
mTreeBuilder->TryToEnableEncodingMenu();
@@ -1289,20 +1255,9 @@ void nsHtml5StreamParser::DoStopRequest() {
}
mLastBuffer = (mLastBuffer->next = newBuf.forget());
} else {
if (mDecodingLocalFileWithoutTokenizing) {
if (mDecodingLocalFileAsUTF8) {
MOZ_ASSERT(mLocalFileBytesBuffered < LOCAL_FILE_UTF_8_BUFFER_SIZE);
MOZ_ASSERT(mGuessEncoding);
auto encoding = mEncoding;
GuessEncoding(true, false);
if (encoding == mEncoding) {
CommitLocalFileToEncoding();
} else {
ReDecodeLocalFile();
DoStopRequest();
return;
}
} else if (mGuessEncoding) {
GuessEncoding(true, false);
CommitLocalFileToUTF8();
}
break;
}
@@ -1347,7 +1302,7 @@ nsresult nsHtml5StreamParser::OnStopRequest(nsIRequest* aRequest,
void nsHtml5StreamParser::DoDataAvailableBuffer(
mozilla::Buffer<uint8_t>&& aBuffer) {
if (MOZ_LIKELY(!mDecodingLocalFileWithoutTokenizing)) {
if (MOZ_LIKELY(!mDecodingLocalFileAsUTF8)) {
DoDataAvailable(aBuffer);
return;
}
@@ -1395,7 +1350,7 @@ void nsHtml5StreamParser::DoDataAvailableBuffer(
// Do this clean-up here to avoid use-after-free when
// DoDataAvailable is passed a span pointing into an
// element of mBufferedLocalFileData.
if (!mDecodingLocalFileWithoutTokenizing) {
if (!mDecodingLocalFileAsUTF8) {
mBufferedLocalFileData.Clear();
}
}
@@ -1428,7 +1383,7 @@ void nsHtml5StreamParser::DoDataAvailable(Span<const uint8_t> aBuffer) {
return;
}
if (mDecodingLocalFileWithoutTokenizing) {
if (mDecodingLocalFileAsUTF8) {
return;
}
@@ -1501,7 +1456,7 @@ nsresult nsHtml5StreamParser::OnDataAvailable(nsIRequest* aRequest,
MOZ_ASSERT(IsParserThread(), "Wrong thread!");
mozilla::MutexAutoLock autoLock(mTokenizerMutex);
if (MOZ_UNLIKELY(mDecodingLocalFileWithoutTokenizing)) {
if (MOZ_UNLIKELY(mDecodingLocalFileAsUTF8)) {
// It's a bit sad to potentially buffer the first 1024
// bytes in two places, but it's a lot simpler than trying
// to optitize out that copy. It only happens for local files
@@ -1572,7 +1527,7 @@ const Encoding* nsHtml5StreamParser::PreferredForInternalEncodingDecl(
}
}
mCharsetSource = kCharsetFromMetaTag; // become confident
DontGuessEncoding(); // don't feed chardet when confident
mFeedChardet = false; // don't feed chardet when confident
return nullptr;
}
@@ -1611,7 +1566,7 @@ bool nsHtml5StreamParser::internalEncodingDeclaration(nsHtml5String aEncoding) {
// Avoid having the chardet ask for another restart after this restart
// request.
DontGuessEncoding();
mFeedChardet = false;
mTreeBuilder->NeedsCharsetSwitchTo(WrapNotNull(encoding), kCharsetFromMetaTag,
mTokenizer->getLineNumber());
FlushTreeOpsAndDisarmTimer();
@@ -1648,7 +1603,7 @@ void nsHtml5StreamParser::FlushTreeOpsAndDisarmTimer() {
void nsHtml5StreamParser::ParseAvailableData() {
MOZ_ASSERT(IsParserThread(), "Wrong thread!");
mTokenizerMutex.AssertCurrentThreadOwns();
MOZ_ASSERT(!mDecodingLocalFileWithoutTokenizing);
MOZ_ASSERT(!mDecodingLocalFileAsUTF8);
if (IsTerminatedOrInterrupted()) {
return;