Backed out changeset 77af189b5c49 (bug 1373984) for build bustage in nsHtml5Parser.cpp a=backout CLOSED TREE
MozReview-Commit-ID: 6kBmU71j2To
This commit is contained in:
@@ -152,7 +152,6 @@ nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor,
|
||||
: mSniffingLength(0)
|
||||
, mBomState(eBomState::BOM_SNIFFING_NOT_STARTED)
|
||||
, mCharsetSource(kCharsetUninitialized)
|
||||
, mEncoding(WINDOWS_1252_ENCODING)
|
||||
, mReparseForbidden(false)
|
||||
, mLastBuffer(nullptr) // Will be filled when starting
|
||||
, mExecutor(aExecutor)
|
||||
@@ -254,21 +253,23 @@ nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf)
|
||||
NS_ASSERTION(IsParserThread(), "Wrong thread!");
|
||||
if (aConf == eBestAnswer || aConf == eSureAnswer) {
|
||||
mFeedChardet = false; // just in case
|
||||
auto encoding = Encoding::ForLabelNoReplacement(
|
||||
const Encoding* encoding = Encoding::ForLabelNoReplacement(
|
||||
nsDependentCString(aCharset));
|
||||
if (!encoding) {
|
||||
return NS_OK;
|
||||
}
|
||||
nsAutoCString charset;
|
||||
encoding->Name(charset);
|
||||
if (HasDecoder()) {
|
||||
if (mEncoding == encoding) {
|
||||
if (mCharset.Equals(charset)) {
|
||||
NS_ASSERTION(mCharsetSource < kCharsetFromAutoDetection,
|
||||
"Why are we running chardet at all?");
|
||||
mCharsetSource = kCharsetFromAutoDetection;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
|
||||
} else {
|
||||
// We've already committed to a decoder. Request a reload from the
|
||||
// docshell.
|
||||
mTreeBuilder->NeedsCharsetSwitchTo(WrapNotNull(encoding),
|
||||
mTreeBuilder->NeedsCharsetSwitchTo(charset,
|
||||
kCharsetFromAutoDetection,
|
||||
0);
|
||||
FlushTreeOpsAndDisarmTimer();
|
||||
@@ -277,9 +278,9 @@ nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf)
|
||||
} else {
|
||||
// Got a confident answer from the sniffing buffer. That code will
|
||||
// take care of setting up the decoder.
|
||||
mEncoding = WrapNotNull(encoding);
|
||||
mCharset.Assign(charset);
|
||||
mCharsetSource = kCharsetFromAutoDetection;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
|
||||
}
|
||||
}
|
||||
return NS_OK;
|
||||
@@ -320,7 +321,7 @@ nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(const
|
||||
{
|
||||
NS_ASSERTION(IsParserThread(), "Wrong thread!");
|
||||
nsresult rv = NS_OK;
|
||||
mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
|
||||
mUnicodeDecoder = Encoding::ForName(mCharset)->NewDecoderWithBOMRemoval();
|
||||
if (mSniffingBuffer) {
|
||||
uint32_t writeCount;
|
||||
rv = WriteStreamBytes(mSniffingBuffer.get(), mSniffingLength, &writeCount);
|
||||
@@ -335,14 +336,14 @@ nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(const
|
||||
}
|
||||
|
||||
nsresult
|
||||
nsHtml5StreamParser::SetupDecodingFromBom(NotNull<const Encoding*> aEncoding)
|
||||
nsHtml5StreamParser::SetupDecodingFromBom(const char* aDecoderCharsetName)
|
||||
{
|
||||
NS_ASSERTION(IsParserThread(), "Wrong thread!");
|
||||
mEncoding = aEncoding;
|
||||
mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
|
||||
mCharset.Assign(aDecoderCharsetName);
|
||||
mUnicodeDecoder = Encoding::ForName(mCharset)->NewDecoderWithBOMRemoval();
|
||||
mCharsetSource = kCharsetFromByteOrderMark;
|
||||
mFeedChardet = false;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
|
||||
mSniffingBuffer = nullptr;
|
||||
mMetaScanner = nullptr;
|
||||
mBomState = BOM_SNIFFING_OVER;
|
||||
@@ -397,12 +398,12 @@ nsHtml5StreamParser::SniffBOMlessUTF16BasicLatin(const uint8_t* aFromSegment,
|
||||
}
|
||||
|
||||
if (byteNonZero[0]) {
|
||||
mEncoding = UTF_16LE_ENCODING;
|
||||
mCharset.AssignLiteral("UTF-16LE");
|
||||
} else {
|
||||
mEncoding = UTF_16BE_ENCODING;
|
||||
mCharset.AssignLiteral("UTF-16BE");
|
||||
}
|
||||
mCharsetSource = kCharsetFromIrreversibleAutoDetection;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
|
||||
mFeedChardet = false;
|
||||
mTreeBuilder->MaybeComplainAboutCharset("EncBomlessUtf16",
|
||||
true,
|
||||
@@ -417,9 +418,8 @@ nsHtml5StreamParser::SetEncodingFromExpat(const char16_t* aEncoding)
|
||||
nsDependentString utf16(aEncoding);
|
||||
nsAutoCString utf8;
|
||||
CopyUTF16toUTF8(utf16, utf8);
|
||||
auto encoding = PreferredForInternalEncodingDecl(utf8);
|
||||
if (encoding) {
|
||||
mEncoding = WrapNotNull(encoding);
|
||||
if (PreferredForInternalEncodingDecl(utf8)) {
|
||||
mCharset.Assign(utf8);
|
||||
mCharsetSource = kCharsetFromMetaTag; // closest for XML
|
||||
return;
|
||||
}
|
||||
@@ -428,7 +428,7 @@ nsHtml5StreamParser::SetEncodingFromExpat(const char16_t* aEncoding)
|
||||
// right away and let the encoding be set to UTF-8 which we'd default to
|
||||
// anyway.
|
||||
}
|
||||
mEncoding = UTF_8_ENCODING; // XML defaults to UTF-8 without a BOM
|
||||
mCharset.AssignLiteral("UTF-8"); // XML defaults to UTF-8 without a BOM
|
||||
mCharsetSource = kCharsetFromMetaTag; // means confident
|
||||
}
|
||||
|
||||
@@ -560,7 +560,7 @@ nsHtml5StreamParser::FinalizeSniffing(const uint8_t* aFromSegment, // can be nul
|
||||
// confidently to UTF-8 in this case.
|
||||
// It is also possible that the document has an XML declaration that is
|
||||
// longer than 1024 bytes, but that case is not worth worrying about.
|
||||
mEncoding = UTF_8_ENCODING;
|
||||
mCharset.AssignLiteral("UTF-8");
|
||||
mCharsetSource = kCharsetFromMetaTag; // means confident
|
||||
}
|
||||
|
||||
@@ -612,18 +612,18 @@ nsHtml5StreamParser::FinalizeSniffing(const uint8_t* aFromSegment, // can be nul
|
||||
}
|
||||
if (mCharsetSource == kCharsetUninitialized) {
|
||||
// Hopefully this case is never needed, but dealing with it anyway
|
||||
mEncoding = WINDOWS_1252_ENCODING;
|
||||
mCharset.AssignLiteral("windows-1252");
|
||||
mCharsetSource = kCharsetFromFallback;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
|
||||
} else if (mMode == LOAD_AS_DATA &&
|
||||
mCharsetSource == kCharsetFromFallback) {
|
||||
NS_ASSERTION(mReparseForbidden, "Reparse should be forbidden for XHR");
|
||||
NS_ASSERTION(!mFeedChardet, "Should not feed chardet for XHR");
|
||||
NS_ASSERTION(mEncoding == UTF_8_ENCODING,
|
||||
NS_ASSERTION(mCharset.EqualsLiteral("UTF-8"),
|
||||
"XHR should default to UTF-8");
|
||||
// Now mark charset source as non-weak to signal that we have a decision
|
||||
mCharsetSource = kCharsetFromDocTypeDefault;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
|
||||
}
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
|
||||
}
|
||||
@@ -637,9 +637,9 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
|
||||
nsresult rv = NS_OK;
|
||||
uint32_t writeCount;
|
||||
|
||||
// mEncoding and mCharsetSource potentially have come from channel or higher
|
||||
// mCharset and mCharsetSource potentially have come from channel or higher
|
||||
// by now. If we find a BOM, SetupDecodingFromBom() will overwrite them.
|
||||
// If we don't find a BOM, the previously set values of mEncoding and
|
||||
// If we don't find a BOM, the previously set values of mCharset and
|
||||
// mCharsetSource are not modified by the BOM sniffing here.
|
||||
for (uint32_t i = 0; i < aCount && mBomState != BOM_SNIFFING_OVER; i++) {
|
||||
switch (mBomState) {
|
||||
@@ -662,7 +662,7 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
|
||||
break;
|
||||
case SEEN_UTF_16_LE_FIRST_BYTE:
|
||||
if (aFromSegment[i] == 0xFE) {
|
||||
rv = SetupDecodingFromBom(UTF_16LE_ENCODING); // upper case is the raw form
|
||||
rv = SetupDecodingFromBom("UTF-16LE"); // upper case is the raw form
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
uint32_t count = aCount - (i + 1);
|
||||
rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount);
|
||||
@@ -674,7 +674,7 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
|
||||
break;
|
||||
case SEEN_UTF_16_BE_FIRST_BYTE:
|
||||
if (aFromSegment[i] == 0xFF) {
|
||||
rv = SetupDecodingFromBom(UTF_16BE_ENCODING); // upper case is the raw form
|
||||
rv = SetupDecodingFromBom("UTF-16BE"); // upper case is the raw form
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
uint32_t count = aCount - (i + 1);
|
||||
rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount);
|
||||
@@ -693,7 +693,7 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
|
||||
break;
|
||||
case SEEN_UTF_8_SECOND_BYTE:
|
||||
if (aFromSegment[i] == 0xBF) {
|
||||
rv = SetupDecodingFromBom(UTF_8_ENCODING); // upper case is the raw form
|
||||
rv = SetupDecodingFromBom("UTF-8"); // upper case is the raw form
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
uint32_t count = aCount - (i + 1);
|
||||
rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount);
|
||||
@@ -718,13 +718,13 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
|
||||
|
||||
if (mBomState == BOM_SNIFFING_OVER &&
|
||||
mCharsetSource == kCharsetFromChannel) {
|
||||
// There was no BOM and the charset came from channel. mEncoding
|
||||
// There was no BOM and the charset came from channel. mCharset
|
||||
// still contains the charset from the channel as set by an
|
||||
// earlier call to SetDocumentCharset(), since we didn't find a BOM and
|
||||
// overwrite mEncoding. (Note that if the user has overridden the charset,
|
||||
// overwrite mCharset. (Note that if the user has overridden the charset,
|
||||
// we don't come here but check <meta> for XSS-dangerous charsets first.)
|
||||
mFeedChardet = false;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment,
|
||||
aCount, aWriteCount);
|
||||
}
|
||||
@@ -743,14 +743,15 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
|
||||
nsHtml5ByteReadable readable(aFromSegment, aFromSegment +
|
||||
countToSniffingLimit);
|
||||
nsAutoCString charset;
|
||||
auto encoding = mMetaScanner->sniff(&readable);
|
||||
mMetaScanner->sniff(&readable, charset);
|
||||
// Due to the way nsHtml5Portability reports OOM, ask the tree buider
|
||||
nsresult rv;
|
||||
if (NS_FAILED((rv = mTreeBuilder->IsBroken()))) {
|
||||
MarkAsBroken(rv);
|
||||
return rv;
|
||||
}
|
||||
if (encoding) {
|
||||
if (!charset.IsEmpty()) {
|
||||
const Encoding* encoding = Encoding::ForName(charset);
|
||||
// meta scan successful; honor overrides unless meta is XSS-dangerous
|
||||
if ((mCharsetSource == kCharsetFromParentForced ||
|
||||
mCharsetSource == kCharsetFromUserForced) &&
|
||||
@@ -760,10 +761,10 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
|
||||
aFromSegment, aCount, aWriteCount);
|
||||
}
|
||||
mEncoding = WrapNotNull(encoding);
|
||||
mCharset.Assign(charset);
|
||||
mCharsetSource = kCharsetFromMetaPrescan;
|
||||
mFeedChardet = false;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
|
||||
aFromSegment, aCount, aWriteCount);
|
||||
}
|
||||
@@ -781,14 +782,16 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
|
||||
// not the last buffer
|
||||
if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) {
|
||||
nsHtml5ByteReadable readable(aFromSegment, aFromSegment + aCount);
|
||||
auto encoding = mMetaScanner->sniff(&readable);
|
||||
nsAutoCString charset;
|
||||
mMetaScanner->sniff(&readable, charset);
|
||||
// Due to the way nsHtml5Portability reports OOM, ask the tree buider
|
||||
nsresult rv;
|
||||
if (NS_FAILED((rv = mTreeBuilder->IsBroken()))) {
|
||||
MarkAsBroken(rv);
|
||||
return rv;
|
||||
}
|
||||
if (encoding) {
|
||||
if (!charset.IsEmpty()) {
|
||||
const Encoding* encoding = Encoding::ForName(charset);
|
||||
// meta scan successful; honor overrides unless meta is XSS-dangerous
|
||||
if ((mCharsetSource == kCharsetFromParentForced ||
|
||||
mCharsetSource == kCharsetFromUserForced) &&
|
||||
@@ -798,10 +801,10 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment,
|
||||
aCount, aWriteCount);
|
||||
}
|
||||
mEncoding = WrapNotNull(encoding);
|
||||
mCharset.Assign(charset);
|
||||
mCharsetSource = kCharsetFromMetaPrescan;
|
||||
mFeedChardet = false;
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment,
|
||||
aCount, aWriteCount);
|
||||
}
|
||||
@@ -1004,7 +1007,7 @@ nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest, nsISupports* aContext)
|
||||
mFeedChardet = false;
|
||||
|
||||
// Instantiate the converter here to avoid BOM sniffing.
|
||||
mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
|
||||
mUnicodeDecoder = Encoding::ForName(mCharset)->NewDecoderWithBOMRemoval();
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
@@ -1228,8 +1231,8 @@ nsHtml5StreamParser::CopySegmentsToParser(nsIInputStream *aInStream,
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
const Encoding*
|
||||
nsHtml5StreamParser::PreferredForInternalEncodingDecl(const nsACString& aEncoding)
|
||||
bool
|
||||
nsHtml5StreamParser::PreferredForInternalEncodingDecl(nsACString& aEncoding)
|
||||
{
|
||||
const Encoding* newEncoding = Encoding::ForLabel(aEncoding);
|
||||
if (!newEncoding) {
|
||||
@@ -1237,7 +1240,7 @@ nsHtml5StreamParser::PreferredForInternalEncodingDecl(const nsACString& aEncodin
|
||||
mTreeBuilder->MaybeComplainAboutCharset("EncMetaUnsupported",
|
||||
true,
|
||||
mTokenizer->getLineNumber());
|
||||
return nullptr;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (newEncoding == UTF_16BE_ENCODING ||
|
||||
@@ -1256,7 +1259,7 @@ nsHtml5StreamParser::PreferredForInternalEncodingDecl(const nsACString& aEncodin
|
||||
newEncoding = WINDOWS_1252_ENCODING;
|
||||
}
|
||||
|
||||
if (newEncoding == mEncoding) {
|
||||
if (newEncoding == Encoding::ForName(mCharset)) {
|
||||
if (mCharsetSource < kCharsetFromMetaPrescan) {
|
||||
if (mInitialEncodingWasFromParentFrame) {
|
||||
mTreeBuilder->MaybeComplainAboutCharset("EncLateMetaFrame",
|
||||
@@ -1270,10 +1273,11 @@ nsHtml5StreamParser::PreferredForInternalEncodingDecl(const nsACString& aEncodin
|
||||
}
|
||||
mCharsetSource = kCharsetFromMetaTag; // become confident
|
||||
mFeedChardet = false; // don't feed chardet when confident
|
||||
return nullptr;
|
||||
return false;
|
||||
}
|
||||
|
||||
return newEncoding;
|
||||
newEncoding->Name(aEncoding);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
@@ -1292,8 +1296,7 @@ nsHtml5StreamParser::internalEncodingDeclaration(nsHtml5String aEncoding)
|
||||
nsAutoCString newEncoding;
|
||||
CopyUTF16toUTF8(newEncoding16, newEncoding);
|
||||
|
||||
auto encoding = PreferredForInternalEncodingDecl(newEncoding);
|
||||
if (!encoding) {
|
||||
if (!PreferredForInternalEncodingDecl(newEncoding)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1311,7 +1314,7 @@ nsHtml5StreamParser::internalEncodingDeclaration(nsHtml5String aEncoding)
|
||||
// Avoid having the chardet ask for another restart after this restart
|
||||
// request.
|
||||
mFeedChardet = false;
|
||||
mTreeBuilder->NeedsCharsetSwitchTo(WrapNotNull(encoding),
|
||||
mTreeBuilder->NeedsCharsetSwitchTo(newEncoding,
|
||||
kCharsetFromMetaTag,
|
||||
mTokenizer->getLineNumber());
|
||||
FlushTreeOpsAndDisarmTimer();
|
||||
|
||||
Reference in New Issue
Block a user