Bug 1373984 - Turn nsIDocument::mCharacterSet into mozilla::NotNull<const mozilla::Encoding*>. r=hsivonen
MozReview-Commit-ID: GF0YXDwfA14
This commit is contained in:
@@ -152,6 +152,7 @@ nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor,
|
||||
: mSniffingLength(0)
|
||||
, mBomState(eBomState::BOM_SNIFFING_NOT_STARTED)
|
||||
, mCharsetSource(kCharsetUninitialized)
|
||||
, mEncoding(WINDOWS_1252_ENCODING)
|
||||
, mReparseForbidden(false)
|
||||
, mLastBuffer(nullptr) // Will be filled when starting
|
||||
, mExecutor(aExecutor)
|
||||
@@ -253,23 +254,21 @@ nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf)
|
||||
NS_ASSERTION(IsParserThread(), "Wrong thread!");
|
||||
if (aConf == eBestAnswer || aConf == eSureAnswer) {
|
||||
mFeedChardet = false; // just in case
|
||||
const Encoding* encoding = Encoding::ForLabelNoReplacement(
|
||||
auto encoding = Encoding::ForLabelNoReplacement(
|
||||
nsDependentCString(aCharset));
|
||||
if (!encoding) {
|
||||
return NS_OK;
|
||||
}
|
||||
nsAutoCString charset;
|
||||
encoding->Name(charset);
|
||||
if (HasDecoder()) {
|
||||
if (mCharset.Equals(charset)) {
|
||||
if (mEncoding == encoding) {
|
||||
NS_ASSERTION(mCharsetSource < kCharsetFromAutoDetection,
|
||||
"Why are we running chardet at all?");
|
||||
mCharsetSource = kCharsetFromAutoDetection;
|
||||
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
} else {
|
||||
// We've already committed to a decoder. Request a reload from the
|
||||
// docshell.
|
||||
mTreeBuilder->NeedsCharsetSwitchTo(charset,
|
||||
mTreeBuilder->NeedsCharsetSwitchTo(WrapNotNull(encoding),
|
||||
kCharsetFromAutoDetection,
|
||||
0);
|
||||
FlushTreeOpsAndDisarmTimer();
|
||||
@@ -278,9 +277,9 @@ nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf)
|
||||
} else {
|
||||
// Got a confident answer from the sniffing buffer. That code will
|
||||
// take care of setting up the decoder.
|
||||
mCharset.Assign(charset);
|
||||
mEncoding = WrapNotNull(encoding);
|
||||
mCharsetSource = kCharsetFromAutoDetection;
|
||||
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
}
|
||||
}
|
||||
return NS_OK;
|
||||
@@ -321,7 +320,7 @@ nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(const
|
||||
{
|
||||
NS_ASSERTION(IsParserThread(), "Wrong thread!");
|
||||
nsresult rv = NS_OK;
|
||||
mUnicodeDecoder = Encoding::ForName(mCharset)->NewDecoderWithBOMRemoval();
|
||||
mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
|
||||
if (mSniffingBuffer) {
|
||||
uint32_t writeCount;
|
||||
rv = WriteStreamBytes(mSniffingBuffer.get(), mSniffingLength, &writeCount);
|
||||
@@ -336,14 +335,14 @@ nsHtml5StreamParser::SetupDecodingAndWriteSniffingBufferAndCurrentSegment(const
|
||||
}
|
||||
|
||||
nsresult
|
||||
nsHtml5StreamParser::SetupDecodingFromBom(const char* aDecoderCharsetName)
|
||||
nsHtml5StreamParser::SetupDecodingFromBom(NotNull<const Encoding*> aEncoding)
|
||||
{
|
||||
NS_ASSERTION(IsParserThread(), "Wrong thread!");
|
||||
mCharset.Assign(aDecoderCharsetName);
|
||||
mUnicodeDecoder = Encoding::ForName(mCharset)->NewDecoderWithBOMRemoval();
|
||||
mEncoding = aEncoding;
|
||||
mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
|
||||
mCharsetSource = kCharsetFromByteOrderMark;
|
||||
mFeedChardet = false;
|
||||
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
mSniffingBuffer = nullptr;
|
||||
mMetaScanner = nullptr;
|
||||
mBomState = BOM_SNIFFING_OVER;
|
||||
@@ -398,12 +397,12 @@ nsHtml5StreamParser::SniffBOMlessUTF16BasicLatin(const uint8_t* aFromSegment,
|
||||
}
|
||||
|
||||
if (byteNonZero[0]) {
|
||||
mCharset.AssignLiteral("UTF-16LE");
|
||||
mEncoding = UTF_16LE_ENCODING;
|
||||
} else {
|
||||
mCharset.AssignLiteral("UTF-16BE");
|
||||
mEncoding = UTF_16BE_ENCODING;
|
||||
}
|
||||
mCharsetSource = kCharsetFromIrreversibleAutoDetection;
|
||||
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
mFeedChardet = false;
|
||||
mTreeBuilder->MaybeComplainAboutCharset("EncBomlessUtf16",
|
||||
true,
|
||||
@@ -418,8 +417,9 @@ nsHtml5StreamParser::SetEncodingFromExpat(const char16_t* aEncoding)
|
||||
nsDependentString utf16(aEncoding);
|
||||
nsAutoCString utf8;
|
||||
CopyUTF16toUTF8(utf16, utf8);
|
||||
if (PreferredForInternalEncodingDecl(utf8)) {
|
||||
mCharset.Assign(utf8);
|
||||
auto encoding = PreferredForInternalEncodingDecl(utf8);
|
||||
if (encoding) {
|
||||
mEncoding = WrapNotNull(encoding);
|
||||
mCharsetSource = kCharsetFromMetaTag; // closest for XML
|
||||
return;
|
||||
}
|
||||
@@ -428,7 +428,7 @@ nsHtml5StreamParser::SetEncodingFromExpat(const char16_t* aEncoding)
|
||||
// right away and let the encoding be set to UTF-8 which we'd default to
|
||||
// anyway.
|
||||
}
|
||||
mCharset.AssignLiteral("UTF-8"); // XML defaults to UTF-8 without a BOM
|
||||
mEncoding = UTF_8_ENCODING; // XML defaults to UTF-8 without a BOM
|
||||
mCharsetSource = kCharsetFromMetaTag; // means confident
|
||||
}
|
||||
|
||||
@@ -560,7 +560,7 @@ nsHtml5StreamParser::FinalizeSniffing(const uint8_t* aFromSegment, // can be nul
|
||||
// confidently to UTF-8 in this case.
|
||||
// It is also possible that the document has an XML declaration that is
|
||||
// longer than 1024 bytes, but that case is not worth worrying about.
|
||||
mCharset.AssignLiteral("UTF-8");
|
||||
mEncoding = UTF_8_ENCODING;
|
||||
mCharsetSource = kCharsetFromMetaTag; // means confident
|
||||
}
|
||||
|
||||
@@ -612,18 +612,18 @@ nsHtml5StreamParser::FinalizeSniffing(const uint8_t* aFromSegment, // can be nul
|
||||
}
|
||||
if (mCharsetSource == kCharsetUninitialized) {
|
||||
// Hopefully this case is never needed, but dealing with it anyway
|
||||
mCharset.AssignLiteral("windows-1252");
|
||||
mEncoding = WINDOWS_1252_ENCODING;
|
||||
mCharsetSource = kCharsetFromFallback;
|
||||
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
} else if (mMode == LOAD_AS_DATA &&
|
||||
mCharsetSource == kCharsetFromFallback) {
|
||||
NS_ASSERTION(mReparseForbidden, "Reparse should be forbidden for XHR");
|
||||
NS_ASSERTION(!mFeedChardet, "Should not feed chardet for XHR");
|
||||
NS_ASSERTION(mCharset.EqualsLiteral("UTF-8"),
|
||||
NS_ASSERTION(mEncoding == UTF_8_ENCODING,
|
||||
"XHR should default to UTF-8");
|
||||
// Now mark charset source as non-weak to signal that we have a decision
|
||||
mCharsetSource = kCharsetFromDocTypeDefault;
|
||||
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
}
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
|
||||
}
|
||||
@@ -637,9 +637,9 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
|
||||
nsresult rv = NS_OK;
|
||||
uint32_t writeCount;
|
||||
|
||||
// mCharset and mCharsetSource potentially have come from channel or higher
|
||||
// mEncoding and mCharsetSource potentially have come from channel or higher
|
||||
// by now. If we find a BOM, SetupDecodingFromBom() will overwrite them.
|
||||
// If we don't find a BOM, the previously set values of mCharset and
|
||||
// If we don't find a BOM, the previously set values of mEncoding and
|
||||
// mCharsetSource are not modified by the BOM sniffing here.
|
||||
for (uint32_t i = 0; i < aCount && mBomState != BOM_SNIFFING_OVER; i++) {
|
||||
switch (mBomState) {
|
||||
@@ -662,7 +662,7 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
|
||||
break;
|
||||
case SEEN_UTF_16_LE_FIRST_BYTE:
|
||||
if (aFromSegment[i] == 0xFE) {
|
||||
rv = SetupDecodingFromBom("UTF-16LE"); // upper case is the raw form
|
||||
rv = SetupDecodingFromBom(UTF_16LE_ENCODING); // upper case is the raw form
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
uint32_t count = aCount - (i + 1);
|
||||
rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount);
|
||||
@@ -674,7 +674,7 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
|
||||
break;
|
||||
case SEEN_UTF_16_BE_FIRST_BYTE:
|
||||
if (aFromSegment[i] == 0xFF) {
|
||||
rv = SetupDecodingFromBom("UTF-16BE"); // upper case is the raw form
|
||||
rv = SetupDecodingFromBom(UTF_16BE_ENCODING); // upper case is the raw form
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
uint32_t count = aCount - (i + 1);
|
||||
rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount);
|
||||
@@ -693,7 +693,7 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
|
||||
break;
|
||||
case SEEN_UTF_8_SECOND_BYTE:
|
||||
if (aFromSegment[i] == 0xBF) {
|
||||
rv = SetupDecodingFromBom("UTF-8"); // upper case is the raw form
|
||||
rv = SetupDecodingFromBom(UTF_8_ENCODING); // upper case is the raw form
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
uint32_t count = aCount - (i + 1);
|
||||
rv = WriteStreamBytes(aFromSegment + (i + 1), count, &writeCount);
|
||||
@@ -718,13 +718,13 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
|
||||
|
||||
if (mBomState == BOM_SNIFFING_OVER &&
|
||||
mCharsetSource == kCharsetFromChannel) {
|
||||
// There was no BOM and the charset came from channel. mCharset
|
||||
// There was no BOM and the charset came from channel. mEncoding
|
||||
// still contains the charset from the channel as set by an
|
||||
// earlier call to SetDocumentCharset(), since we didn't find a BOM and
|
||||
// overwrite mCharset. (Note that if the user has overridden the charset,
|
||||
// overwrite mEncoding. (Note that if the user has overridden the charset,
|
||||
// we don't come here but check <meta> for XSS-dangerous charsets first.)
|
||||
mFeedChardet = false;
|
||||
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment,
|
||||
aCount, aWriteCount);
|
||||
}
|
||||
@@ -743,15 +743,14 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
|
||||
nsHtml5ByteReadable readable(aFromSegment, aFromSegment +
|
||||
countToSniffingLimit);
|
||||
nsAutoCString charset;
|
||||
mMetaScanner->sniff(&readable, charset);
|
||||
auto encoding = mMetaScanner->sniff(&readable);
|
||||
// Due to the way nsHtml5Portability reports OOM, ask the tree buider
|
||||
nsresult rv;
|
||||
if (NS_FAILED((rv = mTreeBuilder->IsBroken()))) {
|
||||
MarkAsBroken(rv);
|
||||
return rv;
|
||||
}
|
||||
if (!charset.IsEmpty()) {
|
||||
const Encoding* encoding = Encoding::ForName(charset);
|
||||
if (encoding) {
|
||||
// meta scan successful; honor overrides unless meta is XSS-dangerous
|
||||
if ((mCharsetSource == kCharsetFromParentForced ||
|
||||
mCharsetSource == kCharsetFromUserForced) &&
|
||||
@@ -761,10 +760,10 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
|
||||
aFromSegment, aCount, aWriteCount);
|
||||
}
|
||||
mCharset.Assign(charset);
|
||||
mEncoding = WrapNotNull(encoding);
|
||||
mCharsetSource = kCharsetFromMetaPrescan;
|
||||
mFeedChardet = false;
|
||||
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(
|
||||
aFromSegment, aCount, aWriteCount);
|
||||
}
|
||||
@@ -782,16 +781,14 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
|
||||
// not the last buffer
|
||||
if (mMode == NORMAL || mMode == VIEW_SOURCE_HTML || mMode == LOAD_AS_DATA) {
|
||||
nsHtml5ByteReadable readable(aFromSegment, aFromSegment + aCount);
|
||||
nsAutoCString charset;
|
||||
mMetaScanner->sniff(&readable, charset);
|
||||
auto encoding = mMetaScanner->sniff(&readable);
|
||||
// Due to the way nsHtml5Portability reports OOM, ask the tree buider
|
||||
nsresult rv;
|
||||
if (NS_FAILED((rv = mTreeBuilder->IsBroken()))) {
|
||||
MarkAsBroken(rv);
|
||||
return rv;
|
||||
}
|
||||
if (!charset.IsEmpty()) {
|
||||
const Encoding* encoding = Encoding::ForName(charset);
|
||||
if (encoding) {
|
||||
// meta scan successful; honor overrides unless meta is XSS-dangerous
|
||||
if ((mCharsetSource == kCharsetFromParentForced ||
|
||||
mCharsetSource == kCharsetFromUserForced) &&
|
||||
@@ -801,10 +798,10 @@ nsHtml5StreamParser::SniffStreamBytes(const uint8_t* aFromSegment,
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment,
|
||||
aCount, aWriteCount);
|
||||
}
|
||||
mCharset.Assign(charset);
|
||||
mEncoding = WrapNotNull(encoding);
|
||||
mCharsetSource = kCharsetFromMetaPrescan;
|
||||
mFeedChardet = false;
|
||||
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
|
||||
mTreeBuilder->SetDocumentCharset(mEncoding, mCharsetSource);
|
||||
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment,
|
||||
aCount, aWriteCount);
|
||||
}
|
||||
@@ -1007,7 +1004,7 @@ nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest, nsISupports* aContext)
|
||||
mFeedChardet = false;
|
||||
|
||||
// Instantiate the converter here to avoid BOM sniffing.
|
||||
mUnicodeDecoder = Encoding::ForName(mCharset)->NewDecoderWithBOMRemoval();
|
||||
mUnicodeDecoder = mEncoding->NewDecoderWithBOMRemoval();
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
@@ -1231,8 +1228,8 @@ nsHtml5StreamParser::CopySegmentsToParser(nsIInputStream *aInStream,
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
bool
|
||||
nsHtml5StreamParser::PreferredForInternalEncodingDecl(nsACString& aEncoding)
|
||||
const Encoding*
|
||||
nsHtml5StreamParser::PreferredForInternalEncodingDecl(const nsACString& aEncoding)
|
||||
{
|
||||
const Encoding* newEncoding = Encoding::ForLabel(aEncoding);
|
||||
if (!newEncoding) {
|
||||
@@ -1240,7 +1237,7 @@ nsHtml5StreamParser::PreferredForInternalEncodingDecl(nsACString& aEncoding)
|
||||
mTreeBuilder->MaybeComplainAboutCharset("EncMetaUnsupported",
|
||||
true,
|
||||
mTokenizer->getLineNumber());
|
||||
return false;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (newEncoding == UTF_16BE_ENCODING ||
|
||||
@@ -1259,7 +1256,7 @@ nsHtml5StreamParser::PreferredForInternalEncodingDecl(nsACString& aEncoding)
|
||||
newEncoding = WINDOWS_1252_ENCODING;
|
||||
}
|
||||
|
||||
if (newEncoding == Encoding::ForName(mCharset)) {
|
||||
if (newEncoding == mEncoding) {
|
||||
if (mCharsetSource < kCharsetFromMetaPrescan) {
|
||||
if (mInitialEncodingWasFromParentFrame) {
|
||||
mTreeBuilder->MaybeComplainAboutCharset("EncLateMetaFrame",
|
||||
@@ -1273,11 +1270,10 @@ nsHtml5StreamParser::PreferredForInternalEncodingDecl(nsACString& aEncoding)
|
||||
}
|
||||
mCharsetSource = kCharsetFromMetaTag; // become confident
|
||||
mFeedChardet = false; // don't feed chardet when confident
|
||||
return false;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
newEncoding->Name(aEncoding);
|
||||
return true;
|
||||
return newEncoding;
|
||||
}
|
||||
|
||||
bool
|
||||
@@ -1296,7 +1292,8 @@ nsHtml5StreamParser::internalEncodingDeclaration(nsHtml5String aEncoding)
|
||||
nsAutoCString newEncoding;
|
||||
CopyUTF16toUTF8(newEncoding16, newEncoding);
|
||||
|
||||
if (!PreferredForInternalEncodingDecl(newEncoding)) {
|
||||
auto encoding = PreferredForInternalEncodingDecl(newEncoding);
|
||||
if (!encoding) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1314,7 +1311,7 @@ nsHtml5StreamParser::internalEncodingDeclaration(nsHtml5String aEncoding)
|
||||
// Avoid having the chardet ask for another restart after this restart
|
||||
// request.
|
||||
mFeedChardet = false;
|
||||
mTreeBuilder->NeedsCharsetSwitchTo(newEncoding,
|
||||
mTreeBuilder->NeedsCharsetSwitchTo(WrapNotNull(encoding),
|
||||
kCharsetFromMetaTag,
|
||||
mTokenizer->getLineNumber());
|
||||
FlushTreeOpsAndDisarmTimer();
|
||||
|
||||
Reference in New Issue
Block a user