Bug 620106 - Allow chardet (if enabled) reload the page during load. r=sicking, a=blocking2.0-final.

This commit is contained in:
Henri Sivonen
2011-02-11 12:33:32 +02:00
parent 865dd5f113
commit 200d06cfa8
11 changed files with 1174 additions and 37 deletions

View File

@@ -206,8 +206,9 @@ nsHtml5StreamParser::nsHtml5StreamParser(nsHtml5TreeOpExecutor* aExecutor,
nsCAutoString detectorContractID;
detectorContractID.AssignLiteral(NS_CHARSET_DETECTOR_CONTRACTID_BASE);
AppendUTF16toUTF8(detectorName, detectorContractID);
if (mChardet = do_CreateInstance(detectorContractID.get())) {
if ((mChardet = do_CreateInstance(detectorContractID.get()))) {
(void) mChardet->Init(this);
mFeedChardet = PR_TRUE;
}
}
@@ -246,9 +247,28 @@ nsHtml5StreamParser::Notify(const char* aCharset, nsDetectionConfident aConf)
{
NS_ASSERTION(IsParserThread(), "Wrong thread!");
if (aConf == eBestAnswer || aConf == eSureAnswer) {
mCharset.Assign(aCharset);
mCharsetSource = kCharsetFromAutoDetection;
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
mFeedChardet = PR_FALSE; // just in case
if (HasDecoder()) {
if (mCharset.Equals(aCharset)) {
NS_ASSERTION(mCharsetSource < kCharsetFromAutoDetection,
"Why are we running chardet at all?");
mCharsetSource = kCharsetFromAutoDetection;
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
} else {
// We've already committed to a decoder. Request a reload from the
// docshell.
nsCAutoString charset(aCharset);
mTreeBuilder->NeedsCharsetSwitchTo(charset, kCharsetFromAutoDetection);
FlushTreeOpsAndDisarmTimer();
Interrupt();
}
} else {
// Got a confident answer from the sniffing buffer. That code will
// take care of setting up the decoder.
mCharset.Assign(aCharset);
mCharsetSource = kCharsetFromAutoDetection;
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
}
}
return NS_OK;
}
@@ -306,6 +326,7 @@ nsHtml5StreamParser::SetupDecodingFromBom(const char* aCharsetName, const char*
mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Recover);
mCharset.Assign(aCharsetName);
mCharsetSource = kCharsetFromByteOrderMark;
mFeedChardet = PR_FALSE;
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
mSniffingBuffer = nsnull;
mMetaScanner = nsnull;
@@ -322,22 +343,39 @@ nsHtml5StreamParser::FinalizeSniffing(const PRUint8* aFromSegment, // can be nul
NS_ASSERTION(IsParserThread(), "Wrong thread!");
// meta scan failed.
if (mCharsetSource >= kCharsetFromHintPrevDoc) {
mFeedChardet = PR_FALSE;
return SetupDecodingAndWriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
}
// maybe try chardet now;
if (mChardet) {
if (mFeedChardet) {
PRBool dontFeed;
nsresult rv;
PRBool dontFeed = PR_FALSE;
if (mSniffingBuffer) {
rv = mChardet->DoIt((const char*)mSniffingBuffer.get(), mSniffingLength, &dontFeed);
mFeedChardet = !dontFeed;
NS_ENSURE_SUCCESS(rv, rv);
}
if (!dontFeed && aFromSegment) {
rv = mChardet->DoIt((const char*)aFromSegment, aCountToSniffingLimit, &dontFeed);
if (mFeedChardet && aFromSegment) {
rv = mChardet->DoIt((const char*)aFromSegment,
// Avoid buffer boundary-dependent behavior when
// reparsing is forbidden. If reparse is forbidden,
// act as if we only saw the first 1024 bytes.
// When reparsing isn't forbidden, buffer boundaries
// can have an effect on whether the page is loaded
// once or twice. :-(
mReparseForbidden ? aCountToSniffingLimit : aCount,
&dontFeed);
mFeedChardet = !dontFeed;
NS_ENSURE_SUCCESS(rv, rv);
}
if (mFeedChardet && (!aFromSegment || mReparseForbidden)) {
// mReparseForbidden is checked so that we get to use the sniffing
// buffer with the best guess so far if we aren't allowed to guess
// better later.
mFeedChardet = PR_FALSE;
rv = mChardet->Done();
NS_ENSURE_SUCCESS(rv, rv);
}
rv = mChardet->Done();
NS_ENSURE_SUCCESS(rv, rv);
// fall thru; callback may have changed charset
}
if (mCharsetSource == kCharsetUninitialized) {
@@ -439,6 +477,7 @@ nsHtml5StreamParser::SniffStreamBytes(const PRUint8* aFromSegment,
mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Recover);
// meta scan successful
mCharsetSource = kCharsetFromMetaPrescan;
mFeedChardet = PR_FALSE;
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
mMetaScanner = nsnull;
return WriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
@@ -453,6 +492,7 @@ nsHtml5StreamParser::SniffStreamBytes(const PRUint8* aFromSegment,
// meta scan successful
mUnicodeDecoder->SetInputErrorBehavior(nsIUnicodeDecoder::kOnError_Recover);
mCharsetSource = kCharsetFromMetaPrescan;
mFeedChardet = PR_FALSE;
mTreeBuilder->SetDocumentCharset(mCharset, mCharsetSource);
mMetaScanner = nsnull;
return WriteSniffingBufferAndCurrentSegment(aFromSegment, aCount, aWriteCount);
@@ -588,6 +628,10 @@ nsHtml5StreamParser::OnStartRequest(nsIRequest* aRequest, nsISupports* aContext)
mReparseForbidden = PR_TRUE;
}
}
if (mCharsetSource >= kCharsetFromAutoDetection) {
mFeedChardet = PR_FALSE;
}
if (mCharsetSource <= kCharsetFromMetaPrescan) {
// we aren't ready to commit to an encoding yet
@@ -624,6 +668,8 @@ nsHtml5StreamParser::DoStopRequest()
PRUint32 writeCount;
FinalizeSniffing(nsnull, 0, &writeCount, 0);
// dropped nsresult here
} else if (mFeedChardet) {
mChardet->Done();
}
mStreamState = STREAM_ENDED;
@@ -681,8 +727,16 @@ nsHtml5StreamParser::DoDataAvailable(PRUint8* aBuffer, PRUint32 aLength)
}
PRUint32 writeCount;
HasDecoder() ? WriteStreamBytes(aBuffer, aLength, &writeCount) :
SniffStreamBytes(aBuffer, aLength, &writeCount);
if (HasDecoder()) {
if (mFeedChardet) {
PRBool dontFeed;
mChardet->DoIt((const char*)aBuffer, aLength, &dontFeed);
mFeedChardet = !dontFeed;
}
WriteStreamBytes(aBuffer, aLength, &writeCount);
} else {
SniffStreamBytes(aBuffer, aLength, &writeCount);
}
// dropping nsresult here
NS_ASSERTION(writeCount == aLength, "Wrong number of stream bytes written/sniffed.");
@@ -817,7 +871,7 @@ nsHtml5StreamParser::internalEncodingDeclaration(nsString* aEncoding)
return PR_FALSE;
}
mTreeBuilder->NeedsCharsetSwitchTo(preferred);
mTreeBuilder->NeedsCharsetSwitchTo(preferred, kCharsetFromMetaTag);
FlushTreeOpsAndDisarmTimer();
Interrupt();
// the tree op executor will cause the stream parser to terminate