fix for bug 209220 - now that i18n APIs take ASCII charset names, update consumers to avoid excess conversions.. which usually means storing charset names as ASCII and updating a few other APIs
r=jkeiser, sr=jst and some great comments from jshin@mailaps.org
This commit is contained in:
@@ -305,7 +305,7 @@ nsParser::nsParser() {
|
||||
}
|
||||
#endif
|
||||
|
||||
mCharset.Assign(NS_LITERAL_STRING("ISO-8859-1"));
|
||||
mCharset.Assign(NS_LITERAL_CSTRING("ISO-8859-1"));
|
||||
mParserContext=0;
|
||||
mStreamStatus=0;
|
||||
mCharsetSource=kCharsetUninitialized;
|
||||
@@ -487,7 +487,7 @@ NS_IMETHODIMP_(void) nsParser::SetCommand(eParserCommands aParserCommand)
|
||||
* @return nada
|
||||
*/
|
||||
NS_IMETHODIMP_(void)
|
||||
nsParser::SetDocumentCharset(const nsAString& aCharset, PRInt32 aCharsetSource)
|
||||
nsParser::SetDocumentCharset(const nsACString& aCharset, PRInt32 aCharsetSource)
|
||||
{
|
||||
mCharset = aCharset;
|
||||
mCharsetSource = aCharsetSource;
|
||||
@@ -495,7 +495,7 @@ nsParser::SetDocumentCharset(const nsAString& aCharset, PRInt32 aCharsetSource)
|
||||
mParserContext->mScanner->SetDocumentCharset(aCharset, aCharsetSource);
|
||||
}
|
||||
|
||||
void nsParser::SetSinkCharset(nsAString& aCharset)
|
||||
void nsParser::SetSinkCharset(nsACString& aCharset)
|
||||
{
|
||||
if (mSink) {
|
||||
mSink->SetDocumentCharset(aCharset);
|
||||
@@ -1998,7 +1998,7 @@ static inline PRBool IsSecondMarker(unsigned char aChar)
|
||||
}
|
||||
}
|
||||
|
||||
static PRBool DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, nsString& oCharset, PRInt32& oCharsetSource) {
|
||||
static PRBool DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, nsCString& oCharset, PRInt32& oCharsetSource) {
|
||||
oCharsetSource= kCharsetFromAutoDetection;
|
||||
oCharset.Truncate();
|
||||
// See http://www.w3.org/TR/2000/REC-xml-20001006#sec-guessing
|
||||
@@ -2013,26 +2013,26 @@ static PRBool DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, nsS
|
||||
// 00 00
|
||||
if((0xFE==aBytes[2]) && (0xFF==aBytes[3])) {
|
||||
// 00 00 FE FF UCS-4, big-endian machine (1234 order)
|
||||
oCharset.AssignWithConversion(UCS4_BE);
|
||||
oCharset.Assign(UCS4_BE);
|
||||
} else if((0x00==aBytes[2]) && (0x3C==aBytes[3])) {
|
||||
// 00 00 00 3C UCS-4, big-endian machine (1234 order)
|
||||
oCharset.AssignWithConversion(UCS4_BE);
|
||||
oCharset.Assign(UCS4_BE);
|
||||
} else if((0xFF==aBytes[2]) && (0xFE==aBytes[3])) {
|
||||
// 00 00 FF FE UCS-4, unusual octet order (2143)
|
||||
oCharset.AssignWithConversion(UCS4_2143);
|
||||
oCharset.Assign(UCS4_2143);
|
||||
} else if((0x3C==aBytes[2]) && (0x00==aBytes[3])) {
|
||||
// 00 00 3C 00 UCS-4, unusual octet order (2143)
|
||||
oCharset.AssignWithConversion(UCS4_2143);
|
||||
oCharset.Assign(UCS4_2143);
|
||||
}
|
||||
oCharsetSource = kCharsetFromByteOrderMark;
|
||||
} else if((0x3C==aBytes[1]) && (0x00==aBytes[2])) {
|
||||
// 00 3C 00
|
||||
if(IsSecondMarker(aBytes[3])) {
|
||||
// 00 3C 00 SM UTF-16, big-endian, no Byte Order Mark
|
||||
oCharset.AssignWithConversion(UTF16_BE);
|
||||
oCharset.Assign(UTF16_BE);
|
||||
} else if((0x00==aBytes[3])) {
|
||||
// 00 3C 00 00 UCS-4, unusual octet order (3412)
|
||||
oCharset.AssignWithConversion(UCS4_3412);
|
||||
oCharset.Assign(UCS4_3412);
|
||||
}
|
||||
oCharsetSource = kCharsetFromByteOrderMark;
|
||||
}
|
||||
@@ -2042,10 +2042,10 @@ static PRBool DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, nsS
|
||||
// 3C 00 XX 00
|
||||
if(IsSecondMarker(aBytes[2])) {
|
||||
// 3C 00 SM 00 UTF-16, little-endian, no Byte Order Mark
|
||||
oCharset.AssignWithConversion(UTF16_LE);
|
||||
oCharset.Assign(UTF16_LE);
|
||||
} else if((0x00==aBytes[2])) {
|
||||
// 3C 00 00 00 UCS-4, little-endian machine (4321 order)
|
||||
oCharset.AssignWithConversion(UCS4_LE);
|
||||
oCharset.Assign(UCS4_LE);
|
||||
}
|
||||
oCharsetSource = kCharsetFromByteOrderMark;
|
||||
// For html, meta tag detector is invoked before this so that we have
|
||||
@@ -2115,7 +2115,7 @@ static PRBool DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, nsS
|
||||
// encoding value is invalid if it is UTF-16
|
||||
if (count > 0 &&
|
||||
(0 != PL_strcmp("UTF-16", (char*)(aBytes+encStart)))) {
|
||||
oCharset.AssignWithConversion((char*)(aBytes+encStart),count);
|
||||
oCharset.Assign((char*)(aBytes+encStart),count);
|
||||
oCharsetSource = kCharsetFromMetaTag;
|
||||
}
|
||||
encodingFound = PR_TRUE;
|
||||
@@ -2135,7 +2135,7 @@ static PRBool DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, nsS
|
||||
if((0xBB==aBytes[1]) && (0xBF==aBytes[2])) {
|
||||
// EF BB BF
|
||||
// Win2K UTF-8 BOM
|
||||
oCharset.AssignWithConversion(UTF8);
|
||||
oCharset.Assign(UTF8);
|
||||
oCharsetSource= kCharsetFromByteOrderMark;
|
||||
}
|
||||
break;
|
||||
@@ -2143,10 +2143,10 @@ static PRBool DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, nsS
|
||||
if(0xFF==aBytes[1]) {
|
||||
if(0x00==aBytes[2] && 0x00==aBytes[3]) {
|
||||
// FE FF 00 00 UCS-4, unusual octet order (3412)
|
||||
oCharset.AssignWithConversion(UCS4_3412);
|
||||
oCharset.Assign(UCS4_3412);
|
||||
} else {
|
||||
// FE FF UTF-16, big-endian
|
||||
oCharset.AssignWithConversion(UTF16_BE);
|
||||
oCharset.Assign(UTF16_BE);
|
||||
}
|
||||
oCharsetSource= kCharsetFromByteOrderMark;
|
||||
}
|
||||
@@ -2155,11 +2155,11 @@ static PRBool DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, nsS
|
||||
if(0xFE==aBytes[1]) {
|
||||
if(0x00==aBytes[2] && 0x00==aBytes[3])
|
||||
// FF FE 00 00 UTF-32, little-endian
|
||||
oCharset.AssignWithConversion(UCS4_LE);
|
||||
oCharset.Assign(UCS4_LE);
|
||||
else
|
||||
// FF FE
|
||||
// UTF-16, little-endian
|
||||
oCharset.AssignWithConversion(UTF16_LE);
|
||||
oCharset.Assign(UTF16_LE);
|
||||
oCharsetSource= kCharsetFromByteOrderMark;
|
||||
}
|
||||
break;
|
||||
@@ -2181,7 +2181,7 @@ inline const char GetNextChar(nsACString::const_iterator& aStart,
|
||||
PRBool
|
||||
nsParser::DetectMetaTag(const char* aBytes,
|
||||
PRInt32 aLen,
|
||||
nsString& aCharset,
|
||||
nsCString& aCharset,
|
||||
PRInt32& aCharsetSource)
|
||||
{
|
||||
aCharsetSource= kCharsetFromMetaTag;
|
||||
@@ -2280,7 +2280,7 @@ nsParser::DetectMetaTag(const char* aBytes,
|
||||
|
||||
// return true if we successfully got something for charset
|
||||
if (currPos != tokEnd) {
|
||||
aCharset.Assign(NS_ConvertASCIItoUCS2(currPos.get(), tokEnd.get() - currPos.get()));
|
||||
aCharset.Assign(currPos.get(), tokEnd.get() - currPos.get());
|
||||
return PR_TRUE;
|
||||
}
|
||||
|
||||
@@ -2324,7 +2324,7 @@ ParserWriteFunc(nsIInputStream* in,
|
||||
|
||||
if(pws->mNeedCharsetCheck) {
|
||||
PRInt32 guessSource;
|
||||
nsAutoString guess;
|
||||
nsCAutoString guess;
|
||||
nsCAutoString preferred;
|
||||
|
||||
pws->mNeedCharsetCheck = PR_FALSE;
|
||||
@@ -2334,7 +2334,7 @@ ParserWriteFunc(nsIInputStream* in,
|
||||
DetectByteOrderMark((const unsigned char*)buf,
|
||||
theNumRead, guess, guessSource))) {
|
||||
nsCOMPtr<nsICharsetAlias> alias(do_GetService(NS_CHARSETALIAS_CONTRACTID));
|
||||
result = alias->GetPreferred(NS_LossyConvertUCS2toASCII(guess), preferred);
|
||||
result = alias->GetPreferred(guess, preferred);
|
||||
// Only continue if it's a recognized charset and not
|
||||
// one of a designated set that we ignore.
|
||||
if (NS_SUCCEEDED(result) &&
|
||||
@@ -2344,9 +2344,9 @@ ParserWriteFunc(nsIInputStream* in,
|
||||
!preferred.Equals(NS_LITERAL_CSTRING("UTF-16LE")) &&
|
||||
!preferred.Equals(NS_LITERAL_CSTRING("UTF-32BE")) &&
|
||||
!preferred.Equals(NS_LITERAL_CSTRING("UTF-32LE"))))) {
|
||||
guess.Assign(NS_ConvertASCIItoUCS2(preferred));
|
||||
guess = preferred;
|
||||
pws->mParser->SetDocumentCharset(guess, guessSource);
|
||||
pws->mParser->SetSinkCharset(guess);
|
||||
pws->mParser->SetSinkCharset(preferred);
|
||||
nsCOMPtr<nsICachingChannel> channel(do_QueryInterface(pws->mRequest));
|
||||
if (channel) {
|
||||
nsCOMPtr<nsISupports> cacheToken;
|
||||
@@ -2358,7 +2358,7 @@ ParserWriteFunc(nsIInputStream* in,
|
||||
nsresult rv =
|
||||
#endif
|
||||
cacheDescriptor->SetMetaDataElement("charset",
|
||||
NS_ConvertUCS2toUTF8(guess).get());
|
||||
guess.get());
|
||||
NS_ASSERTION(NS_SUCCEEDED(rv),"cannot SetMetaDataElement");
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user