fix for bug 209220 - now that i18n APIs take ASCII charset names, update consumers to avoid excess conversions.. which usually means storing charset names as ASCII and updating a few other APIs

r=jkeiser, sr=jst and some great comments from jshin@mailaps.org
This commit is contained in:
alecf@flett.org
2003-06-17 16:40:34 +00:00
parent 56753be82c
commit aa24a7bb8e
82 changed files with 400 additions and 425 deletions

View File

@@ -305,7 +305,7 @@ nsParser::nsParser() {
}
#endif
mCharset.Assign(NS_LITERAL_STRING("ISO-8859-1"));
mCharset.Assign(NS_LITERAL_CSTRING("ISO-8859-1"));
mParserContext=0;
mStreamStatus=0;
mCharsetSource=kCharsetUninitialized;
@@ -487,7 +487,7 @@ NS_IMETHODIMP_(void) nsParser::SetCommand(eParserCommands aParserCommand)
* @return nada
*/
NS_IMETHODIMP_(void)
nsParser::SetDocumentCharset(const nsAString& aCharset, PRInt32 aCharsetSource)
nsParser::SetDocumentCharset(const nsACString& aCharset, PRInt32 aCharsetSource)
{
mCharset = aCharset;
mCharsetSource = aCharsetSource;
@@ -495,7 +495,7 @@ nsParser::SetDocumentCharset(const nsAString& aCharset, PRInt32 aCharsetSource)
mParserContext->mScanner->SetDocumentCharset(aCharset, aCharsetSource);
}
void nsParser::SetSinkCharset(nsAString& aCharset)
void nsParser::SetSinkCharset(nsACString& aCharset)
{
if (mSink) {
mSink->SetDocumentCharset(aCharset);
@@ -1998,7 +1998,7 @@ static inline PRBool IsSecondMarker(unsigned char aChar)
}
}
static PRBool DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, nsString& oCharset, PRInt32& oCharsetSource) {
static PRBool DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, nsCString& oCharset, PRInt32& oCharsetSource) {
oCharsetSource= kCharsetFromAutoDetection;
oCharset.Truncate();
// See http://www.w3.org/TR/2000/REC-xml-20001006#sec-guessing
@@ -2013,26 +2013,26 @@ static PRBool DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, nsS
// 00 00
if((0xFE==aBytes[2]) && (0xFF==aBytes[3])) {
// 00 00 FE FF UCS-4, big-endian machine (1234 order)
oCharset.AssignWithConversion(UCS4_BE);
oCharset.Assign(UCS4_BE);
} else if((0x00==aBytes[2]) && (0x3C==aBytes[3])) {
// 00 00 00 3C UCS-4, big-endian machine (1234 order)
oCharset.AssignWithConversion(UCS4_BE);
oCharset.Assign(UCS4_BE);
} else if((0xFF==aBytes[2]) && (0xFE==aBytes[3])) {
// 00 00 FF FE UCS-4, unusual octet order (2143)
oCharset.AssignWithConversion(UCS4_2143);
oCharset.Assign(UCS4_2143);
} else if((0x3C==aBytes[2]) && (0x00==aBytes[3])) {
// 00 00 3C 00 UCS-4, unusual octet order (2143)
oCharset.AssignWithConversion(UCS4_2143);
oCharset.Assign(UCS4_2143);
}
oCharsetSource = kCharsetFromByteOrderMark;
} else if((0x3C==aBytes[1]) && (0x00==aBytes[2])) {
// 00 3C 00
if(IsSecondMarker(aBytes[3])) {
// 00 3C 00 SM UTF-16, big-endian, no Byte Order Mark
oCharset.AssignWithConversion(UTF16_BE);
oCharset.Assign(UTF16_BE);
} else if((0x00==aBytes[3])) {
// 00 3C 00 00 UCS-4, unusual octet order (3412)
oCharset.AssignWithConversion(UCS4_3412);
oCharset.Assign(UCS4_3412);
}
oCharsetSource = kCharsetFromByteOrderMark;
}
@@ -2042,10 +2042,10 @@ static PRBool DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, nsS
// 3C 00 XX 00
if(IsSecondMarker(aBytes[2])) {
// 3C 00 SM 00 UTF-16, little-endian, no Byte Order Mark
oCharset.AssignWithConversion(UTF16_LE);
oCharset.Assign(UTF16_LE);
} else if((0x00==aBytes[2])) {
// 3C 00 00 00 UCS-4, little-endian machine (4321 order)
oCharset.AssignWithConversion(UCS4_LE);
oCharset.Assign(UCS4_LE);
}
oCharsetSource = kCharsetFromByteOrderMark;
// For html, meta tag detector is invoked before this so that we have
@@ -2115,7 +2115,7 @@ static PRBool DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, nsS
// encoding value is invalid if it is UTF-16
if (count > 0 &&
(0 != PL_strcmp("UTF-16", (char*)(aBytes+encStart)))) {
oCharset.AssignWithConversion((char*)(aBytes+encStart),count);
oCharset.Assign((char*)(aBytes+encStart),count);
oCharsetSource = kCharsetFromMetaTag;
}
encodingFound = PR_TRUE;
@@ -2135,7 +2135,7 @@ static PRBool DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, nsS
if((0xBB==aBytes[1]) && (0xBF==aBytes[2])) {
// EF BB BF
// Win2K UTF-8 BOM
oCharset.AssignWithConversion(UTF8);
oCharset.Assign(UTF8);
oCharsetSource= kCharsetFromByteOrderMark;
}
break;
@@ -2143,10 +2143,10 @@ static PRBool DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, nsS
if(0xFF==aBytes[1]) {
if(0x00==aBytes[2] && 0x00==aBytes[3]) {
// FE FF 00 00 UCS-4, unusual octet order (3412)
oCharset.AssignWithConversion(UCS4_3412);
oCharset.Assign(UCS4_3412);
} else {
// FE FF UTF-16, big-endian
oCharset.AssignWithConversion(UTF16_BE);
oCharset.Assign(UTF16_BE);
}
oCharsetSource= kCharsetFromByteOrderMark;
}
@@ -2155,11 +2155,11 @@ static PRBool DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, nsS
if(0xFE==aBytes[1]) {
if(0x00==aBytes[2] && 0x00==aBytes[3])
// FF FE 00 00 UTF-32, little-endian
oCharset.AssignWithConversion(UCS4_LE);
oCharset.Assign(UCS4_LE);
else
// FF FE
// UTF-16, little-endian
oCharset.AssignWithConversion(UTF16_LE);
oCharset.Assign(UTF16_LE);
oCharsetSource= kCharsetFromByteOrderMark;
}
break;
@@ -2181,7 +2181,7 @@ inline const char GetNextChar(nsACString::const_iterator& aStart,
PRBool
nsParser::DetectMetaTag(const char* aBytes,
PRInt32 aLen,
nsString& aCharset,
nsCString& aCharset,
PRInt32& aCharsetSource)
{
aCharsetSource= kCharsetFromMetaTag;
@@ -2280,7 +2280,7 @@ nsParser::DetectMetaTag(const char* aBytes,
// return true if we successfully got something for charset
if (currPos != tokEnd) {
aCharset.Assign(NS_ConvertASCIItoUCS2(currPos.get(), tokEnd.get() - currPos.get()));
aCharset.Assign(currPos.get(), tokEnd.get() - currPos.get());
return PR_TRUE;
}
@@ -2324,7 +2324,7 @@ ParserWriteFunc(nsIInputStream* in,
if(pws->mNeedCharsetCheck) {
PRInt32 guessSource;
nsAutoString guess;
nsCAutoString guess;
nsCAutoString preferred;
pws->mNeedCharsetCheck = PR_FALSE;
@@ -2334,7 +2334,7 @@ ParserWriteFunc(nsIInputStream* in,
DetectByteOrderMark((const unsigned char*)buf,
theNumRead, guess, guessSource))) {
nsCOMPtr<nsICharsetAlias> alias(do_GetService(NS_CHARSETALIAS_CONTRACTID));
result = alias->GetPreferred(NS_LossyConvertUCS2toASCII(guess), preferred);
result = alias->GetPreferred(guess, preferred);
// Only continue if it's a recognized charset and not
// one of a designated set that we ignore.
if (NS_SUCCEEDED(result) &&
@@ -2344,9 +2344,9 @@ ParserWriteFunc(nsIInputStream* in,
!preferred.Equals(NS_LITERAL_CSTRING("UTF-16LE")) &&
!preferred.Equals(NS_LITERAL_CSTRING("UTF-32BE")) &&
!preferred.Equals(NS_LITERAL_CSTRING("UTF-32LE"))))) {
guess.Assign(NS_ConvertASCIItoUCS2(preferred));
guess = preferred;
pws->mParser->SetDocumentCharset(guess, guessSource);
pws->mParser->SetSinkCharset(guess);
pws->mParser->SetSinkCharset(preferred);
nsCOMPtr<nsICachingChannel> channel(do_QueryInterface(pws->mRequest));
if (channel) {
nsCOMPtr<nsISupports> cacheToken;
@@ -2358,7 +2358,7 @@ ParserWriteFunc(nsIInputStream* in,
nsresult rv =
#endif
cacheDescriptor->SetMetaDataElement("charset",
NS_ConvertUCS2toUTF8(guess).get());
guess.get());
NS_ASSERTION(NS_SUCCEEDED(rv),"cannot SetMetaDataElement");
}
}