fix for bug 209220 - now that i18n APIs take ASCII charset names, update consumers to avoid excess conversions.. which usually means storing charset names as ASCII and updating a few other APIs

r=jkeiser, sr=jst and some great comments from jshin@mailaps.org
2003-06-17 16:40:34 +00:00
parent 56753be82c
commit aa24a7bb8e
82 changed files with 400 additions and 425 deletions
--- a/parser/htmlparser/src/nsParser.cpp
+++ b/parser/htmlparser/src/nsParser.cpp
@@ -305,7 +305,7 @@ nsParser::nsParser() {
  }
 #endif

-  mCharset.Assign(NS_LITERAL_STRING("ISO-8859-1"));
+  mCharset.Assign(NS_LITERAL_CSTRING("ISO-8859-1"));
  mParserContext=0;
  mStreamStatus=0;
  mCharsetSource=kCharsetUninitialized;
@@ -487,7 +487,7 @@ NS_IMETHODIMP_(void) nsParser::SetCommand(eParserCommands aParserCommand)
 *  @return	 nada
 */
 NS_IMETHODIMP_(void)
-nsParser::SetDocumentCharset(const nsAString& aCharset, PRInt32 aCharsetSource)
+nsParser::SetDocumentCharset(const nsACString& aCharset, PRInt32 aCharsetSource)
 {
  mCharset = aCharset;
  mCharsetSource = aCharsetSource; 
@@ -495,7 +495,7 @@ nsParser::SetDocumentCharset(const nsAString& aCharset, PRInt32 aCharsetSource)
     mParserContext->mScanner->SetDocumentCharset(aCharset, aCharsetSource);
 }

-void nsParser::SetSinkCharset(nsAString& aCharset)
+void nsParser::SetSinkCharset(nsACString& aCharset)
 {
  if (mSink) {
    mSink->SetDocumentCharset(aCharset);
@@ -1998,7 +1998,7 @@ static inline PRBool IsSecondMarker(unsigned char aChar)
  }
 }

-static PRBool DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, nsString& oCharset, PRInt32& oCharsetSource) {
+static PRBool DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, nsCString& oCharset, PRInt32& oCharsetSource) {
 oCharsetSource= kCharsetFromAutoDetection;
 oCharset.Truncate();
 // See http://www.w3.org/TR/2000/REC-xml-20001006#sec-guessing
@@ -2013,26 +2013,26 @@ static PRBool DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, nsS
        // 00 00
        if((0xFE==aBytes[2]) && (0xFF==aBytes[3])) {
           // 00 00 FE FF UCS-4, big-endian machine (1234 order)
-           oCharset.AssignWithConversion(UCS4_BE);
+           oCharset.Assign(UCS4_BE);
        } else if((0x00==aBytes[2]) && (0x3C==aBytes[3])) {
           // 00 00 00 3C UCS-4, big-endian machine (1234 order)
-           oCharset.AssignWithConversion(UCS4_BE);
+           oCharset.Assign(UCS4_BE);
        } else if((0xFF==aBytes[2]) && (0xFE==aBytes[3])) {
           // 00 00 FF FE UCS-4, unusual octet order (2143)
-           oCharset.AssignWithConversion(UCS4_2143);
+           oCharset.Assign(UCS4_2143);
        } else if((0x3C==aBytes[2]) && (0x00==aBytes[3])) {
           // 00 00 3C 00 UCS-4, unusual octet order (2143)
-           oCharset.AssignWithConversion(UCS4_2143);
+           oCharset.Assign(UCS4_2143);
        } 
        oCharsetSource = kCharsetFromByteOrderMark;
     } else if((0x3C==aBytes[1]) && (0x00==aBytes[2])) {
        // 00 3C 00
        if(IsSecondMarker(aBytes[3])) {
           // 00 3C 00 SM UTF-16,  big-endian, no Byte Order Mark 
-           oCharset.AssignWithConversion(UTF16_BE); 
+           oCharset.Assign(UTF16_BE); 
        } else if((0x00==aBytes[3])) {
           // 00 3C 00 00 UCS-4, unusual octet order (3412)
-           oCharset.AssignWithConversion(UCS4_3412);
+           oCharset.Assign(UCS4_3412);
        } 
        oCharsetSource = kCharsetFromByteOrderMark;
     }
@@ -2042,10 +2042,10 @@ static PRBool DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, nsS
        // 3C 00 XX 00
        if(IsSecondMarker(aBytes[2])) {
           // 3C 00 SM 00 UTF-16,  little-endian, no Byte Order Mark 
-           oCharset.AssignWithConversion(UTF16_LE); 
+           oCharset.Assign(UTF16_LE); 
        } else if((0x00==aBytes[2])) {
           // 3C 00 00 00 UCS-4, little-endian machine (4321 order)
-           oCharset.AssignWithConversion(UCS4_LE); 
+           oCharset.Assign(UCS4_LE); 
        } 
        oCharsetSource = kCharsetFromByteOrderMark;
     // For html, meta tag detector is invoked before this so that we have 
@@ -2115,7 +2115,7 @@ static PRBool DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, nsS
                   // encoding value is invalid if it is UTF-16
                   if (count > 0 && 
                     (0 != PL_strcmp("UTF-16", (char*)(aBytes+encStart)))) {
-                     oCharset.AssignWithConversion((char*)(aBytes+encStart),count);
+                     oCharset.Assign((char*)(aBytes+encStart),count);
                     oCharsetSource = kCharsetFromMetaTag;
                   }
                   encodingFound = PR_TRUE;
@@ -2135,7 +2135,7 @@ static PRBool DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, nsS
     if((0xBB==aBytes[1]) && (0xBF==aBytes[2])) {
        // EF BB BF
        // Win2K UTF-8 BOM
-        oCharset.AssignWithConversion(UTF8); 
+        oCharset.Assign(UTF8); 
        oCharsetSource= kCharsetFromByteOrderMark;
     }
   break;
@@ -2143,10 +2143,10 @@ static PRBool DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, nsS
     if(0xFF==aBytes[1]) {
        if(0x00==aBytes[2] && 0x00==aBytes[3]) {
          // FE FF 00 00  UCS-4, unusual octet order (3412)
-          oCharset.AssignWithConversion(UCS4_3412);
+          oCharset.Assign(UCS4_3412);
        } else {
          // FE FF UTF-16, big-endian 
-          oCharset.AssignWithConversion(UTF16_BE); 
+          oCharset.Assign(UTF16_BE); 
        }
        oCharsetSource= kCharsetFromByteOrderMark;
     }
@@ -2155,11 +2155,11 @@ static PRBool DetectByteOrderMark(const unsigned char* aBytes, PRInt32 aLen, nsS
     if(0xFE==aBytes[1]) {
        if(0x00==aBytes[2] && 0x00==aBytes[3]) 
         // FF FE 00 00  UTF-32, little-endian
-           oCharset.AssignWithConversion(UCS4_LE); 
+           oCharset.Assign(UCS4_LE); 
        else
        // FF FE
        // UTF-16, little-endian 
-           oCharset.AssignWithConversion(UTF16_LE); 
+           oCharset.Assign(UTF16_LE); 
        oCharsetSource= kCharsetFromByteOrderMark;
     }
   break;
@@ -2181,7 +2181,7 @@ inline const char GetNextChar(nsACString::const_iterator& aStart,
 PRBool 
 nsParser::DetectMetaTag(const char* aBytes, 
                        PRInt32 aLen, 
-                        nsString& aCharset, 
+                        nsCString& aCharset, 
                        PRInt32& aCharsetSource) 
 {
  aCharsetSource= kCharsetFromMetaTag;
@@ -2280,7 +2280,7 @@ nsParser::DetectMetaTag(const char* aBytes,

    // return true if we successfully got something for charset
    if (currPos != tokEnd) {
-      aCharset.Assign(NS_ConvertASCIItoUCS2(currPos.get(), tokEnd.get() - currPos.get()));
+      aCharset.Assign(currPos.get(), tokEnd.get() - currPos.get());
      return PR_TRUE;
    } 
    
@@ -2324,7 +2324,7 @@ ParserWriteFunc(nsIInputStream* in,

  if(pws->mNeedCharsetCheck) { 
    PRInt32 guessSource;
-    nsAutoString guess;
+    nsCAutoString guess;
    nsCAutoString preferred; 
  
    pws->mNeedCharsetCheck = PR_FALSE; 
@@ -2334,7 +2334,7 @@ ParserWriteFunc(nsIInputStream* in,
        DetectByteOrderMark((const unsigned char*)buf, 
                            theNumRead, guess, guessSource))) { 
      nsCOMPtr<nsICharsetAlias> alias(do_GetService(NS_CHARSETALIAS_CONTRACTID));
-      result = alias->GetPreferred(NS_LossyConvertUCS2toASCII(guess), preferred);
+      result = alias->GetPreferred(guess, preferred);
      // Only continue if it's a recognized charset and not
      // one of a designated set that we ignore.
      if (NS_SUCCEEDED(result) &&
@@ -2344,9 +2344,9 @@ ParserWriteFunc(nsIInputStream* in,
            !preferred.Equals(NS_LITERAL_CSTRING("UTF-16LE")) &&
            !preferred.Equals(NS_LITERAL_CSTRING("UTF-32BE")) &&
            !preferred.Equals(NS_LITERAL_CSTRING("UTF-32LE"))))) {
-        guess.Assign(NS_ConvertASCIItoUCS2(preferred));
+        guess = preferred;
        pws->mParser->SetDocumentCharset(guess, guessSource); 
-        pws->mParser->SetSinkCharset(guess);
+        pws->mParser->SetSinkCharset(preferred);
        nsCOMPtr<nsICachingChannel> channel(do_QueryInterface(pws->mRequest));
        if (channel) {
          nsCOMPtr<nsISupports> cacheToken;
@@ -2358,7 +2358,7 @@ ParserWriteFunc(nsIInputStream* in,
              nsresult rv =
 #endif
                cacheDescriptor->SetMetaDataElement("charset",
-                                                    NS_ConvertUCS2toUTF8(guess).get());
+                                                    guess.get());
              NS_ASSERTION(NS_SUCCEEDED(rv),"cannot SetMetaDataElement");
            }
          }