final features for parsing engine (WIP); lots of bug fixes

1999-05-16 05:27:23 +00:00
parent 8a4afa4c75
commit 4f3a22f635
44 changed files with 2860 additions and 2062 deletions
--- a/parser/htmlparser/src/nsHTMLTokens.cpp
+++ b/parser/htmlparser/src/nsHTMLTokens.cpp
@@ -29,7 +29,6 @@
 #include "nsHTMLTags.h"
 #include "nsHTMLEntities.h"
 #include "nsCRT.h"
-#include "nsStr.h"

 //#define GESS_MACHINE
 #ifdef GESS_MACHINE
@@ -79,7 +78,6 @@ void CHTMLToken::SetStringValue(const char* name){
  }
 }

-
 /*
 *  constructor from tag id
 *  
@@ -272,6 +270,19 @@ void CStartToken::DebugDumpSource(ostream& out) {
    out << ">";
 }

+/*
+ *  
+ *  
+ *  @update  gess 3/25/98
+ *  @param   anOutputString will recieve the result
+ *  @return  nada
+ */
+void CStartToken::GetSource(nsString& anOutputString){
+  anOutputString="<";
+  anOutputString+=mTextValue;
+  if(!mAttributed)
+    anOutputString+=">";
+}

 /*
 *  constructor from tag id
@@ -303,7 +314,6 @@ CEndToken::CEndToken(const nsString& aName) : CHTMLToken(aName) {
 *  @return  error result
 */
 nsresult CEndToken::Consume(PRUnichar aChar, nsScanner& aScanner) {
-
  //if you're here, we've already Consumed the <! chars, and are
   //ready to Consume the rest of the open tag identifier.
   //Stop consuming as soon as you see a space or a '>'.
@@ -388,6 +398,19 @@ void CEndToken::DebugDumpSource(ostream& out) {
  out << "</" << buffer << ">";
 }

+/*
+ *  
+ *  
+ *  @update  gess 3/25/98
+ *  @param   anOutputString will recieve the result
+ *  @return  nada
+ */
+void CEndToken::GetSource(nsString& anOutputString){
+  anOutputString="</";
+  anOutputString+=mTextValue;
+  anOutputString+=">";
+}
+
 /*
 *  default constructor
 *  
@@ -474,6 +497,70 @@ nsresult CTextToken::Consume(PRUnichar aChar, nsScanner& aScanner) {
  return result;
 }

+/*
+ *  Consume as much clear text from scanner as possible.
+ *
+ *  @update  gess 3/25/98
+ *  @param   aChar -- last char consumed from stream
+ *  @param   aScanner -- controller of underlying input source
+ *  @return  error result
+ */
+nsresult CTextToken::ConsumeUntil(PRUnichar aChar,nsScanner& aScanner,nsString& aTerminalString){
+  PRBool      done=PR_FALSE; 
+  nsresult    result=NS_OK; 
+  nsString    temp; 
+  PRUnichar   theChar;
+
+  //We're going to try a new algorithm here. Rather than scan for the matching 
+ //end tag like we used to do, we're now going to scan for whitespace and comments. 
+ //If we find either, just eat them. If we find text or a tag, then go to the 
+ //target endtag, or the start of another comment. 
+
+  static nsAutoString theWhitespace2("\b\t ");
+
+  while((!done) && (NS_OK==result)) { 
+    result=aScanner.GetChar(aChar); 
+    if((NS_OK==result) && (kLessThan==aChar)) { 
+      //we're reading a tag or a comment... 
+      result=aScanner.GetChar(theChar); 
+      if((NS_OK==result) && (kExclamation==theChar)) { 
+        //read a comment... 
+        static CCommentToken theComment; 
+        result=theComment.Consume(aChar,aScanner); 
+        if(NS_OK==result) { 
+          //result=aScanner.SkipWhitespace();
+          //temp.Append("<!");
+          temp.Append(theComment.GetStringValueXXX()); 
+          //temp.Append(">");
+        } 
+      } else { 
+        //read a tag... 
+        temp+=aChar; 
+        temp+=theChar; 
+        result=aScanner.ReadUntil(temp,kGreaterThan,PR_TRUE); 
+      } 
+    } 
+    else if(0<=theWhitespace2.BinarySearch(aChar)) { 
+      static CWhitespaceToken theWS; 
+      result=theWS.Consume(aChar,aScanner); 
+      if(NS_OK==result) { 
+        temp.Append(theWS.GetStringValueXXX()); 
+      } 
+    } 
+    else { 
+      temp+=aChar; 
+      result=aScanner.ReadUntil(temp,kLessThan,PR_FALSE); 
+    } 
+    nsAutoString theRight;
+    temp.Right(theRight,aTerminalString.Length());
+    done=PRBool(0==theRight.Compare(aTerminalString,PR_TRUE)); 
+  }  //while
+  int len=temp.Length(); 
+  temp.Truncate(len-aTerminalString.Length()); 
+  mTextValue=temp; 
+  return result; 
+}
+
 /*
 *  default constructor
 *  
@@ -759,14 +846,16 @@ nsresult CCommentToken::Consume(PRUnichar aChar, nsScanner& aScanner) {
  PRBool theStrictForm=PR_FALSE;
  nsresult result=(theStrictForm) ? ConsumeStrictComment(aChar,aScanner,mTextValue) : ConsumeComment(aChar,aScanner,mTextValue);

+/*
  //this change is here to make the editor teams' life easier.
  //I'm removing the leading and trailing markup...
-/*
+
  if(0==mTextValue.Find("<!"))
    mTextValue.Cut(0,2);  //trim off 1st 2 chars...
  if(kGreaterThan==mTextValue.Last())
    mTextValue.Truncate(mTextValue.Length()-1); //trim off last char
 */
+
  return result;
 }

@@ -860,21 +949,21 @@ nsresult CNewlineToken::Consume(PRUnichar aChar, nsScanner& aScanner) {
  mTextValue=aChar;

    //we already read the \r or \n, let's see what's next!
-  PRUnichar nextChar;
-  nsresult result=aScanner.Peek(nextChar);
+  PRUnichar theChar;
+  nsresult result=aScanner.Peek(theChar);

  if(NS_OK==result) {
    switch(aChar) {
      case kNewLine:
-        if(kCR==nextChar) {
-          result=aScanner.GetChar(nextChar);
-          mTextValue+=nextChar;
+        if(kCR==theChar) {
+          result=aScanner.GetChar(theChar);
+          mTextValue+=theChar;
        }
        break;
      case kCR:
-        if(kNewLine==nextChar) {
-          result=aScanner.GetChar(nextChar);
-          mTextValue+=nextChar;
+        if(kNewLine==theChar) {
+          result=aScanner.GetChar(theChar);
+          mTextValue+=theChar;
        }
        break;
      default:
@@ -968,6 +1057,19 @@ void CAttributeToken::DebugDumpToken(ostream& out) {
  out << buffer << ": " << mTypeID << endl;
 }
 
+/*
+ *  
+ *  
+ *  @update  gess 3/25/98
+ *  @param   anOutputString will recieve the result
+ *  @return  nada
+ */
+void CAttributeToken::GetSource(nsString& anOutputString){
+  anOutputString=mTextKey;
+  anOutputString+="=";
+  anOutputString+=mTextValue;
+  anOutputString+=";";
+}

 /*
 *  This general purpose method is used when you want to
@@ -997,8 +1099,8 @@ nsresult ConsumeQuotedString(PRUnichar aChar,nsString& aString,nsScanner& aScann
  PRUnichar ch=aString.Last();
  if(ch!=aChar)
    aString+=aChar;
-  // aString.ReplaceChar(PRUnichar('\n'),PRUnichar(' '));
-  aString.StripChars("\r"); //per the HTML spec, ignore linefeeds...
+  //aString.ReplaceChar(PRUnichar('\n'),PRUnichar(' '));
+  aString.StripChars("\r\n"); //per the HTML spec, ignore linefeeds...
  return result;
 }

@@ -1270,6 +1372,7 @@ const char*  CEntityToken::GetClassName(void) {
  return "&entity";
 }

+
 /*
 *  
 *  
@@ -1439,6 +1542,19 @@ void CEntityToken::DebugDumpSource(ostream& out) {
  delete[] cp;
 }

+/*
+ *  
+ *  
+ *  @update  gess 3/25/98
+ *  @param   anOutputString will recieve the result
+ *  @return  nada
+ */
+void CEntityToken::GetSource(nsString& anOutputString){
+  anOutputString="&";
+  anOutputString+=anOutputString;
+  anOutputString+=";";
+}
+
 /*
 *  default constructor
 *  
@@ -1591,9 +1707,9 @@ nsresult CSkippedContentToken::Consume(PRUnichar aChar,nsScanner& aScanner) {
      temp+=aChar; 
      result=aScanner.ReadUntil(temp,kLessThan,PR_FALSE); 
    } 
-    nsAutoString temp2;
-    temp.Right(temp2,mTextValue.Length());
-    done=PRBool(0==temp2.Compare(mTextValue.GetUnicode(),PR_TRUE,mTextValue.Length())); 
+    nsAutoString theRight;
+    temp.Right(theRight,mTextValue.Length());
+    done=PRBool(0==theRight.Compare(mTextValue,PR_TRUE)); 
  } 
  int len=temp.Length(); 
  temp.Truncate(len-mTextValue.Length()); 
@@ -1616,6 +1732,16 @@ void CSkippedContentToken::DebugDumpSource(ostream& out) {
    out<<">";
 }

+/*
+ *  
+ *  
+ *  @update  gess 3/25/98
+ *  @param   anOutputString will recieve the result
+ *  @return  nada
+ */
+void CSkippedContentToken::GetSource(nsString& anOutputString){
+  anOutputString="$skipped-content";
+}

 /**
 *