24112 - Fixed the orderding of THEAD, TBODY, and TFOOT 24673 - Fixed the crash my not letting P to contain TABLE ( per spec. ) 18308 - Ignoring newlines above BODY 18928 - Fixed by forcing a BODY to open, irrespective of FRAMESET document, if 'text' is present in the document. 23791 - Made ; in entities optional. 24006 - Fixed by setting the alternate end-comment to be '>'. 24275 - Buffering newlines/whitespace only when it's absolutely necessary. 24462 - Fixed by making NOBR a block closure. 21917 - Being careful about ignoring newlines/whitespace for body content. r=rickg 24204 - Fixed by opening up BODY for INPUT ( quirks ) Also did a minor change in CNavDTD::DidBuildModel(); r=pollmann
1946 lines
48 KiB
C++
1946 lines
48 KiB
C++
|
|
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/*
|
|
* The contents of this file are subject to the Netscape Public
|
|
* License Version 1.1 (the "License"); you may not use this file
|
|
* except in compliance with the License. You may obtain a copy of
|
|
* the License at http://www.mozilla.org/NPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS
|
|
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
|
|
* implied. See the License for the specific language governing
|
|
* rights and limitations under the License.
|
|
*
|
|
* The Original Code is mozilla.org code.
|
|
*
|
|
* The Initial Developer of the Original Code is Netscape
|
|
* Communications Corporation. Portions created by Netscape are
|
|
* Copyright (C) 1998 Netscape Communications Corporation. All
|
|
* Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
*/
|
|
|
|
#include <ctype.h>
|
|
#include <time.h>
|
|
#include <stdio.h>
|
|
#include "nsScanner.h"
|
|
#include "nsToken.h"
|
|
#include "nsHTMLTokens.h"
|
|
#include "nsIParser.h"
|
|
#include "prtypes.h"
|
|
#include "nsDebug.h"
|
|
#include "nsHTMLTags.h"
|
|
#include "nsHTMLEntities.h"
|
|
#include "nsCRT.h"
|
|
|
|
|
|
|
|
static const char* gUserdefined = "userdefined";
|
|
|
|
|
|
/**************************************************************
|
|
And now for the token classes...
|
|
**************************************************************/
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
CHTMLToken::CHTMLToken(const nsString& aName,eHTMLTags aTag) : CToken(aName) {
|
|
mTypeID=aTag;
|
|
}
|
|
|
|
/*
|
|
* constructor from tag id
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
CHTMLToken::CHTMLToken(eHTMLTags aTag) : CToken(aTag) {
|
|
|
|
}
|
|
|
|
/**
|
|
* Setter method that changes the string value of this token
|
|
* @update gess5/11/98
|
|
* @param name is a char* value containing new string value
|
|
*/
|
|
void CHTMLToken::SetStringValue(const char* name){
|
|
if(name) {
|
|
mTextValue=name;
|
|
mTypeID = nsHTMLTags::LookupTag(mTextValue);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* constructor from tag id
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
CStartToken::CStartToken(eHTMLTags aTag) : CHTMLToken(aTag) {
|
|
mAttributed=PR_FALSE;
|
|
mEmpty=PR_FALSE;
|
|
mOrigin=-1;
|
|
}
|
|
|
|
/*
|
|
* constructor from tag id
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
CStartToken::CStartToken(nsString& aString,eHTMLTags aTag) : CHTMLToken(aString,aTag) {
|
|
mAttributed=PR_FALSE;
|
|
mEmpty=PR_FALSE;
|
|
mOrigin=-1;
|
|
}
|
|
|
|
|
|
/**
|
|
*
|
|
* @update gess8/4/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
void CStartToken::Reinitialize(PRInt32 aTag, const nsString& aString){
|
|
CToken::Reinitialize(aTag,aString);
|
|
mAttributed=PR_FALSE;
|
|
mUseCount=0; //assume recycling is needed by default.
|
|
mEmpty=PR_FALSE;
|
|
mOrigin=-1;
|
|
mTrailingContent.Truncate();
|
|
}
|
|
|
|
/*
|
|
* This method returns the typeid (the tag type) for this token.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CStartToken::GetTypeID(){
|
|
if(eHTMLTag_unknown==mTypeID) {
|
|
mTypeID = nsHTMLTags::LookupTag(mTextValue);
|
|
}
|
|
return mTypeID;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CStartToken::GetClassName(void) {
|
|
return "start";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CStartToken::GetTokenType(void) {
|
|
return eToken_start;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
void CStartToken::SetAttributed(PRBool aValue) {
|
|
mAttributed=aValue;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRBool CStartToken::IsAttributed(void) {
|
|
return mAttributed;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
void CStartToken::SetEmpty(PRBool aValue) {
|
|
mEmpty=aValue;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRBool CStartToken::IsEmpty(void) {
|
|
return mEmpty;
|
|
}
|
|
|
|
|
|
/*
|
|
* Consume the identifier portion of the start tag
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult CStartToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aMode) {
|
|
|
|
//if you're here, we've already Consumed the < char, and are
|
|
//ready to Consume the rest of the open tag identifier.
|
|
//Stop consuming as soon as you see a space or a '>'.
|
|
//NOTE: We don't Consume the tag attributes here, nor do we eat the ">"
|
|
|
|
mTextValue=aChar;
|
|
nsresult result=aScanner.ReadIdentifier(mTextValue);
|
|
mTypeID = nsHTMLTags::LookupTag(mTextValue);
|
|
|
|
//Good. Now, let's skip whitespace after the identifier,
|
|
//and see if the next char is ">". If so, we have a complete
|
|
//tag without attributes.
|
|
if(NS_OK==result) {
|
|
mOrigin=aScanner.GetOffset(); // We need this position to record the trailing contents of the start token
|
|
result=aScanner.SkipWhitespace();
|
|
mNewlineCount += aScanner.GetNewlinesSkipped();
|
|
if(NS_OK==result) {
|
|
result=aScanner.GetChar(aChar);
|
|
if(NS_OK==result) {
|
|
if(kGreaterThan!=aChar) { //look for '>'
|
|
//push that char back, since we apparently have attributes...
|
|
result=aScanner.PutBack(aChar);
|
|
mAttributed=PR_TRUE;
|
|
} //if
|
|
} //if
|
|
}//if
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/*
|
|
* Dump contents of this token to givne output stream
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param out -- ostream to output content
|
|
* @return
|
|
*/
|
|
void CStartToken::DebugDumpSource(nsOutputStream& out) {
|
|
char buffer[1000];
|
|
mTextValue.ToCString(buffer,sizeof(buffer));
|
|
out << "<" << buffer;
|
|
if(!mAttributed)
|
|
out << ">";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param anOutputString will recieve the result
|
|
* @return nada
|
|
*/
|
|
void CStartToken::GetSource(nsString& anOutputString){
|
|
anOutputString="<";
|
|
anOutputString+=mTextValue;
|
|
anOutputString+=mTrailingContent;
|
|
}
|
|
|
|
/*
|
|
* constructor from tag id
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
CEndToken::CEndToken(eHTMLTags aTag) : CHTMLToken(aTag) {
|
|
SetStringValue(GetTagName(aTag));
|
|
}
|
|
|
|
|
|
/*
|
|
* default constructor for end token
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- char* containing token name
|
|
* @return
|
|
*/
|
|
CEndToken::CEndToken(const nsString& aName) : CHTMLToken(aName) {
|
|
}
|
|
|
|
/*
|
|
* Consume the identifier portion of the end tag
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult CEndToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aMode) {
|
|
//if you're here, we've already Consumed the <! chars, and are
|
|
//ready to Consume the rest of the open tag identifier.
|
|
//Stop consuming as soon as you see a space or a '>'.
|
|
//NOTE: We don't Consume the tag attributes here, nor do we eat the ">"
|
|
|
|
mTextValue="";
|
|
nsresult result=aScanner.ReadUntil(mTextValue,kGreaterThan,PR_FALSE);
|
|
|
|
if(NS_OK==result){
|
|
|
|
PRInt32 theIndex=mTextValue.FindCharInSet(" \r\n\t\b",0);
|
|
nsAutoString buffer(mTextValue);
|
|
buffer.Truncate(theIndex);
|
|
mTypeID= nsHTMLTags::LookupTag(buffer);
|
|
result=aScanner.GetChar(aChar); //eat the closing '>;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/*
|
|
* Asks the token to determine the <i>HTMLTag type</i> of
|
|
* the token. This turns around and looks up the tag name
|
|
* in the tag dictionary.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return eHTMLTag id of this endtag
|
|
*/
|
|
PRInt32 CEndToken::GetTypeID(){
|
|
if(eHTMLTag_unknown==mTypeID) {
|
|
mTypeID = nsHTMLTags::LookupTag(mTextValue);
|
|
switch(mTypeID) {
|
|
case eHTMLTag_dir:
|
|
case eHTMLTag_menu:
|
|
mTypeID=eHTMLTag_ul;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
return mTypeID;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CEndToken::GetClassName(void) {
|
|
return "/end";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CEndToken::GetTokenType(void) {
|
|
return eToken_end;
|
|
}
|
|
|
|
/*
|
|
* Dump contents of this token to givne output stream
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param out -- ostream to output content
|
|
* @return
|
|
*/
|
|
void CEndToken::DebugDumpSource(nsOutputStream& out) {
|
|
char buffer[1000];
|
|
mTextValue.ToCString(buffer,sizeof(buffer));
|
|
out << "</" << buffer << ">";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param anOutputString will recieve the result
|
|
* @return nada
|
|
*/
|
|
void CEndToken::GetSource(nsString& anOutputString){
|
|
anOutputString="</";
|
|
anOutputString+=mTextValue;
|
|
anOutputString+=">";
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CTextToken::CTextToken() : CHTMLToken(eHTMLTag_text) {
|
|
}
|
|
|
|
|
|
/*
|
|
* string based constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CTextToken::CTextToken(const nsString& aName) : CHTMLToken(aName) {
|
|
mTypeID=eHTMLTag_text;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CTextToken::GetClassName(void) {
|
|
return "text";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CTextToken::GetTokenType(void) {
|
|
return eToken_text;
|
|
}
|
|
|
|
/*
|
|
* Consume as much clear text from scanner as possible.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult CTextToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aMode) {
|
|
static const char* theTerminals="\n\r&<";
|
|
nsresult result=NS_OK;
|
|
PRBool done=PR_FALSE;
|
|
|
|
while((NS_OK==result) && (!done)) {
|
|
result=aScanner.ReadUntil(mTextValue,theTerminals,PR_TRUE,PR_FALSE);
|
|
if(NS_OK==result) {
|
|
result=aScanner.Peek(aChar);
|
|
|
|
if(((kCR==aChar) || (kNewLine==aChar)) && (NS_OK==result)) {
|
|
result=aScanner.GetChar(aChar); //strip off the char
|
|
PRUnichar theNextChar;
|
|
result=aScanner.Peek(theNextChar); //then see what's next.
|
|
switch(aChar) {
|
|
case kCR:
|
|
// result=aScanner.GetChar(aChar);
|
|
if(kLF==theNextChar) {
|
|
result=aScanner.GetChar(theNextChar);
|
|
}
|
|
else if(kCR==theNextChar) {
|
|
result=aScanner.GetChar(theNextChar);
|
|
result=aScanner.Peek(theNextChar); //then see what's next.
|
|
if(kLF==theNextChar) {
|
|
result=aScanner.GetChar(theNextChar);
|
|
}
|
|
mTextValue.Append("\n");
|
|
mNewlineCount++;
|
|
}
|
|
mTextValue.Append("\n");
|
|
mNewlineCount++;
|
|
break;
|
|
case kLF:
|
|
if((kLF==theNextChar) || (kCR==theNextChar)) {
|
|
result=aScanner.GetChar(theNextChar);
|
|
mTextValue.Append("\n");
|
|
mNewlineCount++;
|
|
}
|
|
mTextValue.Append("\n");
|
|
mNewlineCount++;
|
|
break;
|
|
default:
|
|
mTextValue.Append("\n");
|
|
mNewlineCount++;
|
|
break;
|
|
} //switch
|
|
}
|
|
else done=PR_TRUE;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Consume as much clear text from scanner as possible.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScanner& aScanner,
|
|
nsString& aTerminalString,PRInt32 aMode,PRBool& aFlushTokens){
|
|
PRBool done=PR_FALSE;
|
|
nsresult result=NS_OK;
|
|
PRUnichar theChar;
|
|
nsAutoString theRight;
|
|
PRInt32 rpos=0;
|
|
|
|
|
|
//We're going to try a new algorithm here. Rather than scan for the matching
|
|
//end tag like we used to do, we're now going to scan for whitespace and comments.
|
|
//If we find either, just eat them. If we find text or a tag, then go to the
|
|
//target endtag, or the start of another comment.
|
|
|
|
|
|
PRInt32 termStrLen=aTerminalString.Length();
|
|
while((!done) && (NS_OK==result)) {
|
|
result=aScanner.GetChar(aChar);
|
|
if((NS_OK==result) && (kLessThan==aChar)) {
|
|
//we're reading a tag or a comment...
|
|
//FYI: <STYLE> and <SCRIPT> should be treated as CDATA. So,
|
|
//don't try to acknowledge "HTML COMMENTS"...just ignore 'em.
|
|
result=aScanner.GetChar(theChar);
|
|
if((NS_OK==result) && (kExclamation==theChar) && (PR_FALSE==aIgnoreComments)) {
|
|
//read a comment...
|
|
static CCommentToken theComment;
|
|
result=theComment.Consume(aChar,aScanner,aMode);
|
|
if(NS_OK==result) {
|
|
//result=aScanner.SkipWhitespace();
|
|
mTextValue.Append(theComment.GetStringValueXXX());
|
|
}
|
|
} else {
|
|
//read a tag...
|
|
mTextValue+=aChar;
|
|
mTextValue+=theChar;
|
|
result=aScanner.ReadUntil(mTextValue,kGreaterThan,PR_TRUE);
|
|
}
|
|
}
|
|
else if(('\b'==aChar) || ('\t'==aChar) || (' '==aChar)) {
|
|
static CWhitespaceToken theWS;
|
|
result=theWS.Consume(aChar,aScanner,aMode);
|
|
if(NS_OK==result) {
|
|
mTextValue.Append(theWS.GetStringValueXXX());
|
|
}
|
|
}
|
|
else {
|
|
mTextValue+=aChar;
|
|
result=aScanner.ReadUntil(mTextValue,kLessThan,PR_FALSE);
|
|
}
|
|
mTextValue.Right(theRight,termStrLen+10); //first, get a wad of chars from the temp string
|
|
rpos=theRight.RFindChar('<'); //now scan for the '<'
|
|
if(-1<rpos) {
|
|
rpos=theRight.RFind(aTerminalString,PR_TRUE);
|
|
if(-1<rpos) {
|
|
nsAutoString temp(theRight);
|
|
temp.Cut(0,rpos);
|
|
if(aMode!=eParseMode_noquirks) {
|
|
temp.StripWhitespace();
|
|
}
|
|
PRUnichar ch=temp.CharAt(aTerminalString.Length());
|
|
rpos=(ch==kGreaterThan)? rpos:kNotFound;
|
|
aFlushTokens=(-1<rpos)?PR_TRUE:PR_FALSE; // We found </SCRIPT>...permit flushing -> Ref: Bug 22485
|
|
}
|
|
}
|
|
done=PRBool(-1<rpos);
|
|
} //while
|
|
if(NS_SUCCEEDED(result)) {
|
|
int len=mTextValue.Length();
|
|
mTextValue.Truncate(len-(theRight.Length()-rpos));
|
|
|
|
// Make aTerminalString contain the name of the end tag ** as seen in **
|
|
// the document and not the made up one.
|
|
theRight.Cut(0,rpos+2);
|
|
theRight.Truncate(theRight.Length()-1);
|
|
aTerminalString = theRight;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update vidur 11/12/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CCDATASectionToken::CCDATASectionToken() : CHTMLToken(eHTMLTag_unknown) {
|
|
}
|
|
|
|
|
|
/*
|
|
* string based constructor
|
|
*
|
|
* @update vidur 11/12/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CCDATASectionToken::CCDATASectionToken(const nsString& aName) : CHTMLToken(aName) {
|
|
mTypeID=eHTMLTag_unknown;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update vidur 11/12/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CCDATASectionToken::GetClassName(void) {
|
|
return "cdatasection";
|
|
}
|
|
|
|
/*
|
|
*
|
|
* @update vidur 11/12/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CCDATASectionToken::GetTokenType(void) {
|
|
return eToken_cdatasection;
|
|
}
|
|
|
|
/*
|
|
* Consume as much marked test from scanner as possible.
|
|
*
|
|
* @update rgess 12/15/99: had to handle case: "<![ ! IE 5]>", in addition to "<![..[..]]>".
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult CCDATASectionToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aMode) {
|
|
static const char* theTerminals="\r]";
|
|
nsresult result=NS_OK;
|
|
PRBool done=PR_FALSE;
|
|
|
|
while((NS_OK==result) && (!done)) {
|
|
result=aScanner.ReadUntil(mTextValue,theTerminals,PR_TRUE,PR_FALSE);
|
|
if(NS_OK==result) {
|
|
result=aScanner.Peek(aChar);
|
|
if((kCR==aChar) && (NS_OK==result)) {
|
|
result=aScanner.GetChar(aChar); //strip off the \r
|
|
result=aScanner.Peek(aChar); //then see what's next.
|
|
if(NS_OK==result) {
|
|
switch(aChar) {
|
|
case kCR:
|
|
result=aScanner.GetChar(aChar); //strip off the \r
|
|
mTextValue.Append("\n\n");
|
|
break;
|
|
case kNewLine:
|
|
//which means we saw \r\n, which becomes \n
|
|
result=aScanner.GetChar(aChar); //strip off the \n
|
|
//now fall through on purpose...
|
|
default:
|
|
mTextValue.Append("\n");
|
|
break;
|
|
} //switch
|
|
} //if
|
|
}
|
|
else if (']'==aChar) {
|
|
result=aScanner.GetChar(aChar); //strip off the ]
|
|
mTextValue.Append(aChar);
|
|
result=aScanner.Peek(aChar); //then see what's next.
|
|
if((NS_OK==result) && (kRightSquareBracket==aChar)) {
|
|
result=aScanner.GetChar(aChar); //strip off the second ]
|
|
mTextValue.Append(aChar);
|
|
result=aScanner.Peek(aChar); //then see what's next.
|
|
}
|
|
if((NS_OK==result) && (kGreaterThan==aChar)) {
|
|
result=aScanner.GetChar(aChar); //strip off the >
|
|
done=PR_TRUE;
|
|
}
|
|
}
|
|
else done=PR_TRUE;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/*
|
|
* Default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CCommentToken::CCommentToken() : CHTMLToken(eHTMLTag_comment) {
|
|
}
|
|
|
|
|
|
/*
|
|
* Copy constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
CCommentToken::CCommentToken(const nsString& aName) : CHTMLToken(aName) {
|
|
mTypeID=eHTMLTag_comment;
|
|
}
|
|
|
|
/*
|
|
* This method consumes a comment using the (CORRECT) comment parsing
|
|
* algorithm supplied by W3C.
|
|
*
|
|
* @update gess 01/04/99
|
|
* @param
|
|
* @param
|
|
* @return
|
|
*/
|
|
static
|
|
nsresult ConsumeStrictComment(PRUnichar aChar, nsScanner& aScanner,nsString& aString) {
|
|
nsresult result=NS_OK;
|
|
|
|
/*********************************************************
|
|
NOTE: This algorithm does a fine job of handling comments
|
|
when they're formatted per spec, but if they're not
|
|
we don't handle them well. For example, we gack
|
|
on the following:
|
|
|
|
<!-- xx -- xx -->
|
|
*********************************************************/
|
|
|
|
aString="<!";
|
|
while(NS_OK==result) {
|
|
result=aScanner.GetChar(aChar);
|
|
if(NS_OK==result) {
|
|
aString+=aChar;
|
|
if(kMinus==aChar) {
|
|
result=aScanner.GetChar(aChar);
|
|
if(NS_OK==result) {
|
|
if(kMinus==aChar) {
|
|
//in this case, we're reading a long-form comment <-- xxx -->
|
|
aString+=aChar;
|
|
if(NS_OK==result) {
|
|
PRInt32 findpos=-1;
|
|
nsAutoString temp("");
|
|
//Read to the first ending sequence '--'
|
|
while((kNotFound==findpos) && (NS_OK==result)) {
|
|
result=aScanner.ReadUntil(temp,kMinus,PR_TRUE);
|
|
findpos=temp.RFind("--");
|
|
}
|
|
aString+=temp;
|
|
if(NS_OK==result) {
|
|
if(NS_OK==result) {
|
|
temp="->";
|
|
result=aScanner.ReadUntil(aString,temp,PR_FALSE,PR_FALSE);
|
|
}
|
|
}
|
|
}
|
|
} //
|
|
else break; //go find '>'
|
|
}
|
|
}//if
|
|
else if(kGreaterThan==aChar) {
|
|
return result;
|
|
}
|
|
else break; //go find '>'
|
|
}//if
|
|
}//while
|
|
if(NS_OK==result) {
|
|
//Read up to the closing '>', unless you already did! (such as <!>).
|
|
if(kGreaterThan!=aChar) {
|
|
result=aScanner.ReadUntil(aString,kGreaterThan,PR_TRUE);
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* This method consumes a comment using common (actually non-standard)
|
|
* algorithm that seems to work against the content on the web.
|
|
*
|
|
* @update gess 01/04/99
|
|
* @param
|
|
* @param
|
|
* @return
|
|
*/
|
|
static
|
|
nsresult ConsumeComment(PRUnichar aChar, nsScanner& aScanner,nsString& aString) {
|
|
|
|
|
|
nsresult result=NS_OK;
|
|
|
|
/*********************************************************
|
|
NOTE: This algorithm does a fine job of handling comments
|
|
commonly used, but it doesn't really consume them
|
|
per spec (But then, neither does IE or Nav).
|
|
*********************************************************/
|
|
|
|
aString="<!";
|
|
nsAutoString theRightChars;
|
|
PRInt32 theBestAltPos=kNotFound;
|
|
PRUint32 theStartOffset=0;
|
|
|
|
result=aScanner.GetChar(aChar);
|
|
if(NS_OK==result) {
|
|
aString+=aChar;
|
|
if(kMinus==aChar) {
|
|
result=aScanner.GetChar(aChar);
|
|
if(NS_OK==result) {
|
|
if(kMinus==aChar) {
|
|
//in this case, we're reading a long-form comment <-- xxx -->
|
|
aString+=aChar;
|
|
|
|
PRInt32 findpos=kNotFound;
|
|
while((kNotFound==findpos) && (NS_OK==result)) {
|
|
result=aScanner.ReadUntil(aString,kGreaterThan,PR_TRUE);
|
|
if(NS_OK==result){
|
|
aChar=aString.CharAt(aString.Length()-3);
|
|
if(kMinus==aChar) {
|
|
aChar=aString.CharAt(aString.Length()-2);
|
|
if(kMinus==aChar) return result; // We have found the dflt end comment delimiter ("-->")
|
|
}
|
|
if(kNotFound==theBestAltPos) {
|
|
// If we did not find the dflt then assume that '>' is the end comment
|
|
// until we find '-->'. Nav. Compatibility -- Ref: Bug# 24006
|
|
theBestAltPos=aString.Length();
|
|
theStartOffset=aScanner.GetOffset();
|
|
}
|
|
}
|
|
} //while
|
|
if((kNotFound==findpos) && (!aScanner.IsIncremental())) {
|
|
//if you're here, then we're in a special state.
|
|
//The problem at hand is that we've hit the end of the document without finding the normal endcomment delimiter "-->".
|
|
//In this case, the first thing we try is to see if we found one of the alternate endcomment delimiter ">".
|
|
//If so, rewind just pass than, and use everything up to that point as your comment.
|
|
//If not, the document has no end comment and should be treated as one big comment.
|
|
if(kNotFound<theBestAltPos) {
|
|
aString.Truncate(theBestAltPos);
|
|
aScanner.Mark(theStartOffset);
|
|
result=NS_OK;
|
|
}
|
|
}
|
|
return result;
|
|
|
|
} //if
|
|
}//if
|
|
}//if
|
|
}//if
|
|
if(NS_OK==result) {
|
|
//Read up to the closing '>', unless you already did! (such as <!>).
|
|
if(kGreaterThan!=aChar) {
|
|
result=aScanner.ReadUntil(aString,kGreaterThan,PR_TRUE);
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Consume the identifier portion of the comment.
|
|
* Note that we've already eaten the "<!" portion.
|
|
*
|
|
* @update gess 1/27/99
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult CCommentToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aMode) {
|
|
nsresult result=(aMode==eParseMode_noquirks) ? ConsumeStrictComment(aChar,aScanner,mTextValue)
|
|
: ConsumeComment(aChar,aScanner,mTextValue);
|
|
|
|
#if 0
|
|
if(NS_OK==result) {
|
|
//ok then, all is well so strip off the delimiters...
|
|
nsAutoString theLeft("");
|
|
mTextValue.Left(theLeft,2);
|
|
if(theLeft=="<!")
|
|
mTextValue.Cut(0,2);
|
|
if('>'==mTextValue.Last())
|
|
mTextValue.Truncate(mTextValue.Length()-1);
|
|
}
|
|
#endif
|
|
return result;
|
|
}
|
|
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CCommentToken::GetClassName(void){
|
|
return "/**/";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CCommentToken::GetTokenType(void) {
|
|
return eToken_comment;
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CNewlineToken::CNewlineToken() : CHTMLToken(eHTMLTag_newline) {
|
|
}
|
|
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string value to init token name with
|
|
* @return
|
|
*/
|
|
CNewlineToken::CNewlineToken(const nsString& aName) : CHTMLToken(aName) {
|
|
mTypeID=eHTMLTag_newline;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CNewlineToken::GetClassName(void) {
|
|
return "crlf";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CNewlineToken::GetTokenType(void) {
|
|
return eToken_newline;
|
|
}
|
|
|
|
/**
|
|
* This method retrieves the value of this internal string.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @return nsString reference to internal string value
|
|
*/
|
|
nsString& CNewlineToken::GetStringValueXXX(void) {
|
|
static nsString* theStr=0;
|
|
if(!theStr)
|
|
theStr=new nsString("\n");
|
|
return *theStr;
|
|
}
|
|
|
|
/*
|
|
* Consume as many cr/lf pairs as you can find.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult CNewlineToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aMode) {
|
|
|
|
#if 1
|
|
mTextValue=kNewLine; //This is what I THINK we should be doing.
|
|
#else
|
|
mTextValue=aChar;
|
|
#endif
|
|
|
|
/*******************************************************************
|
|
|
|
Here's what the HTML spec says about newlines:
|
|
|
|
"A line break is defined to be a carriage return (
),
|
|
a line feed (
), or a carriage return/line feed pair.
|
|
All line breaks constitute white space."
|
|
|
|
*******************************************************************/
|
|
|
|
PRUnichar theChar;
|
|
nsresult result=aScanner.Peek(theChar);
|
|
|
|
if(NS_OK==result) {
|
|
switch(aChar) {
|
|
case kNewLine:
|
|
if(kCR==theChar) {
|
|
result=aScanner.GetChar(theChar);
|
|
mTextValue+=theChar;
|
|
}
|
|
break;
|
|
case kCR:
|
|
//convert CRLF into just CR
|
|
if(kNewLine==theChar) {
|
|
result=aScanner.GetChar(theChar);
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CAttributeToken::CAttributeToken() : CHTMLToken(eHTMLTag_unknown) {
|
|
mHasEqualWithoutValue=PR_FALSE;
|
|
}
|
|
|
|
/*
|
|
* string based constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string value to init token name with
|
|
* @return
|
|
*/
|
|
CAttributeToken::CAttributeToken(const nsString& aName) : CHTMLToken(aName),
|
|
mTextKey() {
|
|
mLastAttribute=PR_FALSE;
|
|
mHasEqualWithoutValue=PR_FALSE;
|
|
}
|
|
|
|
/*
|
|
* construct initializing data to
|
|
* key value pair
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string value to init token name with
|
|
* @return
|
|
*/
|
|
CAttributeToken::CAttributeToken(const nsString& aKey, const nsString& aName) : CHTMLToken(aName) {
|
|
mTextKey = aKey;
|
|
mLastAttribute=PR_FALSE;
|
|
mHasEqualWithoutValue=PR_FALSE;
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @update gess8/4/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
void CAttributeToken::Reinitialize(PRInt32 aTag, const nsString& aString){
|
|
CHTMLToken::Reinitialize(aTag,aString);
|
|
mTextKey.Truncate();
|
|
mLastAttribute=PR_FALSE;
|
|
mHasEqualWithoutValue=PR_FALSE;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CAttributeToken::GetClassName(void) {
|
|
return "attr";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CAttributeToken::GetTokenType(void) {
|
|
return eToken_attribute;
|
|
}
|
|
|
|
/*
|
|
* Removes non-alpha-non-digit characters from the end of a KEY
|
|
*
|
|
* @update harishd 07/15/99
|
|
* @param
|
|
* @return
|
|
*/
|
|
void CAttributeToken::SanitizeKey() {
|
|
PRInt32 length=mTextKey.Length();
|
|
if(length > 0) {
|
|
PRUnichar theChar=mTextKey.Last();
|
|
while(!nsString::IsAlpha(theChar) && !nsString::IsDigit(theChar)) {
|
|
mTextKey.Truncate(length-1);
|
|
length = mTextKey.Length();
|
|
if(length <= 0) break;
|
|
theChar = mTextKey.Last();
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Dump contents of this token to given output stream
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param out -- ostream to output content
|
|
* @return
|
|
*/
|
|
void CAttributeToken::DebugDumpToken(nsOutputStream& out) {
|
|
char buffer[200];
|
|
mTextKey.ToCString(buffer,sizeof(buffer));
|
|
out << "[" << GetClassName() << "] " << buffer << "=";
|
|
mTextValue.ToCString(buffer,sizeof(buffer));
|
|
out << buffer << ": " << mTypeID << nsEndl;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param anOutputString will recieve the result
|
|
* @return nada
|
|
*/
|
|
void CAttributeToken::GetSource(nsString& anOutputString){
|
|
anOutputString=mTextKey;
|
|
anOutputString+="=";
|
|
anOutputString+=mTextValue;
|
|
anOutputString+=";";
|
|
}
|
|
|
|
/*
|
|
* This general purpose method is used when you want to
|
|
* consume a known quoted string.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult ConsumeQuotedString(PRUnichar aChar,nsString& aString,nsScanner& aScanner){
|
|
nsresult result=NS_OK;
|
|
switch(aChar) {
|
|
case kQuote:
|
|
result=aScanner.ReadUntil(aString,kQuote,PR_TRUE);
|
|
if(NS_OK==result)
|
|
result=aScanner.SkipOver(kQuote); //this code is here in case someone mistakenly adds multiple quotes...
|
|
break;
|
|
case kApostrophe:
|
|
result=aScanner.ReadUntil(aString,kApostrophe,PR_TRUE);
|
|
if(NS_OK==result)
|
|
result=aScanner.SkipOver(kApostrophe); //this code is here in case someone mistakenly adds multiple apostrophes...
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
PRUnichar ch=aString.Last();
|
|
if(ch!=aChar)
|
|
aString+=aChar;
|
|
//aString.ReplaceChar(PRUnichar('\n'),PRUnichar(' '));
|
|
aString.StripChars("\r\n"); //per the HTML spec, ignore linefeeds...
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* This general purpose method is used when you want to
|
|
* consume attributed text value.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
static
|
|
nsresult ConsumeAttributeValueText(PRUnichar,nsString& aString,nsScanner& aScanner){
|
|
static const char* theTerminals="\b\t\n\r >";
|
|
nsresult result=aScanner.ReadUntil(aString,theTerminals,PR_TRUE,PR_FALSE);
|
|
|
|
//Let's force quotes if either the first or last char is quoted.
|
|
PRUnichar theLast=aString.Last();
|
|
PRUnichar theFirst=aString.First();
|
|
if(kQuote==theLast) {
|
|
if(kQuote!=theFirst) {
|
|
aString.Insert(kQuote,0);;
|
|
}
|
|
}
|
|
else if(kQuote==theFirst) {
|
|
if(kQuote!=theLast) {
|
|
aString+=kQuote;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
/*
|
|
* Consume the key and value portions of the attribute.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult CAttributeToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aMode) {
|
|
|
|
nsresult result=aScanner.SkipWhitespace(); //skip leading whitespace
|
|
if(NS_OK==result) {
|
|
result=aScanner.Peek(aChar);
|
|
if(NS_OK==result) {
|
|
if(kQuote==aChar) { //if you're here, handle quoted key...
|
|
result=aScanner.GetChar(aChar); //skip the quote sign...
|
|
if(NS_OK==result) {
|
|
result=aScanner.Peek(aChar); //peek ahead to make sure the next char is a legal attr-key
|
|
if(NS_OK==result) {
|
|
if(nsString::IsAlpha(aChar) || nsString::IsDigit(aChar)){
|
|
mTextKey=aChar;
|
|
result=ConsumeQuotedString(aChar,mTextKey,aScanner);
|
|
}
|
|
else {
|
|
return NS_ERROR_HTMLPARSER_BADATTRIBUTE;
|
|
}
|
|
} //if
|
|
}//if
|
|
}
|
|
else if(kHashsign==aChar) {
|
|
result=aScanner.GetChar(aChar); //skip the hash sign...
|
|
if(NS_OK==result) {
|
|
mTextKey=aChar;
|
|
result=aScanner.ReadNumber(mTextKey);
|
|
}
|
|
}
|
|
else {
|
|
//If you're here, handle an unquoted key.
|
|
//Don't forget to reduce entities inline!
|
|
static const char* theTerminals="\b\t\n\r \"<=>";
|
|
result=aScanner.ReadUntil(mTextKey,theTerminals,PR_TRUE,PR_FALSE);
|
|
}
|
|
|
|
//now it's time to Consume the (optional) value...
|
|
if(NS_OK==result) {
|
|
result=aScanner.SkipWhitespace();
|
|
if(NS_OK==result) {
|
|
result=aScanner.Peek(aChar); //Skip ahead until you find an equal sign or a '>'...
|
|
if(NS_OK==result) {
|
|
if(kEqual==aChar){
|
|
result=aScanner.GetChar(aChar); //skip the equal sign...
|
|
if(NS_OK==result) {
|
|
result=aScanner.SkipWhitespace(); //now skip any intervening whitespace
|
|
if(NS_OK==result) {
|
|
result=aScanner.GetChar(aChar); //and grab the next char.
|
|
if(NS_OK==result) {
|
|
if((kQuote==aChar) || (kApostrophe==aChar)) {
|
|
mTextValue=aChar;
|
|
result=ConsumeQuotedString(aChar,mTextValue,aScanner);
|
|
}
|
|
else if(kGreaterThan==aChar){
|
|
mHasEqualWithoutValue=PR_TRUE;
|
|
result=aScanner.PutBack(aChar);
|
|
}
|
|
#if 0
|
|
else if(kAmpersand==aChar) {
|
|
mTextValue=aChar;
|
|
result=aScanner.GetChar(aChar);
|
|
if(NS_OK==result) {
|
|
mTextValue += aChar;
|
|
result=CEntityToken::ConsumeEntity(aChar,mTextValue,aScanner);
|
|
}
|
|
}
|
|
#endif
|
|
else {
|
|
mTextValue=aChar; //it's an alphanum attribute...
|
|
result=ConsumeAttributeValueText(aChar,mTextValue,aScanner);
|
|
}
|
|
}//if
|
|
if(NS_OK==result)
|
|
result=aScanner.SkipWhitespace();
|
|
}//if
|
|
}//if
|
|
}//if
|
|
else {
|
|
//This is where we have to handle fairly busted content.
|
|
//If you're here, it means we saw an attribute name, but couldn't find
|
|
//the following equal sign. <tag NAME=....
|
|
|
|
//Doing this right in all cases is <i>REALLY</i> ugly.
|
|
//My best guess is to grab the next non-ws char. We know it's not '=',
|
|
//so let's see what it is. If it's a '"', then assume we're reading
|
|
//from the middle of the value. Try stripping the quote and continuing...
|
|
|
|
if(kQuote==aChar){
|
|
result=aScanner.SkipOver(aChar); //strip quote.
|
|
}
|
|
}
|
|
}//if
|
|
} //if
|
|
}//if
|
|
if(NS_OK==result) {
|
|
result=aScanner.Peek(aChar);
|
|
mLastAttribute= PRBool((kGreaterThan==aChar) || (kEOF==result));
|
|
}
|
|
} //if
|
|
}//if
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Dump contents of this token to givne output stream
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param out -- ostream to output content
|
|
* @return
|
|
*/
|
|
void CAttributeToken::DebugDumpSource(nsOutputStream& out) {
|
|
static char buffer[1000];
|
|
mTextKey.ToCString(buffer,sizeof(buffer));
|
|
out << " " << buffer;
|
|
if(mTextValue.Length()){
|
|
mTextValue.ToCString(buffer,sizeof(buffer));
|
|
out << "=" << buffer;
|
|
}
|
|
if(mLastAttribute)
|
|
out<<">";
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CWhitespaceToken::CWhitespaceToken() : CHTMLToken(eHTMLTag_whitespace) {
|
|
}
|
|
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string value to init token name with
|
|
* @return
|
|
*/
|
|
CWhitespaceToken::CWhitespaceToken(const nsString& aName) : CHTMLToken(aName) {
|
|
mTypeID=eHTMLTag_whitespace;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CWhitespaceToken::GetClassName(void) {
|
|
return "ws";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CWhitespaceToken::GetTokenType(void) {
|
|
return eToken_whitespace;
|
|
}
|
|
|
|
/*
|
|
* This general purpose method is used when you want to
|
|
* consume an aribrary sequence of whitespace.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult CWhitespaceToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aMode) {
|
|
mTextValue=aChar;
|
|
nsresult result=aScanner.ReadWhitespace(mTextValue);
|
|
if(NS_OK==result) {
|
|
mTextValue.StripChars("\r");
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CEntityToken::CEntityToken() : CHTMLToken(eHTMLTag_entity) {
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string value to init token name with
|
|
* @return
|
|
*/
|
|
CEntityToken::CEntityToken(const nsString& aName) : CHTMLToken(aName) {
|
|
mTypeID=eHTMLTag_entity;
|
|
#ifdef VERBOSE_DEBUG
|
|
if(!VerifyEntityTable()) {
|
|
cout<<"Entity table is invalid!" << endl;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|
|
/*
|
|
* Consume the rest of the entity. We've already eaten the "&".
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult CEntityToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aMode) {
|
|
if(aChar)
|
|
mTextValue=aChar;
|
|
nsresult result=ConsumeEntity(aChar,mTextValue,aScanner);
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CEntityToken::GetClassName(void) {
|
|
return "&entity";
|
|
}
|
|
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CEntityToken::GetTokenType(void) {
|
|
return eToken_entity;
|
|
}
|
|
|
|
/*
|
|
* This general purpose method is used when you want to
|
|
* consume an entity &xxxx;. Keep in mind that entities
|
|
* are <i>not</i> reduced inline.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
PRInt32 CEntityToken::ConsumeEntity(PRUnichar aChar,nsString& aString,nsScanner& aScanner){
|
|
PRUnichar theChar=0;
|
|
PRInt32 result=aScanner.Peek(theChar);
|
|
if(NS_OK==result) {
|
|
if(kLeftBrace==aChar) {
|
|
//you're consuming a script entity...
|
|
PRInt32 rightBraceCount = 0;
|
|
PRInt32 leftBraceCount = 1;
|
|
while(leftBraceCount!=rightBraceCount) {
|
|
result=aScanner.GetChar(aChar);
|
|
if(NS_OK!=result) return result;
|
|
aString += aChar;
|
|
if(aChar==kRightBrace)
|
|
rightBraceCount++;
|
|
else if(aChar==kLeftBrace)
|
|
leftBraceCount++;
|
|
}
|
|
result=aScanner.ReadUntil(aString,kSemicolon,PR_FALSE);
|
|
if(NS_OK==result) {
|
|
result=aScanner.GetChar(aChar); // This character should be a semicolon
|
|
if(NS_OK==result) aString += aChar;
|
|
}
|
|
} //if
|
|
else {
|
|
if(kHashsign==aChar) {
|
|
if('X'==(toupper((char)theChar))) {
|
|
result=aScanner.GetChar(theChar);
|
|
aString+=theChar;
|
|
}
|
|
if(NS_OK==result){
|
|
result=aScanner.ReadNumber(aString);
|
|
}
|
|
}
|
|
else result=aScanner.ReadIdentifier(aString,PR_TRUE); // Ref. Bug# 23791 - For setting aIgnore to PR_TRUE.
|
|
if(NS_OK==result) {
|
|
result=aScanner.Peek(theChar);
|
|
if(NS_OK==result) {
|
|
if (kSemicolon == theChar) {
|
|
// consume semicolon that stopped the scan
|
|
aString+=theChar;
|
|
result=aScanner.GetChar(theChar);
|
|
}
|
|
}
|
|
}//if
|
|
} //else
|
|
} //if
|
|
return result;
|
|
}
|
|
|
|
#define PA_REMAP_128_TO_160_ILLEGAL_NCR 1
|
|
|
|
#ifdef PA_REMAP_128_TO_160_ILLEGAL_NCR
|
|
/**
|
|
* Map some illegal but commonly used numeric entities into their
|
|
* appropriate unicode value.
|
|
*/
|
|
#define NOT_USED 0xfffd
|
|
|
|
static PRUint16 PA_HackTable[] = {
|
|
NOT_USED,
|
|
NOT_USED,
|
|
0x201a, /* SINGLE LOW-9 QUOTATION MARK */
|
|
0x0192, /* LATIN SMALL LETTER F WITH HOOK */
|
|
0x201e, /* DOUBLE LOW-9 QUOTATION MARK */
|
|
0x2026, /* HORIZONTAL ELLIPSIS */
|
|
0x2020, /* DAGGER */
|
|
0x2021, /* DOUBLE DAGGER */
|
|
0x02c6, /* MODIFIER LETTER CIRCUMFLEX ACCENT */
|
|
0x2030, /* PER MILLE SIGN */
|
|
0x0160, /* LATIN CAPITAL LETTER S WITH CARON */
|
|
0x2039, /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */
|
|
0x0152, /* LATIN CAPITAL LIGATURE OE */
|
|
NOT_USED,
|
|
0x017D, /* CAPITAL Z HACEK */
|
|
NOT_USED,
|
|
NOT_USED,
|
|
0x2018, /* LEFT SINGLE QUOTATION MARK */
|
|
0x2019, /* RIGHT SINGLE QUOTATION MARK */
|
|
0x201c, /* LEFT DOUBLE QUOTATION MARK */
|
|
0x201d, /* RIGHT DOUBLE QUOTATION MARK */
|
|
0x2022, /* BULLET */
|
|
0x2013, /* EN DASH */
|
|
0x2014, /* EM DASH */
|
|
0x02dc, /* SMALL TILDE */
|
|
0x2122, /* TRADE MARK SIGN */
|
|
0x0161, /* LATIN SMALL LETTER S WITH CARON */
|
|
0x203a, /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */
|
|
0x0153, /* LATIN SMALL LIGATURE OE */
|
|
NOT_USED,
|
|
NOT_USED,
|
|
0x0178 /* LATIN CAPITAL LETTER Y WITH DIAERESIS */
|
|
};
|
|
#endif /* PA_REMAP_128_TO_160_ILLEGAL_NCR */
|
|
|
|
|
|
/*
|
|
* This method converts this entity into its underlying
|
|
* unicode equivalent.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aString will hold the resulting string value
|
|
* @return numeric (unichar) value
|
|
*/
|
|
PRInt32 CEntityToken::TranslateToUnicodeStr(nsString& aString) {
|
|
PRInt32 value=0;
|
|
PRInt32 theRadix[2]={16,10};
|
|
|
|
if(mTextValue.Length()>1) {
|
|
PRUnichar theChar0=mTextValue.CharAt(0);
|
|
|
|
if(kHashsign==theChar0) {
|
|
PRInt32 err=0;
|
|
|
|
PRUnichar theChar1=mTextValue.CharAt(1);
|
|
PRBool isDigit1=nsString::IsDigit(theChar1);
|
|
value=mTextValue.ToInteger(&err,theRadix[isDigit1]);
|
|
if(0==err) {
|
|
#ifdef PA_REMAP_128_TO_160_ILLEGAL_NCR
|
|
/* for some illegal, but popular usage */
|
|
if ((value >= 0x0080) && (value <= 0x009f)) {
|
|
value = PA_HackTable[value - 0x0080];
|
|
}
|
|
#endif
|
|
aString.Append(PRUnichar(value));
|
|
}//if
|
|
}
|
|
else{
|
|
value = nsHTMLEntities::EntityToUnicode(mTextValue);
|
|
if(-1<value) {
|
|
//we found a named entity...
|
|
aString=PRUnichar(value);
|
|
}
|
|
}//else
|
|
}//if
|
|
|
|
return value;
|
|
}
|
|
|
|
/*
|
|
* Dump contents of this token to givne output stream
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param out -- ostream to output content
|
|
* @return
|
|
*/
|
|
void CEntityToken::DebugDumpSource(nsOutputStream& out) {
|
|
char* cp=mTextValue.ToNewCString();
|
|
out << "&" << *cp;
|
|
delete[] cp;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param anOutputString will recieve the result
|
|
* @return nada
|
|
*/
|
|
void CEntityToken::GetSource(nsString& anOutputString){
|
|
anOutputString="&";
|
|
anOutputString+=mTextValue;
|
|
//anOutputString+=";";
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CScriptToken::CScriptToken() : CHTMLToken(eHTMLTag_script) {
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CScriptToken::GetClassName(void) {
|
|
return "script";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CScriptToken::GetTokenType(void) {
|
|
return eToken_script;
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CStyleToken::CStyleToken() : CHTMLToken(eHTMLTag_style) {
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CStyleToken::GetClassName(void) {
|
|
return "style";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CStyleToken::GetTokenType(void) {
|
|
return eToken_style;
|
|
}
|
|
|
|
|
|
/*
|
|
* string based constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string value to init token name with
|
|
* @return
|
|
*/
|
|
CSkippedContentToken::CSkippedContentToken(const nsString& aName) : CAttributeToken(aName) {
|
|
mTextKey = "$skipped-content";/* XXX need a better answer! */
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CSkippedContentToken::GetClassName(void) {
|
|
return "skipped";
|
|
}
|
|
|
|
/*
|
|
* Retrieve the token type as an int.
|
|
* @update gess 3/25/98
|
|
* @return
|
|
*/
|
|
PRInt32 CSkippedContentToken::GetTokenType(void) {
|
|
return eToken_skippedcontent;
|
|
}
|
|
|
|
/*
|
|
* Consume content until you find an end sequence that matches
|
|
* this objects current mTextValue. Note that this is complicated
|
|
* by the fact that you can be parsing content that itself
|
|
* contains quoted content of the same type (like <SCRIPT>).
|
|
* That means we have to look for quote-pairs, and ignore the
|
|
* content inside them.
|
|
*
|
|
* @update gess 7/25/98
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult CSkippedContentToken::Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode) {
|
|
PRBool done=PR_FALSE;
|
|
nsresult result=NS_OK;
|
|
nsString temp;
|
|
PRUnichar theChar;
|
|
|
|
//We're going to try a new algorithm here. Rather than scan for the matching
|
|
//end tag like we used to do, we're now going to scan for whitespace and comments.
|
|
//If we find either, just eat them. If we find text or a tag, then go to the
|
|
//target endtag, or the start of another comment.
|
|
|
|
while((!done) && (NS_OK==result)) {
|
|
result=aScanner.GetChar(aChar);
|
|
if((NS_OK==result) && (kLessThan==aChar)) {
|
|
//we're reading a tag or a comment...
|
|
result=aScanner.GetChar(theChar);
|
|
if((NS_OK==result) && (kExclamation==theChar)) {
|
|
//read a comment...
|
|
static CCommentToken theComment;
|
|
result=theComment.Consume(aChar,aScanner,aMode);
|
|
if(NS_OK==result) {
|
|
//result=aScanner.SkipWhitespace();
|
|
temp.Append(theComment.GetStringValueXXX());
|
|
}
|
|
} else {
|
|
//read a tag...
|
|
temp+=aChar;
|
|
temp+=theChar;
|
|
result=aScanner.ReadUntil(temp,kGreaterThan,PR_TRUE);
|
|
}
|
|
}
|
|
else if(('\b'==theChar) || ('\t'==theChar) || (' '==theChar)) {
|
|
static CWhitespaceToken theWS;
|
|
result=theWS.Consume(aChar,aScanner,aMode);
|
|
if(NS_OK==result) {
|
|
temp.Append(theWS.GetStringValueXXX());
|
|
}
|
|
}
|
|
else {
|
|
temp+=aChar;
|
|
result=aScanner.ReadUntil(temp,kLessThan,PR_FALSE);
|
|
}
|
|
nsAutoString theRight;
|
|
temp.Right(theRight,mTextValue.Length());
|
|
done=PRBool(0==theRight.Compare(mTextValue,PR_TRUE));
|
|
}
|
|
int len=temp.Length();
|
|
temp.Truncate(len-mTextValue.Length());
|
|
mTextKey=temp;
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Dump contents of this token to givne output stream
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param out -- ostream to output content
|
|
* @return
|
|
*/
|
|
void CSkippedContentToken::DebugDumpSource(nsOutputStream& out) {
|
|
static char buffer[1000];
|
|
mTextKey.ToCString(buffer,sizeof(buffer));
|
|
out << " " << buffer;
|
|
if(mLastAttribute)
|
|
out<<">";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param anOutputString will recieve the result
|
|
* @return nada
|
|
*/
|
|
void CSkippedContentToken::GetSource(nsString& anOutputString){
|
|
anOutputString="$skipped-content";
|
|
}
|
|
|
|
/**
|
|
*
|
|
* @update gess4/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* GetTagName(PRInt32 aTag) {
|
|
const nsCString& result = nsHTMLTags::GetStringValue((nsHTMLTag) aTag);
|
|
if (0 == result.Length()) {
|
|
if(aTag>=eHTMLTag_userdefined)
|
|
return gUserdefined;
|
|
else return 0;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update gess 9/23/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
CInstructionToken::CInstructionToken() : CHTMLToken(eHTMLTag_unknown) {
|
|
}
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update gess 9/23/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
CInstructionToken::CInstructionToken(const nsString& aString) : CHTMLToken(aString) {
|
|
}
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update gess 9/23/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
nsresult CInstructionToken::Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode){
|
|
mTextValue="<?";
|
|
nsresult result=aScanner.ReadUntil(mTextValue,kGreaterThan,PR_TRUE);
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update gess 9/23/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CInstructionToken::GetClassName(void){
|
|
return "instruction";
|
|
}
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update gess 9/23/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CInstructionToken::GetTokenType(void){
|
|
return eToken_instruction;
|
|
}
|
|
|
|
|
|
CErrorToken::CErrorToken(nsParserError *aError) : CHTMLToken(eHTMLTag_unknown)
|
|
{
|
|
mError = aError;
|
|
}
|
|
|
|
CErrorToken::~CErrorToken()
|
|
{
|
|
delete mError;
|
|
}
|
|
|
|
PRInt32 CErrorToken::GetTokenType(void){
|
|
return eToken_error;
|
|
}
|
|
|
|
const char* CErrorToken::GetClassName(void){
|
|
return "error";
|
|
}
|
|
|
|
void CErrorToken::SetError(nsParserError *aError) {
|
|
mError = aError;
|
|
}
|
|
|
|
const nsParserError * CErrorToken::GetError(void)
|
|
{
|
|
return mError;
|
|
}
|
|
|
|
// Doctype decl token
|
|
|
|
CDoctypeDeclToken::CDoctypeDeclToken(eHTMLTags aTag) : CHTMLToken(aTag) {
|
|
}
|
|
|
|
/**
|
|
* This method consumes a doctype element.
|
|
* Note: I'm rewriting this method to seek to the first <, since quotes can really screw us up.
|
|
*
|
|
* @update gess 9/23/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
nsresult CDoctypeDeclToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aMode) {
|
|
|
|
mTextValue="<!";
|
|
|
|
nsresult result=aScanner.ReadUntil(mTextValue,'<',PR_FALSE);
|
|
return result;
|
|
}
|
|
|
|
const char* CDoctypeDeclToken::GetClassName(void) {
|
|
return "doctype";
|
|
}
|
|
|
|
PRInt32 CDoctypeDeclToken::GetTokenType(void) {
|
|
return eToken_doctypeDecl;
|
|
}
|