Files
tubestation/parser/htmlparser/src/nsHTMLTokens.cpp
2001-01-12 19:50:25 +00:00

2149 lines
54 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/*
* The contents of this file are subject to the Netscape Public
* License Version 1.1 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.mozilla.org/NPL/
*
* Software distributed under the License is distributed on an "AS
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
* implied. See the License for the specific language governing
* rights and limitations under the License.
*
* The Original Code is mozilla.org code.
*
* The Initial Developer of the Original Code is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All
* Rights Reserved.
*
* Contributor(s):
*/
#include <ctype.h>
#include <time.h>
#include <stdio.h>
#include "nsScanner.h"
#include "nsToken.h"
#include "nsHTMLTokens.h"
#include "nsIParser.h"
#include "prtypes.h"
#include "nsDebug.h"
#include "nsHTMLTags.h"
#include "nsHTMLEntities.h"
#include "nsCRT.h"
#include "nsReadableUtils.h"
#include "nsScanner.h"
static const char* gUserdefined = "userdefined";
/**************************************************************
And now for the token classes...
**************************************************************/
/*
* constructor from tag id
*
* @update gess 3/25/98
* @param
* @return
*/
CHTMLToken::CHTMLToken(eHTMLTags aTag) : CToken(aTag) {
}
CHTMLToken::~CHTMLToken() {
}
/*
* constructor from tag id
*
* @update gess 3/25/98
* @param
* @return
*/
CStartToken::CStartToken(eHTMLTags aTag) : CHTMLToken(aTag) {
mAttributed=PR_FALSE;
mEmpty=PR_FALSE;
mOrigin=-1;
mContainerInfo=eFormUnknown;
}
CStartToken::CStartToken(const nsAReadableString& aName) : CHTMLToken(eHTMLTag_unknown) {
mAttributed=PR_FALSE;
mEmpty=PR_FALSE;
mOrigin=-1;
mContainerInfo=eFormUnknown;
mTextValue.Assign(aName);
}
CStartToken::CStartToken(const nsAReadableString& aName,eHTMLTags aTag) : CHTMLToken(aTag) {
mAttributed=PR_FALSE;
mEmpty=PR_FALSE;
mOrigin=-1;
mContainerInfo=eFormUnknown;
mTextValue.Assign(aName);
}
nsresult CStartToken::GetIDAttributeAtom(nsIAtom** aResult)
{
NS_ENSURE_ARG_POINTER(aResult);
*aResult = mIDAttributeAtom;
NS_IF_ADDREF(*aResult);
return NS_OK;
}
nsresult CStartToken::SetIDAttributeAtom(nsIAtom* aID)
{
NS_ENSURE_ARG(aID);
mIDAttributeAtom = aID;
return NS_OK;
}
/*
* This method returns the typeid (the tag type) for this token.
*
* @update gess 3/25/98
* @param
* @return
*/
PRInt32 CStartToken::GetTypeID(){
if(eHTMLTag_unknown==mTypeID) {
mTypeID = nsHTMLTags::LookupTag(mTextValue);
}
return mTypeID;
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
const char* CStartToken::GetClassName(void) {
return "start";
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
PRInt32 CStartToken::GetTokenType(void) {
return eToken_start;
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
void CStartToken::SetAttributed(PRBool aValue) {
mAttributed=aValue;
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
PRBool CStartToken::IsAttributed(void) {
return mAttributed;
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
void CStartToken::SetEmpty(PRBool aValue) {
mEmpty=aValue;
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
PRBool CStartToken::IsEmpty(void) {
return mEmpty;
}
/*
* Consume the identifier portion of the start tag
*
* @update gess 3/25/98
* @param aChar -- last char consumed from stream
* @param aScanner -- controller of underlying input source
* @param aMode -- 1=HTML; 0=text (or other ML)
* @return error result
*/
nsresult CStartToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aMode) {
//if you're here, we've already Consumed the < char, and are
//ready to Consume the rest of the open tag identifier.
//Stop consuming as soon as you see a space or a '>'.
//NOTE: We don't Consume the tag attributes here, nor do we eat the ">"
nsresult result=NS_OK;
if(1==aMode) {
nsAutoString theSubstr;
result=aScanner.GetIdentifier(theSubstr,PR_TRUE);
mTypeID = (PRInt32)nsHTMLTags::LookupTag(theSubstr);
if(eHTMLTag_userdefined==mTypeID) {
mTextValue=theSubstr;
}
}
else {
//added PR_TRUE to readId() call below to fix bug 46083. The problem was that the tag given
//was written <title_> but since we didn't respect the '_', we only saw <title>. Then
//we searched for end title, which never comes (they give </title_>).
result=aScanner.ReadIdentifier(mTextValue,PR_TRUE);
mTypeID = nsHTMLTags::LookupTag(mTextValue);
}
return result;
}
/*
* Dump contents of this token to givne output stream
*
* @update gess 3/25/98
* @param out -- ostream to output content
* @return
*/
void CStartToken::DebugDumpSource(nsOutputStream& out) {
char buffer[1000];
mTextValue.ToCString(buffer,sizeof(buffer));
out << "<" << buffer;
if(!mAttributed)
out << ">";
}
const nsAReadableString& CStartToken::GetStringValue()
{
if((eHTMLTag_unknown<mTypeID) && (mTypeID<eHTMLTag_text)) {
if(!mTextValue.Length()) {
mTextValue.AssignWithConversion(nsHTMLTags::GetStringValue((nsHTMLTag) mTypeID));
}
}
return mTextValue;
}
/*
*
*
* @update gess 3/25/98
* @param anOutputString will recieve the result
* @return nada
*/
void CStartToken::GetSource(nsString& anOutputString){
anOutputString.AppendWithConversion("<");
/*
* Watch out for Bug 15204
*/
if(mTrailingContent.Length()>0)
anOutputString=mTrailingContent;
else {
if(mTextValue.Length()>0)
anOutputString.Append(mTextValue);
else
anOutputString.AssignWithConversion(GetTagName(mTypeID));
anOutputString.AppendWithConversion('>');
}
}
/*
*
*
* @update harishd 03/23/00
* @param result appended to the output string.
* @return nada
*/
void CStartToken::AppendSource(nsString& anOutputString){
anOutputString.AppendWithConversion("<");
/*
* Watch out for Bug 15204
*/
if(mTrailingContent.Length()>0)
anOutputString+=mTrailingContent;
else {
if(mTextValue.Length()>0)
anOutputString+=mTextValue;
else
anOutputString.AppendWithConversion(GetTagName(mTypeID));
anOutputString.AppendWithConversion('>');
}
}
/*
* constructor from tag id
*
* @update gess 3/25/98
* @param
* @return
*/
CEndToken::CEndToken(eHTMLTags aTag) : CHTMLToken(aTag) {
}
CEndToken::CEndToken(const nsAReadableString& aName,eHTMLTags aTag) : CHTMLToken(aTag) {
mTextValue.Assign(aName);
}
/*
* Consume the identifier portion of the end tag
*
* @update gess 3/25/98
* @param aChar -- last char consumed from stream
* @param aScanner -- controller of underlying input source
* @param aMode -- 1=HTML; 0=text (or other ML)
* @return error result
*/
nsresult CEndToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aMode) {
//if you're here, we've already Consumed the <! chars, and are
//ready to Consume the rest of the open tag identifier.
//Stop consuming as soon as you see a space or a '>'.
//NOTE: We don't Consume the tag attributes here, nor do we eat the ">"
nsresult result=NS_OK;
nsAutoString buffer;
PRInt32 offset;
if(1==aMode) {
nsAutoString theSubstr;
result=aScanner.ReadUntil(theSubstr,kGreaterThan,PR_FALSE);
if (NS_FAILED(result)) {
return result;
}
offset = theSubstr.FindCharInSet(" \r\n\t\b",0);
if (offset != kNotFound) {
theSubstr.Left(buffer, offset);
mTypeID = nsHTMLTags::LookupTag(buffer);
}
else {
mTypeID = nsHTMLTags::LookupTag(theSubstr);
}
if(eHTMLTag_userdefined==mTypeID) {
mTextValue=theSubstr;
}
}
else {
mTextValue.SetLength(0);
result=aScanner.ReadUntil(mTextValue,kGreaterThan,PR_FALSE);
if (NS_FAILED(result)) {
return result;
}
mTypeID = eHTMLTag_userdefined;
}
result=aScanner.GetChar(aChar); //eat the closing '>;
return result;
}
/*
* Asks the token to determine the <i>HTMLTag type</i> of
* the token. This turns around and looks up the tag name
* in the tag dictionary.
*
* @update gess 3/25/98
* @param
* @return eHTMLTag id of this endtag
*/
PRInt32 CEndToken::GetTypeID(){
if(eHTMLTag_unknown==mTypeID) {
mTypeID = nsHTMLTags::LookupTag(mTextValue);
switch(mTypeID) {
case eHTMLTag_dir:
case eHTMLTag_menu:
mTypeID=eHTMLTag_ul;
break;
default:
break;
}
}
return mTypeID;
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
const char* CEndToken::GetClassName(void) {
return "/end";
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
PRInt32 CEndToken::GetTokenType(void) {
return eToken_end;
}
/*
* Dump contents of this token to givne output stream
*
* @update gess 3/25/98
* @param out -- ostream to output content
* @return
*/
void CEndToken::DebugDumpSource(nsOutputStream& out) {
char buffer[1000];
mTextValue.ToCString(buffer,sizeof(buffer));
out << "</" << buffer << ">";
}
const nsAReadableString& CEndToken::GetStringValue()
{
if((eHTMLTag_unknown<mTypeID) && (mTypeID<eHTMLTag_text)) {
if(!mTextValue.Length()) {
mTextValue.AssignWithConversion(nsHTMLTags::GetStringValue((nsHTMLTag) mTypeID));
}
}
return mTextValue;
}
/*
*
*
* @update gess 3/25/98
* @param anOutputString will recieve the result
* @return nada
*/
void CEndToken::GetSource(nsString& anOutputString){
anOutputString.AppendWithConversion("</");
if(mTextValue.Length()>0)
anOutputString.Append(mTextValue);
else
anOutputString.AppendWithConversion(GetTagName(mTypeID));
anOutputString.AppendWithConversion(">");
}
/*
*
*
* @update harishd 03/23/00
* @param result appended to the output string.
* @return nada
*/
void CEndToken::AppendSource(nsString& anOutputString){
anOutputString.AppendWithConversion("</");
if(mTextValue.Length()>0)
anOutputString.Append(mTextValue);
else
anOutputString.AppendWithConversion(GetTagName(mTypeID));
anOutputString.AppendWithConversion(">");
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CTextToken::CTextToken() : CHTMLToken(eHTMLTag_text) {
}
/*
* string based constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CTextToken::CTextToken(const nsAReadableString& aName) : CHTMLToken(eHTMLTag_text) {
mTextValue.Rebind(aName);
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
const char* CTextToken::GetClassName(void) {
return "text";
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
PRInt32 CTextToken::GetTokenType(void) {
return eToken_text;
}
PRInt32 CTextToken::GetTextLength(void) {
return mTextValue.Length();
}
/*
* Consume as much clear text from scanner as possible.
*
* @update gess 3/25/98
* @param aChar -- last char consumed from stream
* @param aScanner -- controller of underlying input source
* @return error result
*/
nsresult CTextToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aMode) {;
static nsString theTerminals = NS_ConvertToString("\n\r&<",4);
nsresult result=NS_OK;
PRBool done=PR_FALSE;
nsReadingIterator<PRUnichar> origin, start, end;
// Start scanning after the first character, because we know it to
// be part of this text token (we wouldn't have come here if it weren't)
aScanner.CurrentPosition(origin);
start = origin;
start++;
aScanner.SetPosition(start);
while((NS_OK==result) && (!done)) {
result=aScanner.ReadUntil(start, end, theTerminals,PR_FALSE);
if(NS_OK==result) {
result=aScanner.Peek(aChar);
if(((kCR==aChar) || (kNewLine==aChar)) && (NS_OK==result)) {
result=aScanner.GetChar(aChar); //strip off the char
PRUnichar theNextChar;
result=aScanner.Peek(theNextChar); //then see what's next.
switch(aChar) {
case kCR:
// result=aScanner.GetChar(aChar);
if(kLF==theNextChar) {
// If the "\r" is followed by a "\n", don't replace it and
// let it be ignored by the layout system
end += 2;
result=aScanner.GetChar(theNextChar);
}
else {
// If it standalone, replace the "\r" with a "\n" so that
// it will be considered by the layout system
aScanner.ReplaceCharacter(end, kLF);
end++;
}
mNewlineCount++;
break;
case kLF:
end++;
mNewlineCount++;
break;
} //switch
}
else done=PR_TRUE;
}
}
aScanner.BindSubstring(mTextValue, origin, end);
return result;
}
/*
* Consume as much clear text from scanner as possible.
*
* @update gess 3/25/98
* @param aChar -- last char consumed from stream
* @param aScanner -- controller of underlying input source
* @return error result
*/
nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScanner& aScanner,
nsString& aTerminalString,PRInt32 aMode,PRBool& aFlushTokens){
nsresult result=NS_OK;
nsReadingIterator<PRUnichar> theStartOffset, theCurrOffset, theTermStrPos, theStartCommentPos, theAltTermStrPos, endPos;
PRBool done=PR_FALSE;
PRBool theLastIteration=PR_FALSE;
PRInt32 termStrLen=aTerminalString.Length();
aScanner.CurrentPosition(theStartOffset);
theCurrOffset = theStartOffset;
aScanner.EndReading(endPos);
theTermStrPos = theStartCommentPos = theAltTermStrPos = endPos;
// ALGORITHM: *** The performance is based on correctness of the document ***
// 1. Look for a '<' character. This could be
// a) Start of a comment (<!--), b) Start of the terminal string, or c) a start of a tag.
// We are interested in a) and b). c) is ignored because in CDATA we don't care for tags.
// NOTE: Technically speaking in CDATA we should ignore the comments too!! But for compatibility
// we don't.
// 2. Having the offset, for '<', search for the terminal string from there on and record its offset.
// 3. From the same '<' offset also search for start of a comment '<!--'. If found search for
// end comment '-->' between the terminal string and '<!--'. If you did not find the end
// comment, then we have a malformed document, i.e., this section has a prematured terminal string
// Ex. <SCRIPT><!-- document.write('</SCRIPT>') //--> </SCRIPT>. But anyway record terminal string's
// offset and update the current offset to the terminal string (prematured) offset and goto step 1.
// 4. Amen...If you found a terminal string and '-->'. Otherwise goto step 1.
// 5. If the end of the document is reached and if we still don't have the condition in step 4. then
// assume that the prematured terminal string is the actual terminal string and goto step 1. This
// will be our last iteration.
// When is the disaster enabled?
// a) when the buffer runs out ot data.
// b) when the terminal string is not found.
PRBool disaster = PR_FALSE;
while((result == NS_OK) && !done) {
if (FindCharInReadable(PRUnichar(kLessThan), theCurrOffset, endPos)) {
nsReadingIterator<PRUnichar> tempOffset = theCurrOffset;
while(1) {
if (FindCharInReadable(PRUnichar(kGreaterThan), tempOffset, endPos)) {
// Make a copy of the (presumed) end tag and
// do a case-insensitive comparision
nsAutoString str;
nsReadingIterator<PRUnichar> start(tempOffset), end(tempOffset);
start.advance(-termStrLen);
CopyUnicodeTo(start, end, str);
if (str.EqualsIgnoreCase(aTerminalString)) {
theTermStrPos = tempOffset;
theTermStrPos.advance(-termStrLen);
break;
}
tempOffset.advance(1);
}
else {
// Ran out of data and haven't found the terminal string yet.
// Note: If a bogus terminal string is found it would have
// been stored in theAltTermStrPos; Bug: 64576
theTermStrPos=endPos;
break; // we have reached the end of the document
}
}
if (theTermStrPos != endPos) {
if((aMode != eDTDMode_strict) && (aMode != eDTDMode_transitional) &&
!theLastIteration && !aIgnoreComments) {
nsReadingIterator<PRUnichar> endComment(theCurrOffset);
endComment += 5;
if ((theStartCommentPos == endPos) &&
FindInReadable(NS_LITERAL_STRING("<!--"), theCurrOffset, endComment)) {
theStartCommentPos = theCurrOffset;
}
if (theStartCommentPos != endPos) {
// Search for --> between <!-- and </TERMINALSTRING>.
theCurrOffset = theStartCommentPos;
nsReadingIterator<PRUnichar> terminal(theTermStrPos);
if (!RFindInReadable(NS_LITERAL_STRING("-->"),
theCurrOffset, terminal)) {
// If you're here it means that we have a bogus terminal string.
// Even though it is bogus, the position of the terminal string
// could be helpful in case we hit the rock bottom.
theAltTermStrPos = theTermStrPos;
// We did not find '-->' so keep searching for terminal string.
theCurrOffset = theTermStrPos;
theCurrOffset.advance(termStrLen);
continue;
}
}
}
disaster=PR_FALSE;
aScanner.BindSubstring(mTextValue, theStartOffset, theTermStrPos);
theTermStrPos.advance(termStrLen+1);
aScanner.SetPosition(theTermStrPos);
// We found </SCRIPT>...permit flushing -> Ref: Bug 22485
aFlushTokens=PR_TRUE;
done = PR_TRUE;
}
else {
disaster = PR_TRUE;
}
}
else {
disaster = PR_TRUE;
}
if(disaster) {
if(!aScanner.IsIncremental()) {
if(theAltTermStrPos != endPos) {
// If you're here it means..we hit the rock bottom and therefore switch to plan B.
theCurrOffset = theAltTermStrPos;
theLastIteration = PR_TRUE;
}
else {
aTerminalString.Cut(0,2);
done = PR_TRUE; // Do this to fix Bug. 35456
}
}
else
result=kEOF;
}
}
return result;
}
void CTextToken::CopyTo(nsAWritableString& aStr)
{
aStr.Assign(mTextValue);
}
const nsAReadableString& CTextToken::GetStringValue(void)
{
return mTextValue;
}
void CTextToken::Bind(nsScanner* aScanner, nsReadingIterator<PRUnichar>& aStart, nsReadingIterator<PRUnichar>& aEnd)
{
aScanner->BindSubstring(mTextValue, aStart, aEnd);
}
void CTextToken::Bind(const nsAReadableString& aStr)
{
mTextValue.Rebind(aStr);
}
/*
* default constructor
*
* @update vidur 11/12/98
* @param aName -- string to init token name with
* @return
*/
CCDATASectionToken::CCDATASectionToken() : CHTMLToken(eHTMLTag_unknown) {
}
/*
* string based constructor
*
* @update vidur 11/12/98
* @param aName -- string to init token name with
* @return
*/
CCDATASectionToken::CCDATASectionToken(const nsAReadableString& aName) : CHTMLToken(eHTMLTag_unknown) {
mTextValue.Assign(aName);
}
/*
*
*
* @update vidur 11/12/98
* @param
* @return
*/
const char* CCDATASectionToken::GetClassName(void) {
return "cdatasection";
}
/*
*
* @update vidur 11/12/98
* @param
* @return
*/
PRInt32 CCDATASectionToken::GetTokenType(void) {
return eToken_cdatasection;
}
/*
* Consume as much marked test from scanner as possible.
*
* @update rgess 12/15/99: had to handle case: "<![ ! IE 5]>", in addition to "<![..[..]]>".
* @param aChar -- last char consumed from stream
* @param aScanner -- controller of underlying input source
* @return error result
*/
nsresult CCDATASectionToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aMode) {
static const char* theTerminals="\r]";
nsresult result=NS_OK;
PRBool done=PR_FALSE;
while((NS_OK==result) && (!done)) {
result=aScanner.ReadUntil(mTextValue,theTerminals,PR_FALSE);
if(NS_OK==result) {
result=aScanner.Peek(aChar);
if((kCR==aChar) && (NS_OK==result)) {
result=aScanner.GetChar(aChar); //strip off the \r
result=aScanner.Peek(aChar); //then see what's next.
if(NS_OK==result) {
switch(aChar) {
case kCR:
result=aScanner.GetChar(aChar); //strip off the \r
mTextValue.AppendWithConversion("\n\n");
break;
case kNewLine:
//which means we saw \r\n, which becomes \n
result=aScanner.GetChar(aChar); //strip off the \n
//now fall through on purpose...
default:
mTextValue.AppendWithConversion("\n");
break;
} //switch
} //if
}
else if (']'==aChar) {
result=aScanner.GetChar(aChar); //strip off the ]
mTextValue.Append(aChar);
result=aScanner.Peek(aChar); //then see what's next.
if((NS_OK==result) && (kRightSquareBracket==aChar)) {
result=aScanner.GetChar(aChar); //strip off the second ]
mTextValue.Append(aChar);
result=aScanner.Peek(aChar); //then see what's next.
}
if((NS_OK==result) && (kGreaterThan==aChar)) {
result=aScanner.GetChar(aChar); //strip off the >
done=PR_TRUE;
}
}
else done=PR_TRUE;
}
}
return result;
}
const nsAReadableString& CCDATASectionToken::GetStringValue(void)
{
return mTextValue;
}
/*
* Default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CCommentToken::CCommentToken() : CHTMLToken(eHTMLTag_comment) {
}
/*
* Copy constructor
*
* @update gess 3/25/98
* @param
* @return
*/
CCommentToken::CCommentToken(const nsAReadableString& aName) : CHTMLToken(eHTMLTag_comment) {
mTextValue.Assign(aName);
}
/*
* This method consumes a comment using the (CORRECT) comment parsing
* algorithm supplied by W3C.
*
* @update gess 01/04/99
* @param
* @param
* @return
*/
static
nsresult ConsumeStrictComment(PRUnichar aChar, nsScanner& aScanner,nsString& aString) {
nsresult result=NS_OK;
/*********************************************************
NOTE: This algorithm does a fine job of handling comments
when they're formatted per spec, but if they're not
we don't handle them well. For example, we gack
on the following:
<!-- xx -- xx -->
*********************************************************/
aString.AssignWithConversion("<!");
while(NS_OK==result) {
result=aScanner.GetChar(aChar);
if(NS_OK==result) {
aString+=aChar;
if(kMinus==aChar) {
result=aScanner.GetChar(aChar);
if(NS_OK==result) {
if(kMinus==aChar) {
//in this case, we're reading a long-form comment <-- xxx -->
aString+=aChar;
if(NS_OK==result) {
PRInt32 findpos=-1;
nsAutoString temp;
//Read to the first ending sequence '--'
while((kNotFound==findpos) && (NS_OK==result)) {
result=aScanner.ReadUntil(temp,kMinus,PR_TRUE);
findpos=temp.RFind("--");
}
aString+=temp;
if(NS_OK==result) {
if(NS_OK==result) {
temp.AssignWithConversion("->");
result=aScanner.ReadUntil(aString,temp,PR_FALSE);
}
}
}
} //
else break; //go find '>'
}
}//if
else if(kGreaterThan==aChar) {
return result;
}
else break; //go find '>'
}//if
}//while
if(NS_OK==result) {
//Read up to the closing '>', unless you already did! (such as <!>).
if(kGreaterThan!=aChar) {
result=aScanner.ReadUntil(aString,kGreaterThan,PR_TRUE);
}
}
return result;
}
/*
* This method consumes a comment using common (actually non-standard)
* algorithm that seems to work against the content on the web.
*
* @update gess 01/04/99
* @param
* @param
* @return
*/
static
nsresult ConsumeComment(PRUnichar aChar, nsScanner& aScanner,nsString& aString) {
nsresult result=NS_OK;
/*********************************************************
NOTE: This algorithm does a fine job of handling comments
commonly used, but it doesn't really consume them
per spec (But then, neither does IE or Nav).
*********************************************************/
nsReadingIterator<PRUnichar> theStartOffset, theCurrOffset, theBestAltPos, endPos;
aScanner.EndReading(endPos);
theBestAltPos = endPos;
aScanner.CurrentPosition(theStartOffset);
result=aScanner.GetChar(aChar);
if(NS_OK==result) {
if(kMinus==aChar) {
result=aScanner.GetChar(aChar);
if(NS_OK==result) {
if(kMinus==aChar) {
//in this case, we're reading a long-form comment <-- xxx -->
theCurrOffset = theStartOffset;
while((NS_OK==result)) {
if (FindCharInReadable(PRUnichar(kGreaterThan), theCurrOffset, endPos)) {
theCurrOffset++;
nsReadingIterator<PRUnichar> temp = theCurrOffset;
temp.advance(-3);
aChar=*temp;
if(kMinus==aChar) {
temp++;
aChar=*temp;
if(kMinus==aChar) {
theStartOffset -= 2; // Include "<!" also..
CopyUnicodeTo(theStartOffset, theCurrOffset, aString);
aScanner.SetPosition(theCurrOffset);
return result; // We have found the dflt end comment delimiter ("-->")
}
}
if(theBestAltPos == endPos) {
// If we did not find the dflt then assume that '>' is the end comment
// until we find '-->'. Nav. Compatibility -- Ref: Bug# 24006
theBestAltPos=theCurrOffset;
}
}
else {
result=kEOF;
}
} //while
if((endPos==theCurrOffset) && (!aScanner.IsIncremental())) {
//if you're here, then we're in a special state.
//The problem at hand is that we've hit the end of the document without finding the normal endcomment delimiter "-->".
//In this case, the first thing we try is to see if we found one of the alternate endcomment delimiter ">".
//If so, rewind just pass than, and use everything up to that point as your comment.
//If not, the document has no end comment and should be treated as one big comment.
if(endPos != theBestAltPos) {
theStartOffset -= 2;// Include "<!" also..
CopyUnicodeTo(theStartOffset, theBestAltPos, aString);
aScanner.SetPosition(theBestAltPos);
result=NS_OK;
}
}
return result;
} //if
}//if
}//if
}//if
if(NS_OK==result) {
//Read up to the closing '>', unless you already did! (such as <!>).
if(kGreaterThan!=aChar) {
aString.AppendWithConversion("<!- ");
result=aScanner.ReadUntil(aString,kGreaterThan,PR_TRUE);
}
}
return result;
}
/*
* Consume the identifier portion of the comment.
* Note that we've already eaten the "<!" portion.
*
* @update gess 16June2000
* @param aChar -- last char consumed from stream
* @param aScanner -- controller of underlying input source
* @return error result
*/
nsresult CCommentToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aMode) {
nsresult result=PR_TRUE;
switch(aMode) {
#if 0 //set to 1 if you want strict comments... bug 53011...
case eDTDMode_strict:
result=ConsumeStrictComment(aChar,aScanner,mTextValue);
break;
#endif
case eDTDMode_transitional:
default:
result=ConsumeComment(aChar,aScanner,mTextValue);
break;
} //switch
return result;
}
const nsAReadableString& CCommentToken::GetStringValue(void)
{
return mTextValue;
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
const char* CCommentToken::GetClassName(void){
return "/**/";
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
PRInt32 CCommentToken::GetTokenType(void) {
return eToken_comment;
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CNewlineToken::CNewlineToken() : CHTMLToken(eHTMLTag_newline) {
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
const char* CNewlineToken::GetClassName(void) {
return "crlf";
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
PRInt32 CNewlineToken::GetTokenType(void) {
return eToken_newline;
}
static nsSlidingSubstring* gNewlineStr;
void CNewlineToken::AllocNewline()
{
gNewlineStr = new nsSlidingSubstring(NS_LITERAL_STRING("\n"));
}
void CNewlineToken::FreeNewline()
{
if (gNewlineStr) {
delete gNewlineStr;
gNewlineStr = nsnull;
}
}
/**
* This method retrieves the value of this internal string.
*
* @update gess 3/25/98
* @return nsString reference to internal string value
*/
const nsAReadableString& CNewlineToken::GetStringValue(void) {
return *gNewlineStr;
}
/*
* Consume as many cr/lf pairs as you can find.
*
* @update gess 3/25/98
* @param aChar -- last char consumed from stream
* @param aScanner -- controller of underlying input source
* @return error result
*/
nsresult CNewlineToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aMode) {
/*******************************************************************
Here's what the HTML spec says about newlines:
"A line break is defined to be a carriage return (&#x000D;),
a line feed (&#x000A;), or a carriage return/line feed pair.
All line breaks constitute white space."
*******************************************************************/
PRUnichar theChar;
nsresult result=aScanner.Peek(theChar);
if(NS_OK==result) {
switch(aChar) {
case kNewLine:
if(kCR==theChar) {
result=aScanner.GetChar(theChar);
}
break;
case kCR:
//convert CRLF into just CR
if(kNewLine==theChar) {
result=aScanner.GetChar(theChar);
}
break;
default:
break;
}
}
return result;
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CAttributeToken::CAttributeToken() : CHTMLToken(eHTMLTag_unknown) {
mLastAttribute=PR_FALSE;
mHasEqualWithoutValue=PR_FALSE;
}
/*
* string based constructor
*
* @update gess 3/25/98
* @param aName -- string value to init token name with
* @return
*/
CAttributeToken::CAttributeToken(const nsAReadableString& aName) : CHTMLToken(eHTMLTag_unknown) {
mTextValue.Assign(aName);
mLastAttribute=PR_FALSE;
mHasEqualWithoutValue=PR_FALSE;
}
/*
* construct initializing data to
* key value pair
*
* @update gess 3/25/98
* @param aName -- string value to init token name with
* @return
*/
CAttributeToken::CAttributeToken(const nsAReadableString& aKey, const nsAReadableString& aName) : CHTMLToken(eHTMLTag_unknown) {
mTextValue.Assign(aName);
mTextKey.Rebind(aKey);
mLastAttribute=PR_FALSE;
mHasEqualWithoutValue=PR_FALSE;
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
const char* CAttributeToken::GetClassName(void) {
return "attr";
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
PRInt32 CAttributeToken::GetTokenType(void) {
return eToken_attribute;
}
/*
* Removes non-alpha-non-digit characters from the end of a KEY
*
* @update harishd 07/15/99
* @param
* @return
*/
void CAttributeToken::SanitizeKey() {
PRInt32 length=mTextKey.Length();
if(length > 0) {
nsReadingIterator<PRUnichar> iter, begin, end;
mTextKey.BeginReading(begin);
mTextKey.EndReading(end);
iter = end;
// Look for the first legal character starting from
// the end of the string
do {
--iter;
} while (!nsCRT::IsAsciiAlpha(*iter) &&
!nsCRT::IsAsciiDigit(*iter) &&
(iter != begin));
// If there were any illegal characters, just copy out the
// legal part
if (iter != --end) {
nsAutoString str;
CopyUnicodeTo(begin, iter, str);
mTextKey.Rebind(str);
}
}
return;
}
/*
* Dump contents of this token to given output stream
*
* @update gess 3/25/98
* @param out -- ostream to output content
* @return
*/
void CAttributeToken::DebugDumpToken(nsOutputStream& out) {
char buffer[200];
nsCAutoString str;
CopyUCS2toASCII(mTextKey, str);
out << "[" << GetClassName() << "] " << str.GetBuffer() << "=";
mTextValue.ToCString(buffer,sizeof(buffer));
out << buffer << ": " << mTypeID << nsEndl;
}
const nsAReadableString& CAttributeToken::GetStringValue(void)
{
return mTextValue;
}
/*
*
*
* @update rickg 6June2000
* @param anOutputString will recieve the result
* @return nada
*/
void CAttributeToken::GetSource(nsString& anOutputString){
anOutputString.Truncate();
AppendSource(anOutputString);
}
/*
*
*
* @update rickg 6June2000
* @param result appended to the output string.
* @return nada
*/
void CAttributeToken::AppendSource(nsString& anOutputString){
anOutputString.Append(mTextKey);
if(mTextValue.Length() || mHasEqualWithoutValue)
anOutputString.AppendWithConversion("=");
anOutputString.Append(mTextValue);
// anOutputString.AppendWithConversion(";");
}
/*
* This general purpose method is used when you want to
* consume a known quoted string.
*
* @update gess 3/25/98
* @param aChar -- last char consumed from stream
* @param aScanner -- controller of underlying input source
* @return error result
*/
nsresult ConsumeQuotedString(PRUnichar aChar,nsString& aString,nsScanner& aScanner){
nsresult result=NS_OK;
nsReadingIterator<PRUnichar> theOffset;
aScanner.CurrentPosition(theOffset);
switch(aChar) {
case kQuote:
result=aScanner.ReadUntil(aString,kQuote,PR_TRUE);
if(NS_OK==result)
result=aScanner.SkipOver(kQuote); //this code is here in case someone mistakenly adds multiple quotes...
break;
case kApostrophe:
result=aScanner.ReadUntil(aString,kApostrophe,PR_TRUE);
if(NS_OK==result)
result=aScanner.SkipOver(kApostrophe); //this code is here in case someone mistakenly adds multiple apostrophes...
break;
default:
break;
}
// Ref: Bug 35806
// A back up measure when disaster strikes...
// Ex <table> <tr d="><td>hello</td></tr></table>
PRUnichar ch=aString.Last();
if(ch!=aChar) {
if(!aScanner.IsIncremental() && result==kEOF) {
aScanner.SetPosition(theOffset, PR_FALSE, PR_TRUE);
aString.Assign(aChar);
result=kBadStringLiteral;
}
else {
aString+=aChar;
}
}
return result;
}
/*
* This general purpose method is used when you want to
* consume attributed text value.
*
* @update gess 3/25/98
* @param aChar -- last char consumed from stream
* @param aScanner -- controller of underlying input source
* @return error result
*/
static
nsresult ConsumeAttributeValueText(PRUnichar,nsString& aString,nsScanner& aScanner){
static nsString theTerminals = NS_ConvertToString("\b\t\n\r >",6);
nsresult result=aScanner.ReadUntil(aString,theTerminals,PR_FALSE);
//Let's force quotes if either the first or last char is quoted.
PRUnichar theLast=aString.Last();
PRUnichar theFirst=aString.First();
if(kQuote==theLast) {
if(kQuote!=theFirst) {
aString.Insert(kQuote,0);;
}
}
else if(kQuote==theFirst) {
if(kQuote!=theLast) {
aString+=kQuote;
}
}
return result;
}
/*
* Consume the key and value portions of the attribute.
*
* @update rickg 03.23.2000
* @param aChar -- last char consumed from stream
* @param aScanner -- controller of underlying input source
* @param aRetainWhitespace -- 0=discard, 1=retain
* @return error result
*/
nsresult CAttributeToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aRetainWhitespace) {
nsresult result;
//I changed a bit of this method to use aRetainWhitespace so that we do the right
//thing in viewsource. The ws/cr/lf sequences are now maintained, and viewsource looks good.
nsReadingIterator<PRUnichar> wsstart, wsend;
result=(aRetainWhitespace) ? aScanner.ReadWhitespace(wsstart, wsend) : aScanner.SkipWhitespace();
if(NS_OK==result) {
result=aScanner.Peek(aChar);
if(NS_OK==result) {
nsReadingIterator<PRUnichar> start, end;
if((kHashsign==aChar) || (nsCRT::IsAsciiDigit(aChar))){
result=aScanner.ReadNumber(start, end);
}
else {
//If you're here, handle an unquoted key.
static nsAutoString theTerminals = NS_ConvertToString("\b\t\n\r \"=>",8);
result=aScanner.ReadUntil(start,end,theTerminals,PR_FALSE);
}
if (!aRetainWhitespace) {
aScanner.BindSubstring(mTextKey, start, end);
}
//now it's time to Consume the (optional) value...
if(NS_OK==result) {
if (aRetainWhitespace) {
aScanner.ReadWhitespace(start, wsend);
aScanner.BindSubstring(mTextKey, wsstart, wsend);
}
else {
aScanner.SkipWhitespace();
}
if(NS_OK==result) {
result=aScanner.Peek(aChar); //Skip ahead until you find an equal sign or a '>'...
if(NS_OK==result) {
if(kEqual==aChar){
result=aScanner.GetChar(aChar); //skip the equal sign...
if(NS_OK==result) {
result=(aRetainWhitespace) ? aScanner.ReadWhitespace(mTextValue) : aScanner.SkipWhitespace();
if(NS_OK==result) {
result=aScanner.Peek(aChar); //and grab the next char.
if(NS_OK==result) {
if((kQuote==aChar) || (kApostrophe==aChar)) {
aScanner.GetChar(aChar);
mTextValue.Append(aChar);
result=ConsumeQuotedString(aChar,mTextValue,aScanner);
if(result==kBadStringLiteral) {
result=ConsumeAttributeValueText(aChar,mTextValue,aScanner);
}
// According to spec. we ( who? ) should ignore linefeeds. But look,
// even the carriage return was getting stripped ( wonder why! ) -
// Ref. to bug 15204. Okay, so the spec. told us to ignore linefeeds,
// bug then what about bug 47535 ? Should we preserve everything then?
// Well, let's make it so! Commenting out the next two lines..
/*if(!aRetainWhitespace)
mTextValue.StripChars("\r\n"); //per the HTML spec, ignore linefeeds...
*/
}
else if(kGreaterThan==aChar){
mHasEqualWithoutValue=PR_TRUE;
}
else if(kAmpersand==aChar) {
// XXX - Discard script entity for now....except in
// view-source
aScanner.GetChar(aChar);
PRBool discard=!aRetainWhitespace;
mTextValue.Append(aChar);
result=aScanner.GetChar(aChar);
if(NS_OK==result) {
mTextValue.Append(aChar);
result=CEntityToken::ConsumeEntity(aChar,mTextValue,aScanner);
}
if(discard) mTextValue.Truncate();
}
else {
aScanner.GetChar(aChar);
mTextValue.Append(aChar); //it's an alphanum attribute...
result=ConsumeAttributeValueText(aChar,mTextValue,aScanner);
}
}//if
if(NS_OK==result) {
result=(aRetainWhitespace) ? aScanner.ReadWhitespace(mTextValue) : aScanner.SkipWhitespace();
}
}//if
}//if
}//if
else {
//This is where we have to handle fairly busted content.
//If you're here, it means we saw an attribute name, but couldn't find
//the following equal sign. <tag NAME=....
//Doing this right in all cases is <i>REALLY</i> ugly.
//My best guess is to grab the next non-ws char. We know it's not '=',
//so let's see what it is. If it's a '"', then assume we're reading
//from the middle of the value. Try stripping the quote and continuing...
if(kQuote==aChar){
result=aScanner.SkipOver(aChar); //strip quote.
}
}
}//if
} //if
}//if (consume optional value)
if(NS_OK==result) {
result=aScanner.Peek(aChar);
mLastAttribute= PRBool((kGreaterThan==aChar) || (kEOF==result));
}
} //if
}//if
return result;
}
/*
* Dump contents of this token to givne output stream
*
* @update gess 3/25/98
* @param out -- ostream to output content
* @return
*/
void CAttributeToken::DebugDumpSource(nsOutputStream& out) {
static char buffer[1000];
nsCAutoString str;
CopyUCS2toASCII(mTextKey, str);
out << " " << str.GetBuffer();
if(mTextValue.Length()){
mTextValue.ToCString(buffer,sizeof(buffer));
out << "=" << buffer;
}
if(mLastAttribute)
out<<">";
}
void CAttributeToken::SetKey(const nsAReadableString& aKey)
{
mTextKey.Rebind(aKey);
}
void CAttributeToken::BindKey(nsScanner* aScanner,
nsReadingIterator<PRUnichar>& aStart,
nsReadingIterator<PRUnichar>& aEnd)
{
aScanner->BindSubstring(mTextKey, aStart, aEnd);
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CWhitespaceToken::CWhitespaceToken() : CHTMLToken(eHTMLTag_whitespace) {
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string value to init token name with
* @return
*/
CWhitespaceToken::CWhitespaceToken(const nsAReadableString& aName) : CHTMLToken(eHTMLTag_whitespace) {
mTextValue.Assign(aName);
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
const char* CWhitespaceToken::GetClassName(void) {
return "ws";
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
PRInt32 CWhitespaceToken::GetTokenType(void) {
return eToken_whitespace;
}
/*
* This general purpose method is used when you want to
* consume an aribrary sequence of whitespace.
*
* @update gess 3/25/98
* @param aChar -- last char consumed from stream
* @param aScanner -- controller of underlying input source
* @return error result
*/
nsresult CWhitespaceToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aMode) {
mTextValue.Assign(aChar);
nsresult result=aScanner.ReadWhitespace(mTextValue);
if(NS_OK==result) {
mTextValue.StripChar(kCR);
}
return result;
}
const nsAReadableString& CWhitespaceToken::GetStringValue(void)
{
return mTextValue;
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CEntityToken::CEntityToken() : CHTMLToken(eHTMLTag_entity) {
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string value to init token name with
* @return
*/
CEntityToken::CEntityToken(const nsAReadableString& aName) : CHTMLToken(eHTMLTag_entity) {
mTextValue.Assign(aName);
#ifdef VERBOSE_DEBUG
if(!VerifyEntityTable()) {
cout<<"Entity table is invalid!" << endl;
}
#endif
}
/*
* Consume the rest of the entity. We've already eaten the "&".
*
* @update gess 3/25/98
* @param aChar -- last char consumed from stream
* @param aScanner -- controller of underlying input source
* @return error result
*/
nsresult CEntityToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aMode) {
if(aChar)
mTextValue.Assign(aChar);
nsresult result=ConsumeEntity(aChar,mTextValue,aScanner);
return result;
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
const char* CEntityToken::GetClassName(void) {
return "&entity";
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
PRInt32 CEntityToken::GetTokenType(void) {
return eToken_entity;
}
/*
* This general purpose method is used when you want to
* consume an entity &xxxx;. Keep in mind that entities
* are <i>not</i> reduced inline.
*
* @update gess 3/25/98
* @param aChar -- last char consumed from stream
* @param aScanner -- controller of underlying input source
* @return error result
*/
PRInt32 CEntityToken::ConsumeEntity(PRUnichar aChar,nsString& aString,nsScanner& aScanner){
PRUnichar theChar=0;
PRInt32 result=aScanner.Peek(theChar);
if(NS_OK==result) {
if(kLeftBrace==aChar) {
//you're consuming a script entity...
PRInt32 rightBraceCount = 0;
PRInt32 leftBraceCount = 1;
while(leftBraceCount!=rightBraceCount) {
result=aScanner.GetChar(aChar);
if(NS_OK!=result) return result;
aString += aChar;
if(aChar==kRightBrace)
rightBraceCount++;
else if(aChar==kLeftBrace)
leftBraceCount++;
}
result=aScanner.ReadUntil(aString,kSemicolon,PR_FALSE);
if(NS_OK==result) {
result=aScanner.GetChar(aChar); // This character should be a semicolon
if(NS_OK==result) aString += aChar;
}
} //if
else {
if(kHashsign==aChar) {
if('X'==(toupper((char)theChar))) {
result=aScanner.GetChar(theChar);
aString+=theChar;
}
if(NS_OK==result){
result=aScanner.ReadNumber(aString);
}
}
else result=aScanner.ReadIdentifier(aString,PR_TRUE); // Ref. Bug# 23791 - For setting aIgnore to PR_TRUE.
if(NS_OK==result) {
result=aScanner.Peek(theChar);
if(NS_OK==result) {
if (kSemicolon == theChar) {
// consume semicolon that stopped the scan
aString+=theChar;
result=aScanner.GetChar(theChar);
}
}
}//if
} //else
} //if
return result;
}
#define PA_REMAP_128_TO_160_ILLEGAL_NCR 1
#ifdef PA_REMAP_128_TO_160_ILLEGAL_NCR
/**
* Map some illegal but commonly used numeric entities into their
* appropriate unicode value.
*/
#define NOT_USED 0xfffd
static PRUint16 PA_HackTable[] = {
NOT_USED,
NOT_USED,
0x201a, /* SINGLE LOW-9 QUOTATION MARK */
0x0192, /* LATIN SMALL LETTER F WITH HOOK */
0x201e, /* DOUBLE LOW-9 QUOTATION MARK */
0x2026, /* HORIZONTAL ELLIPSIS */
0x2020, /* DAGGER */
0x2021, /* DOUBLE DAGGER */
0x02c6, /* MODIFIER LETTER CIRCUMFLEX ACCENT */
0x2030, /* PER MILLE SIGN */
0x0160, /* LATIN CAPITAL LETTER S WITH CARON */
0x2039, /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */
0x0152, /* LATIN CAPITAL LIGATURE OE */
NOT_USED,
0x017D, /* CAPITAL Z HACEK */
NOT_USED,
NOT_USED,
0x2018, /* LEFT SINGLE QUOTATION MARK */
0x2019, /* RIGHT SINGLE QUOTATION MARK */
0x201c, /* LEFT DOUBLE QUOTATION MARK */
0x201d, /* RIGHT DOUBLE QUOTATION MARK */
0x2022, /* BULLET */
0x2013, /* EN DASH */
0x2014, /* EM DASH */
0x02dc, /* SMALL TILDE */
0x2122, /* TRADE MARK SIGN */
0x0161, /* LATIN SMALL LETTER S WITH CARON */
0x203a, /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */
0x0153, /* LATIN SMALL LIGATURE OE */
NOT_USED,
NOT_USED,
0x0178 /* LATIN CAPITAL LETTER Y WITH DIAERESIS */
};
#endif /* PA_REMAP_128_TO_160_ILLEGAL_NCR */
/*
* This method converts this entity into its underlying
* unicode equivalent.
*
* @update gess 3/25/98
* @param aString will hold the resulting string value
* @return numeric (unichar) value
*/
PRInt32 CEntityToken::TranslateToUnicodeStr(nsString& aString) {
PRInt32 value=0;
if(mTextValue.Length()>1) {
PRUnichar theChar0=mTextValue.CharAt(0);
if(kHashsign==theChar0) {
PRInt32 err=0;
value=mTextValue.ToInteger(&err,kAutoDetect);
if(0==err) {
#ifdef PA_REMAP_128_TO_160_ILLEGAL_NCR
/* for some illegal, but popular usage */
if ((value >= 0x0080) && (value <= 0x009f)) {
value = PA_HackTable[value - 0x0080];
}
#endif
aString.Append(PRUnichar(value));
}//if
}
else{
value = nsHTMLEntities::EntityToUnicode(mTextValue);
if(-1<value) {
//we found a named entity...
aString.Assign(PRUnichar(value));
}
}//else
}//if
return value;
}
/*
* Dump contents of this token to givne output stream
*
* @update gess 3/25/98
* @param out -- ostream to output content
* @return
*/
void CEntityToken::DebugDumpSource(nsOutputStream& out) {
char* cp=mTextValue.ToNewCString();
out << "&" << *cp;
delete[] cp;
}
const nsAReadableString& CEntityToken::GetStringValue(void)
{
return mTextValue;
}
/*
*
*
* @update gess 3/25/98
* @param anOutputString will recieve the result
* @return nada
*/
void CEntityToken::GetSource(nsString& anOutputString){
anOutputString.AppendWithConversion("&");
anOutputString+=mTextValue;
//anOutputString+=";";
}
/*
*
*
* @update harishd 03/23/00
* @param result appended to the output string.
* @return nada
*/
void CEntityToken::AppendSource(nsString& anOutputString){
anOutputString.AppendWithConversion("&");
anOutputString+=mTextValue;
//anOutputString+=";";
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CScriptToken::CScriptToken() : CHTMLToken(eHTMLTag_script) {
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CScriptToken::CScriptToken(const nsAReadableString& aString) : CHTMLToken(eHTMLTag_script) {
mTextValue.Assign(aString);
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
const char* CScriptToken::GetClassName(void) {
return "script";
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
PRInt32 CScriptToken::GetTokenType(void) {
return eToken_script;
}
const nsAReadableString& CScriptToken::GetStringValue(void)
{
return mTextValue;
}
/*
* default constructor
*
* @update gess 3/25/98
* @param aName -- string to init token name with
* @return
*/
CStyleToken::CStyleToken() : CHTMLToken(eHTMLTag_style) {
}
CStyleToken::CStyleToken(const nsAReadableString& aString) : CHTMLToken(eHTMLTag_style) {
mTextValue.Assign(aString);
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
const char* CStyleToken::GetClassName(void) {
return "style";
}
/*
*
*
* @update gess 3/25/98
* @param
* @return
*/
PRInt32 CStyleToken::GetTokenType(void) {
return eToken_style;
}
const nsAReadableString& CStyleToken::GetStringValue(void)
{
return mTextValue;
}
/**
*
* @update gess4/25/98
* @param
* @return
*/
const char* GetTagName(PRInt32 aTag) {
const nsCString& result = nsHTMLTags::GetStringValue((nsHTMLTag) aTag);
if (0 == result.Length()) {
if(aTag>=eHTMLTag_userdefined)
return gUserdefined;
else return 0;
}
return result;
}
/**
*
*
* @update gess 9/23/98
* @param
* @return
*/
CInstructionToken::CInstructionToken() : CHTMLToken(eHTMLTag_unknown) {
}
/**
*
*
* @update gess 9/23/98
* @param
* @return
*/
CInstructionToken::CInstructionToken(const nsAReadableString& aString) : CHTMLToken(eHTMLTag_unknown) {
mTextValue.Assign(aString);
}
/**
*
*
* @update gess 9/23/98
* @param
* @return
*/
nsresult CInstructionToken::Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aMode){
mTextValue.AssignWithConversion("<?");
nsresult result=aScanner.ReadUntil(mTextValue,kGreaterThan,PR_TRUE);
return result;
}
/**
*
*
* @update gess 9/23/98
* @param
* @return
*/
const char* CInstructionToken::GetClassName(void){
return "instruction";
}
/**
*
*
* @update gess 9/23/98
* @param
* @return
*/
PRInt32 CInstructionToken::GetTokenType(void){
return eToken_instruction;
}
const nsAReadableString& CInstructionToken::GetStringValue(void)
{
return mTextValue;
}
CErrorToken::CErrorToken(nsParserError *aError) : CHTMLToken(eHTMLTag_unknown)
{
mError = aError;
}
CErrorToken::~CErrorToken()
{
delete mError;
}
PRInt32 CErrorToken::GetTokenType(void){
return eToken_error;
}
const char* CErrorToken::GetClassName(void){
return "error";
}
void CErrorToken::SetError(nsParserError *aError) {
mError = aError;
}
const nsParserError * CErrorToken::GetError(void)
{
return mError;
}
const nsAReadableString& CErrorToken::GetStringValue(void)
{
return mTextValue;
}
// Doctype decl token
CDoctypeDeclToken::CDoctypeDeclToken(eHTMLTags aTag)
: CHTMLToken(aTag) {
}
CDoctypeDeclToken::CDoctypeDeclToken(const nsAReadableString& aString,eHTMLTags aTag)
: CHTMLToken(aTag), mTextValue(aString) {
}
/**
* This method consumes a doctype element.
* Note: I'm rewriting this method to seek to the first <, since quotes can really screw us up.
*
* @update gess 9/23/98
* @param
* @return
*/
nsresult CDoctypeDeclToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aMode) {
nsresult result =NS_OK;
nsReadingIterator<PRUnichar> theCurrOffset, endPos;
aScanner.CurrentPosition(theCurrOffset);
aScanner.EndReading(endPos);
FindCharInReadable(PRUnichar(kLessThan), theCurrOffset, endPos);
mTextValue.AssignWithConversion("<!");
if(theCurrOffset != endPos) {
result=aScanner.ReadUntil(mTextValue,'<',PR_FALSE);
}
else {
result=aScanner.ReadUntil(mTextValue,'>',PR_TRUE);
}
return result;
}
const char* CDoctypeDeclToken::GetClassName(void) {
return "doctype";
}
PRInt32 CDoctypeDeclToken::GetTokenType(void) {
return eToken_doctypeDecl;
}
const nsAReadableString& CDoctypeDeclToken::GetStringValue(void)
{
return mTextValue;
}
void CDoctypeDeclToken::SetStringValue(const nsAReadableString& aStr)
{
mTextValue.Assign(aStr);
}