/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* * The contents of this file are subject to the Netscape Public * License Version 1.1 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.mozilla.org/NPL/ * * Software distributed under the License is distributed on an "AS * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or * implied. See the License for the specific language governing * rights and limitations under the License. * * The Original Code is mozilla.org code. * * The Initial Developer of the Original Code is Netscape * Communications Corporation. Portions created by Netscape are * Copyright (C) 1998 Netscape Communications Corporation. All * Rights Reserved. * * Contributor(s): */ #include #include #include #include "nsScanner.h" #include "nsToken.h" #include "nsHTMLTokens.h" #include "nsIParser.h" #include "prtypes.h" #include "nsDebug.h" #include "nsHTMLTags.h" #include "nsHTMLEntities.h" #include "nsCRT.h" static const char* gUserdefined = "userdefined"; /************************************************************** And now for the token classes... **************************************************************/ /* * default constructor * * @update gess 3/25/98 * @param * @return */ CHTMLToken::CHTMLToken(const nsString& aName,eHTMLTags aTag) : CToken(aName) { mTypeID=aTag; } /* * constructor from tag id * * @update gess 3/25/98 * @param * @return */ CHTMLToken::CHTMLToken(eHTMLTags aTag) : CToken(aTag) { } /** * Setter method that changes the string value of this token * @update gess5/11/98 * @param name is a char* value containing new string value */ void CHTMLToken::SetStringValue(const char* name){ if(name) { mTextValue=name; mTypeID = nsHTMLTags::LookupTag(mTextValue); } } /* * constructor from tag id * * @update gess 3/25/98 * @param * @return */ CStartToken::CStartToken(eHTMLTags aTag) : CHTMLToken(aTag) { mAttributed=PR_FALSE; mEmpty=PR_FALSE; mOrigin=-1; } /* * constructor from tag id * * @update gess 3/25/98 * @param * @return */ CStartToken::CStartToken(nsString& aString,eHTMLTags aTag) : CHTMLToken(aString,aTag) { mAttributed=PR_FALSE; mEmpty=PR_FALSE; mOrigin=-1; } /** * * @update gess8/4/98 * @param * @return */ void CStartToken::Reinitialize(PRInt32 aTag, const nsString& aString){ CToken::Reinitialize(aTag,aString); mAttributed=PR_FALSE; mUseCount=0; //assume recycling is needed by default. mEmpty=PR_FALSE; mOrigin=-1; mTrailingContent.Truncate(); } /* * This method returns the typeid (the tag type) for this token. * * @update gess 3/25/98 * @param * @return */ PRInt32 CStartToken::GetTypeID(){ if(eHTMLTag_unknown==mTypeID) { mTypeID = nsHTMLTags::LookupTag(mTextValue); } return mTypeID; } /* * * * @update gess 3/25/98 * @param * @return */ const char* CStartToken::GetClassName(void) { return "start"; } /* * * * @update gess 3/25/98 * @param * @return */ PRInt32 CStartToken::GetTokenType(void) { return eToken_start; } /* * * * @update gess 3/25/98 * @param * @return */ void CStartToken::SetAttributed(PRBool aValue) { mAttributed=aValue; } /* * * * @update gess 3/25/98 * @param * @return */ PRBool CStartToken::IsAttributed(void) { return mAttributed; } /* * * * @update gess 3/25/98 * @param * @return */ void CStartToken::SetEmpty(PRBool aValue) { mEmpty=aValue; } /* * * * @update gess 3/25/98 * @param * @return */ PRBool CStartToken::IsEmpty(void) { return mEmpty; } /* * Consume the identifier portion of the start tag * * @update gess 3/25/98 * @param aChar -- last char consumed from stream * @param aScanner -- controller of underlying input source * @return error result */ nsresult CStartToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aMode) { //if you're here, we've already Consumed the < char, and are //ready to Consume the rest of the open tag identifier. //Stop consuming as soon as you see a space or a '>'. //NOTE: We don't Consume the tag attributes here, nor do we eat the ">" mTextValue=aChar; nsresult result=aScanner.ReadIdentifier(mTextValue); mTypeID = nsHTMLTags::LookupTag(mTextValue); //Good. Now, let's skip whitespace after the identifier, //and see if the next char is ">". If so, we have a complete //tag without attributes. if(NS_OK==result) { mOrigin=aScanner.GetOffset(); // We need this position to record the trailing contents of the start token result=aScanner.SkipWhitespace(); mNewlineCount += aScanner.GetNewlinesSkipped(); if(NS_OK==result) { result=aScanner.GetChar(aChar); if(NS_OK==result) { if(kGreaterThan!=aChar) { //look for '>' //push that char back, since we apparently have attributes... result=aScanner.PutBack(aChar); mAttributed=PR_TRUE; } //if } //if }//if } return result; } /* * Dump contents of this token to givne output stream * * @update gess 3/25/98 * @param out -- ostream to output content * @return */ void CStartToken::DebugDumpSource(nsOutputStream& out) { char buffer[1000]; mTextValue.ToCString(buffer,sizeof(buffer)); out << "<" << buffer; if(!mAttributed) out << ">"; } /* * * * @update gess 3/25/98 * @param anOutputString will recieve the result * @return nada */ void CStartToken::GetSource(nsString& anOutputString){ anOutputString="<"; anOutputString+=mTextValue; anOutputString+=mTrailingContent; } /* * constructor from tag id * * @update gess 3/25/98 * @param * @return */ CEndToken::CEndToken(eHTMLTags aTag) : CHTMLToken(aTag) { SetStringValue(GetTagName(aTag)); } /* * default constructor for end token * * @update gess 3/25/98 * @param aName -- char* containing token name * @return */ CEndToken::CEndToken(const nsString& aName) : CHTMLToken(aName) { } /* * Consume the identifier portion of the end tag * * @update gess 3/25/98 * @param aChar -- last char consumed from stream * @param aScanner -- controller of underlying input source * @return error result */ nsresult CEndToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aMode) { //if you're here, we've already Consumed the '. //NOTE: We don't Consume the tag attributes here, nor do we eat the ">" mTextValue=""; nsresult result=aScanner.ReadUntil(mTextValue,kGreaterThan,PR_FALSE); if(NS_OK==result){ PRInt32 theIndex=mTextValue.FindCharInSet(" \r\n\t\b",0); nsAutoString buffer(mTextValue); buffer.Truncate(theIndex); mTypeID= nsHTMLTags::LookupTag(buffer); result=aScanner.GetChar(aChar); //eat the closing '>; } return result; } /* * Asks the token to determine the HTMLTag type of * the token. This turns around and looks up the tag name * in the tag dictionary. * * @update gess 3/25/98 * @param * @return eHTMLTag id of this endtag */ PRInt32 CEndToken::GetTypeID(){ if(eHTMLTag_unknown==mTypeID) { mTypeID = nsHTMLTags::LookupTag(mTextValue); switch(mTypeID) { case eHTMLTag_dir: case eHTMLTag_menu: mTypeID=eHTMLTag_ul; break; default: break; } } return mTypeID; } /* * * * @update gess 3/25/98 * @param * @return */ const char* CEndToken::GetClassName(void) { return "/end"; } /* * * * @update gess 3/25/98 * @param * @return */ PRInt32 CEndToken::GetTokenType(void) { return eToken_end; } /* * Dump contents of this token to givne output stream * * @update gess 3/25/98 * @param out -- ostream to output content * @return */ void CEndToken::DebugDumpSource(nsOutputStream& out) { char buffer[1000]; mTextValue.ToCString(buffer,sizeof(buffer)); out << ""; } /* * * * @update gess 3/25/98 * @param anOutputString will recieve the result * @return nada */ void CEndToken::GetSource(nsString& anOutputString){ anOutputString=""; } /* * default constructor * * @update gess 3/25/98 * @param aName -- string to init token name with * @return */ CTextToken::CTextToken() : CHTMLToken(eHTMLTag_text) { } /* * string based constructor * * @update gess 3/25/98 * @param aName -- string to init token name with * @return */ CTextToken::CTextToken(const nsString& aName) : CHTMLToken(aName) { mTypeID=eHTMLTag_text; } /* * * * @update gess 3/25/98 * @param * @return */ const char* CTextToken::GetClassName(void) { return "text"; } /* * * * @update gess 3/25/98 * @param * @return */ PRInt32 CTextToken::GetTokenType(void) { return eToken_text; } /* * Consume as much clear text from scanner as possible. * * @update gess 3/25/98 * @param aChar -- last char consumed from stream * @param aScanner -- controller of underlying input source * @return error result */ nsresult CTextToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aMode) { static const char* theTerminals="\n\r&<"; nsresult result=NS_OK; PRBool done=PR_FALSE; while((NS_OK==result) && (!done)) { result=aScanner.ReadUntil(mTextValue,theTerminals,PR_TRUE,PR_FALSE); if(NS_OK==result) { result=aScanner.Peek(aChar); if(((kCR==aChar) || (kNewLine==aChar)) && (NS_OK==result)) { result=aScanner.GetChar(aChar); //strip off the char PRUnichar theNextChar; result=aScanner.Peek(theNextChar); //then see what's next. switch(aChar) { case kCR: // result=aScanner.GetChar(aChar); if(kLF==theNextChar) { result=aScanner.GetChar(theNextChar); } else if(kCR==theNextChar) { result=aScanner.GetChar(theNextChar); result=aScanner.Peek(theNextChar); //then see what's next. if(kLF==theNextChar) { result=aScanner.GetChar(theNextChar); } mTextValue.Append("\n"); mNewlineCount++; } mTextValue.Append("\n"); mNewlineCount++; break; case kLF: if((kLF==theNextChar) || (kCR==theNextChar)) { result=aScanner.GetChar(theNextChar); mTextValue.Append("\n"); mNewlineCount++; } mTextValue.Append("\n"); mNewlineCount++; break; default: mTextValue.Append("\n"); mNewlineCount++; break; } //switch } else done=PR_TRUE; } } return result; } /* * Consume as much clear text from scanner as possible. * * @update gess 3/25/98 * @param aChar -- last char consumed from stream * @param aScanner -- controller of underlying input source * @return error result */ nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScanner& aScanner, nsString& aTerminalString,PRInt32 aMode,PRBool& aFlushTokens){ PRBool done=PR_FALSE; nsresult result=NS_OK; PRUnichar theChar; nsAutoString theRight; PRInt32 rpos=0; //We're going to try a new algorithm here. Rather than scan for the matching //end tag like we used to do, we're now going to scan for whitespace and comments. //If we find either, just eat them. If we find text or a tag, then go to the //target endtag, or the start of another comment. PRInt32 termStrLen=aTerminalString.Length(); while((!done) && (NS_OK==result)) { result=aScanner.GetChar(aChar); if((NS_OK==result) && (kLessThan==aChar)) { //we're reading a tag or a comment... //FYI: