/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* * The contents of this file are subject to the Netscape Public License * Version 1.0 (the "NPL"); you may not use this file except in * compliance with the NPL. You may obtain a copy of the NPL at * http://www.mozilla.org/NPL/ * * Software distributed under the NPL is distributed on an "AS IS" basis, * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL * for the specific language governing rights and limitations under the * NPL. * * The Initial Developer of this code under the NPL is Netscape * Communications Corporation. Portions created by Netscape are * Copyright (C) 1998 Netscape Communications Corporation. All Rights * Reserved. */ #include #include #include #include "nsScanner.h" #include "nsToken.h" #include "nsHTMLTokens.h" #include "nsIParser.h" #include "prtypes.h" #include "nsDebug.h" #include "nsHTMLTags.h" #include "nsHTMLEntities.h" #include "nsCRT.h" //#define GESS_MACHINE #ifdef GESS_MACHINE #include "nsEntityEx.cpp" #endif static const char* gUserdefined = "userdefined"; const PRInt32 kMAXNAMELEN=10; /************************************************************** And now for the token classes... **************************************************************/ /* * default constructor * * @update gess 3/25/98 * @param * @return */ CHTMLToken::CHTMLToken(const nsString& aName,eHTMLTags aTag) : CToken(aName) { mTypeID=aTag; } /* * constructor from tag id * * @update gess 3/25/98 * @param * @return */ CHTMLToken::CHTMLToken(eHTMLTags aTag) : CToken(aTag) { } /** * Setter method that changes the string value of this token * @update gess5/11/98 * @param name is a char* value containing new string value */ void CHTMLToken::SetStringValue(const char* name){ if(name) { mTextValue=name; mTypeID = NS_TagToEnum(name); } } /* * constructor from tag id * * @update gess 3/25/98 * @param * @return */ CStartToken::CStartToken(eHTMLTags aTag) : CHTMLToken(aTag) { mAttributed=PR_FALSE; mEmpty=PR_FALSE; } /* * constructor from tag id * * @update gess 3/25/98 * @param * @return */ CStartToken::CStartToken(nsString& aString,eHTMLTags aTag) : CHTMLToken(aString,aTag) { mAttributed=PR_FALSE; mEmpty=PR_FALSE; } /** * * @update gess8/4/98 * @param * @return */ void CStartToken::Reinitialize(PRInt32 aTag, const nsString& aString){ CToken::Reinitialize(aTag,aString); mAttributed=PR_FALSE; mEmpty=PR_FALSE; } /* * This method returns the typeid (the tag type) for this token. * * @update gess 3/25/98 * @param * @return */ PRInt32 CStartToken::GetTypeID(){ if(eHTMLTag_unknown==mTypeID) { nsAutoString tmp(mTextValue); char cbuf[20]; tmp.ToCString(cbuf, sizeof(cbuf)); mTypeID = NS_TagToEnum(cbuf); } return mTypeID; } /* * * * @update gess 3/25/98 * @param * @return */ const char* CStartToken::GetClassName(void) { return "start"; } /* * * * @update gess 3/25/98 * @param * @return */ PRInt32 CStartToken::GetTokenType(void) { return eToken_start; } /* * * * @update gess 3/25/98 * @param * @return */ void CStartToken::SetAttributed(PRBool aValue) { mAttributed=aValue; } /* * * * @update gess 3/25/98 * @param * @return */ PRBool CStartToken::IsAttributed(void) { return mAttributed; } /* * * * @update gess 3/25/98 * @param * @return */ void CStartToken::SetEmpty(PRBool aValue) { mEmpty=aValue; } /* * * * @update gess 3/25/98 * @param * @return */ PRBool CStartToken::IsEmpty(void) { return mEmpty; } static nsString& GetIdentChars(void) { static nsString gIdentChars("-0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz"); return gIdentChars; } static nsString& GetNumericChars(void) { static nsString gNumChars("0123456789ABCDEFabcdef"); return gNumChars; } /* * Consume the identifier portion of the start tag * * @update gess 3/25/98 * @param aChar -- last char consumed from stream * @param aScanner -- controller of underlying input source * @return error result */ nsresult CStartToken::Consume(PRUnichar aChar, nsScanner& aScanner) { //if you're here, we've already Consumed the < char, and are //ready to Consume the rest of the open tag identifier. //Stop consuming as soon as you see a space or a '>'. //NOTE: We don't Consume the tag attributes here, nor do we eat the ">" mTextValue=aChar; nsresult result=aScanner.ReadWhile(mTextValue,GetIdentChars(),PR_TRUE,PR_FALSE); char buffer[300]; mTextValue.ToCString(buffer,sizeof(buffer)-1); mTypeID = NS_TagToEnum(buffer); if(eHTMLTag_image==mTypeID){ mTypeID=eHTMLTag_img; } //Good. Now, let's skip whitespace after the identifier, //and see if the next char is ">". If so, we have a complete //tag without attributes. if(NS_OK==result) { result=aScanner.SkipWhitespace(); if(NS_OK==result) { result=aScanner.GetChar(aChar); if(NS_OK==result) { if(kGreaterThan!=aChar) { //look for '>' //push that char back, since we apparently have attributes... result=aScanner.PutBack(aChar); mAttributed=PR_TRUE; } //if } //if }//if } return result; } /* * Dump contents of this token to givne output stream * * @update gess 3/25/98 * @param out -- ostream to output content * @return */ void CStartToken::DebugDumpSource(ostream& out) { char buffer[1000]; mTextValue.ToCString(buffer,sizeof(buffer)-1); out << "<" << buffer; if(!mAttributed) out << ">"; } /* * * * @update gess 3/25/98 * @param anOutputString will recieve the result * @return nada */ void CStartToken::GetSource(nsString& anOutputString){ anOutputString="<"; anOutputString+=mTextValue; if(!mAttributed) anOutputString+=">"; } /* * constructor from tag id * * @update gess 3/25/98 * @param * @return */ CEndToken::CEndToken(eHTMLTags aTag) : CHTMLToken(aTag) { SetStringValue(GetTagName(aTag)); } /* * default constructor for end token * * @update gess 3/25/98 * @param aName -- char* containing token name * @return */ CEndToken::CEndToken(const nsString& aName) : CHTMLToken(aName) { } /* * Consume the identifier portion of the end tag * * @update gess 3/25/98 * @param aChar -- last char consumed from stream * @param aScanner -- controller of underlying input source * @return error result */ nsresult CEndToken::Consume(PRUnichar aChar, nsScanner& aScanner) { //if you're here, we've already Consumed the '. //NOTE: We don't Consume the tag attributes here, nor do we eat the ">" mTextValue=""; nsresult result=aScanner.ReadUntil(mTextValue,kGreaterThan,PR_FALSE); if(NS_OK==result){ char buffer[20]; PRInt32 theIndex=mTextValue.FindCharInSet(" \r\n\t\b",0); PRInt32 theMaxLen=(kNotFound==theIndex) ? sizeof(buffer)-1 : theIndex; mTextValue.ToCString(buffer,theMaxLen+1); buffer[theMaxLen]=0; mTypeID= NS_TagToEnum(buffer); result=aScanner.GetChar(aChar); //eat the closing '>; } return result; } /* * Asks the token to determine the HTMLTag type of * the token. This turns around and looks up the tag name * in the tag dictionary. * * @update gess 3/25/98 * @param * @return eHTMLTag id of this endtag */ PRInt32 CEndToken::GetTypeID(){ if(eHTMLTag_unknown==mTypeID) { nsAutoString tmp(mTextValue); char cbuf[200]; tmp.ToCString(cbuf, sizeof(cbuf)); mTypeID = NS_TagToEnum(cbuf); switch(mTypeID) { case eHTMLTag_dir: case eHTMLTag_menu: mTypeID=eHTMLTag_ul; break; default: break; } } return mTypeID; } /* * * * @update gess 3/25/98 * @param * @return */ const char* CEndToken::GetClassName(void) { return "/end"; } /* * * * @update gess 3/25/98 * @param * @return */ PRInt32 CEndToken::GetTokenType(void) { return eToken_end; } /* * Dump contents of this token to givne output stream * * @update gess 3/25/98 * @param out -- ostream to output content * @return */ void CEndToken::DebugDumpSource(ostream& out) { char buffer[1000]; mTextValue.ToCString(buffer,sizeof(buffer)-1); out << ""; } /* * * * @update gess 3/25/98 * @param anOutputString will recieve the result * @return nada */ void CEndToken::GetSource(nsString& anOutputString){ anOutputString=""; } /* * default constructor * * @update gess 3/25/98 * @param aName -- string to init token name with * @return */ CTextToken::CTextToken() : CHTMLToken(eHTMLTag_text) { } /* * string based constructor * * @update gess 3/25/98 * @param aName -- string to init token name with * @return */ CTextToken::CTextToken(const nsString& aName) : CHTMLToken(aName) { mTypeID=eHTMLTag_text; } /* * * * @update gess 3/25/98 * @param * @return */ const char* CTextToken::GetClassName(void) { return "text"; } /* * * * @update gess 3/25/98 * @param * @return */ PRInt32 CTextToken::GetTokenType(void) { return eToken_text; } /* * Consume as much clear text from scanner as possible. * * @update gess 3/25/98 * @param aChar -- last char consumed from stream * @param aScanner -- controller of underlying input source * @return error result */ nsresult CTextToken::Consume(PRUnichar aChar, nsScanner& aScanner) { static nsAutoString terminals("&<\r\n"); nsresult result=NS_OK; PRBool done=PR_FALSE; while((NS_OK==result) && (!done)) { result=aScanner.ReadUntil(mTextValue,terminals,PR_FALSE,PR_FALSE); if(NS_OK==result) { result=aScanner.Peek(aChar); if((kCR==aChar) && (NS_OK==result)) { result=aScanner.GetChar(aChar); //strip off the \r result=aScanner.Peek(aChar); //then see what's next. if(NS_OK==result) { switch(aChar) { case kCR: result=aScanner.GetChar(aChar); //strip off the \r mTextValue.Append("\n\n"); break; case kNewLine: //which means we saw \r\n, which becomes \n result=aScanner.GetChar(aChar); //strip off the \n //now fall through on purpose... default: mTextValue.Append("\n"); break; }//switch }//if } else done=PR_TRUE; } } return result; } /* * Consume as much clear text from scanner as possible. * * @update gess 3/25/98 * @param aChar -- last char consumed from stream * @param aScanner -- controller of underlying input source * @return error result */ nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScanner& aScanner,nsString& aTerminalString){ PRBool done=PR_FALSE; nsresult result=NS_OK; nsString temp; PRUnichar theChar; nsAutoString theRight; PRInt32 rpos=0; //We're going to try a new algorithm here. Rather than scan for the matching //end tag like we used to do, we're now going to scan for whitespace and comments. //If we find either, just eat them. If we find text or a tag, then go to the //target endtag, or the start of another comment. static nsAutoString theWhitespace2("\b\t "); static nsAutoString theTerminals("\"\'<"); PRInt32 termStrLen=aTerminalString.Length(); while((!done) && (NS_OK==result)) { result=aScanner.GetChar(aChar); if((NS_OK==result) && (kLessThan==aChar)) { //we're reading a tag or a comment... //FYI: