2508 lines
67 KiB
C++
2508 lines
67 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* ***** BEGIN LICENSE BLOCK *****
|
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
*
|
|
* The contents of this file are subject to the Mozilla Public License Version
|
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
* http://www.mozilla.org/MPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
* for the specific language governing rights and limitations under the
|
|
* License.
|
|
*
|
|
* The Original Code is mozilla.org code.
|
|
*
|
|
* The Initial Developer of the Original Code is
|
|
* Netscape Communications Corporation.
|
|
* Portions created by the Initial Developer are Copyright (C) 1998
|
|
* the Initial Developer. All Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
*
|
|
* Alternatively, the contents of this file may be used under the terms of
|
|
* either of the GNU General Public License Version 2 or later (the "GPL"),
|
|
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
* of those above. If you wish to allow use of your version of this file only
|
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
* use your version of this file under the terms of the MPL, indicate your
|
|
* decision by deleting the provisions above and replace them with the notice
|
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
* the provisions above, a recipient may use your version of this file under
|
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
*
|
|
* ***** END LICENSE BLOCK ***** */
|
|
|
|
#include <ctype.h>
|
|
#include <time.h>
|
|
#include <stdio.h>
|
|
#include "nsScanner.h"
|
|
#include "nsToken.h"
|
|
#include "nsIAtom.h"
|
|
#include "nsHTMLTokens.h"
|
|
#include "prtypes.h"
|
|
#include "nsDebug.h"
|
|
#include "nsHTMLTags.h"
|
|
#include "nsHTMLEntities.h"
|
|
#include "nsCRT.h"
|
|
#include "nsReadableUtils.h"
|
|
#include "nsUnicharUtils.h"
|
|
#include "nsScanner.h"
|
|
|
|
|
|
static const PRUnichar sUserdefined[] = {'u', 's', 'e', 'r', 'd', 'e', 'f',
|
|
'i', 'n', 'e', 'd', 0};
|
|
|
|
static const PRUnichar kAttributeTerminalChars[] = {
|
|
PRUnichar('&'), PRUnichar('\b'), PRUnichar('\t'),
|
|
PRUnichar('\n'), PRUnichar('\r'), PRUnichar(' '),
|
|
PRUnichar('>'),
|
|
PRUnichar(0)
|
|
};
|
|
|
|
|
|
/**************************************************************
|
|
And now for the token classes...
|
|
**************************************************************/
|
|
|
|
/*
|
|
* constructor from tag id
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
CHTMLToken::CHTMLToken(eHTMLTags aTag) : CToken(aTag) {
|
|
}
|
|
|
|
|
|
CHTMLToken::~CHTMLToken() {
|
|
|
|
}
|
|
|
|
/*
|
|
* constructor from tag id
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
CStartToken::CStartToken(eHTMLTags aTag) : CHTMLToken(aTag) {
|
|
mEmpty=PR_FALSE;
|
|
mContainerInfo=eFormUnknown;
|
|
#ifdef DEBUG
|
|
mAttributed = PR_FALSE;
|
|
#endif
|
|
}
|
|
|
|
CStartToken::CStartToken(const nsAString& aName) : CHTMLToken(eHTMLTag_unknown) {
|
|
mEmpty=PR_FALSE;
|
|
mContainerInfo=eFormUnknown;
|
|
mTextValue.Assign(aName);
|
|
#ifdef DEBUG
|
|
mAttributed = PR_FALSE;
|
|
#endif
|
|
}
|
|
|
|
CStartToken::CStartToken(const nsAString& aName,eHTMLTags aTag) : CHTMLToken(aTag) {
|
|
mEmpty=PR_FALSE;
|
|
mContainerInfo=eFormUnknown;
|
|
mTextValue.Assign(aName);
|
|
#ifdef DEBUG
|
|
mAttributed = PR_FALSE;
|
|
#endif
|
|
}
|
|
|
|
nsresult CStartToken::GetIDAttributeAtom(nsIAtom** aResult)
|
|
{
|
|
NS_ENSURE_ARG_POINTER(aResult);
|
|
*aResult = mIDAttributeAtom;
|
|
NS_IF_ADDREF(*aResult);
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
|
|
nsresult CStartToken::SetIDAttributeAtom(nsIAtom* aID)
|
|
{
|
|
NS_ENSURE_ARG(aID);
|
|
mIDAttributeAtom = aID;
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
|
|
/*
|
|
* This method returns the typeid (the tag type) for this token.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CStartToken::GetTypeID(){
|
|
if(eHTMLTag_unknown==mTypeID) {
|
|
mTypeID = nsHTMLTags::LookupTag(mTextValue);
|
|
}
|
|
return mTypeID;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CStartToken::GetClassName(void) {
|
|
return "start";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CStartToken::GetTokenType(void) {
|
|
return eToken_start;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
void CStartToken::SetEmpty(PRBool aValue) {
|
|
mEmpty=aValue;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRBool CStartToken::IsEmpty(void) {
|
|
return mEmpty;
|
|
}
|
|
|
|
|
|
/*
|
|
* Consume the identifier portion of the start tag
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @param aFlag - contains information such as |dtd mode|view mode|doctype|etc...
|
|
* @return error result
|
|
*/
|
|
nsresult CStartToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
|
|
|
|
//if you're here, we've already Consumed the < char, and are
|
|
//ready to Consume the rest of the open tag identifier.
|
|
//Stop consuming as soon as you see a space or a '>'.
|
|
//NOTE: We don't Consume the tag attributes here, nor do we eat the ">"
|
|
|
|
nsresult result=NS_OK;
|
|
if (aFlag & NS_IPARSER_FLAG_HTML) {
|
|
nsAutoString theSubstr;
|
|
result=aScanner.GetIdentifier(theSubstr,PR_TRUE);
|
|
mTypeID = (PRInt32)nsHTMLTags::LookupTag(theSubstr);
|
|
// Save the original tag string if this is user-defined or if we
|
|
// are viewing source
|
|
if(eHTMLTag_userdefined==mTypeID || (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
|
|
mTextValue=theSubstr;
|
|
}
|
|
}
|
|
else {
|
|
//added PR_TRUE to readId() call below to fix bug 46083. The problem was that the tag given
|
|
//was written <title_> but since we didn't respect the '_', we only saw <title>. Then
|
|
//we searched for end title, which never comes (they give </title_>).
|
|
|
|
result=aScanner.ReadIdentifier(mTextValue,PR_TRUE);
|
|
mTypeID = nsHTMLTags::LookupTag(mTextValue);
|
|
}
|
|
|
|
if (NS_SUCCEEDED(result) && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
|
|
result = aScanner.SkipWhitespace(mNewlineCount);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
const nsAString& CStartToken::GetStringValue()
|
|
{
|
|
if((eHTMLTag_unknown<mTypeID) && (mTypeID<eHTMLTag_text)) {
|
|
if(!mTextValue.Length()) {
|
|
mTextValue.Assign(nsHTMLTags::GetStringValue((nsHTMLTag) mTypeID));
|
|
}
|
|
}
|
|
return mTextValue;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param anOutputString will recieve the result
|
|
* @return nada
|
|
*/
|
|
void CStartToken::GetSource(nsString& anOutputString){
|
|
anOutputString.Truncate();
|
|
AppendSourceTo(anOutputString);
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update harishd 03/23/00
|
|
* @param result appended to the output string.
|
|
* @return nada
|
|
*/
|
|
void CStartToken::AppendSourceTo(nsAString& anOutputString){
|
|
anOutputString.Append(PRUnichar('<'));
|
|
/*
|
|
* Watch out for Bug 15204
|
|
*/
|
|
if(!mTrailingContent.IsEmpty())
|
|
anOutputString.Append(mTrailingContent);
|
|
else {
|
|
if(!mTextValue.IsEmpty())
|
|
anOutputString.Append(mTextValue);
|
|
else
|
|
anOutputString.Append(GetTagName(mTypeID));
|
|
anOutputString.Append(PRUnichar('>'));
|
|
}
|
|
}
|
|
|
|
/*
|
|
* constructor from tag id
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
CEndToken::CEndToken(eHTMLTags aTag) : CHTMLToken(aTag) {
|
|
}
|
|
|
|
CEndToken::CEndToken(const nsAString& aName) : CHTMLToken(eHTMLTag_unknown) {
|
|
mTextValue.Assign(aName);
|
|
}
|
|
|
|
CEndToken::CEndToken(const nsAString& aName,eHTMLTags aTag) : CHTMLToken(aTag) {
|
|
mTextValue.Assign(aName);
|
|
}
|
|
|
|
/*
|
|
* Consume the identifier portion of the end tag
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @param aFlag - contains information such as |dtd mode|view mode|doctype|etc...
|
|
* @return error result
|
|
*/
|
|
nsresult CEndToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag)
|
|
{
|
|
nsresult result = NS_OK;
|
|
if (aFlag & NS_IPARSER_FLAG_HTML) {
|
|
nsAutoString theSubstr;
|
|
result=aScanner.GetIdentifier(theSubstr,PR_TRUE);
|
|
NS_ENSURE_SUCCESS(result, result);
|
|
|
|
mTypeID = (PRInt32)nsHTMLTags::LookupTag(theSubstr);
|
|
// Save the original tag string if this is user-defined or if we
|
|
// are viewing source
|
|
if(eHTMLTag_userdefined==mTypeID ||
|
|
(aFlag & (NS_IPARSER_FLAG_VIEW_SOURCE | NS_IPARSER_FLAG_PRESERVE_CONTENT))) {
|
|
mTextValue=theSubstr;
|
|
}
|
|
}
|
|
else {
|
|
result = aScanner.ReadIdentifier(mTextValue,PR_TRUE);
|
|
NS_ENSURE_SUCCESS(result, result);
|
|
|
|
mTypeID = nsHTMLTags::LookupTag(mTextValue);
|
|
}
|
|
|
|
if (!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
|
|
result = aScanner.SkipWhitespace(mNewlineCount);
|
|
NS_ENSURE_SUCCESS(result, result);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
/*
|
|
* Asks the token to determine the <i>HTMLTag type</i> of
|
|
* the token. This turns around and looks up the tag name
|
|
* in the tag dictionary.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return eHTMLTag id of this endtag
|
|
*/
|
|
PRInt32 CEndToken::GetTypeID(){
|
|
if(eHTMLTag_unknown==mTypeID) {
|
|
mTypeID = nsHTMLTags::LookupTag(mTextValue);
|
|
switch(mTypeID) {
|
|
case eHTMLTag_dir:
|
|
case eHTMLTag_menu:
|
|
mTypeID=eHTMLTag_ul;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
return mTypeID;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CEndToken::GetClassName(void) {
|
|
return "/end";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CEndToken::GetTokenType(void) {
|
|
return eToken_end;
|
|
}
|
|
|
|
const nsAString& CEndToken::GetStringValue()
|
|
{
|
|
if((eHTMLTag_unknown<mTypeID) && (mTypeID<eHTMLTag_text)) {
|
|
if(!mTextValue.Length()) {
|
|
mTextValue.Assign(nsHTMLTags::GetStringValue((nsHTMLTag) mTypeID));
|
|
}
|
|
}
|
|
return mTextValue;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param anOutputString will recieve the result
|
|
* @return nada
|
|
*/
|
|
void CEndToken::GetSource(nsString& anOutputString){
|
|
anOutputString.Truncate();
|
|
AppendSourceTo(anOutputString);
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update harishd 03/23/00
|
|
* @param result appended to the output string.
|
|
* @return nada
|
|
*/
|
|
void CEndToken::AppendSourceTo(nsAString& anOutputString){
|
|
anOutputString.Append(NS_LITERAL_STRING("</"));
|
|
if(!mTextValue.IsEmpty())
|
|
anOutputString.Append(mTextValue);
|
|
else
|
|
anOutputString.Append(GetTagName(mTypeID));
|
|
anOutputString.Append(PRUnichar('>'));
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CTextToken::CTextToken() : CHTMLToken(eHTMLTag_text) {
|
|
}
|
|
|
|
|
|
/*
|
|
* string based constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CTextToken::CTextToken(const nsAString& aName) : CHTMLToken(eHTMLTag_text) {
|
|
mTextValue.Rebind(aName);
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CTextToken::GetClassName(void) {
|
|
return "text";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CTextToken::GetTokenType(void) {
|
|
return eToken_text;
|
|
}
|
|
|
|
PRInt32 CTextToken::GetTextLength(void) {
|
|
return mTextValue.Length();
|
|
}
|
|
|
|
/*
|
|
* Consume as much clear text from scanner as possible.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult CTextToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
|
|
static const PRUnichar theTerminalsChars[] =
|
|
{ PRUnichar('\n'), PRUnichar('\r'), PRUnichar('&'), PRUnichar('<'),
|
|
PRUnichar(0) };
|
|
static const nsReadEndCondition theEndCondition(theTerminalsChars);
|
|
nsresult result=NS_OK;
|
|
PRBool done=PR_FALSE;
|
|
nsScannerIterator origin, start, end;
|
|
|
|
// Start scanning after the first character, because we know it to
|
|
// be part of this text token (we wouldn't have come here if it weren't)
|
|
aScanner.CurrentPosition(origin);
|
|
start = origin;
|
|
++start;
|
|
aScanner.SetPosition(start);
|
|
aScanner.EndReading(end);
|
|
|
|
while((NS_OK==result) && (!done)) {
|
|
result=aScanner.ReadUntil(start, end, theEndCondition, PR_FALSE);
|
|
if(NS_OK==result) {
|
|
result=aScanner.Peek(aChar);
|
|
|
|
if(((kCR==aChar) || (kNewLine==aChar)) && (NS_OK==result)) {
|
|
result=aScanner.GetChar(aChar); //strip off the char
|
|
PRUnichar theNextChar;
|
|
result=aScanner.Peek(theNextChar); //then see what's next.
|
|
switch(aChar) {
|
|
case kCR:
|
|
// result=aScanner.GetChar(aChar);
|
|
if(kLF==theNextChar) {
|
|
// If the "\r" is followed by a "\n", don't replace it and
|
|
// let it be ignored by the layout system
|
|
end.advance(2);
|
|
result=aScanner.GetChar(theNextChar);
|
|
}
|
|
else {
|
|
// If it standalone, replace the "\r" with a "\n" so that
|
|
// it will be considered by the layout system
|
|
aScanner.ReplaceCharacter(end, kLF);
|
|
++end;
|
|
}
|
|
++mNewlineCount;
|
|
break;
|
|
case kLF:
|
|
++end;
|
|
++mNewlineCount;
|
|
break;
|
|
} //switch
|
|
}
|
|
else done=PR_TRUE;
|
|
}
|
|
}
|
|
|
|
aScanner.BindSubstring(mTextValue, origin, end);
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Consume as much clear text from scanner as possible.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScanner& aScanner,
|
|
nsString& aEndTagName,PRInt32 aFlag,PRBool& aFlushTokens){
|
|
nsresult result=NS_OK;
|
|
nsScannerIterator theStartOffset, theCurrOffset, theTermStrPos, theStartCommentPos, theAltTermStrPos, endPos;
|
|
PRBool done=PR_FALSE;
|
|
PRBool theLastIteration=PR_FALSE;
|
|
|
|
aScanner.CurrentPosition(theStartOffset);
|
|
theCurrOffset = theStartOffset;
|
|
aScanner.EndReading(endPos);
|
|
theTermStrPos = theStartCommentPos = theAltTermStrPos = endPos;
|
|
|
|
// ALGORITHM: *** The performance is based on correctness of the document ***
|
|
// 1. Look for a '<' character. This could be
|
|
// a) Start of a comment (<!--), b) Start of the terminal string, or c) a start of a tag.
|
|
// We are interested in a) and b). c) is ignored because in CDATA we don't care for tags.
|
|
// NOTE: Technically speaking in CDATA we should ignore the comments too!! But for compatibility
|
|
// we don't.
|
|
// 2. Having the offset, for '<', search for the terminal string from there on and record its offset.
|
|
// 3. From the same '<' offset also search for start of a comment '<!--'. If found search for
|
|
// end comment '-->' between the terminal string and '<!--'. If you did not find the end
|
|
// comment, then we have a malformed document, i.e., this section has a prematured terminal string
|
|
// Ex. <SCRIPT><!-- document.write('</SCRIPT>') //--> </SCRIPT>. But anyway record terminal string's
|
|
// offset and update the current offset to the terminal string (prematured) offset and goto step 1.
|
|
// 4. Amen...If you found a terminal string and '-->'. Otherwise goto step 1.
|
|
// 5. If the end of the document is reached and if we still don't have the condition in step 4. then
|
|
// assume that the prematured terminal string is the actual terminal string and goto step 1. This
|
|
// will be our last iteration.
|
|
|
|
const NS_NAMED_LITERAL_STRING(ltslash, "</");
|
|
const nsString theTerminalString = ltslash + aEndTagName;
|
|
|
|
PRUint32 termStrLen=theTerminalString.Length();
|
|
while((result == NS_OK) && !done) {
|
|
PRBool found = PR_FALSE;
|
|
nsScannerIterator gtOffset,ltOffset = theCurrOffset;
|
|
while (FindCharInReadable(PRUnichar(kLessThan), ltOffset, endPos) &&
|
|
((PRUint32)ltOffset.size_forward() >= termStrLen ||
|
|
Distance(ltOffset, endPos) >= termStrLen)) {
|
|
// Make a copy of the (presumed) end tag and
|
|
// do a case-insensitive comparison
|
|
|
|
nsScannerIterator start(ltOffset), end(ltOffset);
|
|
end.advance(termStrLen);
|
|
|
|
if (CaseInsensitiveFindInReadable(theTerminalString,start,end) &&
|
|
end != endPos && (*end == '>' || *end == ' ' ||
|
|
*end == '\t' || *end == '\n' ||
|
|
*end == '\r' || *end == '\b')) {
|
|
gtOffset = end;
|
|
if (FindCharInReadable(PRUnichar(kGreaterThan), gtOffset, endPos)) {
|
|
found = PR_TRUE;
|
|
theTermStrPos = start;
|
|
}
|
|
break;
|
|
}
|
|
ltOffset.advance(1);
|
|
}
|
|
|
|
if (found && theTermStrPos != endPos) {
|
|
if(!(aFlag & NS_IPARSER_FLAG_STRICT_MODE) &&
|
|
!theLastIteration && !aIgnoreComments) {
|
|
nsScannerIterator endComment(ltOffset);
|
|
endComment.advance(5);
|
|
|
|
if ((theStartCommentPos == endPos) &&
|
|
FindInReadable(NS_LITERAL_STRING("<!--"), theCurrOffset, endComment)) {
|
|
theStartCommentPos = theCurrOffset;
|
|
}
|
|
|
|
if (theStartCommentPos != endPos) {
|
|
// Search for --> between <!-- and </TERMINALSTRING>.
|
|
theCurrOffset = theStartCommentPos;
|
|
nsScannerIterator terminal(theTermStrPos);
|
|
if (!RFindInReadable(NS_LITERAL_STRING("-->"),
|
|
theCurrOffset, terminal)) {
|
|
// If you're here it means that we have a bogus terminal string.
|
|
// Even though it is bogus, the position of the terminal string
|
|
// could be helpful in case we hit the rock bottom.
|
|
theAltTermStrPos = theTermStrPos;
|
|
|
|
// We did not find '-->' so keep searching for terminal string.
|
|
theCurrOffset = theTermStrPos;
|
|
theCurrOffset.advance(termStrLen);
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Make sure to preserve the end tag's representation if needed
|
|
if(aFlag & (NS_IPARSER_FLAG_VIEW_SOURCE | NS_IPARSER_FLAG_PRESERVE_CONTENT)) {
|
|
CopyUnicodeTo(ltOffset.advance(2),gtOffset,aEndTagName);
|
|
}
|
|
|
|
aScanner.BindSubstring(mTextValue, theStartOffset, theTermStrPos);
|
|
aScanner.SetPosition(gtOffset.advance(1));
|
|
|
|
// We found </SCRIPT>...permit flushing -> Ref: Bug 22485
|
|
aFlushTokens=PR_TRUE;
|
|
done = PR_TRUE;
|
|
}
|
|
else {
|
|
// We end up here if:
|
|
// a) when the buffer runs out ot data.
|
|
// b) when the terminal string is not found.
|
|
if(!aScanner.IsIncremental()) {
|
|
if(theAltTermStrPos != endPos) {
|
|
// If you're here it means..we hit the rock bottom and therefore switch to plan B.
|
|
theCurrOffset = theAltTermStrPos;
|
|
theLastIteration = PR_TRUE;
|
|
}
|
|
else {
|
|
done = PR_TRUE; // Do this to fix Bug. 35456
|
|
}
|
|
}
|
|
else {
|
|
result=kEOF;
|
|
}
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
void CTextToken::CopyTo(nsAString& aStr)
|
|
{
|
|
nsScannerIterator start, end;
|
|
mTextValue.BeginReading(start);
|
|
mTextValue.EndReading(end);
|
|
CopyUnicodeTo(start, end, aStr);
|
|
}
|
|
|
|
const nsAString& CTextToken::GetStringValue(void)
|
|
{
|
|
return mTextValue.AsString();
|
|
}
|
|
|
|
void CTextToken::Bind(nsScanner* aScanner, nsScannerIterator& aStart, nsScannerIterator& aEnd)
|
|
{
|
|
aScanner->BindSubstring(mTextValue, aStart, aEnd);
|
|
}
|
|
|
|
void CTextToken::Bind(const nsAString& aStr)
|
|
{
|
|
mTextValue.Rebind(aStr);
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update vidur 11/12/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CCDATASectionToken::CCDATASectionToken(eHTMLTags aTag) : CHTMLToken(aTag) {
|
|
}
|
|
|
|
|
|
/*
|
|
* string based constructor
|
|
*
|
|
* @update vidur 11/12/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CCDATASectionToken::CCDATASectionToken(const nsAString& aName) : CHTMLToken(eHTMLTag_unknown) {
|
|
mTextValue.Assign(aName);
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update vidur 11/12/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CCDATASectionToken::GetClassName(void) {
|
|
return "cdatasection";
|
|
}
|
|
|
|
/*
|
|
*
|
|
* @update vidur 11/12/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CCDATASectionToken::GetTokenType(void) {
|
|
return eToken_cdatasection;
|
|
}
|
|
|
|
/*
|
|
* Consume as much marked test from scanner as possible.
|
|
*
|
|
* @update rgess 12/15/99: had to handle case: "<![ ! IE 5]>", in addition to "<![..[..]]>".
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult CCDATASectionToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
|
|
static const PRUnichar theTerminalsChars[] =
|
|
{ PRUnichar('\r'), PRUnichar('\n'), PRUnichar(']'), PRUnichar(0) };
|
|
static const nsReadEndCondition theEndCondition(theTerminalsChars);
|
|
nsresult result=NS_OK;
|
|
PRBool done=PR_FALSE;
|
|
|
|
while((NS_OK==result) && (!done)) {
|
|
result=aScanner.ReadUntil(mTextValue,theEndCondition,PR_FALSE);
|
|
if(NS_OK==result) {
|
|
result=aScanner.Peek(aChar);
|
|
if((kCR==aChar) && (NS_OK==result)) {
|
|
result=aScanner.GetChar(aChar); //strip off the \r
|
|
result=aScanner.Peek(aChar); //then see what's next.
|
|
if(NS_OK==result) {
|
|
switch(aChar) {
|
|
case kCR:
|
|
result=aScanner.GetChar(aChar); //strip off the \r
|
|
mTextValue.Append(NS_LITERAL_STRING("\n\n"));
|
|
mNewlineCount += 2;
|
|
break;
|
|
case kNewLine:
|
|
//which means we saw \r\n, which becomes \n
|
|
result=aScanner.GetChar(aChar); //strip off the \n
|
|
//now fall through on purpose...
|
|
default:
|
|
mTextValue.Append(NS_LITERAL_STRING("\n"));
|
|
mNewlineCount++;
|
|
break;
|
|
} //switch
|
|
} //if
|
|
}
|
|
else if (kNewLine == aChar) {
|
|
result=aScanner.GetChar(aChar);
|
|
mTextValue.Append(aChar);
|
|
++mNewlineCount;
|
|
}
|
|
else if (kRightSquareBracket == aChar) {
|
|
result=aScanner.GetChar(aChar); //strip off the ]
|
|
mTextValue.Append(aChar);
|
|
result=aScanner.Peek(aChar); //then see what's next.
|
|
if((NS_OK==result) && (kRightSquareBracket==aChar)) {
|
|
result=aScanner.GetChar(aChar); //strip off the second ]
|
|
mTextValue.Append(aChar);
|
|
}
|
|
// The goal here is to not lose data from the page when encountering
|
|
// markup like: <![endif]-->. This means that in normal parsing, we
|
|
// allow ']' to end the marked section and just drop everything between
|
|
// it an the '>'. In view-source mode, we cannot drop things on the
|
|
// floor like that. In fact, to make view-source of XML with script in
|
|
// CDATA sections at all bearable, we need to somewhat enforce the ']>'
|
|
// terminator for marked sections. So make the tokenization somewhat
|
|
// different when in view-source _and_ dealing with a CDATA section.
|
|
PRBool inCDATA = (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) &&
|
|
StringBeginsWith(mTextValue, NS_LITERAL_STRING("[CDATA["));
|
|
if (inCDATA) {
|
|
result = aScanner.Peek(aChar);
|
|
} else {
|
|
nsAutoString dummy; // skip any bad data
|
|
result=aScanner.ReadUntil(dummy,kGreaterThan,PR_FALSE);
|
|
}
|
|
if (NS_OK==result &&
|
|
(!inCDATA || kGreaterThan == aChar)) {
|
|
result=aScanner.GetChar(aChar); //strip off the >
|
|
done=PR_TRUE;
|
|
}
|
|
}
|
|
else done=PR_TRUE;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
const nsAString& CCDATASectionToken::GetStringValue(void)
|
|
{
|
|
return mTextValue;
|
|
}
|
|
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CMarkupDeclToken::CMarkupDeclToken() : CHTMLToken(eHTMLTag_markupDecl) {
|
|
}
|
|
|
|
|
|
/*
|
|
* string based constructor
|
|
*
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CMarkupDeclToken::CMarkupDeclToken(const nsAString& aName) : CHTMLToken(eHTMLTag_markupDecl) {
|
|
mTextValue.Rebind(aName);
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CMarkupDeclToken::GetClassName(void) {
|
|
return "markupdeclaration";
|
|
}
|
|
|
|
/*
|
|
*
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CMarkupDeclToken::GetTokenType(void) {
|
|
return eToken_markupDecl;
|
|
}
|
|
|
|
/*
|
|
* Consume as much declaration from scanner as possible.
|
|
* Declaration is a markup declaration of ELEMENT, ATTLIST, ENTITY or
|
|
* NOTATION, which can span multiple lines and ends in >.
|
|
*
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult CMarkupDeclToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
|
|
static const PRUnichar theTerminalsChars[] =
|
|
{ PRUnichar('\n'), PRUnichar('\r'), PRUnichar('\''), PRUnichar('"'),
|
|
PRUnichar('>'),
|
|
PRUnichar(0) };
|
|
static const nsReadEndCondition theEndCondition(theTerminalsChars);
|
|
nsresult result=NS_OK;
|
|
PRBool done=PR_FALSE;
|
|
PRUnichar quote=0;
|
|
|
|
nsScannerIterator origin, start, end;
|
|
aScanner.CurrentPosition(origin);
|
|
start = origin;
|
|
|
|
while((NS_OK==result) && (!done)) {
|
|
aScanner.SetPosition(start);
|
|
result=aScanner.ReadUntil(start, end, theEndCondition, PR_FALSE);
|
|
if(NS_OK==result) {
|
|
result=aScanner.Peek(aChar);
|
|
|
|
if(NS_OK==result) {
|
|
PRUnichar theNextChar=0;
|
|
if ((kCR==aChar) || (kNewLine==aChar)) {
|
|
result=aScanner.GetChar(aChar); //strip off the char
|
|
result=aScanner.Peek(theNextChar); //then see what's next.
|
|
}
|
|
switch(aChar) {
|
|
case kCR:
|
|
// result=aScanner.GetChar(aChar);
|
|
if(kLF==theNextChar) {
|
|
// If the "\r" is followed by a "\n", don't replace it and
|
|
// let it be ignored by the layout system
|
|
end.advance(2);
|
|
result=aScanner.GetChar(theNextChar);
|
|
}
|
|
else {
|
|
// If it standalone, replace the "\r" with a "\n" so that
|
|
// it will be considered by the layout system
|
|
aScanner.ReplaceCharacter(end, kLF);
|
|
++end;
|
|
}
|
|
++mNewlineCount;
|
|
break;
|
|
case kLF:
|
|
++end;
|
|
++mNewlineCount;
|
|
break;
|
|
case '\'':
|
|
case '"':
|
|
++end;
|
|
if (quote) {
|
|
if (quote == aChar) {
|
|
quote = 0;
|
|
}
|
|
} else {
|
|
quote = aChar;
|
|
}
|
|
break;
|
|
case kGreaterThan:
|
|
if (quote) {
|
|
++end;
|
|
} else {
|
|
start = end;
|
|
++start; // Note that start is wrong after this, we just avoid temp var
|
|
aScanner.SetPosition(start); // Skip the >
|
|
done=PR_TRUE;
|
|
}
|
|
break;
|
|
default:
|
|
NS_ABORT_IF_FALSE(0,"should not happen, switch is missing cases?");
|
|
break;
|
|
} //switch
|
|
start = end;
|
|
}
|
|
else done=PR_TRUE;
|
|
} // if read until !ok
|
|
} // while
|
|
|
|
aScanner.BindSubstring(mTextValue, origin, end);
|
|
|
|
return result;
|
|
}
|
|
|
|
const nsAString& CMarkupDeclToken::GetStringValue(void)
|
|
{
|
|
return mTextValue.AsString();
|
|
}
|
|
|
|
|
|
/*
|
|
* Default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CCommentToken::CCommentToken() : CHTMLToken(eHTMLTag_comment) {
|
|
}
|
|
|
|
|
|
/*
|
|
* Copy constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
CCommentToken::CCommentToken(const nsAString& aName) : CHTMLToken(eHTMLTag_comment) {
|
|
mComment.Rebind(aName);
|
|
}
|
|
|
|
void CCommentToken::AppendSourceTo(nsAString& anOutputString){
|
|
AppendUnicodeTo(mCommentDecl, anOutputString);
|
|
}
|
|
|
|
static PRBool IsCommentEnd(
|
|
const nsScannerIterator& aCurrent,
|
|
const nsScannerIterator& aEnd,
|
|
nsScannerIterator& aGt)
|
|
{
|
|
nsScannerIterator current = aCurrent;
|
|
PRInt32 dashes = 0;
|
|
|
|
while ((current != aEnd) && (dashes != 2)) {
|
|
if (*current == kGreaterThan) {
|
|
aGt = current;
|
|
return PR_TRUE;
|
|
}
|
|
if (*current == PRUnichar('-')) {
|
|
++dashes;
|
|
} else {
|
|
dashes = 0;
|
|
}
|
|
++current;
|
|
}
|
|
|
|
return PR_FALSE;
|
|
}
|
|
|
|
nsresult CCommentToken::ConsumeStrictComment(nsScanner& aScanner)
|
|
{
|
|
// <!--[... -- ... -- ...]*-->
|
|
/*********************************************************
|
|
NOTE: This algorithm does a fine job of handling comments
|
|
when they're formatted per spec, but if they're not
|
|
we don't handle them well.
|
|
*********************************************************/
|
|
nsScannerIterator end, current, gt, lt;
|
|
aScanner.EndReading(end);
|
|
aScanner.CurrentPosition(current);
|
|
|
|
nsScannerIterator beginData = end;
|
|
|
|
lt = current;
|
|
lt.advance(-2); // <!
|
|
|
|
// Regular comment must start with <!--
|
|
if (current != end && *current == kMinus &&
|
|
++current != end && *current == kMinus &&
|
|
++current != end) {
|
|
nsScannerIterator currentEnd = end;
|
|
PRBool balancedComment = PR_FALSE;
|
|
static NS_NAMED_LITERAL_STRING(dashes,"--");
|
|
beginData = current;
|
|
|
|
while (FindInReadable(dashes, current, currentEnd)) {
|
|
current.advance(2);
|
|
|
|
balancedComment = !balancedComment; // We need to match '--' with '--'
|
|
|
|
if (balancedComment && IsCommentEnd(current, end, gt)) {
|
|
// done
|
|
current.advance(-2);
|
|
if (beginData != current) { // protects from <!---->
|
|
aScanner.BindSubstring(mComment, beginData, current);
|
|
}
|
|
aScanner.BindSubstring(mCommentDecl, lt, ++gt);
|
|
aScanner.SetPosition(gt);
|
|
return NS_OK;
|
|
} else {
|
|
// Continue after the last '--'
|
|
currentEnd = end;
|
|
}
|
|
}
|
|
}
|
|
|
|
// If beginData == end, we did not find opening '--'
|
|
if (beginData == end) {
|
|
// This might have been empty comment: <!>
|
|
// Or it could have been something completely bogus like: <!This is foobar>
|
|
// Handle both cases below
|
|
aScanner.CurrentPosition(current);
|
|
beginData = current;
|
|
if (FindCharInReadable('>', current, end)) {
|
|
aScanner.BindSubstring(mComment, beginData, current);
|
|
aScanner.BindSubstring(mCommentDecl, lt, ++current);
|
|
aScanner.SetPosition(current);
|
|
return NS_OK;
|
|
}
|
|
}
|
|
|
|
if (aScanner.IsIncremental()) {
|
|
// We got here because we saw the beginning of a comment,
|
|
// but not yet the end, and we are still loading the page. In that
|
|
// case the return value here will cause us to unwind,
|
|
// wait for more content, and try again.
|
|
// XXX For performance reasons we should cache where we were, and
|
|
// continue from there for next call
|
|
return kEOF; // not really an nsresult, but...
|
|
}
|
|
|
|
// XXX We should return kNotAComment, parse comment open as text, and parse
|
|
// the rest of the document normally. Now we ALMOST do that: <! is
|
|
// missing from the content model.
|
|
return NS_OK;
|
|
}
|
|
|
|
nsresult CCommentToken::ConsumeQuirksComment(nsScanner& aScanner)
|
|
{
|
|
// <![-[-]] ... [[-]-|--!]>
|
|
/*********************************************************
|
|
NOTE: This algorithm does a fine job of handling comments
|
|
commonly used, but it doesn't really consume them
|
|
per spec (But then, neither does IE or Nav).
|
|
*********************************************************/
|
|
nsScannerIterator end, current;
|
|
aScanner.EndReading(end);
|
|
aScanner.CurrentPosition(current);
|
|
nsScannerIterator beginData = current,
|
|
beginLastMinus = end,
|
|
bestAltCommentEnd = end,
|
|
lt = current;
|
|
lt.advance(-2); // <!
|
|
|
|
// When we get here, we have always already consumed <!
|
|
// Skip over possible leading minuses
|
|
if (current != end && *current == kMinus) {
|
|
beginLastMinus = current;
|
|
++current;
|
|
++beginData;
|
|
if (current != end && *current == kMinus) { // <!--
|
|
beginLastMinus = current;
|
|
++current;
|
|
++beginData;
|
|
// Long form comment
|
|
|
|
nsScannerIterator currentEnd = end, gt = end;
|
|
|
|
// Find the end of the comment
|
|
while (FindCharInReadable(kGreaterThan, current, currentEnd)) {
|
|
gt = current;
|
|
if (bestAltCommentEnd == end) {
|
|
bestAltCommentEnd = gt;
|
|
}
|
|
--current;
|
|
PRBool goodComment = PR_FALSE;
|
|
if (current != beginLastMinus && *current == kMinus) { // ->
|
|
--current;
|
|
if (current != beginLastMinus && *current == kMinus) { // -->
|
|
goodComment = PR_TRUE;
|
|
--current;
|
|
}
|
|
} else if (current != beginLastMinus && *current == '!') {
|
|
--current;
|
|
if (current != beginLastMinus && *current == kMinus) {
|
|
--current;
|
|
if (current != beginLastMinus && *current == kMinus) { // --!>
|
|
--current;
|
|
goodComment = PR_TRUE;
|
|
}
|
|
}
|
|
} else if (current == beginLastMinus) {
|
|
goodComment = PR_TRUE;
|
|
}
|
|
|
|
if (goodComment) {
|
|
// done
|
|
if (beginLastMinus != current) { // protects from <!---->
|
|
aScanner.BindSubstring(mComment, beginData, ++current);
|
|
}
|
|
aScanner.BindSubstring(mCommentDecl, lt, ++gt);
|
|
aScanner.SetPosition(gt);
|
|
return NS_OK;
|
|
} else {
|
|
// try again starting after the last '>'
|
|
current = ++gt;
|
|
currentEnd = end;
|
|
}
|
|
} //while
|
|
|
|
if (aScanner.IsIncremental()) {
|
|
// We got here because we saw the beginning of a comment,
|
|
// but not yet the end, and we are still loading the page. In that
|
|
// case the return value here will cause us to unwind,
|
|
// wait for more content, and try again.
|
|
// XXX For performance reasons we should cache where we were, and
|
|
// continue from there for next call
|
|
return kEOF; // not really an nsresult, but...
|
|
}
|
|
|
|
// If you're here, then we're in a special state.
|
|
// The problem at hand is that we've hit the end of the document without finding the normal endcomment delimiter "-->".
|
|
// In this case, the first thing we try is to see if we found an alternate endcomment delimiter ">".
|
|
// If so, rewind just pass that, and use everything up to that point as your comment.
|
|
// If not, the document has no end comment and should be treated as one big comment.
|
|
gt = bestAltCommentEnd;
|
|
if (beginData != gt) { // protects from <!-->
|
|
aScanner.BindSubstring(mComment, beginData, gt);
|
|
}
|
|
if (gt != end) {
|
|
++gt;
|
|
}
|
|
aScanner.BindSubstring(mCommentDecl, lt, gt);
|
|
aScanner.SetPosition(gt);
|
|
return NS_OK;
|
|
}
|
|
}
|
|
|
|
// This could be short form of comment
|
|
// Find the end of the comment
|
|
current = beginData;
|
|
if (FindCharInReadable(kGreaterThan, current, end)) {
|
|
nsScannerIterator gt = current;
|
|
if (current != beginData) {
|
|
--current;
|
|
if (current != beginData && *current == kMinus) { // ->
|
|
--current;
|
|
if (current != beginData && *current == kMinus) { // -->
|
|
--current;
|
|
}
|
|
} else if (current != beginData && *current == '!') { // !>
|
|
--current;
|
|
if (current != beginData && *current == kMinus) { // -!>
|
|
--current;
|
|
if (current != beginData && *current == kMinus) { // --!>
|
|
--current;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (current != gt) {
|
|
aScanner.BindSubstring(mComment, beginData, ++current);
|
|
}
|
|
aScanner.BindSubstring(mCommentDecl, lt, ++gt);
|
|
aScanner.SetPosition(gt);
|
|
return NS_OK;
|
|
}
|
|
|
|
return kEOF; // not really an nsresult, but...
|
|
}
|
|
|
|
/*
|
|
* Consume the identifier portion of the comment.
|
|
* Note that we've already eaten the "<!" portion.
|
|
*
|
|
* @update gess 16June2000
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult CCommentToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
|
|
nsresult result=PR_TRUE;
|
|
|
|
if (aFlag & NS_IPARSER_FLAG_STRICT_MODE) {
|
|
//Enabling strict comment parsing for Bug 53011 and 2749 contradicts!!!!
|
|
result = ConsumeStrictComment(aScanner);
|
|
}
|
|
else {
|
|
result = ConsumeQuirksComment(aScanner);
|
|
}
|
|
|
|
if (NS_SUCCEEDED(result)) {
|
|
mNewlineCount = !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) ? mCommentDecl.CountChar(kNewLine) : -1;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
const nsAString& CCommentToken::GetStringValue(void)
|
|
{
|
|
return mComment.AsString();
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CCommentToken::GetClassName(void){
|
|
return "/**/";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CCommentToken::GetTokenType(void) {
|
|
return eToken_comment;
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CNewlineToken::CNewlineToken() : CHTMLToken(eHTMLTag_newline) {
|
|
}
|
|
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CNewlineToken::GetClassName(void) {
|
|
return "crlf";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CNewlineToken::GetTokenType(void) {
|
|
return eToken_newline;
|
|
}
|
|
|
|
|
|
static nsScannerSubstring* gNewlineStr;
|
|
void CNewlineToken::AllocNewline()
|
|
{
|
|
gNewlineStr = new nsScannerSubstring(NS_LITERAL_STRING("\n"));
|
|
}
|
|
|
|
void CNewlineToken::FreeNewline()
|
|
{
|
|
if (gNewlineStr) {
|
|
delete gNewlineStr;
|
|
gNewlineStr = nsnull;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* This method retrieves the value of this internal string.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @return nsString reference to internal string value
|
|
*/
|
|
const nsAString& CNewlineToken::GetStringValue(void) {
|
|
return gNewlineStr->AsString();
|
|
}
|
|
|
|
/*
|
|
* Consume as many cr/lf pairs as you can find.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult CNewlineToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
|
|
|
|
/*******************************************************************
|
|
|
|
Here's what the HTML spec says about newlines:
|
|
|
|
"A line break is defined to be a carriage return (
),
|
|
a line feed (
), or a carriage return/line feed pair.
|
|
All line breaks constitute white space."
|
|
|
|
*******************************************************************/
|
|
|
|
PRUnichar theChar;
|
|
nsresult result=aScanner.Peek(theChar);
|
|
|
|
if(NS_OK==result) {
|
|
switch(aChar) {
|
|
case kNewLine:
|
|
if(kCR==theChar) {
|
|
result=aScanner.GetChar(theChar);
|
|
}
|
|
break;
|
|
case kCR:
|
|
//convert CRLF into just CR
|
|
if(kNewLine==theChar) {
|
|
result=aScanner.GetChar(theChar);
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
mNewlineCount = 1;
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CAttributeToken::CAttributeToken() : CHTMLToken(eHTMLTag_unknown) {
|
|
mHasEqualWithoutValue=PR_FALSE;
|
|
#ifdef DEBUG
|
|
mLastAttribute = PR_FALSE;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* string based constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string value to init token name with
|
|
* @return
|
|
*/
|
|
CAttributeToken::CAttributeToken(const nsAString& aName) : CHTMLToken(eHTMLTag_unknown) {
|
|
mTextValue.Assign(aName);
|
|
mHasEqualWithoutValue=PR_FALSE;
|
|
#ifdef DEBUG
|
|
mLastAttribute = PR_FALSE;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* construct initializing data to
|
|
* key value pair
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string value to init token name with
|
|
* @return
|
|
*/
|
|
CAttributeToken::CAttributeToken(const nsAString& aKey, const nsAString& aName) : CHTMLToken(eHTMLTag_unknown) {
|
|
mTextValue.Assign(aName);
|
|
mTextKey.Rebind(aKey);
|
|
mHasEqualWithoutValue=PR_FALSE;
|
|
#ifdef DEBUG
|
|
mLastAttribute = PR_FALSE;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CAttributeToken::GetClassName(void) {
|
|
return "attr";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CAttributeToken::GetTokenType(void) {
|
|
return eToken_attribute;
|
|
}
|
|
|
|
/*
|
|
* Removes non-alpha-non-digit characters from the end of a KEY
|
|
*
|
|
* @update harishd 07/15/99
|
|
* @param
|
|
* @return
|
|
*/
|
|
void CAttributeToken::SanitizeKey() {
|
|
PRInt32 length=mTextKey.Length();
|
|
if(length > 0) {
|
|
nsScannerIterator iter, begin, end;
|
|
mTextKey.BeginReading(begin);
|
|
mTextKey.EndReading(end);
|
|
iter = end;
|
|
|
|
// Look for the first legal character starting from
|
|
// the end of the string
|
|
do {
|
|
--iter;
|
|
} while (!nsCRT::IsAsciiAlpha(*iter) &&
|
|
!nsCRT::IsAsciiDigit(*iter) &&
|
|
(iter != begin));
|
|
|
|
// If there were any illegal characters, just copy out the
|
|
// legal part
|
|
if (iter != --end) {
|
|
nsAutoString buf;
|
|
CopyUnicodeTo(begin, ++iter, buf);
|
|
mTextKey.Rebind(buf);
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
const nsAString& CAttributeToken::GetKey(void)
|
|
{
|
|
return mTextKey.AsString();
|
|
}
|
|
|
|
const nsAString& CAttributeToken::GetStringValue(void)
|
|
{
|
|
return mTextValue;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update rickg 6June2000
|
|
* @param anOutputString will recieve the result
|
|
* @return nada
|
|
*/
|
|
void CAttributeToken::GetSource(nsString& anOutputString){
|
|
anOutputString.Truncate();
|
|
AppendSourceTo(anOutputString);
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update rickg 6June2000
|
|
* @param result appended to the output string.
|
|
* @return nada
|
|
*/
|
|
void CAttributeToken::AppendSourceTo(nsAString& anOutputString){
|
|
AppendUnicodeTo(mTextKey, anOutputString);
|
|
if(mTextValue.Length() || mHasEqualWithoutValue)
|
|
anOutputString.Append(NS_LITERAL_STRING("="));
|
|
anOutputString.Append(mTextValue);
|
|
// anOutputString.Append(NS_LITERAL_STRING(";"));
|
|
}
|
|
|
|
static void AppendNCR(nsString& aString, PRInt32 aNCRValue);
|
|
/*
|
|
* @param aScanner -- controller of underlying input source
|
|
* @param aFlag -- If NS_IPARSER_FLAG_VIEW_SOURCE do not reduce entities...
|
|
* @return error result
|
|
*
|
|
*/
|
|
static
|
|
nsresult ConsumeAttributeEntity(nsString& aString,
|
|
nsScanner& aScanner,
|
|
PRInt32 aFlag)
|
|
{
|
|
|
|
nsresult result=NS_OK;
|
|
|
|
PRUnichar ch;
|
|
result=aScanner.Peek(ch, 1);
|
|
|
|
if (NS_SUCCEEDED(result)) {
|
|
PRUnichar amp=0;
|
|
PRInt32 theNCRValue=0;
|
|
nsAutoString entity;
|
|
|
|
if (nsCRT::IsAsciiAlpha(ch) && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
|
|
result=CEntityToken::ConsumeEntity(ch,entity,aScanner);
|
|
if (NS_SUCCEEDED(result)) {
|
|
theNCRValue = nsHTMLEntities::EntityToUnicode(entity);
|
|
PRUnichar theTermChar=entity.Last();
|
|
// If an entity value is greater than 255 then:
|
|
// Nav 4.x does not treat it as an entity,
|
|
// IE treats it as an entity if terminated with a semicolon.
|
|
// Resembling IE!!
|
|
if(theNCRValue < 0 || (theNCRValue > 255 && theTermChar != ';')) {
|
|
// Looks like we're not dealing with an entity
|
|
aString.Append(kAmpersand);
|
|
aString.Append(entity);
|
|
}
|
|
else {
|
|
// A valid entity so reduce it.
|
|
aString.Append(PRUnichar(theNCRValue));
|
|
}
|
|
}
|
|
}
|
|
else if (ch==kHashsign && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
|
|
result=CEntityToken::ConsumeEntity(ch,entity,aScanner);
|
|
if (NS_SUCCEEDED(result)) {
|
|
if (result == NS_HTMLTOKENS_NOT_AN_ENTITY) {
|
|
// Looked like an entity but it's not
|
|
aScanner.GetChar(amp);
|
|
aString.Append(amp);
|
|
result = NS_OK; // just being safe..
|
|
}
|
|
else {
|
|
PRInt32 err;
|
|
theNCRValue=entity.ToInteger(&err,kAutoDetect);
|
|
AppendNCR(aString, theNCRValue);
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
// What we thought as entity is not really an entity...
|
|
aScanner.GetChar(amp);
|
|
aString.Append(amp);
|
|
}//if
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* This general purpose method is used when you want to
|
|
* consume attributed text value.
|
|
* Note: It also reduces entities within attributes.
|
|
*
|
|
* @param aNewlineCount -- the newline count to increment when hitting newlines
|
|
* @param aScanner -- controller of underlying input source
|
|
* @param aTerminalChars -- characters that stop consuming attribute.
|
|
* @param aAllowNewlines -- whether to allow newlines in the value.
|
|
* XXX it would be nice to roll this info into
|
|
* aTerminalChars somehow....
|
|
* @param aFlag - contains information such as |dtd mode|view mode|doctype|etc...
|
|
* @return error result
|
|
*/
|
|
static
|
|
nsresult ConsumeAttributeValueText(nsString& aString,
|
|
PRInt32& aNewlineCount,
|
|
nsScanner& aScanner,
|
|
const nsReadEndCondition& aEndCondition,
|
|
PRBool aAllowNewlines,
|
|
PRInt32 aFlag)
|
|
{
|
|
nsresult result = NS_OK;
|
|
PRBool done = PR_FALSE;
|
|
|
|
do {
|
|
result = aScanner.ReadUntil(aString,aEndCondition,PR_FALSE);
|
|
if(NS_SUCCEEDED(result)) {
|
|
PRUnichar ch;
|
|
aScanner.Peek(ch);
|
|
if(ch == kAmpersand) {
|
|
result = ConsumeAttributeEntity(aString,aScanner,aFlag);
|
|
}
|
|
else if(ch == kCR && aAllowNewlines) {
|
|
aScanner.GetChar(ch);
|
|
result = aScanner.Peek(ch);
|
|
if (NS_SUCCEEDED(result)) {
|
|
if(ch == kNewLine) {
|
|
aString.Append(NS_LITERAL_STRING("\r\n"));
|
|
aScanner.GetChar(ch);
|
|
}
|
|
else {
|
|
aString.Append(PRUnichar('\r'));
|
|
}
|
|
++aNewlineCount;
|
|
}
|
|
}
|
|
else if(ch == kNewLine && aAllowNewlines) {
|
|
aScanner.GetChar(ch);
|
|
aString.Append(PRUnichar('\n'));
|
|
++aNewlineCount;
|
|
}
|
|
else {
|
|
done = PR_TRUE;
|
|
}
|
|
}
|
|
} while (NS_SUCCEEDED(result) && !done);
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* This general purpose method is used when you want to
|
|
* consume a known quoted string.
|
|
*
|
|
* @param aScanner -- controller of underlying input source
|
|
* @param aTerminalChars -- characters that stop consuming attribute.
|
|
* @param aFlag - contains information such as |dtd mode|view mode|doctype|etc...
|
|
* @return error result
|
|
*/
|
|
static
|
|
nsresult ConsumeQuotedString(PRUnichar aChar,
|
|
nsString& aString,
|
|
PRInt32& aNewlineCount,
|
|
nsScanner& aScanner,
|
|
PRInt32 aFlag)
|
|
{
|
|
NS_ASSERTION(aChar==kQuote || aChar==kApostrophe,"char is neither quote nor apostrophe");
|
|
|
|
static const PRUnichar theTerminalCharsQuote[] = {
|
|
PRUnichar(kQuote), PRUnichar('&'), PRUnichar(kCR),
|
|
PRUnichar(kNewLine), PRUnichar(0) };
|
|
static const PRUnichar theTerminalCharsApostrophe[] = {
|
|
PRUnichar(kApostrophe), PRUnichar('&'), PRUnichar(kCR),
|
|
PRUnichar(kNewLine), PRUnichar(0) };
|
|
static const nsReadEndCondition
|
|
theTerminateConditionQuote(theTerminalCharsQuote);
|
|
static const nsReadEndCondition
|
|
theTerminateConditionApostrophe(theTerminalCharsApostrophe);
|
|
|
|
// Assume Quote to init to something
|
|
const nsReadEndCondition *terminateCondition = &theTerminateConditionQuote;
|
|
if (aChar==kApostrophe)
|
|
terminateCondition = &theTerminateConditionApostrophe;
|
|
|
|
nsresult result=NS_OK;
|
|
nsScannerIterator theOffset;
|
|
aScanner.CurrentPosition(theOffset);
|
|
|
|
result=ConsumeAttributeValueText(aString,aNewlineCount,aScanner,
|
|
*terminateCondition,PR_TRUE,aFlag);
|
|
|
|
if(NS_SUCCEEDED(result)) {
|
|
result = aScanner.SkipOver(aChar); // aChar should be " or '
|
|
}
|
|
|
|
// Ref: Bug 35806
|
|
// A back up measure when disaster strikes...
|
|
// Ex <table> <tr d="><td>hello</td></tr></table>
|
|
if(!aString.IsEmpty() && aString.Last()!=aChar &&
|
|
!aScanner.IsIncremental() && result==kEOF) {
|
|
static const nsReadEndCondition
|
|
theAttributeTerminator(kAttributeTerminalChars);
|
|
aString.Truncate();
|
|
aScanner.SetPosition(theOffset, PR_FALSE, PR_TRUE);
|
|
result=ConsumeAttributeValueText(aString,aNewlineCount,aScanner,
|
|
theAttributeTerminator,PR_FALSE,aFlag);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Consume the key and value portions of the attribute.
|
|
*
|
|
* @update rickg 03.23.2000
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @param aFlag - contains information such as |dtd mode|view mode|doctype|etc...
|
|
* @return error result
|
|
*/
|
|
nsresult CAttributeToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
|
|
|
|
nsresult result;
|
|
|
|
//I changed a bit of this method to use aRetain so that we do the right
|
|
//thing in viewsource. The ws/cr/lf sequences are now maintained, and viewsource looks good.
|
|
|
|
nsScannerIterator wsstart, wsend;
|
|
|
|
if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
|
|
result = aScanner.ReadWhitespace(wsstart, wsend, mNewlineCount);
|
|
}
|
|
else {
|
|
result = aScanner.SkipWhitespace(mNewlineCount);
|
|
}
|
|
|
|
if (NS_OK==result) {
|
|
static const PRUnichar theTerminalsChars[] =
|
|
{ PRUnichar(' '), PRUnichar('"'),
|
|
PRUnichar('='), PRUnichar('\n'),
|
|
PRUnichar('\r'), PRUnichar('\t'),
|
|
PRUnichar('>'), PRUnichar('<'),
|
|
PRUnichar('\b'), PRUnichar(0) };
|
|
static const nsReadEndCondition theEndCondition(theTerminalsChars);
|
|
|
|
nsScannerIterator start, end;
|
|
result=aScanner.ReadUntil(start,end,theEndCondition,PR_FALSE);
|
|
|
|
if (!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
|
|
aScanner.BindSubstring(mTextKey, start, end);
|
|
}
|
|
|
|
//now it's time to Consume the (optional) value...
|
|
if (NS_OK==result) {
|
|
if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
|
|
result = aScanner.ReadWhitespace(start, wsend, mNewlineCount);
|
|
aScanner.BindSubstring(mTextKey, wsstart, wsend);
|
|
}
|
|
else {
|
|
result = aScanner.SkipWhitespace(mNewlineCount);
|
|
}
|
|
|
|
if (NS_OK==result) {
|
|
result=aScanner.Peek(aChar); //Skip ahead until you find an equal sign or a '>'...
|
|
if (NS_OK==result) {
|
|
if (kEqual==aChar){
|
|
result=aScanner.GetChar(aChar); //skip the equal sign...
|
|
if (NS_OK==result) {
|
|
if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
|
|
result = aScanner.ReadWhitespace(mTextValue, mNewlineCount);
|
|
}
|
|
else {
|
|
result = aScanner.SkipWhitespace(mNewlineCount);
|
|
}
|
|
|
|
if (NS_OK==result) {
|
|
result=aScanner.Peek(aChar); //and grab the next char.
|
|
if (NS_OK==result) {
|
|
if ((kQuote==aChar) || (kApostrophe==aChar)) {
|
|
aScanner.GetChar(aChar);
|
|
result=ConsumeQuotedString(aChar,mTextValue,mNewlineCount,
|
|
aScanner,aFlag);
|
|
if (NS_SUCCEEDED(result) && (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
|
|
mTextValue.Insert(aChar,0);
|
|
mTextValue.Append(aChar);
|
|
}
|
|
// According to spec. we ( who? ) should ignore linefeeds. But look,
|
|
// even the carriage return was getting stripped ( wonder why! ) -
|
|
// Ref. to bug 15204. Okay, so the spec. told us to ignore linefeeds,
|
|
// bug then what about bug 47535 ? Should we preserve everything then?
|
|
// Well, let's make it so! Commenting out the next two lines..
|
|
/*if(!aRetain)
|
|
mTextValue.StripChars("\r\n"); //per the HTML spec, ignore linefeeds...
|
|
*/
|
|
}
|
|
else if (kGreaterThan==aChar){
|
|
mHasEqualWithoutValue=PR_TRUE;
|
|
}
|
|
else {
|
|
static const nsReadEndCondition
|
|
theAttributeTerminator(kAttributeTerminalChars);
|
|
result=ConsumeAttributeValueText(mTextValue,
|
|
mNewlineCount,
|
|
aScanner,
|
|
theAttributeTerminator,
|
|
PR_FALSE,
|
|
aFlag);
|
|
}
|
|
}//if
|
|
if (NS_OK==result) {
|
|
if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
|
|
result = aScanner.ReadWhitespace(mTextValue, mNewlineCount);
|
|
}
|
|
else {
|
|
result = aScanner.SkipWhitespace(mNewlineCount);
|
|
}
|
|
}
|
|
}//if
|
|
}//if
|
|
}//if
|
|
else {
|
|
//This is where we have to handle fairly busted content.
|
|
//If you're here, it means we saw an attribute name, but couldn't find
|
|
//the following equal sign. <tag NAME=....
|
|
|
|
//Doing this right in all cases is <i>REALLY</i> ugly.
|
|
//My best guess is to grab the next non-ws char. We know it's not '=',
|
|
//so let's see what it is. If it's a '"', then assume we're reading
|
|
//from the middle of the value. Try stripping the quote and continuing...
|
|
if (kQuote==aChar){
|
|
result=aScanner.SkipOver(aChar); //strip quote.
|
|
}
|
|
}
|
|
}//if
|
|
} //if
|
|
}//if (consume optional value)
|
|
|
|
if (NS_OK==result) {
|
|
result=aScanner.Peek(aChar);
|
|
#ifdef DEBUG
|
|
mLastAttribute = (kGreaterThan == aChar || kEOF == result);
|
|
#endif
|
|
}
|
|
}//if
|
|
return result;
|
|
}
|
|
|
|
void CAttributeToken::SetKey(const nsAString& aKey)
|
|
{
|
|
mTextKey.Rebind(aKey);
|
|
}
|
|
|
|
void CAttributeToken::BindKey(nsScanner* aScanner,
|
|
nsScannerIterator& aStart,
|
|
nsScannerIterator& aEnd)
|
|
{
|
|
aScanner->BindSubstring(mTextKey, aStart, aEnd);
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CWhitespaceToken::CWhitespaceToken() : CHTMLToken(eHTMLTag_whitespace) {
|
|
}
|
|
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string value to init token name with
|
|
* @return
|
|
*/
|
|
CWhitespaceToken::CWhitespaceToken(const nsAString& aName) : CHTMLToken(eHTMLTag_whitespace) {
|
|
mTextValue.Assign(aName);
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CWhitespaceToken::GetClassName(void) {
|
|
return "ws";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CWhitespaceToken::GetTokenType(void) {
|
|
return eToken_whitespace;
|
|
}
|
|
|
|
/*
|
|
* This general purpose method is used when you want to
|
|
* consume an aribrary sequence of whitespace.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult CWhitespaceToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
|
|
mTextValue.Assign(aChar);
|
|
nsresult result=aScanner.ReadWhitespace(mTextValue, mNewlineCount);
|
|
if(NS_OK==result) {
|
|
mTextValue.StripChar(kCR);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
const nsAString& CWhitespaceToken::GetStringValue(void)
|
|
{
|
|
return mTextValue;
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CEntityToken::CEntityToken() : CHTMLToken(eHTMLTag_entity) {
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string value to init token name with
|
|
* @return
|
|
*/
|
|
CEntityToken::CEntityToken(const nsAString& aName) : CHTMLToken(eHTMLTag_entity) {
|
|
mTextValue.Assign(aName);
|
|
#ifdef VERBOSE_DEBUG
|
|
if(!VerifyEntityTable()) {
|
|
cout<<"Entity table is invalid!" << endl;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|
|
/*
|
|
* Consume the rest of the entity. We've already eaten the "&".
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult CEntityToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
|
|
nsresult result=ConsumeEntity(aChar,mTextValue,aScanner);
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CEntityToken::GetClassName(void) {
|
|
return "&entity";
|
|
}
|
|
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CEntityToken::GetTokenType(void) {
|
|
return eToken_entity;
|
|
}
|
|
|
|
/*
|
|
* This general purpose method is used when you want to
|
|
* consume an entity &xxxx;. Keep in mind that entities
|
|
* are <i>not</i> reduced inline.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult
|
|
CEntityToken::ConsumeEntity(PRUnichar aChar,
|
|
nsString& aString,
|
|
nsScanner& aScanner) {
|
|
nsresult result=NS_OK;
|
|
if(kLeftBrace==aChar) {
|
|
//you're consuming a script entity...
|
|
aScanner.GetChar(aChar); // Consume &
|
|
|
|
PRInt32 rightBraceCount = 0;
|
|
PRInt32 leftBraceCount = 0;
|
|
|
|
do {
|
|
result=aScanner.GetChar(aChar);
|
|
|
|
if (NS_FAILED(result)) {
|
|
return result;
|
|
}
|
|
|
|
aString.Append(aChar);
|
|
if(aChar==kRightBrace)
|
|
++rightBraceCount;
|
|
else if(aChar==kLeftBrace)
|
|
++leftBraceCount;
|
|
} while(leftBraceCount!=rightBraceCount);
|
|
} //if
|
|
else {
|
|
PRUnichar theChar=0;
|
|
if (kHashsign==aChar) {
|
|
result = aScanner.Peek(theChar,2);
|
|
|
|
if (NS_FAILED(result)) {
|
|
if (kEOF == result && !aScanner.IsIncremental()) {
|
|
// If this is the last buffer then we are certainly
|
|
// not dealing with an entity. That's, there are
|
|
// no more characters after &#. Bug 188278.
|
|
return NS_HTMLTOKENS_NOT_AN_ENTITY;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
if (nsCRT::IsAsciiDigit(theChar)) {
|
|
aScanner.GetChar(aChar); // Consume &
|
|
aScanner.GetChar(aChar); // Consume #
|
|
aString.Assign(aChar);
|
|
result=aScanner.ReadNumber(aString,10);
|
|
}
|
|
else if (theChar == 'x' || theChar == 'X') {
|
|
aScanner.GetChar(aChar); // Consume &
|
|
aScanner.GetChar(aChar); // Consume #
|
|
aScanner.GetChar(theChar); // Consume x
|
|
aString.Assign(aChar);
|
|
aString.Append(theChar);
|
|
result=aScanner.ReadNumber(aString,16);
|
|
}
|
|
else {
|
|
return NS_HTMLTOKENS_NOT_AN_ENTITY;
|
|
}
|
|
}
|
|
else {
|
|
result = aScanner.Peek(theChar,1);
|
|
|
|
if (NS_FAILED(result)) {
|
|
return result;
|
|
}
|
|
|
|
if(nsCRT::IsAsciiAlpha(theChar) ||
|
|
theChar == '_' ||
|
|
theChar == ':') {
|
|
aScanner.GetChar(aChar); // Consume &
|
|
result=aScanner.ReadIdentifier(aString,PR_TRUE); // Ref. Bug# 23791 - For setting aIgnore to PR_TRUE.
|
|
}
|
|
else {
|
|
return NS_HTMLTOKENS_NOT_AN_ENTITY;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (NS_FAILED(result)) {
|
|
return result;
|
|
}
|
|
|
|
result=aScanner.Peek(aChar);
|
|
|
|
if (NS_FAILED(result)) {
|
|
return result;
|
|
}
|
|
|
|
if (aChar == kSemicolon) {
|
|
// consume semicolon that stopped the scan
|
|
aString.Append(aChar);
|
|
result=aScanner.GetChar(aChar);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
#define PA_REMAP_128_TO_160_ILLEGAL_NCR 1
|
|
|
|
#ifdef PA_REMAP_128_TO_160_ILLEGAL_NCR
|
|
/**
|
|
* Map some illegal but commonly used numeric entities into their
|
|
* appropriate unicode value.
|
|
*/
|
|
#define NOT_USED 0xfffd
|
|
|
|
static const PRUint16 PA_HackTable[] = {
|
|
0x20ac, /* EURO SIGN */
|
|
NOT_USED,
|
|
0x201a, /* SINGLE LOW-9 QUOTATION MARK */
|
|
0x0192, /* LATIN SMALL LETTER F WITH HOOK */
|
|
0x201e, /* DOUBLE LOW-9 QUOTATION MARK */
|
|
0x2026, /* HORIZONTAL ELLIPSIS */
|
|
0x2020, /* DAGGER */
|
|
0x2021, /* DOUBLE DAGGER */
|
|
0x02c6, /* MODIFIER LETTER CIRCUMFLEX ACCENT */
|
|
0x2030, /* PER MILLE SIGN */
|
|
0x0160, /* LATIN CAPITAL LETTER S WITH CARON */
|
|
0x2039, /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */
|
|
0x0152, /* LATIN CAPITAL LIGATURE OE */
|
|
NOT_USED,
|
|
0x017D, /* LATIN CAPITAL LETTER Z WITH CARON */
|
|
NOT_USED,
|
|
NOT_USED,
|
|
0x2018, /* LEFT SINGLE QUOTATION MARK */
|
|
0x2019, /* RIGHT SINGLE QUOTATION MARK */
|
|
0x201c, /* LEFT DOUBLE QUOTATION MARK */
|
|
0x201d, /* RIGHT DOUBLE QUOTATION MARK */
|
|
0x2022, /* BULLET */
|
|
0x2013, /* EN DASH */
|
|
0x2014, /* EM DASH */
|
|
0x02dc, /* SMALL TILDE */
|
|
0x2122, /* TRADE MARK SIGN */
|
|
0x0161, /* LATIN SMALL LETTER S WITH CARON */
|
|
0x203a, /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */
|
|
0x0153, /* LATIN SMALL LIGATURE OE */
|
|
NOT_USED,
|
|
0x017E, /* LATIN SMALL LETTER Z WITH CARON */
|
|
0x0178 /* LATIN CAPITAL LETTER Y WITH DIAERESIS */
|
|
};
|
|
#endif /* PA_REMAP_128_TO_160_ILLEGAL_NCR */
|
|
|
|
static void AppendNCR(nsString& aString, PRInt32 aNCRValue)
|
|
{
|
|
#ifdef PA_REMAP_128_TO_160_ILLEGAL_NCR
|
|
/* for some illegal, but popular usage */
|
|
if ((aNCRValue >= 0x0080) && (aNCRValue <= 0x009f)) {
|
|
aNCRValue = PA_HackTable[aNCRValue - 0x0080];
|
|
}
|
|
#endif
|
|
|
|
if (IS_IN_BMP(aNCRValue))
|
|
aString.Append(PRUnichar(aNCRValue));
|
|
else {
|
|
aString.Append(PRUnichar(H_SURROGATE(aNCRValue)));
|
|
aString.Append(PRUnichar(L_SURROGATE(aNCRValue)));
|
|
}
|
|
}
|
|
|
|
/*
|
|
* This method converts this entity into its underlying
|
|
* unicode equivalent.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aString will hold the resulting string value
|
|
* @return numeric (unichar) value
|
|
*/
|
|
PRInt32 CEntityToken::TranslateToUnicodeStr(nsString& aString) {
|
|
PRInt32 value=0;
|
|
|
|
if(mTextValue.Length()>1) {
|
|
PRUnichar theChar0=mTextValue.CharAt(0);
|
|
|
|
if(kHashsign==theChar0) {
|
|
PRInt32 err=0;
|
|
|
|
value=mTextValue.ToInteger(&err,kAutoDetect);
|
|
|
|
if(0==err) {
|
|
AppendNCR(aString, value);
|
|
}
|
|
}
|
|
else{
|
|
value = nsHTMLEntities::EntityToUnicode(mTextValue);
|
|
if(-1<value) {
|
|
//we found a named entity...
|
|
aString.Assign(PRUnichar(value));
|
|
}
|
|
}//else
|
|
}//if
|
|
|
|
return value;
|
|
}
|
|
|
|
|
|
const nsAString& CEntityToken::GetStringValue(void)
|
|
{
|
|
return mTextValue;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param anOutputString will recieve the result
|
|
* @return nada
|
|
*/
|
|
void CEntityToken::GetSource(nsString& anOutputString){
|
|
anOutputString.Append(NS_LITERAL_STRING("&"));
|
|
anOutputString+=mTextValue;
|
|
//anOutputString+=";";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update harishd 03/23/00
|
|
* @param result appended to the output string.
|
|
* @return nada
|
|
*/
|
|
void CEntityToken::AppendSourceTo(nsAString& anOutputString){
|
|
anOutputString.Append(NS_LITERAL_STRING("&"));
|
|
anOutputString+=mTextValue;
|
|
//anOutputString+=";";
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CScriptToken::CScriptToken() : CHTMLToken(eHTMLTag_script) {
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CScriptToken::CScriptToken(const nsAString& aString) : CHTMLToken(eHTMLTag_script) {
|
|
mTextValue.Assign(aString);
|
|
}
|
|
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CScriptToken::GetClassName(void) {
|
|
return "script";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CScriptToken::GetTokenType(void) {
|
|
return eToken_script;
|
|
}
|
|
|
|
const nsAString& CScriptToken::GetStringValue(void)
|
|
{
|
|
return mTextValue;
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CStyleToken::CStyleToken() : CHTMLToken(eHTMLTag_style) {
|
|
}
|
|
|
|
CStyleToken::CStyleToken(const nsAString& aString) : CHTMLToken(eHTMLTag_style) {
|
|
mTextValue.Assign(aString);
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CStyleToken::GetClassName(void) {
|
|
return "style";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CStyleToken::GetTokenType(void) {
|
|
return eToken_style;
|
|
}
|
|
|
|
const nsAString& CStyleToken::GetStringValue(void)
|
|
{
|
|
return mTextValue;
|
|
}
|
|
|
|
|
|
/**
|
|
*
|
|
* @update gess4/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const PRUnichar* GetTagName(PRInt32 aTag)
|
|
{
|
|
const PRUnichar *result = nsHTMLTags::GetStringValue((nsHTMLTag) aTag);
|
|
|
|
if (result) {
|
|
return result;
|
|
}
|
|
|
|
if(aTag >= eHTMLTag_userdefined)
|
|
return sUserdefined;
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update gess 9/23/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
CInstructionToken::CInstructionToken() : CHTMLToken(eHTMLTag_instruction) {
|
|
}
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update gess 9/23/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
CInstructionToken::CInstructionToken(const nsAString& aString) : CHTMLToken(eHTMLTag_unknown) {
|
|
mTextValue.Assign(aString);
|
|
}
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update gess 9/23/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
nsresult CInstructionToken::Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aFlag){
|
|
mTextValue.Assign(NS_LITERAL_STRING("<?"));
|
|
nsresult result=aScanner.ReadUntil(mTextValue,kGreaterThan,PR_TRUE);
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update gess 9/23/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CInstructionToken::GetClassName(void){
|
|
return "instruction";
|
|
}
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update gess 9/23/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CInstructionToken::GetTokenType(void){
|
|
return eToken_instruction;
|
|
}
|
|
|
|
const nsAString& CInstructionToken::GetStringValue(void)
|
|
{
|
|
return mTextValue;
|
|
}
|
|
|
|
|
|
CErrorToken::CErrorToken(nsParserError *aError) : CHTMLToken(eHTMLTag_unknown)
|
|
{
|
|
mError = aError;
|
|
}
|
|
|
|
CErrorToken::~CErrorToken()
|
|
{
|
|
delete mError;
|
|
}
|
|
|
|
PRInt32 CErrorToken::GetTokenType(void){
|
|
return eToken_error;
|
|
}
|
|
|
|
const char* CErrorToken::GetClassName(void){
|
|
return "error";
|
|
}
|
|
|
|
void CErrorToken::SetError(nsParserError *aError) {
|
|
mError = aError;
|
|
}
|
|
|
|
const nsParserError * CErrorToken::GetError(void)
|
|
{
|
|
return mError;
|
|
}
|
|
|
|
const nsAString& CErrorToken::GetStringValue(void)
|
|
{
|
|
return mTextValue;
|
|
}
|
|
|
|
// Doctype decl token
|
|
|
|
CDoctypeDeclToken::CDoctypeDeclToken(eHTMLTags aTag)
|
|
: CHTMLToken(aTag) {
|
|
}
|
|
|
|
CDoctypeDeclToken::CDoctypeDeclToken(const nsAString& aString,eHTMLTags aTag)
|
|
: CHTMLToken(aTag), mTextValue(aString) {
|
|
}
|
|
|
|
/**
|
|
* This method consumes a doctype element.
|
|
* Note: I'm rewriting this method to seek to the first <, since quotes can really screw us up.
|
|
*
|
|
* @update gess 9/23/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
nsresult CDoctypeDeclToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
|
|
|
|
static const PRUnichar terminalChars[] =
|
|
{ PRUnichar('>'), PRUnichar('<'),
|
|
PRUnichar(0)
|
|
};
|
|
static const nsReadEndCondition theEndCondition(terminalChars);
|
|
|
|
nsScannerIterator start, end;
|
|
|
|
aScanner.CurrentPosition(start);
|
|
aScanner.EndReading(end);
|
|
|
|
nsresult result=aScanner.ReadUntil(start, end, theEndCondition, PR_FALSE);
|
|
|
|
if (NS_SUCCEEDED(result)) {
|
|
PRUnichar ch;
|
|
aScanner.Peek(ch);
|
|
if (ch == kGreaterThan) {
|
|
// Include '>' but not '<' since '<'
|
|
// could belong to another tag.
|
|
aScanner.GetChar(ch);
|
|
end.advance(1);
|
|
}
|
|
}
|
|
else if (!aScanner.IsIncremental()) {
|
|
// We have reached the document end but haven't
|
|
// found either a '<' or a '>'. Therefore use
|
|
// whatever we have.
|
|
result = NS_OK;
|
|
}
|
|
|
|
if (NS_SUCCEEDED(result)) {
|
|
start.advance(-2); // Make sure to consume <!
|
|
CopyUnicodeTo(start,end,mTextValue);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
const char* CDoctypeDeclToken::GetClassName(void) {
|
|
return "doctype";
|
|
}
|
|
|
|
PRInt32 CDoctypeDeclToken::GetTokenType(void) {
|
|
return eToken_doctypeDecl;
|
|
}
|
|
|
|
const nsAString& CDoctypeDeclToken::GetStringValue(void)
|
|
{
|
|
return mTextValue;
|
|
}
|
|
|
|
void CDoctypeDeclToken::SetStringValue(const nsAString& aStr)
|
|
{
|
|
mTextValue.Assign(aStr);
|
|
}
|