Moved nsFileSpec and related classes into this obsolete library. Moved nsRegistry and related libreg functionality into the obsolete library. Updated many callers using the obsolete nsFile spec to use nsIFile and Necko to do file IO. Combined the following DLLs (source -> dest) uriloader -> docshell shistory -> docshell jsurl -> jsdom gkview -> gklayout Moved nsAdapterEnumerator out of xpcom/ds and into mailnews, since they're the only consumer Modifed the xpt_link tool so that you can specify a �only include� cid list that can mask CID�s that you are not interested in. Added build options: Prevent the building of xpinstall (--disable-xpinstall) Prevent the building js component loader (--disable-jsloader) A build option to only build a single profile (--enable-single-profile) A build flag to only built the required xpfe components (--disable-xpfe-components). Removal or hiding of unused functions and classes including nsEscape*, nsDequeIterator, nsRecyclingAllocatorImpl, nsDiscriminatedUnion, nsOpaqueKey, nsCRT::strlen, NS_NewCommandLineService Bug 194240, r/sr = darin, alec.
2455 lines
64 KiB
C++
2455 lines
64 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* ***** BEGIN LICENSE BLOCK *****
|
|
* Version: NPL 1.1/GPL 2.0/LGPL 2.1
|
|
*
|
|
* The contents of this file are subject to the Netscape Public License
|
|
* Version 1.1 (the "License"); you may not use this file except in
|
|
* compliance with the License. You may obtain a copy of the License at
|
|
* http://www.mozilla.org/NPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
* for the specific language governing rights and limitations under the
|
|
* License.
|
|
*
|
|
* The Original Code is mozilla.org code.
|
|
*
|
|
* The Initial Developer of the Original Code is
|
|
* Netscape Communications Corporation.
|
|
* Portions created by the Initial Developer are Copyright (C) 1998
|
|
* the Initial Developer. All Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
*
|
|
*
|
|
* Alternatively, the contents of this file may be used under the terms of
|
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
* of those above. If you wish to allow use of your version of this file only
|
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
* use your version of this file under the terms of the NPL, indicate your
|
|
* decision by deleting the provisions above and replace them with the notice
|
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
* the provisions above, a recipient may use your version of this file under
|
|
* the terms of any one of the NPL, the GPL or the LGPL.
|
|
*
|
|
* ***** END LICENSE BLOCK ***** */
|
|
|
|
#include <ctype.h>
|
|
#include <time.h>
|
|
#include <stdio.h>
|
|
#include "nsScanner.h"
|
|
#include "nsToken.h"
|
|
#include "nsIAtom.h"
|
|
#include "nsHTMLTokens.h"
|
|
#include "prtypes.h"
|
|
#include "nsDebug.h"
|
|
#include "nsHTMLTags.h"
|
|
#include "nsHTMLEntities.h"
|
|
#include "nsCRT.h"
|
|
#include "nsReadableUtils.h"
|
|
#include "nsUnicharUtils.h"
|
|
#include "nsScanner.h"
|
|
|
|
|
|
static const PRUnichar sUserdefined[] = {'u', 's', 'e', 'r', 'd', 'e', 'f',
|
|
'i', 'n', 'e', 'd'};
|
|
|
|
static const PRUnichar kAttributeTerminalChars[] = {
|
|
PRUnichar('&'), PRUnichar('\b'), PRUnichar('\t'),
|
|
PRUnichar('\n'), PRUnichar('\r'), PRUnichar(' '),
|
|
PRUnichar('>'),
|
|
PRUnichar(0)
|
|
};
|
|
|
|
|
|
/**************************************************************
|
|
And now for the token classes...
|
|
**************************************************************/
|
|
|
|
/*
|
|
* constructor from tag id
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
CHTMLToken::CHTMLToken(eHTMLTags aTag) : CToken(aTag) {
|
|
}
|
|
|
|
|
|
CHTMLToken::~CHTMLToken() {
|
|
|
|
}
|
|
|
|
/*
|
|
* constructor from tag id
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
CStartToken::CStartToken(eHTMLTags aTag) : CHTMLToken(aTag) {
|
|
mEmpty=PR_FALSE;
|
|
mContainerInfo=eFormUnknown;
|
|
#ifdef DEBUG
|
|
mAttributed = PR_FALSE;
|
|
#endif
|
|
}
|
|
|
|
CStartToken::CStartToken(const nsAString& aName) : CHTMLToken(eHTMLTag_unknown) {
|
|
mEmpty=PR_FALSE;
|
|
mContainerInfo=eFormUnknown;
|
|
mTextValue.Assign(aName);
|
|
#ifdef DEBUG
|
|
mAttributed = PR_FALSE;
|
|
#endif
|
|
}
|
|
|
|
CStartToken::CStartToken(const nsAString& aName,eHTMLTags aTag) : CHTMLToken(aTag) {
|
|
mEmpty=PR_FALSE;
|
|
mContainerInfo=eFormUnknown;
|
|
mTextValue.Assign(aName);
|
|
#ifdef DEBUG
|
|
mAttributed = PR_FALSE;
|
|
#endif
|
|
}
|
|
|
|
nsresult CStartToken::GetIDAttributeAtom(nsIAtom** aResult)
|
|
{
|
|
NS_ENSURE_ARG_POINTER(aResult);
|
|
*aResult = mIDAttributeAtom;
|
|
NS_IF_ADDREF(*aResult);
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
|
|
nsresult CStartToken::SetIDAttributeAtom(nsIAtom* aID)
|
|
{
|
|
NS_ENSURE_ARG(aID);
|
|
mIDAttributeAtom = aID;
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
|
|
/*
|
|
* This method returns the typeid (the tag type) for this token.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CStartToken::GetTypeID(){
|
|
if(eHTMLTag_unknown==mTypeID) {
|
|
mTypeID = nsHTMLTags::LookupTag(mTextValue);
|
|
}
|
|
return mTypeID;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CStartToken::GetClassName(void) {
|
|
return "start";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CStartToken::GetTokenType(void) {
|
|
return eToken_start;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
void CStartToken::SetEmpty(PRBool aValue) {
|
|
mEmpty=aValue;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRBool CStartToken::IsEmpty(void) {
|
|
return mEmpty;
|
|
}
|
|
|
|
|
|
/*
|
|
* Consume the identifier portion of the start tag
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @param aFlag - contains information such as |dtd mode|view mode|doctype|etc...
|
|
* @return error result
|
|
*/
|
|
nsresult CStartToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
|
|
|
|
//if you're here, we've already Consumed the < char, and are
|
|
//ready to Consume the rest of the open tag identifier.
|
|
//Stop consuming as soon as you see a space or a '>'.
|
|
//NOTE: We don't Consume the tag attributes here, nor do we eat the ">"
|
|
|
|
nsresult result=NS_OK;
|
|
if (aFlag & NS_IPARSER_FLAG_HTML) {
|
|
nsAutoString theSubstr;
|
|
result=aScanner.GetIdentifier(theSubstr,PR_TRUE);
|
|
mTypeID = (PRInt32)nsHTMLTags::LookupTag(theSubstr);
|
|
if(eHTMLTag_userdefined==mTypeID) {
|
|
mTextValue=theSubstr;
|
|
}
|
|
}
|
|
else {
|
|
//added PR_TRUE to readId() call below to fix bug 46083. The problem was that the tag given
|
|
//was written <title_> but since we didn't respect the '_', we only saw <title>. Then
|
|
//we searched for end title, which never comes (they give </title_>).
|
|
|
|
result=aScanner.ReadIdentifier(mTextValue,PR_TRUE);
|
|
mTypeID = nsHTMLTags::LookupTag(mTextValue);
|
|
}
|
|
|
|
if (NS_SUCCEEDED(result) && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
|
|
result = aScanner.SkipWhitespace(mNewlineCount);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
const nsAString& CStartToken::GetStringValue()
|
|
{
|
|
if((eHTMLTag_unknown<mTypeID) && (mTypeID<eHTMLTag_text)) {
|
|
if(!mTextValue.Length()) {
|
|
mTextValue.Assign(nsHTMLTags::GetStringValue((nsHTMLTag) mTypeID));
|
|
}
|
|
}
|
|
return mTextValue;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param anOutputString will recieve the result
|
|
* @return nada
|
|
*/
|
|
void CStartToken::GetSource(nsString& anOutputString){
|
|
anOutputString.Append(PRUnichar('<'));
|
|
/*
|
|
* Watch out for Bug 15204
|
|
*/
|
|
if(mTrailingContent.Length()>0)
|
|
anOutputString=mTrailingContent;
|
|
else {
|
|
if(mTextValue.Length()>0)
|
|
anOutputString.Append(mTextValue);
|
|
else
|
|
anOutputString.Assign(GetTagName(mTypeID));
|
|
anOutputString.Append(PRUnichar('>'));
|
|
}
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update harishd 03/23/00
|
|
* @param result appended to the output string.
|
|
* @return nada
|
|
*/
|
|
void CStartToken::AppendSourceTo(nsAString& anOutputString){
|
|
anOutputString.Append(PRUnichar('<'));
|
|
/*
|
|
* Watch out for Bug 15204
|
|
*/
|
|
if(mTrailingContent.Length()>0)
|
|
anOutputString+=mTrailingContent;
|
|
else {
|
|
if(mTextValue.Length()>0)
|
|
anOutputString+=mTextValue;
|
|
else
|
|
anOutputString.Append(GetTagName(mTypeID));
|
|
anOutputString.Append(PRUnichar('>'));
|
|
}
|
|
}
|
|
|
|
/*
|
|
* constructor from tag id
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
CEndToken::CEndToken(eHTMLTags aTag) : CHTMLToken(aTag) {
|
|
}
|
|
|
|
CEndToken::CEndToken(const nsAString& aName) : CHTMLToken(eHTMLTag_unknown) {
|
|
mTextValue.Assign(aName);
|
|
}
|
|
|
|
CEndToken::CEndToken(const nsAString& aName,eHTMLTags aTag) : CHTMLToken(aTag) {
|
|
mTextValue.Assign(aName);
|
|
}
|
|
|
|
/*
|
|
* Consume the identifier portion of the end tag
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @param aFlag - contains information such as |dtd mode|view mode|doctype|etc...
|
|
* @return error result
|
|
*/
|
|
nsresult CEndToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag)
|
|
{
|
|
nsresult result = NS_OK;
|
|
if (aFlag & NS_IPARSER_FLAG_HTML) {
|
|
nsAutoString theSubstr;
|
|
result=aScanner.GetIdentifier(theSubstr,PR_TRUE);
|
|
NS_ENSURE_SUCCESS(result, result);
|
|
|
|
mTypeID = (PRInt32)nsHTMLTags::LookupTag(theSubstr);
|
|
if(eHTMLTag_userdefined==mTypeID) {
|
|
mTextValue=theSubstr;
|
|
}
|
|
}
|
|
else {
|
|
result = aScanner.ReadIdentifier(mTextValue,PR_TRUE);
|
|
NS_ENSURE_SUCCESS(result, result);
|
|
|
|
mTypeID = nsHTMLTags::LookupTag(mTextValue);
|
|
}
|
|
|
|
if (!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
|
|
result = aScanner.SkipWhitespace(mNewlineCount);
|
|
NS_ENSURE_SUCCESS(result, result);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
/*
|
|
* Asks the token to determine the <i>HTMLTag type</i> of
|
|
* the token. This turns around and looks up the tag name
|
|
* in the tag dictionary.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return eHTMLTag id of this endtag
|
|
*/
|
|
PRInt32 CEndToken::GetTypeID(){
|
|
if(eHTMLTag_unknown==mTypeID) {
|
|
mTypeID = nsHTMLTags::LookupTag(mTextValue);
|
|
switch(mTypeID) {
|
|
case eHTMLTag_dir:
|
|
case eHTMLTag_menu:
|
|
mTypeID=eHTMLTag_ul;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
return mTypeID;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CEndToken::GetClassName(void) {
|
|
return "/end";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CEndToken::GetTokenType(void) {
|
|
return eToken_end;
|
|
}
|
|
|
|
const nsAString& CEndToken::GetStringValue()
|
|
{
|
|
if((eHTMLTag_unknown<mTypeID) && (mTypeID<eHTMLTag_text)) {
|
|
if(!mTextValue.Length()) {
|
|
mTextValue.Assign(nsHTMLTags::GetStringValue((nsHTMLTag) mTypeID));
|
|
}
|
|
}
|
|
return mTextValue;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param anOutputString will recieve the result
|
|
* @return nada
|
|
*/
|
|
void CEndToken::GetSource(nsString& anOutputString){
|
|
anOutputString.Append(NS_LITERAL_STRING("</"));
|
|
if(mTextValue.Length()>0)
|
|
anOutputString.Append(mTextValue);
|
|
else
|
|
anOutputString.Append(GetTagName(mTypeID));
|
|
anOutputString.Append(NS_LITERAL_STRING(">"));
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update harishd 03/23/00
|
|
* @param result appended to the output string.
|
|
* @return nada
|
|
*/
|
|
void CEndToken::AppendSourceTo(nsAString& anOutputString){
|
|
anOutputString.Append(NS_LITERAL_STRING("</"));
|
|
if(mTextValue.Length()>0)
|
|
anOutputString.Append(mTextValue);
|
|
else
|
|
anOutputString.Append(GetTagName(mTypeID));
|
|
anOutputString.Append(NS_LITERAL_STRING(">"));
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CTextToken::CTextToken() : CHTMLToken(eHTMLTag_text) {
|
|
}
|
|
|
|
|
|
/*
|
|
* string based constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CTextToken::CTextToken(const nsAString& aName) : CHTMLToken(eHTMLTag_text) {
|
|
mTextValue.Rebind(aName);
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CTextToken::GetClassName(void) {
|
|
return "text";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CTextToken::GetTokenType(void) {
|
|
return eToken_text;
|
|
}
|
|
|
|
PRInt32 CTextToken::GetTextLength(void) {
|
|
return mTextValue.Length();
|
|
}
|
|
|
|
/*
|
|
* Consume as much clear text from scanner as possible.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult CTextToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
|
|
static const PRUnichar theTerminalsChars[] =
|
|
{ PRUnichar('\n'), PRUnichar('\r'), PRUnichar('&'), PRUnichar('<'),
|
|
PRUnichar(0) };
|
|
static const nsReadEndCondition theEndCondition(theTerminalsChars);
|
|
nsresult result=NS_OK;
|
|
PRBool done=PR_FALSE;
|
|
nsReadingIterator<PRUnichar> origin, start, end;
|
|
|
|
// Start scanning after the first character, because we know it to
|
|
// be part of this text token (we wouldn't have come here if it weren't)
|
|
aScanner.CurrentPosition(origin);
|
|
start = origin;
|
|
++start;
|
|
aScanner.SetPosition(start);
|
|
aScanner.EndReading(end);
|
|
|
|
while((NS_OK==result) && (!done)) {
|
|
result=aScanner.ReadUntil(start, end, theEndCondition, PR_FALSE);
|
|
if(NS_OK==result) {
|
|
result=aScanner.Peek(aChar);
|
|
|
|
if(((kCR==aChar) || (kNewLine==aChar)) && (NS_OK==result)) {
|
|
result=aScanner.GetChar(aChar); //strip off the char
|
|
PRUnichar theNextChar;
|
|
result=aScanner.Peek(theNextChar); //then see what's next.
|
|
switch(aChar) {
|
|
case kCR:
|
|
// result=aScanner.GetChar(aChar);
|
|
if(kLF==theNextChar) {
|
|
// If the "\r" is followed by a "\n", don't replace it and
|
|
// let it be ignored by the layout system
|
|
end.advance(2);
|
|
result=aScanner.GetChar(theNextChar);
|
|
}
|
|
else {
|
|
// If it standalone, replace the "\r" with a "\n" so that
|
|
// it will be considered by the layout system
|
|
aScanner.ReplaceCharacter(end, kLF);
|
|
++end;
|
|
}
|
|
++mNewlineCount;
|
|
break;
|
|
case kLF:
|
|
++end;
|
|
++mNewlineCount;
|
|
break;
|
|
} //switch
|
|
}
|
|
else done=PR_TRUE;
|
|
}
|
|
}
|
|
|
|
aScanner.BindSubstring(mTextValue, origin, end);
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Consume as much clear text from scanner as possible.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult CTextToken::ConsumeUntil(PRUnichar aChar,PRBool aIgnoreComments,nsScanner& aScanner,
|
|
nsString& aEndTagName,PRInt32 aFlag,PRBool& aFlushTokens){
|
|
nsresult result=NS_OK;
|
|
nsReadingIterator<PRUnichar> theStartOffset, theCurrOffset, theTermStrPos, theStartCommentPos, theAltTermStrPos, endPos;
|
|
PRBool done=PR_FALSE;
|
|
PRBool theLastIteration=PR_FALSE;
|
|
|
|
aScanner.CurrentPosition(theStartOffset);
|
|
theCurrOffset = theStartOffset;
|
|
aScanner.EndReading(endPos);
|
|
theTermStrPos = theStartCommentPos = theAltTermStrPos = endPos;
|
|
|
|
// ALGORITHM: *** The performance is based on correctness of the document ***
|
|
// 1. Look for a '<' character. This could be
|
|
// a) Start of a comment (<!--), b) Start of the terminal string, or c) a start of a tag.
|
|
// We are interested in a) and b). c) is ignored because in CDATA we don't care for tags.
|
|
// NOTE: Technically speaking in CDATA we should ignore the comments too!! But for compatibility
|
|
// we don't.
|
|
// 2. Having the offset, for '<', search for the terminal string from there on and record its offset.
|
|
// 3. From the same '<' offset also search for start of a comment '<!--'. If found search for
|
|
// end comment '-->' between the terminal string and '<!--'. If you did not find the end
|
|
// comment, then we have a malformed document, i.e., this section has a prematured terminal string
|
|
// Ex. <SCRIPT><!-- document.write('</SCRIPT>') //--> </SCRIPT>. But anyway record terminal string's
|
|
// offset and update the current offset to the terminal string (prematured) offset and goto step 1.
|
|
// 4. Amen...If you found a terminal string and '-->'. Otherwise goto step 1.
|
|
// 5. If the end of the document is reached and if we still don't have the condition in step 4. then
|
|
// assume that the prematured terminal string is the actual terminal string and goto step 1. This
|
|
// will be our last iteration.
|
|
|
|
const NS_NAMED_LITERAL_STRING(ltslash, "</");
|
|
const nsAString& theTerminalString = ltslash + aEndTagName;
|
|
|
|
PRUint32 termStrLen=theTerminalString.Length();
|
|
while((result == NS_OK) && !done) {
|
|
PRBool found = PR_FALSE;
|
|
nsReadingIterator<PRUnichar> gtOffset,ltOffset = theCurrOffset;
|
|
while (FindCharInReadable(PRUnichar(kLessThan), ltOffset, endPos) &&
|
|
Distance(ltOffset, endPos) >= termStrLen) {
|
|
// Make a copy of the (presumed) end tag and
|
|
// do a case-insensitive comparison
|
|
|
|
nsReadingIterator<PRUnichar> start(ltOffset), end(ltOffset);
|
|
end.advance(termStrLen);
|
|
|
|
if (CaseInsensitiveFindInReadable(theTerminalString,start,end) &&
|
|
end != endPos && (*end == '>' || *end == ' ' ||
|
|
*end == '\t' || *end == '\n' ||
|
|
*end == '\r' || *end == '\b')) {
|
|
gtOffset = end;
|
|
if (FindCharInReadable(PRUnichar(kGreaterThan), gtOffset, endPos)) {
|
|
found = PR_TRUE;
|
|
theTermStrPos = start;
|
|
}
|
|
break;
|
|
}
|
|
ltOffset.advance(1);
|
|
}
|
|
|
|
if (found && theTermStrPos != endPos) {
|
|
if(!(aFlag & NS_IPARSER_FLAG_STRICT_MODE) &&
|
|
!theLastIteration && !aIgnoreComments) {
|
|
nsReadingIterator<PRUnichar> endComment(ltOffset);
|
|
endComment.advance(5);
|
|
|
|
if ((theStartCommentPos == endPos) &&
|
|
FindInReadable(NS_LITERAL_STRING("<!--"), theCurrOffset, endComment)) {
|
|
theStartCommentPos = theCurrOffset;
|
|
}
|
|
|
|
if (theStartCommentPos != endPos) {
|
|
// Search for --> between <!-- and </TERMINALSTRING>.
|
|
theCurrOffset = theStartCommentPos;
|
|
nsReadingIterator<PRUnichar> terminal(theTermStrPos);
|
|
if (!RFindInReadable(NS_LITERAL_STRING("-->"),
|
|
theCurrOffset, terminal)) {
|
|
// If you're here it means that we have a bogus terminal string.
|
|
// Even though it is bogus, the position of the terminal string
|
|
// could be helpful in case we hit the rock bottom.
|
|
theAltTermStrPos = theTermStrPos;
|
|
|
|
// We did not find '-->' so keep searching for terminal string.
|
|
theCurrOffset = theTermStrPos;
|
|
theCurrOffset.advance(termStrLen);
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Make sure to preserve the end tag's representation in viewsource
|
|
if(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
|
|
CopyUnicodeTo(ltOffset.advance(2),gtOffset,aEndTagName);
|
|
}
|
|
|
|
aScanner.BindSubstring(mTextValue, theStartOffset, theTermStrPos);
|
|
aScanner.SetPosition(gtOffset.advance(1));
|
|
|
|
// We found </SCRIPT>...permit flushing -> Ref: Bug 22485
|
|
aFlushTokens=PR_TRUE;
|
|
done = PR_TRUE;
|
|
}
|
|
else {
|
|
// We end up here if:
|
|
// a) when the buffer runs out ot data.
|
|
// b) when the terminal string is not found.
|
|
if(!aScanner.IsIncremental()) {
|
|
if(theAltTermStrPos != endPos) {
|
|
// If you're here it means..we hit the rock bottom and therefore switch to plan B.
|
|
theCurrOffset = theAltTermStrPos;
|
|
theLastIteration = PR_TRUE;
|
|
}
|
|
else {
|
|
done = PR_TRUE; // Do this to fix Bug. 35456
|
|
}
|
|
}
|
|
else {
|
|
result=kEOF;
|
|
}
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
void CTextToken::CopyTo(nsAString& aStr)
|
|
{
|
|
aStr.Assign(mTextValue);
|
|
}
|
|
|
|
const nsAString& CTextToken::GetStringValue(void)
|
|
{
|
|
return mTextValue;
|
|
}
|
|
|
|
void CTextToken::Bind(nsScanner* aScanner, nsReadingIterator<PRUnichar>& aStart, nsReadingIterator<PRUnichar>& aEnd)
|
|
{
|
|
aScanner->BindSubstring(mTextValue, aStart, aEnd);
|
|
}
|
|
|
|
void CTextToken::Bind(const nsAString& aStr)
|
|
{
|
|
mTextValue.Rebind(aStr);
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update vidur 11/12/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CCDATASectionToken::CCDATASectionToken() : CHTMLToken(eHTMLTag_unknown) {
|
|
}
|
|
|
|
|
|
/*
|
|
* string based constructor
|
|
*
|
|
* @update vidur 11/12/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CCDATASectionToken::CCDATASectionToken(const nsAString& aName) : CHTMLToken(eHTMLTag_unknown) {
|
|
mTextValue.Assign(aName);
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update vidur 11/12/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CCDATASectionToken::GetClassName(void) {
|
|
return "cdatasection";
|
|
}
|
|
|
|
/*
|
|
*
|
|
* @update vidur 11/12/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CCDATASectionToken::GetTokenType(void) {
|
|
return eToken_cdatasection;
|
|
}
|
|
|
|
/*
|
|
* Consume as much marked test from scanner as possible.
|
|
*
|
|
* @update rgess 12/15/99: had to handle case: "<![ ! IE 5]>", in addition to "<![..[..]]>".
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult CCDATASectionToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
|
|
static const PRUnichar theTerminalsChars[] =
|
|
{ PRUnichar('\r'), PRUnichar(']'), PRUnichar(0) };
|
|
static const nsReadEndCondition theEndCondition(theTerminalsChars);
|
|
nsresult result=NS_OK;
|
|
PRBool done=PR_FALSE;
|
|
|
|
while((NS_OK==result) && (!done)) {
|
|
result=aScanner.ReadUntil(mTextValue,theEndCondition,PR_FALSE);
|
|
if(NS_OK==result) {
|
|
result=aScanner.Peek(aChar);
|
|
if((kCR==aChar) && (NS_OK==result)) {
|
|
result=aScanner.GetChar(aChar); //strip off the \r
|
|
result=aScanner.Peek(aChar); //then see what's next.
|
|
if(NS_OK==result) {
|
|
switch(aChar) {
|
|
case kCR:
|
|
result=aScanner.GetChar(aChar); //strip off the \r
|
|
mTextValue.Append(NS_LITERAL_STRING("\n\n"));
|
|
break;
|
|
case kNewLine:
|
|
//which means we saw \r\n, which becomes \n
|
|
result=aScanner.GetChar(aChar); //strip off the \n
|
|
//now fall through on purpose...
|
|
default:
|
|
mTextValue.Append(NS_LITERAL_STRING("\n"));
|
|
break;
|
|
} //switch
|
|
} //if
|
|
}
|
|
else if (']'==aChar) {
|
|
result=aScanner.GetChar(aChar); //strip off the ]
|
|
mTextValue.Append(aChar);
|
|
result=aScanner.Peek(aChar); //then see what's next.
|
|
if((NS_OK==result) && (kRightSquareBracket==aChar)) {
|
|
result=aScanner.GetChar(aChar); //strip off the second ]
|
|
mTextValue.Append(aChar);
|
|
}
|
|
nsAutoString dummy; // skip any bad data
|
|
result=aScanner.ReadUntil(dummy,kGreaterThan,PR_FALSE);
|
|
if (NS_OK==result) {
|
|
result=aScanner.GetChar(aChar); //strip off the >
|
|
done=PR_TRUE;
|
|
}
|
|
}
|
|
else done=PR_TRUE;
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
const nsAString& CCDATASectionToken::GetStringValue(void)
|
|
{
|
|
return mTextValue;
|
|
}
|
|
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CMarkupDeclToken::CMarkupDeclToken() : CHTMLToken(eHTMLTag_markupDecl) {
|
|
}
|
|
|
|
|
|
/*
|
|
* string based constructor
|
|
*
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CMarkupDeclToken::CMarkupDeclToken(const nsAString& aName) : CHTMLToken(eHTMLTag_markupDecl) {
|
|
mTextValue.Rebind(aName);
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CMarkupDeclToken::GetClassName(void) {
|
|
return "markupdeclaration";
|
|
}
|
|
|
|
/*
|
|
*
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CMarkupDeclToken::GetTokenType(void) {
|
|
return eToken_markupDecl;
|
|
}
|
|
|
|
/*
|
|
* Consume as much declaration from scanner as possible.
|
|
* Declaration is a markup declaration of ELEMENT, ATTLIST, ENTITY or
|
|
* NOTATION, which can span multiple lines and ends in >.
|
|
*
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult CMarkupDeclToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
|
|
static const PRUnichar theTerminalsChars[] =
|
|
{ PRUnichar('\n'), PRUnichar('\r'), PRUnichar('\''), PRUnichar('"'),
|
|
PRUnichar('>'),
|
|
PRUnichar(0) };
|
|
static const nsReadEndCondition theEndCondition(theTerminalsChars);
|
|
nsresult result=NS_OK;
|
|
PRBool done=PR_FALSE;
|
|
PRUnichar quote=0;
|
|
|
|
nsReadingIterator<PRUnichar> origin, start, end;
|
|
aScanner.CurrentPosition(origin);
|
|
start = origin;
|
|
|
|
while((NS_OK==result) && (!done)) {
|
|
aScanner.SetPosition(start);
|
|
result=aScanner.ReadUntil(start, end, theEndCondition, PR_FALSE);
|
|
if(NS_OK==result) {
|
|
result=aScanner.Peek(aChar);
|
|
|
|
if(NS_OK==result) {
|
|
PRUnichar theNextChar=0;
|
|
if ((kCR==aChar) || (kNewLine==aChar)) {
|
|
result=aScanner.GetChar(aChar); //strip off the char
|
|
result=aScanner.Peek(theNextChar); //then see what's next.
|
|
}
|
|
switch(aChar) {
|
|
case kCR:
|
|
// result=aScanner.GetChar(aChar);
|
|
if(kLF==theNextChar) {
|
|
// If the "\r" is followed by a "\n", don't replace it and
|
|
// let it be ignored by the layout system
|
|
end.advance(2);
|
|
result=aScanner.GetChar(theNextChar);
|
|
}
|
|
else {
|
|
// If it standalone, replace the "\r" with a "\n" so that
|
|
// it will be considered by the layout system
|
|
aScanner.ReplaceCharacter(end, kLF);
|
|
++end;
|
|
}
|
|
++mNewlineCount;
|
|
break;
|
|
case kLF:
|
|
++end;
|
|
++mNewlineCount;
|
|
break;
|
|
case '\'':
|
|
case '"':
|
|
++end;
|
|
if (quote) {
|
|
if (quote == aChar) {
|
|
quote = 0;
|
|
}
|
|
} else {
|
|
quote = aChar;
|
|
}
|
|
break;
|
|
case kGreaterThan:
|
|
if (quote) {
|
|
++end;
|
|
} else {
|
|
start = end;
|
|
++start; // Note that start is wrong after this, we just avoid temp var
|
|
aScanner.SetPosition(start); // Skip the >
|
|
done=PR_TRUE;
|
|
}
|
|
break;
|
|
default:
|
|
NS_ABORT_IF_FALSE(0,"should not happen, switch is missing cases?");
|
|
break;
|
|
} //switch
|
|
start = end;
|
|
}
|
|
else done=PR_TRUE;
|
|
} // if read until !ok
|
|
} // while
|
|
|
|
aScanner.BindSubstring(mTextValue, origin, end);
|
|
|
|
return result;
|
|
}
|
|
|
|
const nsAString& CMarkupDeclToken::GetStringValue(void)
|
|
{
|
|
return mTextValue;
|
|
}
|
|
|
|
|
|
/*
|
|
* Default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CCommentToken::CCommentToken() : CHTMLToken(eHTMLTag_comment) {
|
|
}
|
|
|
|
|
|
/*
|
|
* Copy constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
CCommentToken::CCommentToken(const nsAString& aName) : CHTMLToken(eHTMLTag_comment) {
|
|
mComment.Rebind(aName);
|
|
}
|
|
|
|
void CCommentToken::AppendSourceTo(nsAString& anOutputString){
|
|
anOutputString.Append(mCommentDecl);
|
|
}
|
|
|
|
static PRBool IsCommentEnd(
|
|
const nsReadingIterator<PRUnichar>& aCurrent,
|
|
const nsReadingIterator<PRUnichar>& aEnd,
|
|
nsReadingIterator<PRUnichar>& aGt)
|
|
{
|
|
nsReadingIterator<PRUnichar> current = aCurrent;
|
|
PRInt32 dashes = 0;
|
|
|
|
while ((current != aEnd) && (dashes != 2)) {
|
|
if (*current == kGreaterThan) {
|
|
aGt = current;
|
|
return PR_TRUE;
|
|
}
|
|
if (*current == PRUnichar('-')) {
|
|
++dashes;
|
|
} else {
|
|
dashes = 0;
|
|
}
|
|
++current;
|
|
}
|
|
|
|
return PR_FALSE;
|
|
}
|
|
|
|
nsresult CCommentToken::ConsumeStrictComment(nsScanner& aScanner)
|
|
{
|
|
// <!--[... -- ... -- ...]*-->
|
|
/*********************************************************
|
|
NOTE: This algorithm does a fine job of handling comments
|
|
when they're formatted per spec, but if they're not
|
|
we don't handle them well.
|
|
*********************************************************/
|
|
nsReadingIterator<PRUnichar> end, current, gt, lt;
|
|
aScanner.EndReading(end);
|
|
aScanner.CurrentPosition(current);
|
|
|
|
nsReadingIterator<PRUnichar> beginData = end;
|
|
|
|
lt = current;
|
|
lt.advance(-2); // <!
|
|
|
|
// Regular comment must start with <!--
|
|
if (current != end && *current == kMinus &&
|
|
++current != end && *current == kMinus &&
|
|
++current != end) {
|
|
nsReadingIterator<PRUnichar> currentEnd = end;
|
|
PRBool balancedComment = PR_FALSE;
|
|
static NS_NAMED_LITERAL_STRING(dashes,"--");
|
|
beginData = current;
|
|
|
|
while (FindInReadable(dashes, current, currentEnd)) {
|
|
current.advance(2);
|
|
|
|
balancedComment = !balancedComment; // We need to match '--' with '--'
|
|
|
|
if (balancedComment && IsCommentEnd(current, end, gt)) {
|
|
// done
|
|
current.advance(-2);
|
|
if (beginData != current) { // protects from <!---->
|
|
aScanner.BindSubstring(mComment, beginData, current);
|
|
}
|
|
aScanner.BindSubstring(mCommentDecl, lt, ++gt);
|
|
aScanner.SetPosition(gt);
|
|
return NS_OK;
|
|
} else {
|
|
// Continue after the last '--'
|
|
currentEnd = end;
|
|
}
|
|
}
|
|
}
|
|
|
|
// If beginData == end, we did not find opening '--'
|
|
if (beginData == end) {
|
|
// This might have been empty comment: <!>
|
|
// Or it could have been something completely bogus like: <!This is foobar>
|
|
// Handle both cases below
|
|
aScanner.CurrentPosition(current);
|
|
beginData = current;
|
|
if (FindCharInReadable('>', current, end)) {
|
|
aScanner.BindSubstring(mComment, beginData, current);
|
|
aScanner.BindSubstring(mCommentDecl, lt, ++current);
|
|
aScanner.SetPosition(current);
|
|
return NS_OK;
|
|
}
|
|
}
|
|
|
|
if (aScanner.IsIncremental()) {
|
|
// We got here because we saw the beginning of a comment,
|
|
// but not yet the end, and we are still loading the page. In that
|
|
// case the return value here will cause us to unwind,
|
|
// wait for more content, and try again.
|
|
// XXX For performance reasons we should cache where we were, and
|
|
// continue from there for next call
|
|
return kEOF; // not really an nsresult, but...
|
|
}
|
|
|
|
// XXX We should return kNotAComment, parse comment open as text, and parse
|
|
// the rest of the document normally. Now we ALMOST do that: <! is
|
|
// missing from the content model.
|
|
return NS_OK;
|
|
}
|
|
|
|
nsresult CCommentToken::ConsumeQuirksComment(nsScanner& aScanner)
|
|
{
|
|
// <![-[-]] ... [[-]-|--!]>
|
|
/*********************************************************
|
|
NOTE: This algorithm does a fine job of handling comments
|
|
commonly used, but it doesn't really consume them
|
|
per spec (But then, neither does IE or Nav).
|
|
*********************************************************/
|
|
nsReadingIterator<PRUnichar> end, current;
|
|
aScanner.EndReading(end);
|
|
aScanner.CurrentPosition(current);
|
|
nsReadingIterator<PRUnichar> beginData = current,
|
|
beginLastMinus = end,
|
|
bestAltCommentEnd = end,
|
|
lt = current;
|
|
lt.advance(-2); // <!
|
|
|
|
// When we get here, we have always already consumed <!
|
|
// Skip over possible leading minuses
|
|
if (current != end && *current == kMinus) {
|
|
beginLastMinus = current;
|
|
++current;
|
|
++beginData;
|
|
if (current != end && *current == kMinus) { // <!--
|
|
beginLastMinus = current;
|
|
++current;
|
|
++beginData;
|
|
// Long form comment
|
|
|
|
nsReadingIterator<PRUnichar> currentEnd = end, gt = end;
|
|
|
|
// Find the end of the comment
|
|
while (FindCharInReadable(kGreaterThan, current, currentEnd)) {
|
|
gt = current;
|
|
if (bestAltCommentEnd == end) {
|
|
bestAltCommentEnd = gt;
|
|
}
|
|
--current;
|
|
PRBool goodComment = PR_FALSE;
|
|
if (current != beginLastMinus && *current == kMinus) { // ->
|
|
--current;
|
|
if (current != beginLastMinus && *current == kMinus) { // -->
|
|
goodComment = PR_TRUE;
|
|
--current;
|
|
}
|
|
} else if (current != beginLastMinus && *current == '!') {
|
|
--current;
|
|
if (current != beginLastMinus && *current == kMinus) {
|
|
--current;
|
|
if (current != beginLastMinus && *current == kMinus) { // --!>
|
|
--current;
|
|
goodComment = PR_TRUE;
|
|
}
|
|
}
|
|
} else if (current == beginLastMinus) {
|
|
goodComment = PR_TRUE;
|
|
}
|
|
|
|
if (goodComment) {
|
|
// done
|
|
if (beginLastMinus != current) { // protects from <!---->
|
|
aScanner.BindSubstring(mComment, beginData, ++current);
|
|
}
|
|
aScanner.BindSubstring(mCommentDecl, lt, ++gt);
|
|
aScanner.SetPosition(gt);
|
|
return NS_OK;
|
|
} else {
|
|
// try again starting after the last '>'
|
|
current = ++gt;
|
|
currentEnd = end;
|
|
}
|
|
} //while
|
|
|
|
if (aScanner.IsIncremental()) {
|
|
// We got here because we saw the beginning of a comment,
|
|
// but not yet the end, and we are still loading the page. In that
|
|
// case the return value here will cause us to unwind,
|
|
// wait for more content, and try again.
|
|
// XXX For performance reasons we should cache where we were, and
|
|
// continue from there for next call
|
|
return kEOF; // not really an nsresult, but...
|
|
}
|
|
|
|
// If you're here, then we're in a special state.
|
|
// The problem at hand is that we've hit the end of the document without finding the normal endcomment delimiter "-->".
|
|
// In this case, the first thing we try is to see if we found an alternate endcomment delimiter ">".
|
|
// If so, rewind just pass that, and use everything up to that point as your comment.
|
|
// If not, the document has no end comment and should be treated as one big comment.
|
|
gt = bestAltCommentEnd;
|
|
if (beginData != gt) { // protects from <!-->
|
|
aScanner.BindSubstring(mComment, beginData, gt);
|
|
}
|
|
if (gt != end) {
|
|
++gt;
|
|
}
|
|
aScanner.BindSubstring(mCommentDecl, lt, gt);
|
|
aScanner.SetPosition(gt);
|
|
return NS_OK;
|
|
}
|
|
}
|
|
|
|
// This could be short form of comment
|
|
// Find the end of the comment
|
|
current = beginData;
|
|
if (FindCharInReadable(kGreaterThan, current, end)) {
|
|
nsReadingIterator<PRUnichar> gt = current;
|
|
if (current != beginData) {
|
|
--current;
|
|
if (current != beginData && *current == kMinus) { // ->
|
|
--current;
|
|
if (current != beginData && *current == kMinus) { // -->
|
|
--current;
|
|
}
|
|
} else if (current != beginData && *current == '!') { // !>
|
|
--current;
|
|
if (current != beginData && *current == kMinus) { // -!>
|
|
--current;
|
|
if (current != beginData && *current == kMinus) { // --!>
|
|
--current;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (current != gt) {
|
|
aScanner.BindSubstring(mComment, beginData, ++current);
|
|
}
|
|
aScanner.BindSubstring(mCommentDecl, lt, ++gt);
|
|
aScanner.SetPosition(gt);
|
|
return NS_OK;
|
|
}
|
|
|
|
return kEOF; // not really an nsresult, but...
|
|
}
|
|
|
|
/*
|
|
* Consume the identifier portion of the comment.
|
|
* Note that we've already eaten the "<!" portion.
|
|
*
|
|
* @update gess 16June2000
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult CCommentToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
|
|
nsresult result=PR_TRUE;
|
|
|
|
if (aFlag & NS_IPARSER_FLAG_STRICT_MODE) {
|
|
//Enabling strict comment parsing for Bug 53011 and 2749 contradicts!!!!
|
|
result = ConsumeStrictComment(aScanner);
|
|
}
|
|
else {
|
|
result = ConsumeQuirksComment(aScanner);
|
|
}
|
|
|
|
if (NS_SUCCEEDED(result)) {
|
|
mNewlineCount = !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) ? mCommentDecl.CountChar(kNewLine) : -1;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
const nsAString& CCommentToken::GetStringValue(void)
|
|
{
|
|
return mComment;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CCommentToken::GetClassName(void){
|
|
return "/**/";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CCommentToken::GetTokenType(void) {
|
|
return eToken_comment;
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CNewlineToken::CNewlineToken() : CHTMLToken(eHTMLTag_newline) {
|
|
}
|
|
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CNewlineToken::GetClassName(void) {
|
|
return "crlf";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CNewlineToken::GetTokenType(void) {
|
|
return eToken_newline;
|
|
}
|
|
|
|
|
|
static nsSlidingSubstring* gNewlineStr;
|
|
void CNewlineToken::AllocNewline()
|
|
{
|
|
gNewlineStr = new nsSlidingSubstring(NS_LITERAL_STRING("\n"));
|
|
}
|
|
|
|
void CNewlineToken::FreeNewline()
|
|
{
|
|
if (gNewlineStr) {
|
|
delete gNewlineStr;
|
|
gNewlineStr = nsnull;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* This method retrieves the value of this internal string.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @return nsString reference to internal string value
|
|
*/
|
|
const nsAString& CNewlineToken::GetStringValue(void) {
|
|
return *gNewlineStr;
|
|
}
|
|
|
|
/*
|
|
* Consume as many cr/lf pairs as you can find.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult CNewlineToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
|
|
|
|
/*******************************************************************
|
|
|
|
Here's what the HTML spec says about newlines:
|
|
|
|
"A line break is defined to be a carriage return (
),
|
|
a line feed (
), or a carriage return/line feed pair.
|
|
All line breaks constitute white space."
|
|
|
|
*******************************************************************/
|
|
|
|
PRUnichar theChar;
|
|
nsresult result=aScanner.Peek(theChar);
|
|
|
|
if(NS_OK==result) {
|
|
switch(aChar) {
|
|
case kNewLine:
|
|
if(kCR==theChar) {
|
|
result=aScanner.GetChar(theChar);
|
|
}
|
|
break;
|
|
case kCR:
|
|
//convert CRLF into just CR
|
|
if(kNewLine==theChar) {
|
|
result=aScanner.GetChar(theChar);
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
mNewlineCount = 1;
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CAttributeToken::CAttributeToken() : CHTMLToken(eHTMLTag_unknown) {
|
|
mHasEqualWithoutValue=PR_FALSE;
|
|
#ifdef DEBUG
|
|
mLastAttribute = PR_FALSE;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* string based constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string value to init token name with
|
|
* @return
|
|
*/
|
|
CAttributeToken::CAttributeToken(const nsAString& aName) : CHTMLToken(eHTMLTag_unknown) {
|
|
mTextValue.Assign(aName);
|
|
mHasEqualWithoutValue=PR_FALSE;
|
|
#ifdef DEBUG
|
|
mLastAttribute = PR_FALSE;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
* construct initializing data to
|
|
* key value pair
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string value to init token name with
|
|
* @return
|
|
*/
|
|
CAttributeToken::CAttributeToken(const nsAString& aKey, const nsAString& aName) : CHTMLToken(eHTMLTag_unknown) {
|
|
mTextValue.Assign(aName);
|
|
mTextKey.Rebind(aKey);
|
|
mHasEqualWithoutValue=PR_FALSE;
|
|
#ifdef DEBUG
|
|
mLastAttribute = PR_FALSE;
|
|
#endif
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CAttributeToken::GetClassName(void) {
|
|
return "attr";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CAttributeToken::GetTokenType(void) {
|
|
return eToken_attribute;
|
|
}
|
|
|
|
/*
|
|
* Removes non-alpha-non-digit characters from the end of a KEY
|
|
*
|
|
* @update harishd 07/15/99
|
|
* @param
|
|
* @return
|
|
*/
|
|
void CAttributeToken::SanitizeKey() {
|
|
PRInt32 length=mTextKey.Length();
|
|
if(length > 0) {
|
|
nsReadingIterator<PRUnichar> iter, begin, end;
|
|
mTextKey.BeginReading(begin);
|
|
mTextKey.EndReading(end);
|
|
iter = end;
|
|
|
|
// Look for the first legal character starting from
|
|
// the end of the string
|
|
do {
|
|
--iter;
|
|
} while (!nsCRT::IsAsciiAlpha(*iter) &&
|
|
!nsCRT::IsAsciiDigit(*iter) &&
|
|
(iter != begin));
|
|
|
|
// If there were any illegal characters, just copy out the
|
|
// legal part
|
|
if (iter != --end) {
|
|
nsAutoString str;
|
|
CopyUnicodeTo(begin, ++iter, str);
|
|
mTextKey.Rebind(str);
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
const nsAString& CAttributeToken::GetStringValue(void)
|
|
{
|
|
return mTextValue;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update rickg 6June2000
|
|
* @param anOutputString will recieve the result
|
|
* @return nada
|
|
*/
|
|
void CAttributeToken::GetSource(nsString& anOutputString){
|
|
anOutputString.Truncate();
|
|
AppendSourceTo(anOutputString);
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update rickg 6June2000
|
|
* @param result appended to the output string.
|
|
* @return nada
|
|
*/
|
|
void CAttributeToken::AppendSourceTo(nsAString& anOutputString){
|
|
anOutputString.Append(mTextKey);
|
|
if(mTextValue.Length() || mHasEqualWithoutValue)
|
|
anOutputString.Append(NS_LITERAL_STRING("="));
|
|
anOutputString.Append(mTextValue);
|
|
// anOutputString.Append(NS_LITERAL_STRING(";"));
|
|
}
|
|
|
|
static void AppendNCR(nsString& aString, PRInt32 aNCRValue);
|
|
/*
|
|
* @param aScanner -- controller of underlying input source
|
|
* @param aFlag -- If NS_IPARSER_FLAG_VIEW_SOURCE do not reduce entities...
|
|
* @return error result
|
|
*
|
|
*/
|
|
static
|
|
nsresult ConsumeAttributeEntity(nsString& aString,
|
|
nsScanner& aScanner,
|
|
PRInt32 aFlag)
|
|
{
|
|
|
|
nsresult result=NS_OK;
|
|
|
|
PRUnichar ch;
|
|
result=aScanner.Peek(ch, 1);
|
|
|
|
if (NS_SUCCEEDED(result)) {
|
|
PRUnichar amp=0;
|
|
PRInt32 theNCRValue=0;
|
|
nsAutoString entity;
|
|
|
|
if (nsCRT::IsAsciiAlpha(ch) && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
|
|
result=CEntityToken::ConsumeEntity(ch,entity,aScanner);
|
|
if (NS_SUCCEEDED(result)) {
|
|
theNCRValue = nsHTMLEntities::EntityToUnicode(entity);
|
|
PRUnichar theTermChar=entity.Last();
|
|
// If an entity value is greater than 255 then:
|
|
// Nav 4.x does not treat it as an entity,
|
|
// IE treats it as an entity if terminated with a semicolon.
|
|
// Resembling IE!!
|
|
if(theNCRValue < 0 || (theNCRValue > 255 && theTermChar != ';')) {
|
|
// Looks like we're not dealing with an entity
|
|
aString.Append(kAmpersand);
|
|
aString.Append(entity);
|
|
}
|
|
else {
|
|
// A valid entity so reduce it.
|
|
aString.Append(PRUnichar(theNCRValue));
|
|
}
|
|
}
|
|
}
|
|
else if (ch==kHashsign && !(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
|
|
result=CEntityToken::ConsumeEntity(ch,entity,aScanner);
|
|
if (NS_SUCCEEDED(result)) {
|
|
if (result == NS_HTMLTOKENS_NOT_AN_ENTITY) {
|
|
// Looked like an entity but it's not
|
|
aScanner.GetChar(amp);
|
|
aString.Append(amp);
|
|
result = NS_OK; // just being safe..
|
|
}
|
|
else {
|
|
PRInt32 err;
|
|
theNCRValue=entity.ToInteger(&err,kAutoDetect);
|
|
AppendNCR(aString, theNCRValue);
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
// What we thought as entity is not really an entity...
|
|
aScanner.GetChar(amp);
|
|
aString.Append(amp);
|
|
}//if
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* This general purpose method is used when you want to
|
|
* consume attributed text value.
|
|
* Note: It also reduces entities within attributes.
|
|
*
|
|
* @param aScanner -- controller of underlying input source
|
|
* @param aTerminalChars -- characters that stop consuming attribute.
|
|
* @param aFlag - contains information such as |dtd mode|view mode|doctype|etc...
|
|
* @return error result
|
|
*/
|
|
static
|
|
nsresult ConsumeAttributeValueText(nsString& aString,
|
|
nsScanner& aScanner,
|
|
const nsReadEndCondition& aEndCondition,
|
|
PRInt32 aFlag)
|
|
{
|
|
nsresult result = NS_OK;
|
|
PRBool done = PR_FALSE;
|
|
|
|
do {
|
|
result = aScanner.ReadUntil(aString,aEndCondition,PR_FALSE);
|
|
if(NS_SUCCEEDED(result)) {
|
|
PRUnichar ch;
|
|
aScanner.Peek(ch);
|
|
if(ch == kAmpersand) {
|
|
result = ConsumeAttributeEntity(aString,aScanner,aFlag);
|
|
}
|
|
else {
|
|
done = PR_TRUE;
|
|
}
|
|
}
|
|
} while (NS_SUCCEEDED(result) && !done);
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* This general purpose method is used when you want to
|
|
* consume a known quoted string.
|
|
*
|
|
* @param aScanner -- controller of underlying input source
|
|
* @param aTerminalChars -- characters that stop consuming attribute.
|
|
* @param aFlag - contains information such as |dtd mode|view mode|doctype|etc...
|
|
* @return error result
|
|
*/
|
|
static
|
|
nsresult ConsumeQuotedString(PRUnichar aChar,
|
|
nsString& aString,
|
|
nsScanner& aScanner,
|
|
PRInt32 aFlag)
|
|
{
|
|
NS_ASSERTION(aChar==kQuote || aChar==kApostrophe,"char is neither quote nor apostrophe");
|
|
|
|
static const PRUnichar theTerminalCharsQuote[] = {
|
|
PRUnichar(kQuote), PRUnichar('&'), PRUnichar(0) };
|
|
static const PRUnichar theTerminalCharsApostrophe[] = {
|
|
PRUnichar(kApostrophe), PRUnichar('&'), PRUnichar(0) };
|
|
static const nsReadEndCondition
|
|
theTerminateConditionQuote(theTerminalCharsQuote);
|
|
static const nsReadEndCondition
|
|
theTerminateConditionApostrophe(theTerminalCharsApostrophe);
|
|
|
|
// Assume Quote to init to something
|
|
const nsReadEndCondition *terminateCondition = &theTerminateConditionQuote;
|
|
if (aChar==kApostrophe)
|
|
terminateCondition = &theTerminateConditionApostrophe;
|
|
|
|
nsresult result=NS_OK;
|
|
nsReadingIterator<PRUnichar> theOffset;
|
|
aScanner.CurrentPosition(theOffset);
|
|
|
|
result=ConsumeAttributeValueText(aString,aScanner,*terminateCondition,aFlag);
|
|
|
|
if(NS_SUCCEEDED(result)) {
|
|
result = aScanner.SkipOver(aChar); // aChar should be " or '
|
|
}
|
|
|
|
// Ref: Bug 35806
|
|
// A back up measure when disaster strikes...
|
|
// Ex <table> <tr d="><td>hello</td></tr></table>
|
|
if(!aString.IsEmpty() && aString.Last()!=aChar &&
|
|
!aScanner.IsIncremental() && result==kEOF) {
|
|
static const nsReadEndCondition
|
|
theAttributeTerminator(kAttributeTerminalChars);
|
|
aString.Truncate();
|
|
aScanner.SetPosition(theOffset, PR_FALSE, PR_TRUE);
|
|
result=ConsumeAttributeValueText(aString,aScanner,theAttributeTerminator,aFlag);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Consume the key and value portions of the attribute.
|
|
*
|
|
* @update rickg 03.23.2000
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @param aFlag - contains information such as |dtd mode|view mode|doctype|etc...
|
|
* @return error result
|
|
*/
|
|
nsresult CAttributeToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
|
|
|
|
nsresult result;
|
|
|
|
//I changed a bit of this method to use aRetain so that we do the right
|
|
//thing in viewsource. The ws/cr/lf sequences are now maintained, and viewsource looks good.
|
|
|
|
nsReadingIterator<PRUnichar> wsstart, wsend;
|
|
|
|
if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
|
|
result = aScanner.ReadWhitespace(wsstart, wsend, mNewlineCount);
|
|
}
|
|
else {
|
|
result = aScanner.SkipWhitespace(mNewlineCount);
|
|
}
|
|
|
|
if (NS_OK==result) {
|
|
static const PRUnichar theTerminalsChars[] =
|
|
{ PRUnichar(' '), PRUnichar('"'),
|
|
PRUnichar('='), PRUnichar('\n'),
|
|
PRUnichar('\r'), PRUnichar('\t'),
|
|
PRUnichar('>'), PRUnichar('<'),
|
|
PRUnichar('\b'), PRUnichar(0) };
|
|
static const nsReadEndCondition theEndCondition(theTerminalsChars);
|
|
|
|
nsReadingIterator<PRUnichar> start, end;
|
|
result=aScanner.ReadUntil(start,end,theEndCondition,PR_FALSE);
|
|
|
|
if (!(aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
|
|
aScanner.BindSubstring(mTextKey, start, end);
|
|
}
|
|
|
|
//now it's time to Consume the (optional) value...
|
|
if (NS_OK==result) {
|
|
if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
|
|
result = aScanner.ReadWhitespace(start, wsend, mNewlineCount);
|
|
aScanner.BindSubstring(mTextKey, wsstart, wsend);
|
|
}
|
|
else {
|
|
result = aScanner.SkipWhitespace(mNewlineCount);
|
|
}
|
|
|
|
if (NS_OK==result) {
|
|
result=aScanner.Peek(aChar); //Skip ahead until you find an equal sign or a '>'...
|
|
if (NS_OK==result) {
|
|
if (kEqual==aChar){
|
|
result=aScanner.GetChar(aChar); //skip the equal sign...
|
|
if (NS_OK==result) {
|
|
if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
|
|
result = aScanner.ReadWhitespace(mTextValue, mNewlineCount);
|
|
}
|
|
else {
|
|
result = aScanner.SkipWhitespace(mNewlineCount);
|
|
}
|
|
|
|
if (NS_OK==result) {
|
|
result=aScanner.Peek(aChar); //and grab the next char.
|
|
if (NS_OK==result) {
|
|
if ((kQuote==aChar) || (kApostrophe==aChar)) {
|
|
aScanner.GetChar(aChar);
|
|
result=ConsumeQuotedString(aChar,mTextValue,aScanner,aFlag);
|
|
if (NS_SUCCEEDED(result) && (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE)) {
|
|
mTextValue.Insert(aChar,0);
|
|
mTextValue.Append(aChar);
|
|
}
|
|
// According to spec. we ( who? ) should ignore linefeeds. But look,
|
|
// even the carriage return was getting stripped ( wonder why! ) -
|
|
// Ref. to bug 15204. Okay, so the spec. told us to ignore linefeeds,
|
|
// bug then what about bug 47535 ? Should we preserve everything then?
|
|
// Well, let's make it so! Commenting out the next two lines..
|
|
/*if(!aRetain)
|
|
mTextValue.StripChars("\r\n"); //per the HTML spec, ignore linefeeds...
|
|
*/
|
|
}
|
|
else if (kGreaterThan==aChar){
|
|
mHasEqualWithoutValue=PR_TRUE;
|
|
}
|
|
else {
|
|
static const nsReadEndCondition
|
|
theAttributeTerminator(kAttributeTerminalChars);
|
|
result=ConsumeAttributeValueText(mTextValue,
|
|
aScanner,
|
|
theAttributeTerminator,
|
|
aFlag);
|
|
}
|
|
}//if
|
|
if (NS_OK==result) {
|
|
if (aFlag & NS_IPARSER_FLAG_VIEW_SOURCE) {
|
|
result = aScanner.ReadWhitespace(mTextValue, mNewlineCount);
|
|
}
|
|
else {
|
|
result = aScanner.SkipWhitespace(mNewlineCount);
|
|
}
|
|
}
|
|
}//if
|
|
}//if
|
|
}//if
|
|
else {
|
|
//This is where we have to handle fairly busted content.
|
|
//If you're here, it means we saw an attribute name, but couldn't find
|
|
//the following equal sign. <tag NAME=....
|
|
|
|
//Doing this right in all cases is <i>REALLY</i> ugly.
|
|
//My best guess is to grab the next non-ws char. We know it's not '=',
|
|
//so let's see what it is. If it's a '"', then assume we're reading
|
|
//from the middle of the value. Try stripping the quote and continuing...
|
|
if (kQuote==aChar){
|
|
result=aScanner.SkipOver(aChar); //strip quote.
|
|
}
|
|
}
|
|
}//if
|
|
} //if
|
|
}//if (consume optional value)
|
|
|
|
if (NS_OK==result) {
|
|
result=aScanner.Peek(aChar);
|
|
#ifdef DEBUG
|
|
mLastAttribute = (kGreaterThan == aChar || kEOF == result);
|
|
#endif
|
|
}
|
|
}//if
|
|
return result;
|
|
}
|
|
|
|
void CAttributeToken::SetKey(const nsAString& aKey)
|
|
{
|
|
mTextKey.Rebind(aKey);
|
|
}
|
|
|
|
void CAttributeToken::BindKey(nsScanner* aScanner,
|
|
nsReadingIterator<PRUnichar>& aStart,
|
|
nsReadingIterator<PRUnichar>& aEnd)
|
|
{
|
|
aScanner->BindSubstring(mTextKey, aStart, aEnd);
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CWhitespaceToken::CWhitespaceToken() : CHTMLToken(eHTMLTag_whitespace) {
|
|
}
|
|
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string value to init token name with
|
|
* @return
|
|
*/
|
|
CWhitespaceToken::CWhitespaceToken(const nsAString& aName) : CHTMLToken(eHTMLTag_whitespace) {
|
|
mTextValue.Assign(aName);
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CWhitespaceToken::GetClassName(void) {
|
|
return "ws";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CWhitespaceToken::GetTokenType(void) {
|
|
return eToken_whitespace;
|
|
}
|
|
|
|
/*
|
|
* This general purpose method is used when you want to
|
|
* consume an aribrary sequence of whitespace.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult CWhitespaceToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
|
|
mTextValue.Assign(aChar);
|
|
nsresult result=aScanner.ReadWhitespace(mTextValue, mNewlineCount);
|
|
if(NS_OK==result) {
|
|
mTextValue.StripChar(kCR);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
const nsAString& CWhitespaceToken::GetStringValue(void)
|
|
{
|
|
return mTextValue;
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CEntityToken::CEntityToken() : CHTMLToken(eHTMLTag_entity) {
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string value to init token name with
|
|
* @return
|
|
*/
|
|
CEntityToken::CEntityToken(const nsAString& aName) : CHTMLToken(eHTMLTag_entity) {
|
|
mTextValue.Assign(aName);
|
|
#ifdef VERBOSE_DEBUG
|
|
if(!VerifyEntityTable()) {
|
|
cout<<"Entity table is invalid!" << endl;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|
|
/*
|
|
* Consume the rest of the entity. We've already eaten the "&".
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult CEntityToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
|
|
nsresult result=ConsumeEntity(aChar,mTextValue,aScanner);
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CEntityToken::GetClassName(void) {
|
|
return "&entity";
|
|
}
|
|
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CEntityToken::GetTokenType(void) {
|
|
return eToken_entity;
|
|
}
|
|
|
|
/*
|
|
* This general purpose method is used when you want to
|
|
* consume an entity &xxxx;. Keep in mind that entities
|
|
* are <i>not</i> reduced inline.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*/
|
|
nsresult
|
|
CEntityToken::ConsumeEntity(PRUnichar aChar,
|
|
nsString& aString,
|
|
nsScanner& aScanner) {
|
|
nsresult result=NS_OK;
|
|
if(kLeftBrace==aChar) {
|
|
//you're consuming a script entity...
|
|
aScanner.GetChar(aChar); // Consume &
|
|
|
|
PRInt32 rightBraceCount = 0;
|
|
PRInt32 leftBraceCount = 0;
|
|
|
|
do {
|
|
result=aScanner.GetChar(aChar);
|
|
|
|
if (NS_FAILED(result)) {
|
|
return result;
|
|
}
|
|
|
|
aString.Append(aChar);
|
|
if(aChar==kRightBrace)
|
|
++rightBraceCount;
|
|
else if(aChar==kLeftBrace)
|
|
++leftBraceCount;
|
|
} while(leftBraceCount!=rightBraceCount);
|
|
} //if
|
|
else {
|
|
PRUnichar theChar=0;
|
|
if (kHashsign==aChar) {
|
|
result = aScanner.Peek(theChar,2);
|
|
|
|
if (NS_FAILED(result)) {
|
|
if (kEOF == result && !aScanner.IsIncremental()) {
|
|
// If this is the last buffer then we are certainly
|
|
// not dealing with an entity. That's, there are
|
|
// no more characters after &#. Bug 188278.
|
|
return NS_HTMLTOKENS_NOT_AN_ENTITY;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
if (nsCRT::IsAsciiDigit(theChar)) {
|
|
aScanner.GetChar(aChar); // Consume &
|
|
aScanner.GetChar(aChar); // Consume #
|
|
aString.Assign(aChar);
|
|
result=aScanner.ReadNumber(aString,10);
|
|
}
|
|
else if (theChar == 'x' || theChar == 'X') {
|
|
aScanner.GetChar(aChar); // Consume &
|
|
aScanner.GetChar(aChar); // Consume #
|
|
aScanner.GetChar(theChar); // Consume x
|
|
aString.Assign(aChar);
|
|
aString.Append(theChar);
|
|
result=aScanner.ReadNumber(aString,16);
|
|
}
|
|
else {
|
|
return NS_HTMLTOKENS_NOT_AN_ENTITY;
|
|
}
|
|
}
|
|
else {
|
|
result = aScanner.Peek(theChar,1);
|
|
|
|
if (NS_FAILED(result)) {
|
|
return result;
|
|
}
|
|
|
|
if(nsCRT::IsAsciiAlpha(theChar) ||
|
|
theChar == '_' ||
|
|
theChar == ':') {
|
|
aScanner.GetChar(aChar); // Consume &
|
|
result=aScanner.ReadIdentifier(aString,PR_TRUE); // Ref. Bug# 23791 - For setting aIgnore to PR_TRUE.
|
|
}
|
|
else {
|
|
return NS_HTMLTOKENS_NOT_AN_ENTITY;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (NS_FAILED(result)) {
|
|
return result;
|
|
}
|
|
|
|
result=aScanner.Peek(aChar);
|
|
|
|
if (NS_FAILED(result)) {
|
|
return result;
|
|
}
|
|
|
|
if (aChar == kSemicolon) {
|
|
// consume semicolon that stopped the scan
|
|
aString.Append(aChar);
|
|
result=aScanner.GetChar(aChar);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
#define PA_REMAP_128_TO_160_ILLEGAL_NCR 1
|
|
|
|
#ifdef PA_REMAP_128_TO_160_ILLEGAL_NCR
|
|
/**
|
|
* Map some illegal but commonly used numeric entities into their
|
|
* appropriate unicode value.
|
|
*/
|
|
#define NOT_USED 0xfffd
|
|
|
|
static PRUint16 PA_HackTable[] = {
|
|
0x20ac, /* EURO SIGN */
|
|
NOT_USED,
|
|
0x201a, /* SINGLE LOW-9 QUOTATION MARK */
|
|
0x0192, /* LATIN SMALL LETTER F WITH HOOK */
|
|
0x201e, /* DOUBLE LOW-9 QUOTATION MARK */
|
|
0x2026, /* HORIZONTAL ELLIPSIS */
|
|
0x2020, /* DAGGER */
|
|
0x2021, /* DOUBLE DAGGER */
|
|
0x02c6, /* MODIFIER LETTER CIRCUMFLEX ACCENT */
|
|
0x2030, /* PER MILLE SIGN */
|
|
0x0160, /* LATIN CAPITAL LETTER S WITH CARON */
|
|
0x2039, /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */
|
|
0x0152, /* LATIN CAPITAL LIGATURE OE */
|
|
NOT_USED,
|
|
0x017D, /* LATIN CAPITAL LETTER Z WITH CARON */
|
|
NOT_USED,
|
|
NOT_USED,
|
|
0x2018, /* LEFT SINGLE QUOTATION MARK */
|
|
0x2019, /* RIGHT SINGLE QUOTATION MARK */
|
|
0x201c, /* LEFT DOUBLE QUOTATION MARK */
|
|
0x201d, /* RIGHT DOUBLE QUOTATION MARK */
|
|
0x2022, /* BULLET */
|
|
0x2013, /* EN DASH */
|
|
0x2014, /* EM DASH */
|
|
0x02dc, /* SMALL TILDE */
|
|
0x2122, /* TRADE MARK SIGN */
|
|
0x0161, /* LATIN SMALL LETTER S WITH CARON */
|
|
0x203a, /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */
|
|
0x0153, /* LATIN SMALL LIGATURE OE */
|
|
NOT_USED,
|
|
0x017E, /* LATIN SMALL LETTER Z WITH CARON */
|
|
0x0178 /* LATIN CAPITAL LETTER Y WITH DIAERESIS */
|
|
};
|
|
#endif /* PA_REMAP_128_TO_160_ILLEGAL_NCR */
|
|
|
|
static void AppendNCR(nsString& aString, PRInt32 aNCRValue)
|
|
{
|
|
#ifdef PA_REMAP_128_TO_160_ILLEGAL_NCR
|
|
/* for some illegal, but popular usage */
|
|
if ((aNCRValue >= 0x0080) && (aNCRValue <= 0x009f)) {
|
|
aNCRValue = PA_HackTable[aNCRValue - 0x0080];
|
|
}
|
|
#endif
|
|
|
|
if (IS_IN_BMP(aNCRValue))
|
|
aString.Append(PRUnichar(aNCRValue));
|
|
else {
|
|
aString.Append(PRUnichar(H_SURROGATE(aNCRValue)));
|
|
aString.Append(PRUnichar(L_SURROGATE(aNCRValue)));
|
|
}
|
|
}
|
|
|
|
/*
|
|
* This method converts this entity into its underlying
|
|
* unicode equivalent.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aString will hold the resulting string value
|
|
* @return numeric (unichar) value
|
|
*/
|
|
PRInt32 CEntityToken::TranslateToUnicodeStr(nsString& aString) {
|
|
PRInt32 value=0;
|
|
|
|
if(mTextValue.Length()>1) {
|
|
PRUnichar theChar0=mTextValue.CharAt(0);
|
|
|
|
if(kHashsign==theChar0) {
|
|
PRInt32 err=0;
|
|
|
|
value=mTextValue.ToInteger(&err,kAutoDetect);
|
|
|
|
if(0==err) {
|
|
AppendNCR(aString, value);
|
|
}
|
|
}
|
|
else{
|
|
value = nsHTMLEntities::EntityToUnicode(mTextValue);
|
|
if(-1<value) {
|
|
//we found a named entity...
|
|
aString.Assign(PRUnichar(value));
|
|
}
|
|
}//else
|
|
}//if
|
|
|
|
return value;
|
|
}
|
|
|
|
|
|
const nsAString& CEntityToken::GetStringValue(void)
|
|
{
|
|
return mTextValue;
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param anOutputString will recieve the result
|
|
* @return nada
|
|
*/
|
|
void CEntityToken::GetSource(nsString& anOutputString){
|
|
anOutputString.Append(NS_LITERAL_STRING("&"));
|
|
anOutputString+=mTextValue;
|
|
//anOutputString+=";";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update harishd 03/23/00
|
|
* @param result appended to the output string.
|
|
* @return nada
|
|
*/
|
|
void CEntityToken::AppendSourceTo(nsAString& anOutputString){
|
|
anOutputString.Append(NS_LITERAL_STRING("&"));
|
|
anOutputString+=mTextValue;
|
|
//anOutputString+=";";
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CScriptToken::CScriptToken() : CHTMLToken(eHTMLTag_script) {
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CScriptToken::CScriptToken(const nsAString& aString) : CHTMLToken(eHTMLTag_script) {
|
|
mTextValue.Assign(aString);
|
|
}
|
|
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CScriptToken::GetClassName(void) {
|
|
return "script";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CScriptToken::GetTokenType(void) {
|
|
return eToken_script;
|
|
}
|
|
|
|
const nsAString& CScriptToken::GetStringValue(void)
|
|
{
|
|
return mTextValue;
|
|
}
|
|
|
|
/*
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*/
|
|
CStyleToken::CStyleToken() : CHTMLToken(eHTMLTag_style) {
|
|
}
|
|
|
|
CStyleToken::CStyleToken(const nsAString& aString) : CHTMLToken(eHTMLTag_style) {
|
|
mTextValue.Assign(aString);
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CStyleToken::GetClassName(void) {
|
|
return "style";
|
|
}
|
|
|
|
/*
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CStyleToken::GetTokenType(void) {
|
|
return eToken_style;
|
|
}
|
|
|
|
const nsAString& CStyleToken::GetStringValue(void)
|
|
{
|
|
return mTextValue;
|
|
}
|
|
|
|
|
|
/**
|
|
*
|
|
* @update gess4/25/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const PRUnichar* GetTagName(PRInt32 aTag)
|
|
{
|
|
const PRUnichar *result = nsHTMLTags::GetStringValue((nsHTMLTag) aTag);
|
|
|
|
if (result) {
|
|
return result;
|
|
}
|
|
|
|
if(aTag >= eHTMLTag_userdefined)
|
|
return sUserdefined;
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update gess 9/23/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
CInstructionToken::CInstructionToken() : CHTMLToken(eHTMLTag_instruction) {
|
|
}
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update gess 9/23/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
CInstructionToken::CInstructionToken(const nsAString& aString) : CHTMLToken(eHTMLTag_unknown) {
|
|
mTextValue.Assign(aString);
|
|
}
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update gess 9/23/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
nsresult CInstructionToken::Consume(PRUnichar aChar,nsScanner& aScanner,PRInt32 aFlag){
|
|
mTextValue.Assign(NS_LITERAL_STRING("<?"));
|
|
nsresult result=aScanner.ReadUntil(mTextValue,kGreaterThan,PR_TRUE);
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update gess 9/23/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
const char* CInstructionToken::GetClassName(void){
|
|
return "instruction";
|
|
}
|
|
|
|
/**
|
|
*
|
|
*
|
|
* @update gess 9/23/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
PRInt32 CInstructionToken::GetTokenType(void){
|
|
return eToken_instruction;
|
|
}
|
|
|
|
const nsAString& CInstructionToken::GetStringValue(void)
|
|
{
|
|
return mTextValue;
|
|
}
|
|
|
|
|
|
CErrorToken::CErrorToken(nsParserError *aError) : CHTMLToken(eHTMLTag_unknown)
|
|
{
|
|
mError = aError;
|
|
}
|
|
|
|
CErrorToken::~CErrorToken()
|
|
{
|
|
delete mError;
|
|
}
|
|
|
|
PRInt32 CErrorToken::GetTokenType(void){
|
|
return eToken_error;
|
|
}
|
|
|
|
const char* CErrorToken::GetClassName(void){
|
|
return "error";
|
|
}
|
|
|
|
void CErrorToken::SetError(nsParserError *aError) {
|
|
mError = aError;
|
|
}
|
|
|
|
const nsParserError * CErrorToken::GetError(void)
|
|
{
|
|
return mError;
|
|
}
|
|
|
|
const nsAString& CErrorToken::GetStringValue(void)
|
|
{
|
|
return mTextValue;
|
|
}
|
|
|
|
// Doctype decl token
|
|
|
|
CDoctypeDeclToken::CDoctypeDeclToken(eHTMLTags aTag)
|
|
: CHTMLToken(aTag) {
|
|
}
|
|
|
|
CDoctypeDeclToken::CDoctypeDeclToken(const nsAString& aString,eHTMLTags aTag)
|
|
: CHTMLToken(aTag), mTextValue(aString) {
|
|
}
|
|
|
|
/**
|
|
* This method consumes a doctype element.
|
|
* Note: I'm rewriting this method to seek to the first <, since quotes can really screw us up.
|
|
*
|
|
* @update gess 9/23/98
|
|
* @param
|
|
* @return
|
|
*/
|
|
nsresult CDoctypeDeclToken::Consume(PRUnichar aChar, nsScanner& aScanner,PRInt32 aFlag) {
|
|
|
|
static const PRUnichar terminalChars[] =
|
|
{ PRUnichar('>'), PRUnichar('<'),
|
|
PRUnichar(0)
|
|
};
|
|
static const nsReadEndCondition theEndCondition(terminalChars);
|
|
|
|
nsReadingIterator<PRUnichar> start, end;
|
|
|
|
aScanner.CurrentPosition(start);
|
|
aScanner.EndReading(end);
|
|
|
|
nsresult result=aScanner.ReadUntil(start, end, theEndCondition, PR_FALSE);
|
|
|
|
if (NS_SUCCEEDED(result)) {
|
|
PRUnichar ch;
|
|
aScanner.Peek(ch);
|
|
if (ch == kGreaterThan) {
|
|
// Include '>' but not '<' since '<'
|
|
// could belong to another tag.
|
|
aScanner.GetChar(ch);
|
|
end.advance(1);
|
|
}
|
|
}
|
|
else if (!aScanner.IsIncremental()) {
|
|
// We have reached the document end but haven't
|
|
// found either a '<' or a '>'. Therefore use
|
|
// whatever we have.
|
|
result = NS_OK;
|
|
}
|
|
|
|
if (NS_SUCCEEDED(result)) {
|
|
start.advance(-2); // Make sure to consume <!
|
|
CopyUnicodeTo(start,end,mTextValue);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
const char* CDoctypeDeclToken::GetClassName(void) {
|
|
return "doctype";
|
|
}
|
|
|
|
PRInt32 CDoctypeDeclToken::GetTokenType(void) {
|
|
return eToken_doctypeDecl;
|
|
}
|
|
|
|
const nsAString& CDoctypeDeclToken::GetStringValue(void)
|
|
{
|
|
return mTextValue;
|
|
}
|
|
|
|
void CDoctypeDeclToken::SetStringValue(const nsAString& aStr)
|
|
{
|
|
mTextValue.Assign(aStr);
|
|
}
|