1308 lines
40 KiB
C++
1308 lines
40 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/*
|
|
* The contents of this file are subject to the Netscape Public License
|
|
* Version 1.0 (the "NPL"); you may not use this file except in
|
|
* compliance with the NPL. You may obtain a copy of the NPL at
|
|
* http://www.mozilla.org/NPL/
|
|
*
|
|
* Software distributed under the NPL is distributed on an "AS IS" basis,
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the NPL
|
|
* for the specific language governing rights and limitations under the
|
|
* NPL.
|
|
*
|
|
* The Initial Developer of this code under the NPL is Netscape
|
|
* Communications Corporation. Portions created by Netscape are
|
|
* Copyright (C) 1998 Netscape Communications Corporation. All Rights
|
|
* Reserved.
|
|
*/
|
|
|
|
#include <ctype.h>
|
|
#include <time.h>
|
|
#include <stdio.h>
|
|
#include "nsScanner.h"
|
|
#include "nsToken.h"
|
|
#include "nsHTMLTokens.h"
|
|
#include "nsParserTypes.h"
|
|
#include "prtypes.h"
|
|
#include "nsDebug.h"
|
|
|
|
//#define GESS_MACHINE
|
|
#ifdef GESS_MACHINE
|
|
#include "nsEntityEx.cpp"
|
|
#endif
|
|
|
|
static nsString gIdentChars("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-");
|
|
static nsString gAttrTextChars("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-%.");
|
|
static nsString gAlphaChars("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ");
|
|
static nsAutoString gDigits("0123456789");
|
|
static nsAutoString gWhitespace(" \n\r\t\b");
|
|
static nsAutoString gOperatorChars("/?.<>[]{}~^+=-!%&*(),|:");
|
|
|
|
//debug error messages...
|
|
static const char* kNullScanner = "Error: Scanner is null.";
|
|
|
|
const PRInt32 kMAXNAMELEN=10;
|
|
struct StrToUnicodeStruct
|
|
{
|
|
char fName[kMAXNAMELEN+1];
|
|
PRInt32 fValue;
|
|
};
|
|
|
|
|
|
// KEEP THIS LIST SORTED!
|
|
// NOTE: This names table is sorted in ascii collating order. If you
|
|
// add a new entry, make sure you put it in the right spot otherwise
|
|
// the binary search code above will break!
|
|
static StrToUnicodeStruct gStrToUnicodeTable[] =
|
|
{
|
|
{"AElig", 0x00c6}, {"AMP", 0x0026}, {"Aacute",0x00c1},
|
|
{"Acirc", 0x00c2}, {"Agrave",0x00c0}, {"Aring", 0x00c5},
|
|
{"Atilde",0x00c3}, {"Auml", 0x00c4}, {"COPY", 0x00a9},
|
|
{"Ccedil",0x00c7}, {"ETH", 0x00d0}, {"Eacute",0x00c9},
|
|
{"Ecirc", 0x00ca}, {"Egrave",0x00c8}, {"Euml", 0x00cb},
|
|
{"GT", 0x003e}, {"Iacute",0x00cd}, {"Icirc", 0x00ce},
|
|
{"Igrave",0x00cc}, {"Iuml", 0x00cf}, {"LT", 0x003c},
|
|
{"Ntilde",0x00d1}, {"Oacute",0x00d3}, {"Ocirc", 0x00d4},
|
|
{"Ograve",0x00d2}, {"Oslash",0x00d8}, {"Otilde",0x00d5},
|
|
{"Ouml", 0x00d6}, {"QUOT", 0x0022}, {"REG", 0x00ae},
|
|
{"THORN", 0x00de}, {"Uacute",0x00da}, {"Ucirc", 0x00db},
|
|
{"Ugrave",0x00d9}, {"Uuml", 0x00dc}, {"Yacute",0x00dd},
|
|
{"aacute",0x00e1}, {"acirc", 0x00e2}, {"acute", 0x00b4},
|
|
{"aelig", 0x00e6}, {"agrave",0x00e0}, {"amp", 0x0026},
|
|
{"aring", 0x00e5}, {"atilde",0x00e3}, {"auml", 0x00e4},
|
|
{"brvbar",0x00a6}, {"ccedil",0x00e7}, {"cedil", 0x00b8},
|
|
{"cent", 0x00a2}, {"copy", 0x00a9}, {"curren",0x00a4},
|
|
{"deg", 0x00b0}, {"divide",0x00f7}, {"eacute",0x00e9},
|
|
{"ecirc", 0x00ea}, {"egrave",0x00e8}, {"eth", 0x00f0},
|
|
{"euml", 0x00eb}, {"frac12",0x00bd}, {"frac14",0x00bc},
|
|
{"frac34",0x00be}, {"gt", 0x003e}, {"iacute",0x00ed},
|
|
{"icirc", 0x00ee}, {"iexcl", 0x00a1}, {"igrave",0x00ec},
|
|
{"iquest",0x00bf}, {"iuml", 0x00ef}, {"laquo", 0x00ab},
|
|
{"lt", 0x003c}, {"macr", 0x00af}, {"micro", 0x00b5},
|
|
{"middot",0x00b7}, {"nbsp", 0x00a0}, {"not", 0x00ac},
|
|
{"ntilde",0x00f1}, {"oacute",0x00f3}, {"ocirc", 0x00f4},
|
|
{"ograve",0x00f2}, {"ordf", 0x00aa}, {"ordm", 0x00ba},
|
|
{"oslash",0x00f8}, {"otilde",0x00f5}, {"ouml", 0x00f6},
|
|
{"para", 0x00b6}, {"plusmn",0x00b1}, {"pound", 0x00a3},
|
|
{"quot", 0x0022}, {"raquo", 0x00bb}, {"reg", 0x00ae},
|
|
{"sect", 0x00a7}, {"shy", 0x00ad}, {"sup1", 0x00b9},
|
|
{"sup2", 0x00b2}, {"sup3", 0x00b3}, {"szlig", 0x00df},
|
|
{"thorn", 0x00fe}, {"times", 0x00d7}, {"uacute",0x00fa},
|
|
{"ucirc", 0x00fb}, {"ugrave",0x00f9}, {"uml", 0x00a8},
|
|
{"uuml", 0x00fc}, {"yacute",0x00fd}, {"yen", 0x00a5},
|
|
{"yuml", 0x00ff}
|
|
};
|
|
|
|
|
|
|
|
struct HTMLTagEntry
|
|
{
|
|
char fName[12];
|
|
eHTMLTags fTagID;
|
|
};
|
|
|
|
|
|
// KEEP THIS LIST SORTED!
|
|
// NOTE: This table is sorted in ascii collating order. If you
|
|
// add a new entry, make sure you put it in the right spot otherwise
|
|
// the binary search code above will break!
|
|
HTMLTagEntry gHTMLTagTable[] =
|
|
{
|
|
{"!DOCTYPE", eHTMLTag_doctype}, {"A", eHTMLTag_a},
|
|
{"ACRONYM", eHTMLTag_acronym}, {"ADDRESS", eHTMLTag_address},
|
|
{"APPLET", eHTMLTag_applet}, {"AREA", eHTMLTag_area},
|
|
|
|
{"B", eHTMLTag_bold}, {"BASE", eHTMLTag_base},
|
|
{"BASEFONT", eHTMLTag_basefont}, {"BDO", eHTMLTag_bdo},
|
|
{"BIG", eHTMLTag_big}, {"BLINK", eHTMLTag_blink},
|
|
{"BLOCKQUOTE",eHTMLTag_blockquote}, {"BODY", eHTMLTag_body},
|
|
{"BR", eHTMLTag_br}, {"BUTTON", eHTMLTag_button},
|
|
|
|
{"CAPTION", eHTMLTag_caption}, {"CENTER", eHTMLTag_center},
|
|
{"CERTIFICATE",eHTMLTag_certificate},
|
|
{"CITE", eHTMLTag_cite}, {"CODE", eHTMLTag_code},
|
|
{"COL", eHTMLTag_col}, {"COLGROUP", eHTMLTag_colgroup},
|
|
|
|
{"DD", eHTMLTag_dd}, {"DEL", eHTMLTag_del},
|
|
{"DFN", eHTMLTag_dfn}, {"DIR", eHTMLTag_dir},
|
|
{"DIV", eHTMLTag_div}, {"DL", eHTMLTag_dl},
|
|
{"DT", eHTMLTag_dt},
|
|
|
|
{"EM", eHTMLTag_em}, {"EMBED", eHTMLTag_embed},
|
|
|
|
{"FIELDSET", eHTMLTag_fieldset}, {"FONT", eHTMLTag_font},
|
|
{"FOOTER", eHTMLTag_footer}, {"FORM", eHTMLTag_form},
|
|
{"FRAME", eHTMLTag_frame}, {"FRAMESET", eHTMLTag_frameset},
|
|
|
|
{"H1", eHTMLTag_h1}, {"H2", eHTMLTag_h2},
|
|
{"H3", eHTMLTag_h3}, {"H4", eHTMLTag_h4},
|
|
{"H5", eHTMLTag_h5}, {"H6", eHTMLTag_h6},
|
|
{"HEAD", eHTMLTag_head}, {"HEADER", eHTMLTag_header},
|
|
{"HR", eHTMLTag_hr}, {"HTML", eHTMLTag_html},
|
|
|
|
{"I", eHTMLTag_italic}, {"IFRAME", eHTMLTag_iframe},
|
|
{"ILAYER", eHTMLTag_ilayer}, {"IMG", eHTMLTag_img},
|
|
{"INPUT", eHTMLTag_input}, {"INS", eHTMLTag_ins},
|
|
{"ISINDEX", eHTMLTag_isindex},
|
|
|
|
{"KBD", eHTMLTag_kbd}, {"KEYGEN", eHTMLTag_keygen},
|
|
|
|
{"LABEL", eHTMLTag_label}, {"LAYER", eHTMLTag_layer},
|
|
{"LEGEND", eHTMLTag_legend}, {"LI", eHTMLTag_listitem},
|
|
{"LINK", eHTMLTag_link}, {"LISTING", eHTMLTag_listing},
|
|
|
|
{"MAP", eHTMLTag_map}, {"MARQUEE", eHTMLTag_marquee},
|
|
{"MENU", eHTMLTag_menu}, {"META", eHTMLTag_meta},
|
|
|
|
{"NEWLINE", eHTMLTag_newline},
|
|
|
|
{"NOEMBED", eHTMLTag_noembed}, {"NOFRAMES", eHTMLTag_noframes},
|
|
{"NOLAYER", eHTMLTag_nolayer}, {"NOSCRIPT", eHTMLTag_noscript},
|
|
|
|
{"OBJECT", eHTMLTag_object}, {"OL", eHTMLTag_ol},
|
|
{"OPTION", eHTMLTag_option},
|
|
|
|
{"P", eHTMLTag_paragraph}, {"PARAM", eHTMLTag_param},
|
|
{"PLAINTEXT", eHTMLTag_plaintext},
|
|
|
|
{"PRE", eHTMLTag_pre},
|
|
|
|
{"Q", eHTMLTag_quotation},
|
|
|
|
{"S", eHTMLTag_strike}, {"SAMP", eHTMLTag_samp},
|
|
{"SCRIPT", eHTMLTag_script}, {"SELECT", eHTMLTag_select},
|
|
{"SERVER", eHTMLTag_server}, {"SMALL", eHTMLTag_small},
|
|
{"SPACER", eHTMLTag_spacer},
|
|
{"SPAN", eHTMLTag_span}, {"STRIKE", eHTMLTag_strike},
|
|
{"STRONG", eHTMLTag_strong}, {"STYLE", eHTMLTag_style},
|
|
{"SUB", eHTMLTag_sub}, {"SUP", eHTMLTag_sup},
|
|
|
|
{"TABLE", eHTMLTag_table}, {"TBODY", eHTMLTag_tbody},
|
|
{"TD", eHTMLTag_td}, {"TEXTAREA", eHTMLTag_textarea},
|
|
{"TFOOT", eHTMLTag_tfoot}, {"TH", eHTMLTag_th},
|
|
{"THEAD", eHTMLTag_thead}, {"TITLE", eHTMLTag_title},
|
|
{"TR", eHTMLTag_tr}, {"TT", eHTMLTag_tt},
|
|
|
|
{"U", eHTMLTag_u}, {"UL", eHTMLTag_ul},
|
|
{"VAR", eHTMLTag_var}, {"WBR", eHTMLTag_wbr},
|
|
{"WS", eHTMLTag_whitespace},
|
|
|
|
|
|
};
|
|
|
|
|
|
struct HTMLAttrEntry
|
|
{
|
|
char fName[11];
|
|
eHTMLAttributes fAttrID;
|
|
};
|
|
|
|
HTMLAttrEntry gHTMLAttributeTable[] =
|
|
{
|
|
{"ABBREV", eHTMLAttr_abbrev}, {"ABOVE", eHTMLAttr_above},
|
|
{"ALT", eHTMLAttr_alt}, {"ARRAY", eHTMLAttr_array},
|
|
{"AU", eHTMLAttr_author},
|
|
|
|
{"BACKGROUND",eHTMLAttr_background}, {"BANNER", eHTMLAttr_banner},
|
|
{"BELOW", eHTMLAttr_below}, {"BGSOUND", eHTMLAttr_bgsound},
|
|
{"BOX", eHTMLAttr_box}, {"BT", eHTMLAttr_bt},
|
|
|
|
{"CLASS", eHTMLAttr_class}, {"COMMENT", eHTMLAttr_comment},
|
|
{"CREDIT", eHTMLAttr_credit},
|
|
|
|
{"DIR", eHTMLAttr_dir},
|
|
|
|
{"FIG", eHTMLAttr_figure}, {"FIGURE", eHTMLAttr_figure},
|
|
{"FOOTNOTE", eHTMLAttr_footnote},
|
|
|
|
{"HEIGHT", eHTMLAttr_height},
|
|
|
|
{"ID", eHTMLAttr_id},
|
|
|
|
{"LANG", eHTMLAttr_lang},
|
|
|
|
{"MATH", eHTMLAttr_math},
|
|
|
|
{"NAME", eHTMLAttr_name}, {"NEXTID", eHTMLAttr_nextid},
|
|
{"NOBR", eHTMLAttr_nobreak},
|
|
|
|
{"NOTE", eHTMLAttr_note},
|
|
|
|
{"OPTION", eHTMLAttr_option}, {"OVERLAY", eHTMLAttr_overlay},
|
|
|
|
{"PERSON", eHTMLAttr_person}, {"PUBLIC", eHTMLAttr_public},
|
|
|
|
{"RANGE", eHTMLAttr_range}, {"ROOT", eHTMLAttr_root},
|
|
|
|
{"SGML", eHTMLAttr_sgml}, {"SQRT", eHTMLAttr_sqrt},
|
|
{"SRC", eHTMLAttr_src}, {"STYLE", eHTMLAttr_style},
|
|
|
|
{"TEXT", eHTMLAttr_text}, {"TITLE", eHTMLAttr_title},
|
|
|
|
{"WBR", eHTMLAttr_wordbreak}, {"WIDTH", eHTMLAttr_width},
|
|
|
|
{"XMP", eHTMLAttr_xmp}
|
|
};
|
|
|
|
|
|
/**-------------------------------------------------------
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
CHTMLToken::CHTMLToken(const nsString& aName) : CToken(aName) {
|
|
mTagType=eHTMLTag_unknown;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
eHTMLTags CHTMLToken::GetHTMLTag() {
|
|
return mTagType;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
void CHTMLToken::SetHTMLTag(eHTMLTags aTagType) {
|
|
mTagType=aTagType;
|
|
return;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
CStartToken::CStartToken(const nsString& aName) : CHTMLToken(aName) {
|
|
mAttributed=PR_FALSE;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
* default destructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
eHTMLTags CStartToken::GetHTMLTag(){
|
|
if(eHTMLTag_unknown==mTagType)
|
|
mTagType=DetermineHTMLTagType(mTextValue);
|
|
return mTagType;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
const char* CStartToken::GetClassName(void) {
|
|
return "start";
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
PRInt32 CStartToken::GetTokenType(void) {
|
|
return eToken_start;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
void CStartToken::SetAttributed(PRBool aValue) {
|
|
mAttributed=aValue;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
PRBool CStartToken::IsAttributed(void) {
|
|
return mAttributed;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
* Consume the identifier portion of the start tag
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*------------------------------------------------------*/
|
|
PRInt32 CStartToken::Consume(PRUnichar aChar, CScanner& aScanner) {
|
|
|
|
//if you're here, we've already Consumed the < char, and are
|
|
//ready to Consume the rest of the open tag identifier.
|
|
//Stop consuming as soon as you see a space or a '>'.
|
|
//NOTE: We don't Consume the tag attributes here, nor do we eat the ">"
|
|
|
|
mTextValue=aChar;
|
|
PRInt32 result=aScanner.ReadWhile(mTextValue,gIdentChars,PR_FALSE);
|
|
|
|
//Good. Now, let's skip whitespace after the identifier,
|
|
//and see if the next char is ">". If so, we have a complete
|
|
//tag without attributes.
|
|
|
|
aScanner.SkipWhite();
|
|
result=aScanner.GetChar(aChar);
|
|
if(kGreaterThan!=aChar) { //look for '>'
|
|
//push that char back, since we apparently have attributes...
|
|
aScanner.PutBack(aChar);
|
|
mAttributed=PR_TRUE;
|
|
}
|
|
return result;
|
|
};
|
|
|
|
|
|
/**-------------------------------------------------------
|
|
* Dump contents of this token to givne output stream
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param out -- ostream to output content
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
void CStartToken::DebugDumpSource(ostream& out) {
|
|
char* cp=mTextValue.ToNewCString();
|
|
out << "<" << *cp;
|
|
if(!mAttributed)
|
|
out << ">";
|
|
delete cp;
|
|
}
|
|
|
|
|
|
/**-------------------------------------------------------
|
|
* default constructor for end token
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- char* containing token name
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
CEndToken::CEndToken(const nsString& aName) : CHTMLToken(aName) {
|
|
mOrdinalValue=eToken_end;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
* Consume the identifier portion of the end tag
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*------------------------------------------------------*/
|
|
PRInt32 CEndToken::Consume(PRUnichar aChar, CScanner& aScanner) {
|
|
|
|
//if you're here, we've already Consumed the <! chars, and are
|
|
//ready to Consume the rest of the open tag identifier.
|
|
//Stop consuming as soon as you see a space or a '>'.
|
|
//NOTE: We don't Consume the tag attributes here, nor do we eat the ">"
|
|
|
|
mTextValue="";
|
|
static nsAutoString terminals(">");
|
|
PRInt32 result=aScanner.ReadUntil(mTextValue,terminals,PR_FALSE);
|
|
aScanner.GetChar(aChar); //eat the closing '>;
|
|
return result;
|
|
};
|
|
|
|
|
|
/**-------------------------------------------------------
|
|
* Asks the token to determine the <i>HTMLTag type</i> of
|
|
* the token. This turns around and looks up the tag name
|
|
* in the tag dictionary.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return eHTMLTag id of this endtag
|
|
*------------------------------------------------------*/
|
|
eHTMLTags CEndToken::GetHTMLTag(){
|
|
if(eHTMLTag_unknown==mTagType)
|
|
mTagType=DetermineHTMLTagType(mTextValue);
|
|
return mTagType;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
const char* CEndToken::GetClassName(void) {
|
|
return "/end";
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
PRInt32 CEndToken::GetTokenType(void) {
|
|
return eToken_end;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
* Dump contents of this token to givne output stream
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param out -- ostream to output content
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
void CEndToken::DebugDumpSource(ostream& out) {
|
|
char* cp=mTextValue.ToNewCString();
|
|
out << "</" << *cp << ">";
|
|
delete cp;
|
|
}
|
|
|
|
|
|
/**-------------------------------------------------------
|
|
* Default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string to init token name with
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
CTextToken::CTextToken(const nsString& aName) : CHTMLToken(aName) {
|
|
mOrdinalValue=eToken_text;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
const char* CTextToken::GetClassName(void) {
|
|
return "text";
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
PRInt32 CTextToken::GetTokenType(void) {
|
|
return eToken_text;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
* Consume as much clear text from scanner as possible.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*------------------------------------------------------*/
|
|
PRInt32 CTextToken::Consume(PRUnichar aChar, CScanner& aScanner) {
|
|
static nsAutoString terminals("&<\r\n");
|
|
PRInt32 result=aScanner.ReadUntil(mTextValue,terminals,PR_FALSE);
|
|
return result;
|
|
};
|
|
|
|
|
|
/**-------------------------------------------------------
|
|
* Default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
CCommentToken::CCommentToken(const nsString& aName) : CHTMLToken(aName) {
|
|
mOrdinalValue=eToken_comment;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
* Consume the identifier portion of the comment.
|
|
* Note that we've already eaten the "<!" portion.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*------------------------------------------------------*/
|
|
PRInt32 CCommentToken::Consume(PRUnichar aChar, CScanner& aScanner) {
|
|
|
|
PRUnichar ch,ch2;
|
|
|
|
static nsAutoString terminals(">");
|
|
|
|
aScanner.GetChar(ch);
|
|
PRInt32 result=aScanner.GetChar(ch);
|
|
mTextValue="<!";
|
|
if(kMinus==ch) {
|
|
aScanner.GetChar(ch2);
|
|
if(kMinus==ch2) {
|
|
//in this case, we're reading a long-form comment <-- xxx -->
|
|
mTextValue+="--";
|
|
PRInt32 findpos=-1;
|
|
while((findpos==kNotFound) && (!result)) {
|
|
result=aScanner.ReadUntil(mTextValue,terminals,PR_TRUE);
|
|
findpos=mTextValue.RFind("-->");
|
|
}
|
|
return result;
|
|
}
|
|
}
|
|
//if you're here, we're consuming a "short-form" comment
|
|
mTextValue+=ch;
|
|
result=aScanner.ReadUntil(mTextValue,terminals,PR_TRUE);
|
|
return result;
|
|
};
|
|
|
|
/**-------------------------------------------------------
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
const char* CCommentToken::GetClassName(void){
|
|
return "/**/";
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
PRInt32 CCommentToken::GetTokenType(void) {
|
|
return eToken_comment;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string value to init token name with
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
CNewlineToken::CNewlineToken(const nsString& aName) : CHTMLToken(aName) {
|
|
mOrdinalValue=eToken_newline;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
const char* CNewlineToken::GetClassName(void) {
|
|
return "crlf";
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
PRInt32 CNewlineToken::GetTokenType(void) {
|
|
return eToken_newline;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
* Consume as many cr/lf pairs as you can find.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*------------------------------------------------------*/
|
|
PRInt32 CNewlineToken::Consume(PRUnichar aChar, CScanner& aScanner) {
|
|
mTextValue=aChar;
|
|
static nsAutoString crlfChars("\r\n");
|
|
PRInt32 result=aScanner.ReadWhile(mTextValue,crlfChars,PR_FALSE);
|
|
mTextValue.StripChars("\r");
|
|
return result;
|
|
};
|
|
|
|
/**-------------------------------------------------------
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string value to init token name with
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
CAttributeToken::CAttributeToken(const nsString& aName) : CHTMLToken(aName),
|
|
mTextKey() {
|
|
mLastAttribute=PR_FALSE;
|
|
mOrdinalValue=eToken_attribute;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
const char* CAttributeToken::GetClassName(void) {
|
|
return "attr";
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
PRInt32 CAttributeToken::GetTokenType(void) {
|
|
return eToken_attribute;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
* Dump contents of this token to givne output stream
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param out -- ostream to output content
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
void CAttributeToken::DebugDumpToken(ostream& out) {
|
|
char* cp=mTextKey.ToNewCString();
|
|
out << "[" << GetClassName() << "] " << *cp << "=";
|
|
delete cp;
|
|
char* cp2=mTextValue.ToNewCString();
|
|
out << *cp2 << ": " << mOrdinalValue << endl;
|
|
}
|
|
|
|
|
|
/**-------------------------------------------------------
|
|
* This general purpose method is used when you want to
|
|
* consume a known quoted string.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*------------------------------------------------------*/
|
|
PRInt32 ConsumeQuotedString(PRUnichar aChar,nsString& aString,CScanner& aScanner){
|
|
static nsAutoString terminals1(">'");
|
|
static nsAutoString terminals2(">\"");
|
|
|
|
PRInt32 result=kNotFound;
|
|
switch(aChar) {
|
|
case kQuote:
|
|
result=aScanner.ReadUntil(aString,terminals2,PR_TRUE);
|
|
break;
|
|
case kApostrophe:
|
|
result=aScanner.ReadUntil(aString,terminals1,PR_TRUE);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
PRUnichar ch=aString.Last();
|
|
if(ch!=aChar)
|
|
aString+=aChar;
|
|
return result;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
* This general purpose method is used when you want to
|
|
* consume attributed text value.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*------------------------------------------------------*/
|
|
PRInt32 ConsumeAttributeValueText(PRUnichar aChar,nsString& aString,CScanner& aScanner){
|
|
|
|
PRInt32 result=kNotFound;
|
|
static nsAutoString terminals(" \t\b\r\n>");
|
|
result=aScanner.ReadUntil(aString,terminals,PR_FALSE);
|
|
return result;
|
|
}
|
|
|
|
|
|
/**-------------------------------------------------------
|
|
* Consume the key and value portions of the attribute.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*------------------------------------------------------*/
|
|
PRInt32 CAttributeToken::Consume(PRUnichar aChar, CScanner& aScanner) {
|
|
|
|
aScanner.SkipWhite(); //skip leading whitespace
|
|
PRInt32 result=aScanner.Peek(aChar);
|
|
if(kEOF!=result) {
|
|
if(kQuote==aChar) { //if you're here, handle quoted key...
|
|
aScanner.GetChar(aChar); //skip the quote sign...
|
|
mTextKey=aChar;
|
|
result=ConsumeQuotedString(aChar,mTextKey,aScanner);
|
|
}
|
|
else if(kHashsign==aChar) {
|
|
aScanner.GetChar(aChar); //skip the hash sign...
|
|
mTextKey=aChar;
|
|
result=aScanner.ReadWhile(mTextKey,gDigits,PR_TRUE);
|
|
}
|
|
else {
|
|
//If you're here, handle an unquoted key.
|
|
//Don't forget to reduce entities inline!
|
|
static nsAutoString terminals(" >=\t\b\r\n\"");
|
|
result=aScanner.ReadUntil(mTextKey,terminals,PR_FALSE);
|
|
}
|
|
|
|
//now it's time to Consume the (optional) value...
|
|
if(!(result=aScanner.SkipWhite())) {
|
|
if(!(result=aScanner.Peek(aChar))) {
|
|
if(kEqual==aChar){
|
|
aScanner.GetChar(aChar); //skip the equal sign...
|
|
aScanner.SkipWhite(); //now skip any intervening whitespace
|
|
aScanner.GetChar(aChar); //and grab the next char.
|
|
|
|
if((kQuote==aChar) || (kApostrophe==aChar)) {
|
|
mTextValue=aChar;
|
|
result=ConsumeQuotedString(aChar,mTextValue,aScanner);
|
|
}
|
|
else {
|
|
mTextValue=aChar; //it's an alphanum attribute...
|
|
result=ConsumeAttributeValueText(aChar,mTextValue,aScanner);
|
|
}
|
|
|
|
aScanner.SkipWhite();
|
|
}
|
|
}
|
|
}
|
|
aScanner.Peek(aChar);
|
|
mLastAttribute= PRBool((kGreaterThan==aChar) || (kEOF==result));
|
|
}
|
|
return result;
|
|
};
|
|
|
|
/**-------------------------------------------------------
|
|
* Dump contents of this token to givne output stream
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param out -- ostream to output content
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
void CAttributeToken::DebugDumpSource(ostream& out) {
|
|
char* cp=mTextKey.ToNewCString();
|
|
out << " " << *cp;
|
|
delete cp;
|
|
if(mTextValue.Length()) {
|
|
cp=mTextValue.ToNewCString();
|
|
out << "=" << *cp;
|
|
delete cp;
|
|
}
|
|
if(mLastAttribute)
|
|
out<<">";
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string value to init token name with
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
CWhitespaceToken::CWhitespaceToken(const nsString& aName) : CHTMLToken(aName) {
|
|
mOrdinalValue=eToken_whitespace;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
const char* CWhitespaceToken::GetClassName(void) {
|
|
return "ws";
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
PRInt32 CWhitespaceToken::GetTokenType(void) {
|
|
return eToken_whitespace;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
* This general purpose method is used when you want to
|
|
* consume an aribrary sequence of whitespace.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*------------------------------------------------------*/
|
|
PRInt32 CWhitespaceToken::Consume(PRUnichar aChar, CScanner& aScanner) {
|
|
|
|
mTextValue=aChar;
|
|
PRInt32 result=aScanner.ReadWhile(mTextValue,gWhitespace,PR_FALSE);
|
|
mTextValue.StripChars("\r");
|
|
return result;
|
|
};
|
|
|
|
/**-------------------------------------------------------
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string value to init token name with
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
CEntityToken::CEntityToken(const nsString& aName) : CHTMLToken(aName) {
|
|
mOrdinalValue=eToken_entity;
|
|
#ifdef VERBOSE_DEBUG
|
|
if(!VerifyEntityTable()) {
|
|
cout<<"Entity table is invalid!" << endl;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
* Consume the rest of the entity. We've already eaten the "&".
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*------------------------------------------------------*/
|
|
PRInt32 CEntityToken::Consume(PRUnichar aChar, CScanner& aScanner) {
|
|
|
|
mTextValue=aChar;
|
|
PRInt32 result=ConsumeEntity(aChar,mTextValue,aScanner);
|
|
return result;
|
|
};
|
|
|
|
/**-------------------------------------------------------
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
const char* CEntityToken::GetClassName(void) {
|
|
return "&entity";
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
PRInt32 CEntityToken::GetTokenType(void) {
|
|
return eToken_entity;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
* This general purpose method is used when you want to
|
|
* consume an entity &xxxx;. Keep in mind that entities
|
|
* are <i>not</i> reduced inline.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*------------------------------------------------------*/
|
|
PRInt32 CEntityToken::ConsumeEntity(PRUnichar aChar,nsString& aString,CScanner& aScanner){
|
|
|
|
PRInt32 result=kNotFound;
|
|
aScanner.Peek(aChar);
|
|
if(kLeftBrace==aChar) {
|
|
//you're consuming a script entity...
|
|
static nsAutoString terminals("}>");
|
|
result=aScanner.ReadUntil(aString,terminals,PR_FALSE);
|
|
aScanner.Peek(aChar);
|
|
if(kRightBrace==aChar) {
|
|
aString+=kRightBrace; //append rightbrace, and...
|
|
aScanner.GetChar(aChar);//yank the closing right-brace
|
|
}
|
|
}
|
|
else
|
|
{
|
|
result=aScanner.ReadWhile(aString,gIdentChars,PR_FALSE);
|
|
aScanner.Peek(aChar);
|
|
if (kSemicolon == aChar) {
|
|
// consume semicolon that stopped the scan
|
|
aScanner.GetChar(aChar);
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
/**-------------------------------------------------------
|
|
* This method converts this entity into its underlying
|
|
* unicode equivalent.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
PRInt32 CEntityToken::TranslateToUnicodeStr(nsString& aString) {
|
|
char* cp = mTextValue.ToNewCString();
|
|
PRInt32 index=FindEntityIndex(cp);
|
|
if(kNotFound!=index) {
|
|
PRUnichar ch=gStrToUnicodeTable[index].fValue;
|
|
aString=ch;
|
|
} else {
|
|
#ifdef GESS_MACHINE
|
|
index=TranslateExtendedEntity(cp,aString);
|
|
#endif
|
|
}
|
|
delete cp;
|
|
return index;
|
|
}
|
|
|
|
|
|
/**-------------------------------------------------------
|
|
* This method ensures that the entity table doesn't get
|
|
* out of sync. Make sure you call this at least once.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @return PR_TRUE if valid (ordered correctly)
|
|
*------------------------------------------------------*/
|
|
PRBool CEntityToken::VerifyEntityTable(){
|
|
PRInt32 count=sizeof(gStrToUnicodeTable)/sizeof(StrToUnicodeStruct);
|
|
PRInt32 i,j;
|
|
for(i=1;i<count-1;i++)
|
|
{
|
|
j=strcmp(gStrToUnicodeTable[i-1].fName,gStrToUnicodeTable[i].fName);
|
|
if(j>0)
|
|
return PR_FALSE;
|
|
}
|
|
return PR_TRUE;
|
|
}
|
|
|
|
|
|
/**-------------------------------------------------------
|
|
* This method is used to convert from a given string (char*)
|
|
* into a entity index (offset within entity table).
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aBuffer -- string to be converted
|
|
* @param aBuflen -- optional string length
|
|
* @return integer offset of string in table, or kNotFound
|
|
*------------------------------------------------------*/
|
|
PRInt32 CEntityToken::FindEntityIndex(const char* aBuffer,PRInt32 aBufLen) {
|
|
PRInt32 result=kNotFound;
|
|
PRInt32 cnt=sizeof(gStrToUnicodeTable)/sizeof(StrToUnicodeStruct);
|
|
PRInt32 low=0;
|
|
PRInt32 high=cnt-1;
|
|
PRInt32 middle=kNotFound;
|
|
|
|
if(kNotFound==aBufLen) {
|
|
aBufLen=strlen(aBuffer);
|
|
}
|
|
|
|
if (aBuffer && aBufLen && cnt) {
|
|
while(low<=high)
|
|
{
|
|
middle=(PRInt32)(low+high)/2;
|
|
// result=strncmp(aBuffer,gStrToUnicodeTable[middle].fName,aBufLen);
|
|
result=strcmp(aBuffer,gStrToUnicodeTable[middle].fName);
|
|
if (result==0) {
|
|
return middle;
|
|
}
|
|
if (result<0) {
|
|
high=middle-1;
|
|
}
|
|
else low=middle+1;
|
|
}
|
|
}
|
|
return kNotFound;
|
|
}
|
|
|
|
|
|
/**-------------------------------------------------------
|
|
* This method reduces all text entities into their char
|
|
* representation.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
PRInt32 CEntityToken::ReduceEntities(nsString& aString) {
|
|
PRInt32 result=0;
|
|
PRInt32 amppos=0;
|
|
PRBool done=PR_FALSE;
|
|
PRInt32 offset=0;
|
|
|
|
while(!done) {
|
|
if(kNotFound!=(amppos=aString.Find('&',offset))) {
|
|
if(!nsString::IsSpace(aString[amppos+1])) { //have we found a genuine entity?
|
|
PRInt32 endpos=aString.FindLastCharInSet(gIdentChars,amppos+1);
|
|
PRInt32 cnt;
|
|
if(kNotFound==endpos)
|
|
cnt=aString.Length()-1-amppos;
|
|
else cnt=endpos-amppos;
|
|
PRInt32 index=FindEntityIndex((const char*)&aString[amppos+1],cnt);
|
|
if(kNotFound!=index) {
|
|
aString[amppos]=gStrToUnicodeTable[index].fValue;
|
|
aString.Cut(amppos+1,cnt+(endpos!=kNotFound));
|
|
}
|
|
else offset=amppos+1;
|
|
}
|
|
}
|
|
else done=PR_TRUE;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
* Dump contents of this token to givne output stream
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param out -- ostream to output content
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
void CEntityToken::DebugDumpSource(ostream& out) {
|
|
char* cp=mTextValue.ToNewCString();
|
|
out << "&" << *cp;
|
|
delete cp;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
const char* CScriptToken::GetClassName(void) {
|
|
return "script";
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
PRInt32 CScriptToken::GetTokenType(void) {
|
|
return eToken_script;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
const char* CStyleToken::GetClassName(void) {
|
|
return "style";
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
PRInt32 CStyleToken::GetTokenType(void) {
|
|
return eToken_style;
|
|
}
|
|
|
|
|
|
/**-------------------------------------------------------
|
|
* default constructor
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aName -- string value to init token name with
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
CSkippedContentToken::CSkippedContentToken(const nsString& aName) : CAttributeToken(aName) {
|
|
mTextKey = "$skipped-content";/* XXX need a better answer! */
|
|
mOrdinalValue=eToken_skippedcontent;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
const char* CSkippedContentToken::GetClassName(void) {
|
|
return "skipped";
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
*
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
PRInt32 CSkippedContentToken::GetTokenType(void) {
|
|
return eToken_skippedcontent;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
* Consume content until you find a sequence that matches
|
|
* this objects mTextValue.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aChar -- last char consumed from stream
|
|
* @param aScanner -- controller of underlying input source
|
|
* @return error result
|
|
*------------------------------------------------------*/
|
|
PRInt32 CSkippedContentToken::Consume(PRUnichar aChar,CScanner& aScanner) {
|
|
PRBool done=PR_FALSE;
|
|
PRInt32 result=kNoError;
|
|
nsString temp;
|
|
|
|
while((!done) && (!aScanner.Eof())) {
|
|
static nsAutoString terminals(">");
|
|
result=aScanner.ReadUntil(temp,terminals,PR_TRUE);
|
|
done=PRBool(kNotFound!=temp.RFind(mTextValue,PR_TRUE));
|
|
}
|
|
mTextValue=temp;
|
|
return result;
|
|
}
|
|
|
|
|
|
/**-------------------------------------------------------
|
|
* This method iterates the tagtable to ensure that is
|
|
* is proper sort order. This method only needs to be
|
|
* called once.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
class CTagTableVerifier {
|
|
public:
|
|
CTagTableVerifier::CTagTableVerifier(){
|
|
PRInt32 count=sizeof(gHTMLTagTable)/sizeof(HTMLTagEntry);
|
|
PRInt32 i,j;
|
|
for(i=1;i<count-1;i++)
|
|
{
|
|
j=strcmp(gHTMLTagTable[i-1].fName,gHTMLTagTable[i].fName);
|
|
if(j>0) {
|
|
#ifdef VERBOSE_DEBUG
|
|
cout << "Tag Table is out of order at " << i << "!" << endl;
|
|
#endif
|
|
return;
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
};
|
|
|
|
|
|
/*-------------------------------------------------------
|
|
*
|
|
* @update gess4/6/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
eHTMLTokenTypes DetermineTokenType(const nsString& aString){
|
|
return eToken_unknown;
|
|
}
|
|
|
|
/**-------------------------------------------------------
|
|
* This method accepts a string (and optionally, its length)
|
|
* and determines the eHTMLTag (id) value.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param aString -- string to be convered to id
|
|
* @return valid id, or user_defined.
|
|
*------------------------------------------------------*/
|
|
eHTMLTags DetermineHTMLTagType(const nsString& aString)
|
|
{
|
|
PRInt32 result=-1;
|
|
PRInt32 cnt=sizeof(gHTMLTagTable)/sizeof(HTMLTagEntry);
|
|
PRInt32 low=0;
|
|
PRInt32 high=cnt-1;
|
|
PRInt32 middle=kNotFound;
|
|
|
|
if (0 != cnt)
|
|
while(low<=high)
|
|
{
|
|
middle=(PRInt32)(low+high)/2;
|
|
result=aString.Compare(gHTMLTagTable[middle].fName, PR_TRUE);
|
|
if (result==0)
|
|
return gHTMLTagTable[middle].fTagID;
|
|
if (result<0)
|
|
high=middle-1;
|
|
else low=middle+1;
|
|
}
|
|
return eHTMLTag_userdefined;
|
|
}
|
|
|
|
|
|
/**-------------------------------------------------------
|
|
* This method iterates the attribute-table to ensure that is
|
|
* is proper sort order. This method only needs to be
|
|
* called once.
|
|
*
|
|
* @update gess 3/25/98
|
|
* @param
|
|
* @return
|
|
*------------------------------------------------------*/
|
|
class CAttributeTableVerifier {
|
|
public:
|
|
CAttributeTableVerifier::CAttributeTableVerifier(){
|
|
PRInt32 count=sizeof(gHTMLAttributeTable)/sizeof(HTMLAttrEntry);
|
|
PRInt32 i,j;
|
|
for(i=1;i<count-1;i++)
|
|
{
|
|
j=strcmp(gHTMLAttributeTable[i-1].fName,gHTMLAttributeTable[i].fName);
|
|
if(j>0) {
|
|
#ifdef VERBOSE_DEBUG
|
|
cout << "Attribute table is out of order at " << j << "!" << endl;
|
|
#endif
|
|
return;
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
};
|
|
|
|
|
|
/**-------------------------------------------------------
|
|
* These objects are here to force the validation of the
|
|
* tag and attribute tables.
|
|
*------------------------------------------------------*/
|
|
|
|
CAttributeTableVerifier gAttributeTableVerifier;
|
|
CTagTableVerifier gTableVerifier;
|
|
|