another fix for entities

This commit is contained in:
rickg@netscape.com
1999-03-25 01:27:41 +00:00
parent ccd3de0272
commit 01e7e472eb
6 changed files with 104 additions and 70 deletions

View File

@@ -465,7 +465,7 @@ nsHTMLElement gHTMLElements[] = {
{ /*tag*/ eHTMLTag_embed, { /*tag*/ eHTMLTag_embed,
/*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags,
/*autoclose starttags and endtags*/ 0,0,0, /*autoclose starttags and endtags*/ 0,0,0,
/*parent,incl,exclgroups*/ kFlow, kInline, kNone, /*parent,incl,exclgroups*/ kFlow, kNone, kNone,
/*special properties*/ 0, /*special properties*/ 0,
/*special parents,kids,skip*/ 0,&gContainsParam,eHTMLTag_unknown}, /*special parents,kids,skip*/ 0,&gContainsParam,eHTMLTag_unknown},
@@ -1095,7 +1095,7 @@ PRBool nsHTMLElement::IsContainer(eHTMLTags aChild) {
static eHTMLTags gNonContainers[]={ static eHTMLTags gNonContainers[]={
eHTMLTag_unknown, eHTMLTag_unknown,
eHTMLTag_area, eHTMLTag_base, eHTMLTag_basefont, eHTMLTag_area, eHTMLTag_base, eHTMLTag_basefont,
eHTMLTag_br, eHTMLTag_col, eHTMLTag_br, eHTMLTag_col, eHTMLTag_embed,
eHTMLTag_frame, eHTMLTag_hr, eHTMLTag_whitespace, eHTMLTag_frame, eHTMLTag_hr, eHTMLTag_whitespace,
eHTMLTag_input, eHTMLTag_link, eHTMLTag_isindex, eHTMLTag_input, eHTMLTag_link, eHTMLTag_isindex,
eHTMLTag_meta, eHTMLTag_param, eHTMLTag_plaintext, eHTMLTag_meta, eHTMLTag_param, eHTMLTag_plaintext,

View File

@@ -532,7 +532,7 @@ nsresult nsHTMLTokenizer::ConsumeEntity(PRUnichar aChar,CToken*& aToken,nsScanne
} }
else if(kHashsign==theChar) { else if(kHashsign==theChar) {
aToken = theRecycler->CreateTokenOfType(eToken_entity,eHTMLTag_entity); aToken = theRecycler->CreateTokenOfType(eToken_entity,eHTMLTag_entity);
result=aToken->Consume(0,aScanner); result=aToken->Consume(theChar,aScanner);
} }
else { else {
//oops, we're actually looking at plain text... //oops, we're actually looking at plain text...
@@ -544,7 +544,7 @@ nsresult nsHTMLTokenizer::ConsumeEntity(PRUnichar aChar,CToken*& aToken,nsScanne
char cbuf[30]; char cbuf[30];
nsString& theStr=aToken->GetStringValueXXX(); nsString& theStr=aToken->GetStringValueXXX();
theStr.ToCString(cbuf, sizeof(cbuf)-1); theStr.ToCString(cbuf, sizeof(cbuf)-1);
if(-1==NS_EntityToUnicode(cbuf)){ if((kHashsign!=theChar) && (-1==NS_EntityToUnicode(cbuf))){
//if you're here we have a bogus entity. //if you're here we have a bogus entity.
//convert it into a text token. //convert it into a text token.
nsAutoString temp("&"); nsAutoString temp("&");

View File

@@ -205,6 +205,12 @@ nsString& GetIdentChars(void) {
return gIdentChars; return gIdentChars;
} }
static
nsString& GetNumericChars(void) {
static nsString gNumChars("0123456789ABCDEFabcdef");
return gNumChars;
}
/* /*
* Consume the identifier portion of the start tag * Consume the identifier portion of the start tag
* *
@@ -1280,31 +1286,40 @@ PRInt32 CEntityToken::GetTokenType(void) {
* @return error result * @return error result
*/ */
PRInt32 CEntityToken::ConsumeEntity(PRUnichar aChar,nsString& aString,nsScanner& aScanner){ PRInt32 CEntityToken::ConsumeEntity(PRUnichar aChar,nsString& aString,nsScanner& aScanner){
PRUnichar theChar=0;
PRInt32 result=aScanner.Peek(aChar); PRInt32 result=aScanner.Peek(theChar);
if(NS_OK==result) { if(NS_OK==result) {
if(kLeftBrace==aChar) { if(kLeftBrace==theChar) {
//you're consuming a script entity... //you're consuming a script entity...
static nsAutoString terminals("}>"); static nsAutoString terminals("}>");
result=aScanner.ReadUntil(aString,terminals,PR_FALSE,PR_FALSE); result=aScanner.ReadUntil(aString,terminals,PR_FALSE,PR_FALSE);
if(NS_OK==result) { if(NS_OK==result) {
result=aScanner.Peek(aChar); result=aScanner.Peek(theChar);
if(NS_OK==result) { if(NS_OK==result) {
if(kRightBrace==aChar) { if(kRightBrace==theChar) {
aString+=kRightBrace; //append rightbrace, and... aString+=kRightBrace; //append rightbrace, and...
result=aScanner.GetChar(aChar);//yank the closing right-brace result=aScanner.GetChar(theChar);//yank the closing right-brace
} }
} }
} }
} //if } //if
else { else {
result=aScanner.ReadWhile(aString,GetIdentChars(),PR_TRUE,PR_FALSE); if(kHashsign==aChar) {
if('X'==(toupper((char)theChar))) {
result=aScanner.GetChar(theChar);
aString+=theChar;
}
if(NS_OK==result){ if(NS_OK==result){
result=aScanner.Peek(aChar); result=aScanner.ReadWhile(aString,GetNumericChars(),PR_TRUE,PR_FALSE);
}
}
else result=aScanner.ReadWhile(aString,GetIdentChars(),PR_TRUE,PR_FALSE);
if(NS_OK==result) { if(NS_OK==result) {
if (kSemicolon == aChar) { result=aScanner.Peek(theChar);
if(NS_OK==result) {
if (kSemicolon == theChar) {
// consume semicolon that stopped the scan // consume semicolon that stopped the scan
result=aScanner.GetChar(aChar); result=aScanner.GetChar(theChar);
} }
} }
}//if }//if
@@ -1375,18 +1390,12 @@ PRInt32 CEntityToken::TranslateToUnicodeStr(nsString& aString) {
PRUnichar theChar0=mTextValue[0]; PRUnichar theChar0=mTextValue[0];
PRBool isDigit0=nsString::IsDigit(theChar0); PRBool isDigit0=nsString::IsDigit(theChar0);
char cbuf[30]; if(kHashsign==theChar0) {
mTextValue.ToCString(cbuf, sizeof(cbuf)-1);
value = NS_EntityToUnicode(cbuf);
if(-1<value) {
//we found a named entity...
aString=PRUnichar(value);
}
else {
if(isDigit0 || ('x'==theChar0) || ('X'==theChar0)) {
PRInt32 err=0; PRInt32 err=0;
value=mTextValue.ToInteger(&err,theRadix[isDigit0]);
PRUnichar theChar1=mTextValue[1];
PRBool isDigit1=nsString::IsDigit(theChar1);
value=mTextValue.ToInteger(&err,theRadix[isDigit1]);
if(0==err) { if(0==err) {
#ifdef PA_REMAP_128_TO_160_ILLEGAL_NCR #ifdef PA_REMAP_128_TO_160_ILLEGAL_NCR
/* for some illegal, but popular usage */ /* for some illegal, but popular usage */
@@ -1396,7 +1405,15 @@ PRInt32 CEntityToken::TranslateToUnicodeStr(nsString& aString) {
#endif #endif
aString.Append(PRUnichar(value)); aString.Append(PRUnichar(value));
}//if }//if
}//if }
else{
char cbuf[30];
mTextValue.ToCString(cbuf, sizeof(cbuf)-1);
value = NS_EntityToUnicode(cbuf);
if(-1<value) {
//we found a named entity...
aString=PRUnichar(value);
}
}//else }//else
}//if }//if

View File

@@ -465,7 +465,7 @@ nsHTMLElement gHTMLElements[] = {
{ /*tag*/ eHTMLTag_embed, { /*tag*/ eHTMLTag_embed,
/*rootnodes,endrootnodes*/ &gRootTags,&gRootTags, /*rootnodes,endrootnodes*/ &gRootTags,&gRootTags,
/*autoclose starttags and endtags*/ 0,0,0, /*autoclose starttags and endtags*/ 0,0,0,
/*parent,incl,exclgroups*/ kFlow, kInline, kNone, /*parent,incl,exclgroups*/ kFlow, kNone, kNone,
/*special properties*/ 0, /*special properties*/ 0,
/*special parents,kids,skip*/ 0,&gContainsParam,eHTMLTag_unknown}, /*special parents,kids,skip*/ 0,&gContainsParam,eHTMLTag_unknown},
@@ -1095,7 +1095,7 @@ PRBool nsHTMLElement::IsContainer(eHTMLTags aChild) {
static eHTMLTags gNonContainers[]={ static eHTMLTags gNonContainers[]={
eHTMLTag_unknown, eHTMLTag_unknown,
eHTMLTag_area, eHTMLTag_base, eHTMLTag_basefont, eHTMLTag_area, eHTMLTag_base, eHTMLTag_basefont,
eHTMLTag_br, eHTMLTag_col, eHTMLTag_br, eHTMLTag_col, eHTMLTag_embed,
eHTMLTag_frame, eHTMLTag_hr, eHTMLTag_whitespace, eHTMLTag_frame, eHTMLTag_hr, eHTMLTag_whitespace,
eHTMLTag_input, eHTMLTag_link, eHTMLTag_isindex, eHTMLTag_input, eHTMLTag_link, eHTMLTag_isindex,
eHTMLTag_meta, eHTMLTag_param, eHTMLTag_plaintext, eHTMLTag_meta, eHTMLTag_param, eHTMLTag_plaintext,

View File

@@ -532,7 +532,7 @@ nsresult nsHTMLTokenizer::ConsumeEntity(PRUnichar aChar,CToken*& aToken,nsScanne
} }
else if(kHashsign==theChar) { else if(kHashsign==theChar) {
aToken = theRecycler->CreateTokenOfType(eToken_entity,eHTMLTag_entity); aToken = theRecycler->CreateTokenOfType(eToken_entity,eHTMLTag_entity);
result=aToken->Consume(0,aScanner); result=aToken->Consume(theChar,aScanner);
} }
else { else {
//oops, we're actually looking at plain text... //oops, we're actually looking at plain text...
@@ -544,7 +544,7 @@ nsresult nsHTMLTokenizer::ConsumeEntity(PRUnichar aChar,CToken*& aToken,nsScanne
char cbuf[30]; char cbuf[30];
nsString& theStr=aToken->GetStringValueXXX(); nsString& theStr=aToken->GetStringValueXXX();
theStr.ToCString(cbuf, sizeof(cbuf)-1); theStr.ToCString(cbuf, sizeof(cbuf)-1);
if(-1==NS_EntityToUnicode(cbuf)){ if((kHashsign!=theChar) && (-1==NS_EntityToUnicode(cbuf))){
//if you're here we have a bogus entity. //if you're here we have a bogus entity.
//convert it into a text token. //convert it into a text token.
nsAutoString temp("&"); nsAutoString temp("&");

View File

@@ -205,6 +205,12 @@ nsString& GetIdentChars(void) {
return gIdentChars; return gIdentChars;
} }
static
nsString& GetNumericChars(void) {
static nsString gNumChars("0123456789ABCDEFabcdef");
return gNumChars;
}
/* /*
* Consume the identifier portion of the start tag * Consume the identifier portion of the start tag
* *
@@ -1280,31 +1286,40 @@ PRInt32 CEntityToken::GetTokenType(void) {
* @return error result * @return error result
*/ */
PRInt32 CEntityToken::ConsumeEntity(PRUnichar aChar,nsString& aString,nsScanner& aScanner){ PRInt32 CEntityToken::ConsumeEntity(PRUnichar aChar,nsString& aString,nsScanner& aScanner){
PRUnichar theChar=0;
PRInt32 result=aScanner.Peek(aChar); PRInt32 result=aScanner.Peek(theChar);
if(NS_OK==result) { if(NS_OK==result) {
if(kLeftBrace==aChar) { if(kLeftBrace==theChar) {
//you're consuming a script entity... //you're consuming a script entity...
static nsAutoString terminals("}>"); static nsAutoString terminals("}>");
result=aScanner.ReadUntil(aString,terminals,PR_FALSE,PR_FALSE); result=aScanner.ReadUntil(aString,terminals,PR_FALSE,PR_FALSE);
if(NS_OK==result) { if(NS_OK==result) {
result=aScanner.Peek(aChar); result=aScanner.Peek(theChar);
if(NS_OK==result) { if(NS_OK==result) {
if(kRightBrace==aChar) { if(kRightBrace==theChar) {
aString+=kRightBrace; //append rightbrace, and... aString+=kRightBrace; //append rightbrace, and...
result=aScanner.GetChar(aChar);//yank the closing right-brace result=aScanner.GetChar(theChar);//yank the closing right-brace
} }
} }
} }
} //if } //if
else { else {
result=aScanner.ReadWhile(aString,GetIdentChars(),PR_TRUE,PR_FALSE); if(kHashsign==aChar) {
if('X'==(toupper((char)theChar))) {
result=aScanner.GetChar(theChar);
aString+=theChar;
}
if(NS_OK==result){ if(NS_OK==result){
result=aScanner.Peek(aChar); result=aScanner.ReadWhile(aString,GetNumericChars(),PR_TRUE,PR_FALSE);
}
}
else result=aScanner.ReadWhile(aString,GetIdentChars(),PR_TRUE,PR_FALSE);
if(NS_OK==result) { if(NS_OK==result) {
if (kSemicolon == aChar) { result=aScanner.Peek(theChar);
if(NS_OK==result) {
if (kSemicolon == theChar) {
// consume semicolon that stopped the scan // consume semicolon that stopped the scan
result=aScanner.GetChar(aChar); result=aScanner.GetChar(theChar);
} }
} }
}//if }//if
@@ -1375,18 +1390,12 @@ PRInt32 CEntityToken::TranslateToUnicodeStr(nsString& aString) {
PRUnichar theChar0=mTextValue[0]; PRUnichar theChar0=mTextValue[0];
PRBool isDigit0=nsString::IsDigit(theChar0); PRBool isDigit0=nsString::IsDigit(theChar0);
char cbuf[30]; if(kHashsign==theChar0) {
mTextValue.ToCString(cbuf, sizeof(cbuf)-1);
value = NS_EntityToUnicode(cbuf);
if(-1<value) {
//we found a named entity...
aString=PRUnichar(value);
}
else {
if(isDigit0 || ('x'==theChar0) || ('X'==theChar0)) {
PRInt32 err=0; PRInt32 err=0;
value=mTextValue.ToInteger(&err,theRadix[isDigit0]);
PRUnichar theChar1=mTextValue[1];
PRBool isDigit1=nsString::IsDigit(theChar1);
value=mTextValue.ToInteger(&err,theRadix[isDigit1]);
if(0==err) { if(0==err) {
#ifdef PA_REMAP_128_TO_160_ILLEGAL_NCR #ifdef PA_REMAP_128_TO_160_ILLEGAL_NCR
/* for some illegal, but popular usage */ /* for some illegal, but popular usage */
@@ -1396,7 +1405,15 @@ PRInt32 CEntityToken::TranslateToUnicodeStr(nsString& aString) {
#endif #endif
aString.Append(PRUnichar(value)); aString.Append(PRUnichar(value));
}//if }//if
}//if }
else{
char cbuf[30];
mTextValue.ToCString(cbuf, sizeof(cbuf)-1);
value = NS_EntityToUnicode(cbuf);
if(-1<value) {
//we found a named entity...
aString=PRUnichar(value);
}
}//else }//else
}//if }//if