Bug 612527 - Implement spec tweaks to U+0000 handling in the HTML5 parser. rs=jonas, a=blocking2.0-betaN.

This commit is contained in:
Henri Sivonen
2010-12-09 11:27:58 +02:00
parent 0e46554119
commit 034b529d7e
9 changed files with 66 additions and 12 deletions

View File

@@ -3424,7 +3424,7 @@ public class Tokenizer implements Locator {
} }
switch (c) { switch (c) {
case '\u0000': case '\u0000':
emitReplacementCharacter(buf, pos); emitPlaintextReplacementCharacter(buf, pos);
continue; continue;
case '\r': case '\r':
emitCarriageReturn(buf, pos); emitCarriageReturn(buf, pos);
@@ -5785,6 +5785,13 @@ public class Tokenizer implements Locator {
cstart = pos + 1; cstart = pos + 1;
} }
private void emitPlaintextReplacementCharacter(@NoLength char[] buf, int pos)
throws SAXException {
flushChars(buf, pos);
tokenHandler.characters(REPLACEMENT_CHARACTER, 0, 1);
cstart = pos + 1;
}
private void setAdditionalAndRememberAmpersandLocation(char add) { private void setAdditionalAndRememberAmpersandLocation(char add) {
additional = add; additional = add;
// [NOCPP[ // [NOCPP[

View File

@@ -1214,10 +1214,24 @@ public abstract class TreeBuilder<T> implements TokenHandler,
/** /**
* @see nu.validator.htmlparser.common.TokenHandler#zeroOriginatingReplacementCharacter() * @see nu.validator.htmlparser.common.TokenHandler#zeroOriginatingReplacementCharacter()
*/ */
public void zeroOriginatingReplacementCharacter() public void zeroOriginatingReplacementCharacter() throws SAXException {
throws SAXException { if (mode == TEXT) {
if (isInForeign() || mode == TEXT) { accumulateCharacters(REPLACEMENT_CHARACTER, 0, 1);
characters(REPLACEMENT_CHARACTER, 0, 1); return;
}
if (currentPtr >= 0) {
StackNode<T> stackNode = stack[currentPtr];
if (stackNode.ns == "http://www.w3.org/1999/xhtml") {
return;
}
if (stackNode.isHtmlIntegrationPoint()) {
return;
}
if (stackNode.ns == "http://www.w3.org/1998/Math/MathML"
&& stackNode.getGroup() == MI_MO_MN_MS_MTEXT) {
return;
}
accumulateCharacters(REPLACEMENT_CHARACTER, 0, 1);
} }
} }

View File

@@ -1717,7 +1717,7 @@ nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRInt32 pos, PRUnichar*
} }
switch(c) { switch(c) {
case '\0': { case '\0': {
emitReplacementCharacter(buf, pos); emitPlaintextReplacementCharacter(buf, pos);
continue; continue;
} }
case '\r': { case '\r': {
@@ -3300,6 +3300,14 @@ nsHtml5Tokenizer::emitReplacementCharacter(PRUnichar* buf, PRInt32 pos)
cstart = pos + 1; cstart = pos + 1;
} }
void
nsHtml5Tokenizer::emitPlaintextReplacementCharacter(PRUnichar* buf, PRInt32 pos)
{
flushChars(buf, pos);
tokenHandler->characters(REPLACEMENT_CHARACTER, 0, 1);
cstart = pos + 1;
}
void void
nsHtml5Tokenizer::setAdditionalAndRememberAmpersandLocation(PRUnichar add) nsHtml5Tokenizer::setAdditionalAndRememberAmpersandLocation(PRUnichar add)
{ {

View File

@@ -259,6 +259,7 @@ class nsHtml5Tokenizer
private: private:
void emitCarriageReturn(PRUnichar* buf, PRInt32 pos); void emitCarriageReturn(PRUnichar* buf, PRInt32 pos);
void emitReplacementCharacter(PRUnichar* buf, PRInt32 pos); void emitReplacementCharacter(PRUnichar* buf, PRInt32 pos);
void emitPlaintextReplacementCharacter(PRUnichar* buf, PRInt32 pos);
void setAdditionalAndRememberAmpersandLocation(PRUnichar add); void setAdditionalAndRememberAmpersandLocation(PRUnichar add);
void bogusDoctype(); void bogusDoctype();
void bogusDoctypeWithoutQuirks(); void bogusDoctypeWithoutQuirks();

View File

@@ -423,8 +423,22 @@ nsHtml5TreeBuilder::characters(const PRUnichar* buf, PRInt32 start, PRInt32 leng
void void
nsHtml5TreeBuilder::zeroOriginatingReplacementCharacter() nsHtml5TreeBuilder::zeroOriginatingReplacementCharacter()
{ {
if (isInForeign() || mode == NS_HTML5TREE_BUILDER_TEXT) { if (mode == NS_HTML5TREE_BUILDER_TEXT) {
characters(REPLACEMENT_CHARACTER, 0, 1); accumulateCharacters(REPLACEMENT_CHARACTER, 0, 1);
return;
}
if (currentPtr >= 0) {
nsHtml5StackNode* stackNode = stack[currentPtr];
if (stackNode->ns == kNameSpaceID_XHTML) {
return;
}
if (stackNode->isHtmlIntegrationPoint()) {
return;
}
if (stackNode->ns == kNameSpaceID_MathML && stackNode->getGroup() == NS_HTML5TREE_BUILDER_MI_MO_MN_MS_MTEXT) {
return;
}
accumulateCharacters(REPLACEMENT_CHARACTER, 0, 1);
} }
} }

View File

@@ -7,10 +7,6 @@
var html5Exceptions = { var html5Exceptions = {
"<!doctype html><keygen><frameset>" : true, // Bug 101019 "<!doctype html><keygen><frameset>" : true, // Bug 101019
"<select><keygen>" : true, // Bug 101019 "<select><keygen>" : true, // Bug 101019
"<plaintext>\u0000filler\u0000text\u0000" : true, // Bug 612527
"<body><svg><foreignObject>\u0000filler\u0000text" : true, // Bug 612527
"<svg>\u0000</svg><frameset>" : true, // Bug 612527
"<svg>\u0000 </svg><frameset>" : true, // Bug 612527
"<option><span><option>" : true, // Bug 612528 "<option><span><option>" : true, // Bug 612528
"<!doctype html><div><body><frameset>" : true, // Bug 614241 "<!doctype html><div><body><frameset>" : true, // Bug 614241
} }

View File

@@ -1204,3 +1204,17 @@
| <summary> | <summary>
| <article> | <article>
| "a" | "a"
#data
<!doctype html><p><a><plaintext>b
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <a>
| <plaintext>
| <a>
| "b"