From 142069e90f81b6546317ce8279fc3f9873d171cf Mon Sep 17 00:00:00 2001 From: Henri Sivonen Date: Mon, 27 Oct 2014 11:48:11 +0200 Subject: [PATCH] Bug 886390 part 1 - Implement behavior equivalent to the adjusted current node concept by changing the first node on the stack. r=wchen. --- parser/html/javasrc/TreeBuilder.java | 144 +++++++++++++++++++++------ parser/html/nsHtml5AtomList.h | 18 ++-- parser/html/nsHtml5TreeBuilder.cpp | 72 ++++++++++---- 3 files changed, 176 insertions(+), 58 deletions(-) diff --git a/parser/html/javasrc/TreeBuilder.java b/parser/html/javasrc/TreeBuilder.java index 42893e7b2063..2f4801879ca0 100644 --- a/parser/html/javasrc/TreeBuilder.java +++ b/parser/html/javasrc/TreeBuilder.java @@ -611,32 +611,104 @@ public abstract class TreeBuilder implements TokenHandler, } else { elt = createHtmlElementSetAsRoot(tokenizer.emptyAttributes()); } - StackNode node = new StackNode(ElementName.HTML, elt - // [NOCPP[ - , errorHandler == null ? null : new TaintableLocatorImpl(tokenizer) - // ]NOCPP] - ); - currentPtr++; - stack[currentPtr] = node; - if ("template" == contextName) { - pushTemplateMode(IN_TEMPLATE); - } - resetTheInsertionMode(); - formPointer = getFormPointerForContext(contextNode); - if ("title" == contextName || "textarea" == contextName) { - tokenizer.setStateAndEndTagExpectation(Tokenizer.RCDATA, contextName); - } else if ("style" == contextName || "xmp" == contextName - || "iframe" == contextName || "noembed" == contextName - || "noframes" == contextName - || (scriptingEnabled && "noscript" == contextName)) { - tokenizer.setStateAndEndTagExpectation(Tokenizer.RAWTEXT, contextName); - } else if ("plaintext" == contextName) { - tokenizer.setStateAndEndTagExpectation(Tokenizer.PLAINTEXT, contextName); - } else if ("script" == contextName) { - tokenizer.setStateAndEndTagExpectation(Tokenizer.SCRIPT_DATA, + // When the context node is not in the HTML namespace, contrary + // to the spec, the first node on the stack is not set to "html" + // in the HTML namespace. Instead, it is set to a node that has + // the characteristics of the appropriate "adjusted current node". + // This way, there is no need to perform "adjusted current node" + // checks during tree construction. Instead, it's sufficient to + // just look at the current node. However, this also means that it + // is not safe to treat "html" in the HTML namespace as a sentinel + // that ends stack popping. Instead, stack popping loops that are + // meant not to pop the first element on the stack need to check + // for currentPos becoming zero. + if (contextNamespace == "http://www.w3.org/2000/svg") { + ElementName elementName = ElementName.SVG; + if ("title" == contextName || "desc" == contextName + || "foreignObject" == contextName) { + // These elements are all alike and we don't care about + // the exact name. + elementName = ElementName.FOREIGNOBJECT; + } + // This is the SVG variant of the StackNode constructor. + StackNode node = new StackNode(elementName, + elementName.camelCaseName, elt + // [NOCPP[ + , errorHandler == null ? null + : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); + currentPtr++; + stack[currentPtr] = node; + tokenizer.setStateAndEndTagExpectation(Tokenizer.DATA, contextName); - } else { - tokenizer.setStateAndEndTagExpectation(Tokenizer.DATA, contextName); + // The frameset-ok flag is set even though never + // ends up being allowed as HTML frameset in the fragment case. + mode = FRAMESET_OK; + } else if (contextNamespace == "http://www.w3.org/1998/Math/MathML") { + ElementName elementName = ElementName.MATH; + if ("mi" == contextName || "mo" == contextName + || "mn" == contextName || "ms" == contextName + || "mtext" == contextName) { + // These elements are all alike and we don't care about + // the exact name. + elementName = ElementName.MTEXT; + } else if ("annotation-xml" == contextName) { + elementName = ElementName.ANNOTATION_XML; + // Blink does not check the encoding attribute of the + // annotation-xml element innerHTML is being set on. + // Let's do the same at least until + // https://www.w3.org/Bugs/Public/show_bug.cgi?id=26783 + // is resolved. + } + // This is the MathML variant of the StackNode constructor. + StackNode node = new StackNode(elementName, elt, + elementName.name, false + // [NOCPP[ + , errorHandler == null ? null + : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); + currentPtr++; + stack[currentPtr] = node; + tokenizer.setStateAndEndTagExpectation(Tokenizer.DATA, + contextName); + // The frameset-ok flag is set even though never + // ends up being allowed as HTML frameset in the fragment case. + mode = FRAMESET_OK; + } else { // html + StackNode node = new StackNode(ElementName.HTML, elt + // [NOCPP[ + , errorHandler == null ? null + : new TaintableLocatorImpl(tokenizer) + // ]NOCPP] + ); + currentPtr++; + stack[currentPtr] = node; + if ("template" == contextName) { + pushTemplateMode(IN_TEMPLATE); + } + resetTheInsertionMode(); + formPointer = getFormPointerForContext(contextNode); + if ("title" == contextName || "textarea" == contextName) { + tokenizer.setStateAndEndTagExpectation(Tokenizer.RCDATA, + contextName); + } else if ("style" == contextName || "xmp" == contextName + || "iframe" == contextName || "noembed" == contextName + || "noframes" == contextName + || (scriptingEnabled && "noscript" == contextName)) { + tokenizer.setStateAndEndTagExpectation(Tokenizer.RAWTEXT, + contextName); + } else if ("plaintext" == contextName) { + tokenizer.setStateAndEndTagExpectation(Tokenizer.PLAINTEXT, + contextName); + } else if ("script" == contextName) { + tokenizer.setStateAndEndTagExpectation( + Tokenizer.SCRIPT_DATA, contextName); + } else { + tokenizer.setStateAndEndTagExpectation(Tokenizer.DATA, + contextName); + } } contextName = null; contextNode = null; @@ -1454,7 +1526,8 @@ public abstract class TreeBuilder implements TokenHandler, case IN_CELL: case IN_BODY: // [NOCPP[ - openelementloop: for (int i = currentPtr; i >= 0; i--) { + // i > 0 to stop in time in the foreign fragment case. + openelementloop: for (int i = currentPtr; i > 0; i--) { int group = stack[i].getGroup(); switch (group) { case DD_OR_DT: @@ -3308,10 +3381,18 @@ public abstract class TreeBuilder implements TokenHandler, endtagloop: for (;;) { if (isInForeign()) { if (stack[currentPtr].name != name) { - errEndTagDidNotMatchCurrentOpenElement(name, stack[currentPtr].popName); + if (currentPtr == 0) { + errStrayEndTag(name); + } else { + errEndTagDidNotMatchCurrentOpenElement(name, stack[currentPtr].popName); + } } eltPos = currentPtr; for (;;) { + if (eltPos == 0) { + assert fragment: "We can get this close to the root of the stack in foreign content only in the fragment case."; + break endtagloop; + } if (stack[eltPos].name == name) { while (currentPtr >= eltPos) { pop(); @@ -3649,7 +3730,9 @@ public abstract class TreeBuilder implements TokenHandler, // XXX Can the 'in foreign' case happen anymore? if (isInForeign()) { errHtmlStartTagInForeignContext(name); - while (stack[currentPtr].ns != "http://www.w3.org/1999/xhtml") { + // Check for currentPtr for the fragment + // case. + while (currentPtr >= 0 && stack[currentPtr].ns != "http://www.w3.org/1999/xhtml") { pop(); } } @@ -3730,8 +3813,11 @@ public abstract class TreeBuilder implements TokenHandler, case BR: errEndTagBr(); if (isInForeign()) { + // XXX can this happen anymore? errHtmlStartTagInForeignContext(name); - while (stack[currentPtr].ns != "http://www.w3.org/1999/xhtml") { + // Check for currentPtr for the fragment + // case. + while (currentPtr >= 0 && stack[currentPtr].ns != "http://www.w3.org/1999/xhtml") { pop(); } } diff --git a/parser/html/nsHtml5AtomList.h b/parser/html/nsHtml5AtomList.h index a51427d13fcf..ad78bac2224e 100644 --- a/parser/html/nsHtml5AtomList.h +++ b/parser/html/nsHtml5AtomList.h @@ -21,8 +21,16 @@ */ HTML5_ATOM(emptystring, "") -HTML5_ATOM(template_, "template") HTML5_ATOM(title, "title") +HTML5_ATOM(desc, "desc") +HTML5_ATOM(foreignObject, "foreignObject") +HTML5_ATOM(mi, "mi") +HTML5_ATOM(mo, "mo") +HTML5_ATOM(mn, "mn") +HTML5_ATOM(ms, "ms") +HTML5_ATOM(mtext, "mtext") +HTML5_ATOM(annotation_xml, "annotation-xml") +HTML5_ATOM(template_, "template") HTML5_ATOM(textarea, "textarea") HTML5_ATOM(style, "style") HTML5_ATOM(xmp, "xmp") @@ -728,10 +736,6 @@ HTML5_ATOM(hr, "hr") HTML5_ATOM(li, "li") HTML5_ATOM(ln, "ln") HTML5_ATOM(lt, "lt") -HTML5_ATOM(mi, "mi") -HTML5_ATOM(mn, "mn") -HTML5_ATOM(mo, "mo") -HTML5_ATOM(ms, "ms") HTML5_ATOM(or_, "or") HTML5_ATOM(pi, "pi") HTML5_ATOM(rb, "rb") @@ -785,7 +789,6 @@ HTML5_ATOM(csch, "csch") HTML5_ATOM(cosh, "cosh") HTML5_ATOM(coth, "coth") HTML5_ATOM(curl, "curl") -HTML5_ATOM(desc, "desc") HTML5_ATOM(diff, "diff") HTML5_ATOM(defs, "defs") HTML5_ATOM(font, "font") @@ -836,7 +839,6 @@ HTML5_ATOM(mover, "mover") HTML5_ATOM(minus, "minus") HTML5_ATOM(mroot, "mroot") HTML5_ATOM(msqrt, "msqrt") -HTML5_ATOM(mtext, "mtext") HTML5_ATOM(notin, "notin") HTML5_ATOM(piece, "piece") HTML5_ATOM(param, "param") @@ -1035,14 +1037,12 @@ HTML5_ATOM(animateMotion, "animateMotion") HTML5_ATOM(font_face_src, "font-face-src") HTML5_ATOM(font_face_uri, "font-face-uri") HTML5_ATOM(foreignobject, "foreignobject") -HTML5_ATOM(foreignObject, "foreignObject") HTML5_ATOM(fecolormatrix, "fecolormatrix") HTML5_ATOM(feColorMatrix, "feColorMatrix") HTML5_ATOM(missing_glyph, "missing-glyph") HTML5_ATOM(mmultiscripts, "mmultiscripts") HTML5_ATOM(scalarproduct, "scalarproduct") HTML5_ATOM(vectorproduct, "vectorproduct") -HTML5_ATOM(annotation_xml, "annotation-xml") HTML5_ATOM(definition_src, "definition-src") HTML5_ATOM(font_face_name, "font-face-name") HTML5_ATOM(fegaussianblur, "fegaussianblur") diff --git a/parser/html/nsHtml5TreeBuilder.cpp b/parser/html/nsHtml5TreeBuilder.cpp index a21af9867913..3037d1babef1 100644 --- a/parser/html/nsHtml5TreeBuilder.cpp +++ b/parser/html/nsHtml5TreeBuilder.cpp @@ -97,24 +97,48 @@ nsHtml5TreeBuilder::startTokenization(nsHtml5Tokenizer* self) } else { elt = createHtmlElementSetAsRoot(tokenizer->emptyAttributes()); } - nsHtml5StackNode* node = new nsHtml5StackNode(nsHtml5ElementName::ELT_HTML, elt); - currentPtr++; - stack[currentPtr] = node; - if (nsHtml5Atoms::template_ == contextName) { - pushTemplateMode(NS_HTML5TREE_BUILDER_IN_TEMPLATE); - } - resetTheInsertionMode(); - formPointer = getFormPointerForContext(contextNode); - if (nsHtml5Atoms::title == contextName || nsHtml5Atoms::textarea == contextName) { - tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_RCDATA, contextName); - } else if (nsHtml5Atoms::style == contextName || nsHtml5Atoms::xmp == contextName || nsHtml5Atoms::iframe == contextName || nsHtml5Atoms::noembed == contextName || nsHtml5Atoms::noframes == contextName || (scriptingEnabled && nsHtml5Atoms::noscript == contextName)) { - tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_RAWTEXT, contextName); - } else if (nsHtml5Atoms::plaintext == contextName) { - tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_PLAINTEXT, contextName); - } else if (nsHtml5Atoms::script == contextName) { - tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_SCRIPT_DATA, contextName); - } else { + if (contextNamespace == kNameSpaceID_SVG) { + nsHtml5ElementName* elementName = nsHtml5ElementName::ELT_SVG; + if (nsHtml5Atoms::title == contextName || nsHtml5Atoms::desc == contextName || nsHtml5Atoms::foreignObject == contextName) { + elementName = nsHtml5ElementName::ELT_FOREIGNOBJECT; + } + nsHtml5StackNode* node = new nsHtml5StackNode(elementName, elementName->camelCaseName, elt); + currentPtr++; + stack[currentPtr] = node; tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_DATA, contextName); + mode = NS_HTML5TREE_BUILDER_FRAMESET_OK; + } else if (contextNamespace == kNameSpaceID_MathML) { + nsHtml5ElementName* elementName = nsHtml5ElementName::ELT_MATH; + if (nsHtml5Atoms::mi == contextName || nsHtml5Atoms::mo == contextName || nsHtml5Atoms::mn == contextName || nsHtml5Atoms::ms == contextName || nsHtml5Atoms::mtext == contextName) { + elementName = nsHtml5ElementName::ELT_MTEXT; + } else if (nsHtml5Atoms::annotation_xml == contextName) { + elementName = nsHtml5ElementName::ELT_ANNOTATION_XML; + } + nsHtml5StackNode* node = new nsHtml5StackNode(elementName, elt, elementName->name, false); + currentPtr++; + stack[currentPtr] = node; + tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_DATA, contextName); + mode = NS_HTML5TREE_BUILDER_FRAMESET_OK; + } else { + nsHtml5StackNode* node = new nsHtml5StackNode(nsHtml5ElementName::ELT_HTML, elt); + currentPtr++; + stack[currentPtr] = node; + if (nsHtml5Atoms::template_ == contextName) { + pushTemplateMode(NS_HTML5TREE_BUILDER_IN_TEMPLATE); + } + resetTheInsertionMode(); + formPointer = getFormPointerForContext(contextNode); + if (nsHtml5Atoms::title == contextName || nsHtml5Atoms::textarea == contextName) { + tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_RCDATA, contextName); + } else if (nsHtml5Atoms::style == contextName || nsHtml5Atoms::xmp == contextName || nsHtml5Atoms::iframe == contextName || nsHtml5Atoms::noembed == contextName || nsHtml5Atoms::noframes == contextName || (scriptingEnabled && nsHtml5Atoms::noscript == contextName)) { + tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_RAWTEXT, contextName); + } else if (nsHtml5Atoms::plaintext == contextName) { + tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_PLAINTEXT, contextName); + } else if (nsHtml5Atoms::script == contextName) { + tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_SCRIPT_DATA, contextName); + } else { + tokenizer->setStateAndEndTagExpectation(NS_HTML5TOKENIZER_DATA, contextName); + } } contextName = nullptr; contextNode = nullptr; @@ -2202,10 +2226,18 @@ nsHtml5TreeBuilder::endTag(nsHtml5ElementName* elementName) for (; ; ) { if (isInForeign()) { if (stack[currentPtr]->name != name) { - errEndTagDidNotMatchCurrentOpenElement(name, stack[currentPtr]->popName); + if (!currentPtr) { + errStrayEndTag(name); + } else { + errEndTagDidNotMatchCurrentOpenElement(name, stack[currentPtr]->popName); + } } eltPos = currentPtr; for (; ; ) { + if (!eltPos) { + MOZ_ASSERT(fragment, "We can get this close to the root of the stack in foreign content only in the fragment case."); + NS_HTML5_BREAK(endtagloop); + } if (stack[eltPos]->name == name) { while (currentPtr >= eltPos) { pop(); @@ -2574,7 +2606,7 @@ nsHtml5TreeBuilder::endTag(nsHtml5ElementName* elementName) errNoElementToCloseButEndTagSeen(nsHtml5Atoms::p); if (isInForeign()) { errHtmlStartTagInForeignContext(name); - while (stack[currentPtr]->ns != kNameSpaceID_XHTML) { + while (currentPtr >= 0 && stack[currentPtr]->ns != kNameSpaceID_XHTML) { pop(); } } @@ -2657,7 +2689,7 @@ nsHtml5TreeBuilder::endTag(nsHtml5ElementName* elementName) errEndTagBr(); if (isInForeign()) { errHtmlStartTagInForeignContext(name); - while (stack[currentPtr]->ns != kNameSpaceID_XHTML) { + while (currentPtr >= 0 && stack[currentPtr]->ns != kNameSpaceID_XHTML) { pop(); } }