Bug 489820 and bug 483209 - Make the HTML5 parser check buffer bounds less often and place limits on buffer growth. rs=sicking.

This commit is contained in:
Henri Sivonen
2010-02-12 09:49:06 +02:00
parent 382fdd2350
commit c7fd798070
5 changed files with 136 additions and 141 deletions

View File

@@ -191,38 +191,6 @@ nsHtml5Tokenizer::emptyAttributes()
return nsHtml5HtmlAttributes::EMPTY_ATTRIBUTES; return nsHtml5HtmlAttributes::EMPTY_ATTRIBUTES;
} }
void
nsHtml5Tokenizer::clearStrBufAndAppendCurrentC(PRUnichar c)
{
strBuf[0] = c;
strBufLen = 1;
}
void
nsHtml5Tokenizer::clearStrBufAndAppendForceWrite(PRUnichar c)
{
strBuf[0] = c;
strBufLen = 1;
}
void
nsHtml5Tokenizer::clearStrBufForNextState()
{
strBufLen = 0;
}
void
nsHtml5Tokenizer::appendStrBuf(PRUnichar c)
{
if (strBufLen == strBuf.length) {
jArray<PRUnichar,PRInt32> newBuf = jArray<PRUnichar,PRInt32>(strBuf.length + NS_HTML5TOKENIZER_BUFFER_GROW_BY);
nsHtml5ArrayCopy::arraycopy(strBuf, newBuf, strBuf.length);
strBuf.release();
strBuf = newBuf;
}
strBuf[strBufLen++] = c;
}
nsString* nsString*
nsHtml5Tokenizer::strBufToString() nsHtml5Tokenizer::strBufToString()
{ {
@@ -243,44 +211,6 @@ nsHtml5Tokenizer::emitStrBuf()
} }
} }
void
nsHtml5Tokenizer::clearLongStrBufForNextState()
{
longStrBufLen = 0;
}
void
nsHtml5Tokenizer::clearLongStrBuf()
{
longStrBufLen = 0;
}
void
nsHtml5Tokenizer::clearLongStrBufAndAppendCurrentC(PRUnichar c)
{
longStrBuf[0] = c;
longStrBufLen = 1;
}
void
nsHtml5Tokenizer::clearLongStrBufAndAppendToComment(PRUnichar c)
{
longStrBuf[0] = c;
longStrBufLen = 1;
}
void
nsHtml5Tokenizer::appendLongStrBuf(PRUnichar c)
{
if (longStrBufLen == longStrBuf.length) {
jArray<PRUnichar,PRInt32> newBuf = jArray<PRUnichar,PRInt32>(longStrBufLen + (longStrBufLen >> 1));
nsHtml5ArrayCopy::arraycopy(longStrBuf, newBuf, longStrBuf.length);
longStrBuf.release();
longStrBuf = newBuf;
}
longStrBuf[longStrBufLen++] = c;
}
void void
nsHtml5Tokenizer::appendSecondHyphenToBogusComment() nsHtml5Tokenizer::appendSecondHyphenToBogusComment()
{ {
@@ -294,32 +224,6 @@ nsHtml5Tokenizer::adjustDoubleHyphenAndAppendToLongStrBufAndErr(PRUnichar c)
appendLongStrBuf(c); appendLongStrBuf(c);
} }
void
nsHtml5Tokenizer::appendLongStrBuf(jArray<PRUnichar,PRInt32> buffer, PRInt32 offset, PRInt32 length)
{
PRInt32 reqLen = longStrBufLen + length;
if (longStrBuf.length < reqLen) {
jArray<PRUnichar,PRInt32> newBuf = jArray<PRUnichar,PRInt32>(reqLen + (reqLen >> 1));
nsHtml5ArrayCopy::arraycopy(longStrBuf, newBuf, longStrBuf.length);
longStrBuf.release();
longStrBuf = newBuf;
}
nsHtml5ArrayCopy::arraycopy(buffer, offset, longStrBuf, longStrBufLen, length);
longStrBufLen = reqLen;
}
void
nsHtml5Tokenizer::appendLongStrBuf(jArray<PRUnichar,PRInt32> arr)
{
appendLongStrBuf(arr, 0, arr.length);
}
void
nsHtml5Tokenizer::appendStrBufToLongStrBuf()
{
appendLongStrBuf(strBuf, 0, strBufLen);
}
nsString* nsString*
nsHtml5Tokenizer::longStrBufToString() nsHtml5Tokenizer::longStrBufToString()
{ {
@@ -342,12 +246,6 @@ nsHtml5Tokenizer::flushChars(PRUnichar* buf, PRInt32 pos)
cstart = 0x7fffffff; cstart = 0x7fffffff;
} }
void
nsHtml5Tokenizer::resetAttributes()
{
attributes = nsnull;
}
void void
nsHtml5Tokenizer::strBufToElementNameString() nsHtml5Tokenizer::strBufToElementNameString()
{ {
@@ -429,6 +327,7 @@ nsHtml5Tokenizer::tokenizeBuffer(nsHtml5UTF16Buffer* buffer)
shouldSuspend = PR_FALSE; shouldSuspend = PR_FALSE;
lastCR = PR_FALSE; lastCR = PR_FALSE;
PRInt32 start = buffer->getStart(); PRInt32 start = buffer->getStart();
PRInt32 end = buffer->getEnd();
PRInt32 pos = start - 1; PRInt32 pos = start - 1;
switch(state) { switch(state) {
case NS_HTML5TOKENIZER_DATA: case NS_HTML5TOKENIZER_DATA:
@@ -456,8 +355,9 @@ nsHtml5Tokenizer::tokenizeBuffer(nsHtml5UTF16Buffer* buffer)
break; break;
} }
} }
pos = stateLoop(state, c, pos, buffer->getBuffer(), PR_FALSE, returnState, buffer->getEnd()); ensureBufferSpace(end - start);
if (pos == buffer->getEnd()) { pos = stateLoop(state, c, pos, buffer->getBuffer(), PR_FALSE, returnState, end);
if (pos == end) {
buffer->setStart(pos); buffer->setStart(pos);
} else { } else {
buffer->setStart(pos + 1); buffer->setStart(pos + 1);
@@ -465,6 +365,38 @@ nsHtml5Tokenizer::tokenizeBuffer(nsHtml5UTF16Buffer* buffer)
return lastCR; return lastCR;
} }
void
nsHtml5Tokenizer::ensureBufferSpace(PRInt32 addedLength)
{
PRInt32 newlongStrBufCapacity = longStrBufLen + addedLength;
if (newlongStrBufCapacity > NS_HTML5TOKENIZER_BUFFER_CLIP_THRESHOLD) {
longStrBuf[0] = 0x2026;
longStrBuf[1] = 0xfffd;
longStrBufLen = 2;
newlongStrBufCapacity = 2 + addedLength;
}
if (newlongStrBufCapacity > longStrBuf.length) {
jArray<PRUnichar,PRInt32> newBuf = jArray<PRUnichar,PRInt32>(newlongStrBufCapacity);
nsHtml5ArrayCopy::arraycopy(longStrBuf, newBuf, longStrBufLen);
longStrBuf.release();
longStrBuf = newBuf;
}
PRInt32 newStrBufCapacity = strBufLen + addedLength;
if (newStrBufCapacity > NS_HTML5TOKENIZER_BUFFER_CLIP_THRESHOLD) {
strBuf[0] = 0x2026;
strBuf[1] = 0xfffd;
strBufLen = 2;
newStrBufCapacity = 2 + addedLength;
}
if (newStrBufCapacity > strBuf.length) {
jArray<PRUnichar,PRInt32> newBuf = jArray<PRUnichar,PRInt32>(newStrBufCapacity);
nsHtml5ArrayCopy::arraycopy(strBuf, newBuf, strBufLen);
strBuf.release();
strBuf = newBuf;
}
tokenHandler->ensureBufferSpace(addedLength);
}
PRInt32 PRInt32
nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRInt32 pos, PRUnichar* buf, PRBool reconsume, PRInt32 returnState, PRInt32 endPos) nsHtml5Tokenizer::stateLoop(PRInt32 state, PRUnichar c, PRInt32 pos, PRUnichar* buf, PRBool reconsume, PRInt32 returnState, PRInt32 endPos)
{ {

View File

@@ -158,31 +158,83 @@ class nsHtml5Tokenizer
nsHtml5HtmlAttributes* emptyAttributes(); nsHtml5HtmlAttributes* emptyAttributes();
private: private:
void clearStrBufAndAppendCurrentC(PRUnichar c); inline void clearStrBufAndAppendCurrentC(PRUnichar c)
void clearStrBufAndAppendForceWrite(PRUnichar c); {
void clearStrBufForNextState(); strBuf[0] = c;
void appendStrBuf(PRUnichar c); strBufLen = 1;
}
inline void clearStrBufAndAppendForceWrite(PRUnichar c)
{
strBuf[0] = c;
strBufLen = 1;
}
inline void clearStrBufForNextState()
{
strBufLen = 0;
}
inline void appendStrBuf(PRUnichar c)
{
strBuf[strBufLen++] = c;
}
protected: protected:
nsString* strBufToString(); nsString* strBufToString();
private: private:
void strBufToDoctypeName(); void strBufToDoctypeName();
void emitStrBuf(); void emitStrBuf();
void clearLongStrBufForNextState(); inline void clearLongStrBufForNextState()
void clearLongStrBuf(); {
void clearLongStrBufAndAppendCurrentC(PRUnichar c); longStrBufLen = 0;
void clearLongStrBufAndAppendToComment(PRUnichar c); }
void appendLongStrBuf(PRUnichar c);
inline void clearLongStrBuf()
{
longStrBufLen = 0;
}
inline void clearLongStrBufAndAppendCurrentC(PRUnichar c)
{
longStrBuf[0] = c;
longStrBufLen = 1;
}
inline void clearLongStrBufAndAppendToComment(PRUnichar c)
{
longStrBuf[0] = c;
longStrBufLen = 1;
}
inline void appendLongStrBuf(PRUnichar c)
{
longStrBuf[longStrBufLen++] = c;
}
void appendSecondHyphenToBogusComment(); void appendSecondHyphenToBogusComment();
void adjustDoubleHyphenAndAppendToLongStrBufAndErr(PRUnichar c); void adjustDoubleHyphenAndAppendToLongStrBufAndErr(PRUnichar c);
void appendLongStrBuf(jArray<PRUnichar,PRInt32> buffer, PRInt32 offset, PRInt32 length); inline void appendLongStrBuf(jArray<PRUnichar,PRInt32> buffer, PRInt32 offset, PRInt32 length)
void appendLongStrBuf(jArray<PRUnichar,PRInt32> arr); {
void appendStrBufToLongStrBuf(); nsHtml5ArrayCopy::arraycopy(buffer, offset, longStrBuf, longStrBufLen, length);
longStrBufLen += length;
}
inline void appendStrBufToLongStrBuf()
{
appendLongStrBuf(strBuf, 0, strBufLen);
}
nsString* longStrBufToString(); nsString* longStrBufToString();
void emitComment(PRInt32 provisionalHyphens, PRInt32 pos); void emitComment(PRInt32 provisionalHyphens, PRInt32 pos);
protected: protected:
void flushChars(PRUnichar* buf, PRInt32 pos); void flushChars(PRUnichar* buf, PRInt32 pos);
private: private:
void resetAttributes(); inline void resetAttributes()
{
attributes = nsnull;
}
void strBufToElementNameString(); void strBufToElementNameString();
PRInt32 emitCurrentTagToken(PRBool selfClosing, PRInt32 pos); PRInt32 emitCurrentTagToken(PRBool selfClosing, PRInt32 pos);
void attributeNameComplete(); void attributeNameComplete();
@@ -194,6 +246,7 @@ class nsHtml5Tokenizer
void start(); void start();
PRBool tokenizeBuffer(nsHtml5UTF16Buffer* buffer); PRBool tokenizeBuffer(nsHtml5UTF16Buffer* buffer);
private: private:
void ensureBufferSpace(PRInt32 addedLength);
PRInt32 stateLoop(PRInt32 state, PRUnichar c, PRInt32 pos, PRUnichar* buf, PRBool reconsume, PRInt32 returnState, PRInt32 endPos); PRInt32 stateLoop(PRInt32 state, PRUnichar c, PRInt32 pos, PRUnichar* buf, PRBool reconsume, PRInt32 returnState, PRInt32 endPos);
void initDoctypeFields(); void initDoctypeFields();
inline void adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn() inline void adjustDoubleHyphenAndAppendToLongStrBufCarriageReturn()
@@ -294,6 +347,7 @@ jArray<PRUnichar,PRInt32> nsHtml5Tokenizer::NOSCRIPT_ARR = 0;
jArray<PRUnichar,PRInt32> nsHtml5Tokenizer::NOFRAMES_ARR = 0; jArray<PRUnichar,PRInt32> nsHtml5Tokenizer::NOFRAMES_ARR = 0;
#endif #endif
#define NS_HTML5TOKENIZER_BUFFER_CLIP_THRESHOLD 8000
#define NS_HTML5TOKENIZER_DATA 0 #define NS_HTML5TOKENIZER_DATA 0
#define NS_HTML5TOKENIZER_RCDATA 1 #define NS_HTML5TOKENIZER_RCDATA 1
#define NS_HTML5TOKENIZER_SCRIPT_DATA 2 #define NS_HTML5TOKENIZER_SCRIPT_DATA 2

View File

@@ -193,6 +193,22 @@ nsHtml5TreeBuilder::comment(PRUnichar* buf, PRInt32 start, PRInt32 length)
return; return;
} }
void
nsHtml5TreeBuilder::ensureBufferSpace(PRInt32 addedLength)
{
PRInt32 newCharBufferCapacity = charBufferLen + addedLength;
if (newCharBufferCapacity > NS_HTML5TREE_BUILDER_BUFFER_FLUSH_THRESHOLD) {
flushCharacters();
newCharBufferCapacity = addedLength;
}
if (newCharBufferCapacity > charBuffer.length) {
jArray<PRUnichar,PRInt32> newBuf = jArray<PRUnichar,PRInt32>(newCharBufferCapacity);
nsHtml5ArrayCopy::arraycopy(charBuffer, newBuf, charBufferLen);
charBuffer.release();
charBuffer = newBuf;
}
}
void void
nsHtml5TreeBuilder::characters(const PRUnichar* buf, PRInt32 start, PRInt32 length) nsHtml5TreeBuilder::characters(const PRUnichar* buf, PRInt32 start, PRInt32 length)
{ {
@@ -3061,6 +3077,10 @@ nsHtml5TreeBuilder::clearLastListSlot()
void void
nsHtml5TreeBuilder::push(nsHtml5StackNode* node) nsHtml5TreeBuilder::push(nsHtml5StackNode* node)
{ {
if (currentPtr == NS_HTML5TREE_BUILDER_STACK_MAX_DEPTH) {
pop();
}
currentPtr++; currentPtr++;
if (currentPtr == stack.length) { if (currentPtr == stack.length) {
jArray<nsHtml5StackNode*,PRInt32> newStack = jArray<nsHtml5StackNode*,PRInt32>(stack.length + 64); jArray<nsHtml5StackNode*,PRInt32> newStack = jArray<nsHtml5StackNode*,PRInt32>(stack.length + 64);
@@ -3075,6 +3095,10 @@ nsHtml5TreeBuilder::push(nsHtml5StackNode* node)
void void
nsHtml5TreeBuilder::silentPush(nsHtml5StackNode* node) nsHtml5TreeBuilder::silentPush(nsHtml5StackNode* node)
{ {
if (currentPtr == NS_HTML5TREE_BUILDER_STACK_MAX_DEPTH) {
pop();
}
currentPtr++; currentPtr++;
if (currentPtr == stack.length) { if (currentPtr == stack.length) {
jArray<nsHtml5StackNode*,PRInt32> newStack = jArray<nsHtml5StackNode*,PRInt32>(stack.length + 64); jArray<nsHtml5StackNode*,PRInt32> newStack = jArray<nsHtml5StackNode*,PRInt32>(stack.length + 64);
@@ -3722,20 +3746,6 @@ nsHtml5TreeBuilder::appendVoidFormToCurrent(nsHtml5HtmlAttributes* attributes)
elementPopped(kNameSpaceID_XHTML, nsHtml5Atoms::form, elt); elementPopped(kNameSpaceID_XHTML, nsHtml5Atoms::form, elt);
} }
void
nsHtml5TreeBuilder::accumulateCharacter(PRUnichar c)
{
PRInt32 newLen = charBufferLen + 1;
if (newLen > charBuffer.length) {
jArray<PRUnichar,PRInt32> newBuf = jArray<PRUnichar,PRInt32>(newLen);
nsHtml5ArrayCopy::arraycopy(charBuffer, newBuf, charBufferLen);
charBuffer.release();
charBuffer = newBuf;
}
charBuffer[charBufferLen] = c;
charBufferLen = newLen;
}
void void
nsHtml5TreeBuilder::requestSuspension() nsHtml5TreeBuilder::requestSuspension()
{ {

View File

@@ -101,6 +101,7 @@ class nsHtml5TreeBuilder : public nsAHtml5TreeBuilderState
void startTokenization(nsHtml5Tokenizer* self); void startTokenization(nsHtml5Tokenizer* self);
void doctype(nsIAtom* name, nsString* publicIdentifier, nsString* systemIdentifier, PRBool forceQuirks); void doctype(nsIAtom* name, nsString* publicIdentifier, nsString* systemIdentifier, PRBool forceQuirks);
void comment(PRUnichar* buf, PRInt32 start, PRInt32 length); void comment(PRUnichar* buf, PRInt32 start, PRInt32 length);
void ensureBufferSpace(PRInt32 addedLength);
void characters(const PRUnichar* buf, PRInt32 start, PRInt32 length); void characters(const PRUnichar* buf, PRInt32 start, PRInt32 length);
void eof(); void eof();
void endTokenization(); void endTokenization();
@@ -183,7 +184,11 @@ class nsHtml5TreeBuilder : public nsAHtml5TreeBuilderState
void appendVoidFormToCurrent(nsHtml5HtmlAttributes* attributes); void appendVoidFormToCurrent(nsHtml5HtmlAttributes* attributes);
protected: protected:
void accumulateCharacters(const PRUnichar* buf, PRInt32 start, PRInt32 length); void accumulateCharacters(const PRUnichar* buf, PRInt32 start, PRInt32 length);
void accumulateCharacter(PRUnichar c); inline void accumulateCharacter(PRUnichar c)
{
charBuffer[charBufferLen++] = c;
}
void requestSuspension(); void requestSuspension();
nsIContent** createElement(PRInt32 ns, nsIAtom* name, nsHtml5HtmlAttributes* attributes); nsIContent** createElement(PRInt32 ns, nsIAtom* name, nsHtml5HtmlAttributes* attributes);
nsIContent** createElement(PRInt32 ns, nsIAtom* name, nsHtml5HtmlAttributes* attributes, nsIContent** form); nsIContent** createElement(PRInt32 ns, nsIAtom* name, nsHtml5HtmlAttributes* attributes, nsIContent** form);
@@ -244,6 +249,8 @@ class nsHtml5TreeBuilder : public nsAHtml5TreeBuilderState
jArray<const char*,PRInt32> nsHtml5TreeBuilder::QUIRKY_PUBLIC_IDS = nsnull; jArray<const char*,PRInt32> nsHtml5TreeBuilder::QUIRKY_PUBLIC_IDS = nsnull;
#endif #endif
#define NS_HTML5TREE_BUILDER_BUFFER_FLUSH_THRESHOLD 4096
#define NS_HTML5TREE_BUILDER_STACK_MAX_DEPTH 200
#define NS_HTML5TREE_BUILDER_OTHER 0 #define NS_HTML5TREE_BUILDER_OTHER 0
#define NS_HTML5TREE_BUILDER_A 1 #define NS_HTML5TREE_BUILDER_A 1
#define NS_HTML5TREE_BUILDER_BASE 2 #define NS_HTML5TREE_BUILDER_BASE 2

View File

@@ -605,16 +605,8 @@ nsHtml5TreeBuilder::elementPopped(PRInt32 aNamespace, nsIAtom* aName, nsIContent
void void
nsHtml5TreeBuilder::accumulateCharacters(const PRUnichar* aBuf, PRInt32 aStart, PRInt32 aLength) nsHtml5TreeBuilder::accumulateCharacters(const PRUnichar* aBuf, PRInt32 aStart, PRInt32 aLength)
{ {
PRInt32 newFillLen = charBufferLen + aLength;
if (newFillLen > charBuffer.length) {
PRInt32 newAllocLength = newFillLen + (newFillLen >> 1);
jArray<PRUnichar,PRInt32> newBuf(newAllocLength);
memcpy(newBuf, charBuffer, sizeof(PRUnichar) * charBufferLen);
charBuffer.release();
charBuffer = newBuf;
}
memcpy(charBuffer + charBufferLen, aBuf + aStart, sizeof(PRUnichar) * aLength); memcpy(charBuffer + charBufferLen, aBuf + aStart, sizeof(PRUnichar) * aLength);
charBufferLen = newFillLen; charBufferLen += aLength;
} }
nsIContent** nsIContent**