Bug 332173 - Problems with regexp parsing of '~' in nsIZipReader.findEntries (and other nsWildCard uses). r=jwalden, a=ss

This commit is contained in:
Nelson Bolyard
2009-07-29 14:07:45 -07:00
parent 13b00e36c4
commit a48c202343

View File

@@ -57,21 +57,28 @@
template<class T> template<class T>
static int static int
_valid_subexp(const T *expr, T stop) _valid_subexp(const T *expr, T stop1, T stop2)
{ {
register int x,y,t; register int x;
int nsc,np,tld; int nsc = 0; /* Number of special characters */
int np; /* Number of pipe characters in union */
int tld = 0; /* Number of tilde characters */
x=0;nsc=0;tld=0; for (x = 0; expr[x] && (expr[x] != stop1) && (expr[x] != stop2); ++x) {
while(expr[x] && (expr[x] != stop)) {
switch(expr[x]) { switch(expr[x]) {
case '~': case '~':
if(tld) return INVALID_SXP; if(tld) /* at most one exclusion */
else ++tld; return INVALID_SXP;
if (stop1) /* no exclusions within unions */
return INVALID_SXP;
if (!expr[x+1]) /* exclusion cannot be last character */
return INVALID_SXP;
if (!x) /* exclusion cannot be first character */
return INVALID_SXP;
++tld;
/* fall through */
case '*': case '*':
case '?': case '?':
case '^':
case '$': case '$':
++nsc; ++nsc;
break; break;
@@ -79,51 +86,46 @@ _valid_subexp(const T *expr, T stop)
++nsc; ++nsc;
if((!expr[++x]) || (expr[x] == ']')) if((!expr[++x]) || (expr[x] == ']'))
return INVALID_SXP; return INVALID_SXP;
for(;expr[x] && (expr[x] != ']');++x) for(; expr[x] && (expr[x] != ']'); ++x) {
if(expr[x] == '\\') if(expr[x] == '\\' && !expr[++x])
if(!expr[++x])
return INVALID_SXP; return INVALID_SXP;
}
if(!expr[x]) if(!expr[x])
return INVALID_SXP; return INVALID_SXP;
break; break;
case '(': case '(':
++nsc;np = 0; ++nsc;
while(1) { if (stop1) /* no nested unions */
if(expr[++x] == ')')
return INVALID_SXP; return INVALID_SXP;
for(y=x;(expr[y]) && (expr[y] != '|') && (expr[y] != ')');++y) np = -1;
if(expr[y] == '\\') do {
if(!expr[++y]) int t = ::_valid_subexp(&expr[++x], T(')'), T('|'));
return INVALID_SXP; if(t == 0 || t == INVALID_SXP)
if(!expr[y])
return INVALID_SXP;
if(expr[y] == '|')
++np;
t = ::_valid_subexp(&expr[x],expr[y]);
if(t == INVALID_SXP)
return INVALID_SXP; return INVALID_SXP;
x+=t; x+=t;
if(expr[x] == ')') { if(!expr[x])
if(!np) return INVALID_SXP;
++np;
} while (expr[x] == '|' );
if(np < 1) /* must be at least one pipe */
return INVALID_SXP; return INVALID_SXP;
break;
}
}
break; break;
case ')': case ')':
case ']': case ']':
case '|':
return INVALID_SXP; return INVALID_SXP;
case '\\': case '\\':
++nsc;
if(!expr[++x]) if(!expr[++x])
return INVALID_SXP; return INVALID_SXP;
break;
default: default:
break; break;
} }
++x;
} }
if((!stop) && (!nsc)) if((!stop1) && (!nsc)) /* must be at least one special character */
return NON_SXP; return NON_SXP;
return ((expr[x] == stop) ? x : INVALID_SXP); return ((expr[x] == stop1 || expr[x] == stop2) ? x : INVALID_SXP);
} }
@@ -131,7 +133,7 @@ template<class T>
int int
NS_WildCardValid_(const T *expr) NS_WildCardValid_(const T *expr)
{ {
int x = ::_valid_subexp(expr, T('\0')); int x = ::_valid_subexp(expr, T('\0'), T('\0'));
return (x < 0 ? x : VALID_SXP); return (x < 0 ? x : VALID_SXP);
} }
@@ -155,56 +157,125 @@ NS_WildCardValid(const PRUnichar *expr)
#define ABORTED -1 #define ABORTED -1
template<class T> template<class T>
static int _shexp_match(const T *str, const T *expr, PRBool case_insensitive); static int
_shexp_match(const T *str, const T *expr, PRBool case_insensitive, unsigned int level);
/**
* Count characters until we reach a NUL character or either of the
* two delimiter characters, stop1 or stop2. If we encounter a bracketed
* expression, look only for NUL or ']' inside it. Do not look for stop1
* or stop2 inside it. Return ABORTED if bracketed expression is unterminated.
* Handle all escaping.
* Return index in input string of first stop found, or ABORTED if not found.
* If "dest" is non-NULL, copy counted characters to it and NUL terminate.
*/
template<class T> template<class T>
static int static int
_handle_union(const T *str, const T *expr, PRBool case_insensitive) _scan_and_copy(const T *expr, T stop1, T stop2, T *dest)
{ {
T *e2 = (T *) NS_Alloc(sizeof(T)*nsCharTraits<T>::length(expr)); register int sx; /* source index */
register int t,p2,p1 = 1; register char cc;
int cp;
while(1) { for (sx = 0; (cc = expr[sx]) && cc != stop1 && cc != stop2; sx++) {
for(cp=1;expr[cp] != ')';cp++) if (cc == '\\') {
if(expr[cp] == '\\') if (!expr[++sx])
++cp; return ABORTED; /* should be impossible */
for(p2 = 0;(expr[p1] != '|') && (p1 != cp);p1++,p2++) {
if(expr[p1] == '\\')
e2[p2++] = expr[p1++];
e2[p2] = expr[p1];
} }
for (t=cp+1; ((e2[p2] = expr[t]) != 0); ++t,++p2) {} else if (cc == '[') {
if(::_shexp_match(str,e2, case_insensitive) == MATCH) { while ((cc = expr[++sx]) && cc != ']') {
NS_Free(e2); if(cc == '\\' && !expr[++sx])
return MATCH; return ABORTED;
} }
if(p1 == cp) { if (!cc)
NS_Free(e2); return ABORTED; /* should be impossible */
return NOMATCH;
} }
else ++p1;
} }
if (dest && sx) {
/* Copy all but the closing delimiter. */
memcpy(dest, expr, sx * sizeof(T));
dest[sx] = 0;
}
return cc ? sx : ABORTED; /* index of closing delimiter */
} }
/* On input, expr[0] is the opening parenthesis of a union.
* See if any of the alternatives in the union matches as a pattern.
* The strategy is to take each of the alternatives, in turn, and append
* the rest of the expression (after the closing ')' that marks the end of
* this union) to that alternative, and then see if the resultant expression
* matches the input string. Repeat this until some alternative matches,
* or we have an abort.
*/
template<class T>
static int
_handle_union(const T *str, const T *expr, PRBool case_insensitive,
unsigned int level)
{
register int sx; /* source index */
int cp; /* source index of closing parenthesis */
int count;
int ret = NOMATCH;
T *e2;
/* Find the closing parenthesis that ends this union in the expression */
cp = ::_scan_and_copy(expr, T(')'), T('\0'), static_cast<T*>(NULL));
if (cp == ABORTED || cp < 4) /* must be at least "(a|b" before ')' */
return ABORTED;
++cp; /* now index of char after closing parenthesis */
e2 = (T *) NS_Alloc(1 + nsCharTraits<T>::length(expr));
if (!e2)
return ABORTED;
for (sx = 1; ; ++sx) {
/* Here, expr[sx] is one character past the preceeding '(' or '|'. */
/* Copy everything up to the next delimiter to e2 */
count = ::_scan_and_copy(expr + sx, T(')'), T('|'), e2);
if (count == ABORTED || !count) {
ret = ABORTED;
break;
}
sx += count;
/* Append everything after closing parenthesis to e2. This is safe. */
nsCharTraits<T>::copy(e2 + count, expr + cp, nsCharTraits<T>::length(expr + cp) + 1);
ret = ::_shexp_match(str, e2, case_insensitive, level + 1);
if (ret != NOMATCH || !expr[sx] || expr[sx] == ')')
break;
}
NS_Free(e2);
if (sx < 2)
ret = ABORTED;
return ret;
}
/* returns 1 if val is in range from start..end, case insensitive. */
static int
_is_char_in_range(int start, int end, int val)
{
char map[256];
memset(map, 0, sizeof map);
while (start <= end)
map[tolower(start++)] = 1;
return map[tolower(val)];
}
template<class T> template<class T>
static int static int
_shexp_match(const T *str, const T *expr, PRBool case_insensitive) _shexp_match(const T *str, const T *expr, PRBool case_insensitive,
unsigned int level)
{ {
register int x,y; register int x; /* input string index */
register int y; /* expression index */
int ret,neg; int ret,neg;
ret = 0; if (level > 20) /* Don't let the stack get too deep. */
for(x=0,y=0;expr[y];++y,++x) { return ABORTED;
if((!str[x]) && (expr[y] != '(') && (expr[y] != '$') && (expr[y] != '*')) for(x = 0, y = 0; expr[y]; ++y, ++x) {
ret = ABORTED; if((!str[x]) && (expr[y] != '$') && (expr[y] != '*')) {
else { return NOMATCH;
}
switch(expr[y]) { switch(expr[y]) {
case '$': case '$':
if( (str[x]) ) if(str[x])
ret = NOMATCH; return NOMATCH;
else
--x; /* we don't want loop to increment x */ --x; /* we don't want loop to increment x */
break; break;
case '*': case '*':
@@ -212,109 +283,141 @@ _shexp_match(const T *str, const T *expr, PRBool case_insensitive)
if(!expr[y]) if(!expr[y])
return MATCH; return MATCH;
while(str[x]) { while(str[x]) {
switch(::_shexp_match(&str[x++],&expr[y], case_insensitive)) { ret = ::_shexp_match(&str[x++], &expr[y], case_insensitive,
level + 1);
switch(ret) {
case NOMATCH: case NOMATCH:
continue; continue;
case ABORTED: case ABORTED:
ret = ABORTED; return ABORTED;
break;
default: default:
return MATCH; return MATCH;
} }
break;
} }
if((expr[y] == '$') && (expr[y+1] == '\0') && (!str[x])) if((expr[y] == '$') && (expr[y+1] == '\0') && (!str[x]))
return MATCH; return MATCH;
else else
ret = ABORTED; return NOMATCH;
break; case '[': {
case '[': T start, end = 0;
int i;
neg = ((expr[++y] == '^') && (expr[y+1] != ']')); neg = ((expr[++y] == '^') && (expr[y+1] != ']'));
if (neg) if (neg)
++y; ++y;
i = y;
if ((isalnum(expr[y])) && (expr[y+1] == '-') && start = expr[i++];
(isalnum(expr[y+2])) && (expr[y+3] == ']')) if (start == '\\')
{ start = expr[i++];
int start = expr[y], end = expr[y+2]; if (isalnum(int(start)) && expr[i++] == '-') {
end = expr[i++];
/* Droolproofing for pinheads not included */ if (end == '\\')
if(neg ^ ((str[x] < start) || (str[x] > end))) { end = expr[i++];
ret = NOMATCH;
break;
} }
y+=3; if (isalnum(int(end)) && expr[i] == ']') {
/* This is a range form: a-b */
T val = str[x];
if (end < start) { /* swap them */
T tmp = end;
end = start;
start = tmp;
}
if (case_insensitive && isalpha(int(val))) {
val = ::_is_char_in_range(int(start), int(end), int(val));
if (neg == val)
return NOMATCH;
}
else if (neg != ((val < start) || (val > end))) {
return NOMATCH;
}
y = i;
} }
else { else {
int matched; /* Not range form */
int matched = 0;
for (matched=0;expr[y] != ']';y++) { for (; expr[y] != ']'; y++) {
/* match an escaped ']' character */ if (expr[y] == '\\')
if('\\' == expr[y] && ']' == expr[y+1]) { ++y;
if(']' == str[x]) if(case_insensitive)
matched |= 1; matched |= (toupper(int(str[x])) == toupper(int(expr[y])));
y++; /* move an extra char to compensate for '\\' */ else
continue;
}
matched |= (str[x] == expr[y]); matched |= (str[x] == expr[y]);
} }
if (neg ^ (!matched)) if (neg == matched)
ret = NOMATCH; return NOMATCH;
}
} }
break; break;
case '(': case '(':
return ::_handle_union(&str[x],&expr[y], case_insensitive); if (!expr[y+1])
break; return ABORTED;
return ::_handle_union(&str[x], &expr[y], case_insensitive, level + 1);
case '?': case '?':
break; break;
case ')':
case ']':
case '|':
return ABORTED;
case '\\': case '\\':
++y; ++y;
/* fall through */
default: default:
if(case_insensitive) if(case_insensitive) {
{ if(toupper(int(str[x])) != toupper(int(expr[y])))
if(toupper(str[x]) != toupper(expr[y])) return NOMATCH;
ret = NOMATCH;
} }
else else {
{
if(str[x] != expr[y]) if(str[x] != expr[y])
ret = NOMATCH; return NOMATCH;
} }
break; break;
} }
} }
if(ret) return (str[x] ? NOMATCH : MATCH);
break;
}
return (ret ? ret : (str[x] ? NOMATCH : MATCH));
} }
template<class T> template<class T>
int static int
NS_WildCardMatch_(const T *str, const T *xp, PRBool case_insensitive) ns_WildCardMatch(const T *str, const T *xp, PRBool case_insensitive)
{ {
T *expr = NS_strdup(xp); T *expr = NULL;
int x, ret = MATCH;
if (!nsCharTraits<T>::find(xp, nsCharTraits<T>::length(xp), T('~')))
return _shexp_match(str, xp, case_insensitive, 0);
expr = (T *) NS_Alloc(nsCharTraits<T>::length(xp) + 1);
if(!expr) if(!expr)
return 1; return NOMATCH;
for(int x=nsCharTraits<T>::length(expr)-1;x;--x) { x = ::_scan_and_copy(expr, T('~'), T('\0'), static_cast<T*>(NULL));
if((expr[x] == '~') && (expr[x-1] != '\\')) { if (x != ABORTED && expr[x] == '~') {
expr[x] = '\0'; expr[x++] = '\0';
if(::_shexp_match(str,&expr[++x], case_insensitive) == MATCH) ret = ::_shexp_match(str, &expr[x], case_insensitive, 0);
goto punt; switch (ret) {
break; case NOMATCH: ret = MATCH; break;
case MATCH: ret = NOMATCH; break;
default: break;
} }
} }
if(::_shexp_match(str,expr, case_insensitive) == MATCH) { if (ret == MATCH)
NS_Free(expr); ret = ::_shexp_match(str, expr, case_insensitive, 0);
return 0;
}
punt:
NS_Free(expr); NS_Free(expr);
return 1; return ret;
}
template<class T>
NS_COM int
NS_WildCardMatch_(const T *str, const T *expr, PRBool case_insensitive)
{
int is_valid = NS_WildCardValid(expr);
switch(is_valid) {
case INVALID_SXP:
return -1;
default:
return ns_WildCardMatch(str, expr, case_insensitive);
}
} }
NS_COM int NS_COM int