re PR libgcj/20504 (java.util.regex implementation doesn't support quoting constructs)

2005-05-16  Ziga Mahkovec  <ziga.mahkovec@klika.si>

	PR libgcj/20504
	gnu/regexp/RE.java: Add support for quoting constructs.

From-SVN: r99789
This commit is contained in:
Ziga Mahkovec 2005-05-16 21:01:52 +02:00 committed by Tom Tromey
parent bbf6f1cf02
commit 4f751533e4
2 changed files with 38 additions and 21 deletions

View File

@ -1,3 +1,8 @@
2005-05-16 Ziga Mahkovec <ziga.mahkovec@klika.si>
PR libgcj/20504
gnu/regexp/RE.java: Add support for quoting constructs.
2005-05-15 Tom Tromey <tromey@redhat.com>
* java/lang/String.java (startsWith): Fixed javadoc.

View File

@ -331,10 +331,22 @@ public class RE extends REToken {
// Buffer a token so we can create a TokenRepeated, etc.
REToken currentToken = null;
char ch;
boolean quot = false;
while (index < pLength) {
// read the next character unit (including backslash escapes)
index = getCharUnit(pattern,index,unit);
index = getCharUnit(pattern,index,unit,quot);
if (unit.bk)
if (unit.ch == 'Q') {
quot = true;
continue;
} else if (unit.ch == 'E') {
quot = false;
continue;
}
if (quot)
unit.bk = false;
// ALTERNATION OPERATOR
// \| or | (if RE_NO_BK_VBAR) or newline (if RE_NEWLINE_ALT)
@ -342,8 +354,8 @@ public class RE extends REToken {
// TODO: the '\n' literal here should be a test against REToken.newline,
// which unfortunately may be more than a single character.
if ( ( (unit.ch == '|' && (syntax.get(RESyntax.RE_NO_BK_VBAR) ^ unit.bk))
|| (syntax.get(RESyntax.RE_NEWLINE_ALT) && (unit.ch == '\n') && !unit.bk) )
if ( ( (unit.ch == '|' && (syntax.get(RESyntax.RE_NO_BK_VBAR) ^ (unit.bk || quot)))
|| (syntax.get(RESyntax.RE_NEWLINE_ALT) && (unit.ch == '\n') && !(unit.bk || quot)) )
&& !syntax.get(RESyntax.RE_LIMITED_OPS)) {
// make everything up to here be a branch. create vector if nec.
addToken(currentToken);
@ -363,7 +375,7 @@ public class RE extends REToken {
// OPEN QUESTION:
// what is proper interpretation of '{' at start of string?
else if ((unit.ch == '{') && syntax.get(RESyntax.RE_INTERVALS) && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk)) {
else if ((unit.ch == '{') && syntax.get(RESyntax.RE_INTERVALS) && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ (unit.bk || quot))) {
int newIndex = getMinMax(pattern,index,minMax,syntax);
if (newIndex > index) {
if (minMax.first > minMax.second)
@ -388,7 +400,7 @@ public class RE extends REToken {
// LIST OPERATOR:
// [...] | [^...]
else if ((unit.ch == '[') && !unit.bk) {
else if ((unit.ch == '[') && !(unit.bk || quot)) {
Vector options = new Vector();
boolean negative = false;
char lastChar = 0;
@ -490,7 +502,7 @@ public class RE extends REToken {
// SUBEXPRESSIONS
// (...) | \(...\) depending on RE_NO_BK_PARENS
else if ((unit.ch == '(') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk)) {
else if ((unit.ch == '(') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) {
boolean pure = false;
boolean comment = false;
boolean lookAhead = false;
@ -537,13 +549,13 @@ public class RE extends REToken {
int nextIndex = index;
int nested = 0;
while ( ((nextIndex = getCharUnit(pattern,endIndex,unit)) > 0)
&& !(nested == 0 && (unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk)) )
while ( ((nextIndex = getCharUnit(pattern,endIndex,unit,false)) > 0)
&& !(nested == 0 && (unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) )
if ((endIndex = nextIndex) >= pLength)
throw new REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex);
else if (unit.ch == '(' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk))
else if (unit.ch == '(' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))
nested++;
else if (unit.ch == ')' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk))
else if (unit.ch == ')' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))
nested--;
// endIndex is now position at a ')','\)'
@ -572,14 +584,14 @@ public class RE extends REToken {
// UNMATCHED RIGHT PAREN
// ) or \) throw exception if
// !syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD)
else if (!syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) && ((unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk))) {
else if (!syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) && ((unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))) {
throw new REException(getLocalizedMessage("unmatched.paren"),REException.REG_EPAREN,index);
}
// START OF LINE OPERATOR
// ^
else if ((unit.ch == '^') && !unit.bk) {
else if ((unit.ch == '^') && !(unit.bk || quot)) {
addToken(currentToken);
currentToken = null;
addToken(new RETokenStart(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null));
@ -588,7 +600,7 @@ public class RE extends REToken {
// END OF LINE OPERATOR
// $
else if ((unit.ch == '$') && !unit.bk) {
else if ((unit.ch == '$') && !(unit.bk || quot)) {
addToken(currentToken);
currentToken = null;
addToken(new RETokenEnd(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null));
@ -597,7 +609,7 @@ public class RE extends REToken {
// MATCH-ANY-CHARACTER OPERATOR (except possibly newline and null)
// .
else if ((unit.ch == '.') && !unit.bk) {
else if ((unit.ch == '.') && !(unit.bk || quot)) {
addToken(currentToken);
currentToken = new RETokenAny(subIndex,syntax.get(RESyntax.RE_DOT_NEWLINE) || ((cflags & REG_DOT_NEWLINE) > 0),syntax.get(RESyntax.RE_DOT_NOT_NULL));
}
@ -605,7 +617,7 @@ public class RE extends REToken {
// ZERO-OR-MORE REPEAT OPERATOR
// *
else if ((unit.ch == '*') && !unit.bk) {
else if ((unit.ch == '*') && !(unit.bk || quot)) {
if (currentToken == null)
throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
if (currentToken instanceof RETokenRepeated)
@ -621,7 +633,7 @@ public class RE extends REToken {
// + | \+ depending on RE_BK_PLUS_QM
// not available if RE_LIMITED_OPS is set
else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ unit.bk)) {
else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) {
if (currentToken == null)
throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
if (currentToken instanceof RETokenRepeated)
@ -638,7 +650,7 @@ public class RE extends REToken {
// not available if RE_LIMITED_OPS is set
// stingy matching if RE_STINGY_OPS is set and it follows a quantifier
else if ((unit.ch == '?') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ unit.bk)) {
else if ((unit.ch == '?') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) {
if (currentToken == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
// Check for stingy matching on RETokenRepeated
@ -805,9 +817,9 @@ public class RE extends REToken {
}
private static int getCharUnit(char[] input, int index, CharUnit unit) throws REException {
private static int getCharUnit(char[] input, int index, CharUnit unit, boolean quot) throws REException {
unit.ch = input[index++];
if (unit.bk = (unit.ch == '\\'))
if (unit.bk = (unit.ch == '\\' && (!quot || index >= input.length || input[index] == 'E')))
if (index < input.length)
unit.ch = input[index++];
else throw new REException(getLocalizedMessage("ends.with.backslash"),REException.REG_ESCAPE,index);
@ -1281,7 +1293,7 @@ public class RE extends REToken {
// Read string of digits
do {
index = getCharUnit(input,index,unit);
index = getCharUnit(input,index,unit,false);
if (Character.isDigit(unit.ch))
buf.append(unit.ch);
} while ((index != input.length) && Character.isDigit(unit.ch));
@ -1306,7 +1318,7 @@ public class RE extends REToken {
else if ((unit.ch == ',') && !unit.bk) {
buf = new StringBuffer();
// Read string of digits
while (((index = getCharUnit(input,index,unit)) != input.length) && Character.isDigit(unit.ch))
while (((index = getCharUnit(input,index,unit,false)) != input.length) && Character.isDigit(unit.ch))
buf.append(unit.ch);
if (!((unit.ch == '}') && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk)))