From 4f751533e48a432d340bc782d4ac2c1480b0f32f Mon Sep 17 00:00:00 2001 From: Ziga Mahkovec Date: Mon, 16 May 2005 21:01:52 +0200 Subject: [PATCH] re PR libgcj/20504 (java.util.regex implementation doesn't support quoting constructs) 2005-05-16 Ziga Mahkovec PR libgcj/20504 gnu/regexp/RE.java: Add support for quoting constructs. From-SVN: r99789 --- libjava/ChangeLog | 5 ++++ libjava/gnu/regexp/RE.java | 54 +++++++++++++++++++++++--------------- 2 files changed, 38 insertions(+), 21 deletions(-) diff --git a/libjava/ChangeLog b/libjava/ChangeLog index 94ab18d8c3a..1b35743c61e 100644 --- a/libjava/ChangeLog +++ b/libjava/ChangeLog @@ -1,3 +1,8 @@ +2005-05-16 Ziga Mahkovec + + PR libgcj/20504 + gnu/regexp/RE.java: Add support for quoting constructs. + 2005-05-15 Tom Tromey * java/lang/String.java (startsWith): Fixed javadoc. diff --git a/libjava/gnu/regexp/RE.java b/libjava/gnu/regexp/RE.java index a01de58f1bb..c8c8a3eb9ff 100644 --- a/libjava/gnu/regexp/RE.java +++ b/libjava/gnu/regexp/RE.java @@ -331,10 +331,22 @@ public class RE extends REToken { // Buffer a token so we can create a TokenRepeated, etc. REToken currentToken = null; char ch; + boolean quot = false; while (index < pLength) { // read the next character unit (including backslash escapes) - index = getCharUnit(pattern,index,unit); + index = getCharUnit(pattern,index,unit,quot); + + if (unit.bk) + if (unit.ch == 'Q') { + quot = true; + continue; + } else if (unit.ch == 'E') { + quot = false; + continue; + } + if (quot) + unit.bk = false; // ALTERNATION OPERATOR // \| or | (if RE_NO_BK_VBAR) or newline (if RE_NEWLINE_ALT) @@ -342,8 +354,8 @@ public class RE extends REToken { // TODO: the '\n' literal here should be a test against REToken.newline, // which unfortunately may be more than a single character. - if ( ( (unit.ch == '|' && (syntax.get(RESyntax.RE_NO_BK_VBAR) ^ unit.bk)) - || (syntax.get(RESyntax.RE_NEWLINE_ALT) && (unit.ch == '\n') && !unit.bk) ) + if ( ( (unit.ch == '|' && (syntax.get(RESyntax.RE_NO_BK_VBAR) ^ (unit.bk || quot))) + || (syntax.get(RESyntax.RE_NEWLINE_ALT) && (unit.ch == '\n') && !(unit.bk || quot)) ) && !syntax.get(RESyntax.RE_LIMITED_OPS)) { // make everything up to here be a branch. create vector if nec. addToken(currentToken); @@ -363,7 +375,7 @@ public class RE extends REToken { // OPEN QUESTION: // what is proper interpretation of '{' at start of string? - else if ((unit.ch == '{') && syntax.get(RESyntax.RE_INTERVALS) && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk)) { + else if ((unit.ch == '{') && syntax.get(RESyntax.RE_INTERVALS) && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ (unit.bk || quot))) { int newIndex = getMinMax(pattern,index,minMax,syntax); if (newIndex > index) { if (minMax.first > minMax.second) @@ -388,7 +400,7 @@ public class RE extends REToken { // LIST OPERATOR: // [...] | [^...] - else if ((unit.ch == '[') && !unit.bk) { + else if ((unit.ch == '[') && !(unit.bk || quot)) { Vector options = new Vector(); boolean negative = false; char lastChar = 0; @@ -490,7 +502,7 @@ public class RE extends REToken { // SUBEXPRESSIONS // (...) | \(...\) depending on RE_NO_BK_PARENS - else if ((unit.ch == '(') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk)) { + else if ((unit.ch == '(') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) { boolean pure = false; boolean comment = false; boolean lookAhead = false; @@ -537,13 +549,13 @@ public class RE extends REToken { int nextIndex = index; int nested = 0; - while ( ((nextIndex = getCharUnit(pattern,endIndex,unit)) > 0) - && !(nested == 0 && (unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk)) ) + while ( ((nextIndex = getCharUnit(pattern,endIndex,unit,false)) > 0) + && !(nested == 0 && (unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) ) if ((endIndex = nextIndex) >= pLength) throw new REException(getLocalizedMessage("subexpr.no.end"),REException.REG_ESUBREG,nextIndex); - else if (unit.ch == '(' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk)) + else if (unit.ch == '(' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) nested++; - else if (unit.ch == ')' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk)) + else if (unit.ch == ')' && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot))) nested--; // endIndex is now position at a ')','\)' @@ -572,14 +584,14 @@ public class RE extends REToken { // UNMATCHED RIGHT PAREN // ) or \) throw exception if // !syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) - else if (!syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) && ((unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ unit.bk))) { + else if (!syntax.get(RESyntax.RE_UNMATCHED_RIGHT_PAREN_ORD) && ((unit.ch == ')') && (syntax.get(RESyntax.RE_NO_BK_PARENS) ^ (unit.bk || quot)))) { throw new REException(getLocalizedMessage("unmatched.paren"),REException.REG_EPAREN,index); } // START OF LINE OPERATOR // ^ - else if ((unit.ch == '^') && !unit.bk) { + else if ((unit.ch == '^') && !(unit.bk || quot)) { addToken(currentToken); currentToken = null; addToken(new RETokenStart(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null)); @@ -588,7 +600,7 @@ public class RE extends REToken { // END OF LINE OPERATOR // $ - else if ((unit.ch == '$') && !unit.bk) { + else if ((unit.ch == '$') && !(unit.bk || quot)) { addToken(currentToken); currentToken = null; addToken(new RETokenEnd(subIndex,((cflags & REG_MULTILINE) > 0) ? syntax.getLineSeparator() : null)); @@ -597,7 +609,7 @@ public class RE extends REToken { // MATCH-ANY-CHARACTER OPERATOR (except possibly newline and null) // . - else if ((unit.ch == '.') && !unit.bk) { + else if ((unit.ch == '.') && !(unit.bk || quot)) { addToken(currentToken); currentToken = new RETokenAny(subIndex,syntax.get(RESyntax.RE_DOT_NEWLINE) || ((cflags & REG_DOT_NEWLINE) > 0),syntax.get(RESyntax.RE_DOT_NOT_NULL)); } @@ -605,7 +617,7 @@ public class RE extends REToken { // ZERO-OR-MORE REPEAT OPERATOR // * - else if ((unit.ch == '*') && !unit.bk) { + else if ((unit.ch == '*') && !(unit.bk || quot)) { if (currentToken == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index); if (currentToken instanceof RETokenRepeated) @@ -621,7 +633,7 @@ public class RE extends REToken { // + | \+ depending on RE_BK_PLUS_QM // not available if RE_LIMITED_OPS is set - else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ unit.bk)) { + else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) { if (currentToken == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index); if (currentToken instanceof RETokenRepeated) @@ -638,7 +650,7 @@ public class RE extends REToken { // not available if RE_LIMITED_OPS is set // stingy matching if RE_STINGY_OPS is set and it follows a quantifier - else if ((unit.ch == '?') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ unit.bk)) { + else if ((unit.ch == '?') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) { if (currentToken == null) throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index); // Check for stingy matching on RETokenRepeated @@ -805,9 +817,9 @@ public class RE extends REToken { } - private static int getCharUnit(char[] input, int index, CharUnit unit) throws REException { + private static int getCharUnit(char[] input, int index, CharUnit unit, boolean quot) throws REException { unit.ch = input[index++]; - if (unit.bk = (unit.ch == '\\')) + if (unit.bk = (unit.ch == '\\' && (!quot || index >= input.length || input[index] == 'E'))) if (index < input.length) unit.ch = input[index++]; else throw new REException(getLocalizedMessage("ends.with.backslash"),REException.REG_ESCAPE,index); @@ -1281,7 +1293,7 @@ public class RE extends REToken { // Read string of digits do { - index = getCharUnit(input,index,unit); + index = getCharUnit(input,index,unit,false); if (Character.isDigit(unit.ch)) buf.append(unit.ch); } while ((index != input.length) && Character.isDigit(unit.ch)); @@ -1306,7 +1318,7 @@ public class RE extends REToken { else if ((unit.ch == ',') && !unit.bk) { buf = new StringBuffer(); // Read string of digits - while (((index = getCharUnit(input,index,unit)) != input.length) && Character.isDigit(unit.ch)) + while (((index = getCharUnit(input,index,unit,false)) != input.length) && Character.isDigit(unit.ch)) buf.append(unit.ch); if (!((unit.ch == '}') && (syntax.get(RESyntax.RE_NO_BK_BRACES) ^ unit.bk)))