rollup merge of #20330: fhahn/issue-15877-model-lexer-range-2

This patch resolves the second problem mentioned in #15877: function calls to integers, e.g. `3.b()`.

 It does so, by checking whether the character following the first dot of a FLOAT_LIT is a character or an underscore (these should denote a valid identifier). This does not look like a particularly, but it seems like a lookahead of 1 is needed for this distinction.

Another interesting aspect are ranges that start with a integer constant, but end with a function call, e.g. `1..b()`. Rust treats this as a range from 1 to `b()`, but given that `1.` is a valid FLOAT_LIT, `1..b()` could be a function call to a float as well.

cc @cmr
This commit is contained in:
Alex Crichton 2014-12-30 16:26:18 -08:00
commit fa130ae6ff
3 changed files with 77 additions and 4 deletions

View File

@ -1,7 +1,7 @@
Reference grammar.
Uses [antlr4](http://www.antlr.org/) and a custom Rust tool to compare
ASTs/token streams generated. You can use the `check-syntax` make target to
ASTs/token streams generated. You can use the `check-lexer` make target to
run all of the available tests.
To use manually:
@ -12,7 +12,7 @@ javac *.java
rustc -O verify.rs
for file in ../*/**.rs; do
echo $file;
grun RustLexer tokens -tokens < $file | ./verify $file || break
grun RustLexer tokens -tokens < $file | ./verify $file RustLexer.tokens || break
done
```

View File

@ -112,8 +112,64 @@ LIT_INTEGER
;
LIT_FLOAT
: [0-9][0-9_]* ( '.' {_input.LA(1) != '.'}?
| ('.' [0-9][0-9_]*)? ([eE] [-+]? [0-9][0-9_]*)? SUFFIX?)
: [0-9][0-9_]* ('.' {
/* dot followed by another dot is a range, no float */
_input.LA(1) != '.' &&
/* dot followed by an identifier is an integer with a function call, no float */
_input.LA(1) != '_' &&
_input.LA(1) != 'a' &&
_input.LA(1) != 'b' &&
_input.LA(1) != 'c' &&
_input.LA(1) != 'd' &&
_input.LA(1) != 'e' &&
_input.LA(1) != 'f' &&
_input.LA(1) != 'g' &&
_input.LA(1) != 'h' &&
_input.LA(1) != 'i' &&
_input.LA(1) != 'j' &&
_input.LA(1) != 'k' &&
_input.LA(1) != 'l' &&
_input.LA(1) != 'm' &&
_input.LA(1) != 'n' &&
_input.LA(1) != 'o' &&
_input.LA(1) != 'p' &&
_input.LA(1) != 'q' &&
_input.LA(1) != 'r' &&
_input.LA(1) != 's' &&
_input.LA(1) != 't' &&
_input.LA(1) != 'u' &&
_input.LA(1) != 'v' &&
_input.LA(1) != 'w' &&
_input.LA(1) != 'x' &&
_input.LA(1) != 'y' &&
_input.LA(1) != 'z' &&
_input.LA(1) != 'A' &&
_input.LA(1) != 'B' &&
_input.LA(1) != 'C' &&
_input.LA(1) != 'D' &&
_input.LA(1) != 'E' &&
_input.LA(1) != 'F' &&
_input.LA(1) != 'G' &&
_input.LA(1) != 'H' &&
_input.LA(1) != 'I' &&
_input.LA(1) != 'J' &&
_input.LA(1) != 'K' &&
_input.LA(1) != 'L' &&
_input.LA(1) != 'M' &&
_input.LA(1) != 'N' &&
_input.LA(1) != 'O' &&
_input.LA(1) != 'P' &&
_input.LA(1) != 'Q' &&
_input.LA(1) != 'R' &&
_input.LA(1) != 'S' &&
_input.LA(1) != 'T' &&
_input.LA(1) != 'U' &&
_input.LA(1) != 'V' &&
_input.LA(1) != 'W' &&
_input.LA(1) != 'X' &&
_input.LA(1) != 'Y' &&
_input.LA(1) != 'Z'
}? | ('.' [0-9][0-9_]*)? ([eE] [-+]? [0-9][0-9_]*)? SUFFIX?)
;
LIT_STR

View File

@ -11,6 +11,10 @@ if [ "${VERBOSE}" == "1" ]; then
set -x
fi
passed=0
failed=0
skipped=0
check() {
grep --silent "// ignore-lexer-test" $1;
@ -21,14 +25,27 @@ check() {
# seem to have anny effect.
if $3 RustLexer tokens -tokens < $1 | $4 $1 $5; then
echo "pass: $1"
passed=`expr $passed + 1`
else
echo "fail: $1"
failed=`expr $failed + 1`
fi
else
echo "skip: $1"
skipped=`expr $skipped + 1`
fi
}
for file in $(find $1 -iname '*.rs' ! -path '*/test/compile-fail*'); do
check $file $2 $3 $4 $5
done
printf "\ntest result: "
if [ $failed -eq 0 ]; then
printf "ok. $passed passed; $failed failed; $skipped skipped\n\n"
else
printf "failed. $passed passed; $failed failed; $skipped skipped\n\n"
exit 1
fi