f8d9fa9e80
This upgrades all of libgo other than the runtime package to the Go 1.4 release. In Go 1.4 much of the runtime was rewritten into Go. Merging that code will take more time and will not change the API, so I'm putting it off for now. There are a few runtime changes anyhow, to accomodate other packages that rely on minor modifications to the runtime support. The compiler changes slightly to add a one-bit flag to each type descriptor kind that is stored directly in an interface, which for gccgo is currently only pointer types. Another one-bit flag (gcprog) is reserved because it is used by the gc compiler, but gccgo does not currently use it. There is another error check in the compiler since I ran across it during testing. gotools/: * Makefile.am (go_cmd_go_files): Sort entries. Add generate.go. * Makefile.in: Rebuild. From-SVN: r219627
573 lines
14 KiB
Go
573 lines
14 KiB
Go
// Copyright 2011 The Go Authors. All rights reserved.
|
||
// Use of this source code is governed by a BSD-style
|
||
// license that can be found in the LICENSE file.
|
||
|
||
package syntax
|
||
|
||
import (
|
||
"bytes"
|
||
"fmt"
|
||
"testing"
|
||
"unicode"
|
||
)
|
||
|
||
type parseTest struct {
|
||
Regexp string
|
||
Dump string
|
||
}
|
||
|
||
var parseTests = []parseTest{
|
||
// Base cases
|
||
{`a`, `lit{a}`},
|
||
{`a.`, `cat{lit{a}dot{}}`},
|
||
{`a.b`, `cat{lit{a}dot{}lit{b}}`},
|
||
{`ab`, `str{ab}`},
|
||
{`a.b.c`, `cat{lit{a}dot{}lit{b}dot{}lit{c}}`},
|
||
{`abc`, `str{abc}`},
|
||
{`a|^`, `alt{lit{a}bol{}}`},
|
||
{`a|b`, `cc{0x61-0x62}`},
|
||
{`(a)`, `cap{lit{a}}`},
|
||
{`(a)|b`, `alt{cap{lit{a}}lit{b}}`},
|
||
{`a*`, `star{lit{a}}`},
|
||
{`a+`, `plus{lit{a}}`},
|
||
{`a?`, `que{lit{a}}`},
|
||
{`a{2}`, `rep{2,2 lit{a}}`},
|
||
{`a{2,3}`, `rep{2,3 lit{a}}`},
|
||
{`a{2,}`, `rep{2,-1 lit{a}}`},
|
||
{`a*?`, `nstar{lit{a}}`},
|
||
{`a+?`, `nplus{lit{a}}`},
|
||
{`a??`, `nque{lit{a}}`},
|
||
{`a{2}?`, `nrep{2,2 lit{a}}`},
|
||
{`a{2,3}?`, `nrep{2,3 lit{a}}`},
|
||
{`a{2,}?`, `nrep{2,-1 lit{a}}`},
|
||
// Malformed { } are treated as literals.
|
||
{`x{1001`, `str{x{1001}`},
|
||
{`x{9876543210`, `str{x{9876543210}`},
|
||
{`x{9876543210,`, `str{x{9876543210,}`},
|
||
{`x{2,1`, `str{x{2,1}`},
|
||
{`x{1,9876543210`, `str{x{1,9876543210}`},
|
||
{``, `emp{}`},
|
||
{`|`, `emp{}`}, // alt{emp{}emp{}} but got factored
|
||
{`|x|`, `alt{emp{}lit{x}emp{}}`},
|
||
{`.`, `dot{}`},
|
||
{`^`, `bol{}`},
|
||
{`$`, `eol{}`},
|
||
{`\|`, `lit{|}`},
|
||
{`\(`, `lit{(}`},
|
||
{`\)`, `lit{)}`},
|
||
{`\*`, `lit{*}`},
|
||
{`\+`, `lit{+}`},
|
||
{`\?`, `lit{?}`},
|
||
{`{`, `lit{{}`},
|
||
{`}`, `lit{}}`},
|
||
{`\.`, `lit{.}`},
|
||
{`\^`, `lit{^}`},
|
||
{`\$`, `lit{$}`},
|
||
{`\\`, `lit{\}`},
|
||
{`[ace]`, `cc{0x61 0x63 0x65}`},
|
||
{`[abc]`, `cc{0x61-0x63}`},
|
||
{`[a-z]`, `cc{0x61-0x7a}`},
|
||
{`[a]`, `lit{a}`},
|
||
{`\-`, `lit{-}`},
|
||
{`-`, `lit{-}`},
|
||
{`\_`, `lit{_}`},
|
||
{`abc`, `str{abc}`},
|
||
{`abc|def`, `alt{str{abc}str{def}}`},
|
||
{`abc|def|ghi`, `alt{str{abc}str{def}str{ghi}}`},
|
||
|
||
// Posix and Perl extensions
|
||
{`[[:lower:]]`, `cc{0x61-0x7a}`},
|
||
{`[a-z]`, `cc{0x61-0x7a}`},
|
||
{`[^[:lower:]]`, `cc{0x0-0x60 0x7b-0x10ffff}`},
|
||
{`[[:^lower:]]`, `cc{0x0-0x60 0x7b-0x10ffff}`},
|
||
{`(?i)[[:lower:]]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`},
|
||
{`(?i)[a-z]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`},
|
||
{`(?i)[^[:lower:]]`, `cc{0x0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}`},
|
||
{`(?i)[[:^lower:]]`, `cc{0x0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}`},
|
||
{`\d`, `cc{0x30-0x39}`},
|
||
{`\D`, `cc{0x0-0x2f 0x3a-0x10ffff}`},
|
||
{`\s`, `cc{0x9-0xa 0xc-0xd 0x20}`},
|
||
{`\S`, `cc{0x0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}`},
|
||
{`\w`, `cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a}`},
|
||
{`\W`, `cc{0x0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x10ffff}`},
|
||
{`(?i)\w`, `cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a 0x17f 0x212a}`},
|
||
{`(?i)\W`, `cc{0x0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}`},
|
||
{`[^\\]`, `cc{0x0-0x5b 0x5d-0x10ffff}`},
|
||
// { `\C`, `byte{}` }, // probably never
|
||
|
||
// Unicode, negatives, and a double negative.
|
||
{`\p{Braille}`, `cc{0x2800-0x28ff}`},
|
||
{`\P{Braille}`, `cc{0x0-0x27ff 0x2900-0x10ffff}`},
|
||
{`\p{^Braille}`, `cc{0x0-0x27ff 0x2900-0x10ffff}`},
|
||
{`\P{^Braille}`, `cc{0x2800-0x28ff}`},
|
||
{`\pZ`, `cc{0x20 0xa0 0x1680 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}`},
|
||
{`[\p{Braille}]`, `cc{0x2800-0x28ff}`},
|
||
{`[\P{Braille}]`, `cc{0x0-0x27ff 0x2900-0x10ffff}`},
|
||
{`[\p{^Braille}]`, `cc{0x0-0x27ff 0x2900-0x10ffff}`},
|
||
{`[\P{^Braille}]`, `cc{0x2800-0x28ff}`},
|
||
{`[\pZ]`, `cc{0x20 0xa0 0x1680 0x2000-0x200a 0x2028-0x2029 0x202f 0x205f 0x3000}`},
|
||
{`\p{Lu}`, mkCharClass(unicode.IsUpper)},
|
||
{`[\p{Lu}]`, mkCharClass(unicode.IsUpper)},
|
||
{`(?i)[\p{Lu}]`, mkCharClass(isUpperFold)},
|
||
{`\p{Any}`, `dot{}`},
|
||
{`\p{^Any}`, `cc{}`},
|
||
|
||
// Hex, octal.
|
||
{`[\012-\234]\141`, `cat{cc{0xa-0x9c}lit{a}}`},
|
||
{`[\x{41}-\x7a]\x61`, `cat{cc{0x41-0x7a}lit{a}}`},
|
||
|
||
// More interesting regular expressions.
|
||
{`a{,2}`, `str{a{,2}}`},
|
||
{`\.\^\$\\`, `str{.^$\}`},
|
||
{`[a-zABC]`, `cc{0x41-0x43 0x61-0x7a}`},
|
||
{`[^a]`, `cc{0x0-0x60 0x62-0x10ffff}`},
|
||
{`[α-ε☺]`, `cc{0x3b1-0x3b5 0x263a}`}, // utf-8
|
||
{`a*{`, `cat{star{lit{a}}lit{{}}`},
|
||
|
||
// Test precedences
|
||
{`(?:ab)*`, `star{str{ab}}`},
|
||
{`(ab)*`, `star{cap{str{ab}}}`},
|
||
{`ab|cd`, `alt{str{ab}str{cd}}`},
|
||
{`a(b|c)d`, `cat{lit{a}cap{cc{0x62-0x63}}lit{d}}`},
|
||
|
||
// Test flattening.
|
||
{`(?:a)`, `lit{a}`},
|
||
{`(?:ab)(?:cd)`, `str{abcd}`},
|
||
{`(?:a+b+)(?:c+d+)`, `cat{plus{lit{a}}plus{lit{b}}plus{lit{c}}plus{lit{d}}}`},
|
||
{`(?:a+|b+)|(?:c+|d+)`, `alt{plus{lit{a}}plus{lit{b}}plus{lit{c}}plus{lit{d}}}`},
|
||
{`(?:a|b)|(?:c|d)`, `cc{0x61-0x64}`},
|
||
{`a|.`, `dot{}`},
|
||
{`.|a`, `dot{}`},
|
||
{`(?:[abc]|A|Z|hello|world)`, `alt{cc{0x41 0x5a 0x61-0x63}str{hello}str{world}}`},
|
||
{`(?:[abc]|A|Z)`, `cc{0x41 0x5a 0x61-0x63}`},
|
||
|
||
// Test Perl quoted literals
|
||
{`\Q+|*?{[\E`, `str{+|*?{[}`},
|
||
{`\Q+\E+`, `plus{lit{+}}`},
|
||
{`\Q\\E`, `lit{\}`},
|
||
{`\Q\\\E`, `str{\\}`},
|
||
|
||
// Test Perl \A and \z
|
||
{`(?m)^`, `bol{}`},
|
||
{`(?m)$`, `eol{}`},
|
||
{`(?-m)^`, `bot{}`},
|
||
{`(?-m)$`, `eot{}`},
|
||
{`(?m)\A`, `bot{}`},
|
||
{`(?m)\z`, `eot{\z}`},
|
||
{`(?-m)\A`, `bot{}`},
|
||
{`(?-m)\z`, `eot{\z}`},
|
||
|
||
// Test named captures
|
||
{`(?P<name>a)`, `cap{name:lit{a}}`},
|
||
|
||
// Case-folded literals
|
||
{`[Aa]`, `litfold{A}`},
|
||
{`[\x{100}\x{101}]`, `litfold{Ā}`},
|
||
{`[Δδ]`, `litfold{Δ}`},
|
||
|
||
// Strings
|
||
{`abcde`, `str{abcde}`},
|
||
{`[Aa][Bb]cd`, `cat{strfold{AB}str{cd}}`},
|
||
|
||
// Factoring.
|
||
{`abc|abd|aef|bcx|bcy`, `alt{cat{lit{a}alt{cat{lit{b}cc{0x63-0x64}}str{ef}}}cat{str{bc}cc{0x78-0x79}}}`},
|
||
{`ax+y|ax+z|ay+w`, `cat{lit{a}alt{cat{plus{lit{x}}cc{0x79-0x7a}}cat{plus{lit{y}}lit{w}}}}`},
|
||
|
||
// Bug fixes.
|
||
{`(?:.)`, `dot{}`},
|
||
{`(?:x|(?:xa))`, `cat{lit{x}alt{emp{}lit{a}}}`},
|
||
{`(?:.|(?:.a))`, `cat{dot{}alt{emp{}lit{a}}}`},
|
||
{`(?:A(?:A|a))`, `cat{lit{A}litfold{A}}`},
|
||
{`(?:A|a)`, `litfold{A}`},
|
||
{`A|(?:A|a)`, `litfold{A}`},
|
||
{`(?s).`, `dot{}`},
|
||
{`(?-s).`, `dnl{}`},
|
||
{`(?:(?:^).)`, `cat{bol{}dot{}}`},
|
||
{`(?-s)(?:(?:^).)`, `cat{bol{}dnl{}}`},
|
||
|
||
// RE2 prefix_tests
|
||
{`abc|abd`, `cat{str{ab}cc{0x63-0x64}}`},
|
||
{`a(?:b)c|abd`, `cat{str{ab}cc{0x63-0x64}}`},
|
||
{`abc|abd|aef|bcx|bcy`,
|
||
`alt{cat{lit{a}alt{cat{lit{b}cc{0x63-0x64}}str{ef}}}` +
|
||
`cat{str{bc}cc{0x78-0x79}}}`},
|
||
{`abc|x|abd`, `alt{str{abc}lit{x}str{abd}}`},
|
||
{`(?i)abc|ABD`, `cat{strfold{AB}cc{0x43-0x44 0x63-0x64}}`},
|
||
{`[ab]c|[ab]d`, `cat{cc{0x61-0x62}cc{0x63-0x64}}`},
|
||
{`(?:xx|yy)c|(?:xx|yy)d`,
|
||
`cat{alt{str{xx}str{yy}}cc{0x63-0x64}}`},
|
||
{`x{2}|x{2}[0-9]`,
|
||
`cat{rep{2,2 lit{x}}alt{emp{}cc{0x30-0x39}}}`},
|
||
{`x{2}y|x{2}[0-9]y`,
|
||
`cat{rep{2,2 lit{x}}alt{lit{y}cat{cc{0x30-0x39}lit{y}}}}`},
|
||
|
||
// Valid repetitions.
|
||
{`((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}))`, ``},
|
||
{`((((((((((x{1}){2}){2}){2}){2}){2}){2}){2}){2}){2})`, ``},
|
||
}
|
||
|
||
const testFlags = MatchNL | PerlX | UnicodeGroups
|
||
|
||
func TestParseSimple(t *testing.T) {
|
||
testParseDump(t, parseTests, testFlags)
|
||
}
|
||
|
||
var foldcaseTests = []parseTest{
|
||
{`AbCdE`, `strfold{ABCDE}`},
|
||
{`[Aa]`, `litfold{A}`},
|
||
{`a`, `litfold{A}`},
|
||
|
||
// 0x17F is an old English long s (looks like an f) and folds to s.
|
||
// 0x212A is the Kelvin symbol and folds to k.
|
||
{`A[F-g]`, `cat{litfold{A}cc{0x41-0x7a 0x17f 0x212a}}`}, // [Aa][A-z...]
|
||
{`[[:upper:]]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`},
|
||
{`[[:lower:]]`, `cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}`},
|
||
}
|
||
|
||
func TestParseFoldCase(t *testing.T) {
|
||
testParseDump(t, foldcaseTests, FoldCase)
|
||
}
|
||
|
||
var literalTests = []parseTest{
|
||
{"(|)^$.[*+?]{5,10},\\", "str{(|)^$.[*+?]{5,10},\\}"},
|
||
}
|
||
|
||
func TestParseLiteral(t *testing.T) {
|
||
testParseDump(t, literalTests, Literal)
|
||
}
|
||
|
||
var matchnlTests = []parseTest{
|
||
{`.`, `dot{}`},
|
||
{"\n", "lit{\n}"},
|
||
{`[^a]`, `cc{0x0-0x60 0x62-0x10ffff}`},
|
||
{`[a\n]`, `cc{0xa 0x61}`},
|
||
}
|
||
|
||
func TestParseMatchNL(t *testing.T) {
|
||
testParseDump(t, matchnlTests, MatchNL)
|
||
}
|
||
|
||
var nomatchnlTests = []parseTest{
|
||
{`.`, `dnl{}`},
|
||
{"\n", "lit{\n}"},
|
||
{`[^a]`, `cc{0x0-0x9 0xb-0x60 0x62-0x10ffff}`},
|
||
{`[a\n]`, `cc{0xa 0x61}`},
|
||
}
|
||
|
||
func TestParseNoMatchNL(t *testing.T) {
|
||
testParseDump(t, nomatchnlTests, 0)
|
||
}
|
||
|
||
// Test Parse -> Dump.
|
||
func testParseDump(t *testing.T, tests []parseTest, flags Flags) {
|
||
for _, tt := range tests {
|
||
re, err := Parse(tt.Regexp, flags)
|
||
if err != nil {
|
||
t.Errorf("Parse(%#q): %v", tt.Regexp, err)
|
||
continue
|
||
}
|
||
if tt.Dump == "" {
|
||
// It parsed. That's all we care about.
|
||
continue
|
||
}
|
||
d := dump(re)
|
||
if d != tt.Dump {
|
||
t.Errorf("Parse(%#q).Dump() = %#q want %#q", tt.Regexp, d, tt.Dump)
|
||
}
|
||
}
|
||
}
|
||
|
||
// dump prints a string representation of the regexp showing
|
||
// the structure explicitly.
|
||
func dump(re *Regexp) string {
|
||
var b bytes.Buffer
|
||
dumpRegexp(&b, re)
|
||
return b.String()
|
||
}
|
||
|
||
var opNames = []string{
|
||
OpNoMatch: "no",
|
||
OpEmptyMatch: "emp",
|
||
OpLiteral: "lit",
|
||
OpCharClass: "cc",
|
||
OpAnyCharNotNL: "dnl",
|
||
OpAnyChar: "dot",
|
||
OpBeginLine: "bol",
|
||
OpEndLine: "eol",
|
||
OpBeginText: "bot",
|
||
OpEndText: "eot",
|
||
OpWordBoundary: "wb",
|
||
OpNoWordBoundary: "nwb",
|
||
OpCapture: "cap",
|
||
OpStar: "star",
|
||
OpPlus: "plus",
|
||
OpQuest: "que",
|
||
OpRepeat: "rep",
|
||
OpConcat: "cat",
|
||
OpAlternate: "alt",
|
||
}
|
||
|
||
// dumpRegexp writes an encoding of the syntax tree for the regexp re to b.
|
||
// It is used during testing to distinguish between parses that might print
|
||
// the same using re's String method.
|
||
func dumpRegexp(b *bytes.Buffer, re *Regexp) {
|
||
if int(re.Op) >= len(opNames) || opNames[re.Op] == "" {
|
||
fmt.Fprintf(b, "op%d", re.Op)
|
||
} else {
|
||
switch re.Op {
|
||
default:
|
||
b.WriteString(opNames[re.Op])
|
||
case OpStar, OpPlus, OpQuest, OpRepeat:
|
||
if re.Flags&NonGreedy != 0 {
|
||
b.WriteByte('n')
|
||
}
|
||
b.WriteString(opNames[re.Op])
|
||
case OpLiteral:
|
||
if len(re.Rune) > 1 {
|
||
b.WriteString("str")
|
||
} else {
|
||
b.WriteString("lit")
|
||
}
|
||
if re.Flags&FoldCase != 0 {
|
||
for _, r := range re.Rune {
|
||
if unicode.SimpleFold(r) != r {
|
||
b.WriteString("fold")
|
||
break
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
b.WriteByte('{')
|
||
switch re.Op {
|
||
case OpEndText:
|
||
if re.Flags&WasDollar == 0 {
|
||
b.WriteString(`\z`)
|
||
}
|
||
case OpLiteral:
|
||
for _, r := range re.Rune {
|
||
b.WriteRune(r)
|
||
}
|
||
case OpConcat, OpAlternate:
|
||
for _, sub := range re.Sub {
|
||
dumpRegexp(b, sub)
|
||
}
|
||
case OpStar, OpPlus, OpQuest:
|
||
dumpRegexp(b, re.Sub[0])
|
||
case OpRepeat:
|
||
fmt.Fprintf(b, "%d,%d ", re.Min, re.Max)
|
||
dumpRegexp(b, re.Sub[0])
|
||
case OpCapture:
|
||
if re.Name != "" {
|
||
b.WriteString(re.Name)
|
||
b.WriteByte(':')
|
||
}
|
||
dumpRegexp(b, re.Sub[0])
|
||
case OpCharClass:
|
||
sep := ""
|
||
for i := 0; i < len(re.Rune); i += 2 {
|
||
b.WriteString(sep)
|
||
sep = " "
|
||
lo, hi := re.Rune[i], re.Rune[i+1]
|
||
if lo == hi {
|
||
fmt.Fprintf(b, "%#x", lo)
|
||
} else {
|
||
fmt.Fprintf(b, "%#x-%#x", lo, hi)
|
||
}
|
||
}
|
||
}
|
||
b.WriteByte('}')
|
||
}
|
||
|
||
func mkCharClass(f func(rune) bool) string {
|
||
re := &Regexp{Op: OpCharClass}
|
||
lo := rune(-1)
|
||
for i := rune(0); i <= unicode.MaxRune; i++ {
|
||
if f(i) {
|
||
if lo < 0 {
|
||
lo = i
|
||
}
|
||
} else {
|
||
if lo >= 0 {
|
||
re.Rune = append(re.Rune, lo, i-1)
|
||
lo = -1
|
||
}
|
||
}
|
||
}
|
||
if lo >= 0 {
|
||
re.Rune = append(re.Rune, lo, unicode.MaxRune)
|
||
}
|
||
return dump(re)
|
||
}
|
||
|
||
func isUpperFold(r rune) bool {
|
||
if unicode.IsUpper(r) {
|
||
return true
|
||
}
|
||
c := unicode.SimpleFold(r)
|
||
for c != r {
|
||
if unicode.IsUpper(c) {
|
||
return true
|
||
}
|
||
c = unicode.SimpleFold(c)
|
||
}
|
||
return false
|
||
}
|
||
|
||
func TestFoldConstants(t *testing.T) {
|
||
last := rune(-1)
|
||
for i := rune(0); i <= unicode.MaxRune; i++ {
|
||
if unicode.SimpleFold(i) == i {
|
||
continue
|
||
}
|
||
if last == -1 && minFold != i {
|
||
t.Errorf("minFold=%#U should be %#U", minFold, i)
|
||
}
|
||
last = i
|
||
}
|
||
if maxFold != last {
|
||
t.Errorf("maxFold=%#U should be %#U", maxFold, last)
|
||
}
|
||
}
|
||
|
||
func TestAppendRangeCollapse(t *testing.T) {
|
||
// AppendRange should collapse each of the new ranges
|
||
// into the earlier ones (it looks back two ranges), so that
|
||
// the slice never grows very large.
|
||
// Note that we are not calling cleanClass.
|
||
var r []rune
|
||
for i := rune('A'); i <= 'Z'; i++ {
|
||
r = appendRange(r, i, i)
|
||
r = appendRange(r, i+'a'-'A', i+'a'-'A')
|
||
}
|
||
if string(r) != "AZaz" {
|
||
t.Errorf("appendRange interlaced A-Z a-z = %s, want AZaz", string(r))
|
||
}
|
||
}
|
||
|
||
var invalidRegexps = []string{
|
||
`(`,
|
||
`)`,
|
||
`(a`,
|
||
`a)`,
|
||
`(a))`,
|
||
`(a|b|`,
|
||
`a|b|)`,
|
||
`(a|b|))`,
|
||
`(a|b`,
|
||
`a|b)`,
|
||
`(a|b))`,
|
||
`[a-z`,
|
||
`([a-z)`,
|
||
`[a-z)`,
|
||
`([a-z]))`,
|
||
`x{1001}`,
|
||
`x{9876543210}`,
|
||
`x{2,1}`,
|
||
`x{1,9876543210}`,
|
||
"\xff", // Invalid UTF-8
|
||
"[\xff]",
|
||
"[\\\xff]",
|
||
"\\\xff",
|
||
`(?P<name>a`,
|
||
`(?P<name>`,
|
||
`(?P<name`,
|
||
`(?P<x y>a)`,
|
||
`(?P<>a)`,
|
||
`[a-Z]`,
|
||
`(?i)[a-Z]`,
|
||
`a{100000}`,
|
||
`a{100000,}`,
|
||
"((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})",
|
||
}
|
||
|
||
var onlyPerl = []string{
|
||
`[a-b-c]`,
|
||
`\Qabc\E`,
|
||
`\Q*+?{[\E`,
|
||
`\Q\\E`,
|
||
`\Q\\\E`,
|
||
`\Q\\\\E`,
|
||
`\Q\\\\\E`,
|
||
`(?:a)`,
|
||
`(?P<name>a)`,
|
||
}
|
||
|
||
var onlyPOSIX = []string{
|
||
"a++",
|
||
"a**",
|
||
"a?*",
|
||
"a+*",
|
||
"a{1}*",
|
||
".{1}{2}.{3}",
|
||
}
|
||
|
||
func TestParseInvalidRegexps(t *testing.T) {
|
||
for _, regexp := range invalidRegexps {
|
||
if re, err := Parse(regexp, Perl); err == nil {
|
||
t.Errorf("Parse(%#q, Perl) = %s, should have failed", regexp, dump(re))
|
||
}
|
||
if re, err := Parse(regexp, POSIX); err == nil {
|
||
t.Errorf("Parse(%#q, POSIX) = %s, should have failed", regexp, dump(re))
|
||
}
|
||
}
|
||
for _, regexp := range onlyPerl {
|
||
if _, err := Parse(regexp, Perl); err != nil {
|
||
t.Errorf("Parse(%#q, Perl): %v", regexp, err)
|
||
}
|
||
if re, err := Parse(regexp, POSIX); err == nil {
|
||
t.Errorf("Parse(%#q, POSIX) = %s, should have failed", regexp, dump(re))
|
||
}
|
||
}
|
||
for _, regexp := range onlyPOSIX {
|
||
if re, err := Parse(regexp, Perl); err == nil {
|
||
t.Errorf("Parse(%#q, Perl) = %s, should have failed", regexp, dump(re))
|
||
}
|
||
if _, err := Parse(regexp, POSIX); err != nil {
|
||
t.Errorf("Parse(%#q, POSIX): %v", regexp, err)
|
||
}
|
||
}
|
||
}
|
||
|
||
func TestToStringEquivalentParse(t *testing.T) {
|
||
for _, tt := range parseTests {
|
||
re, err := Parse(tt.Regexp, testFlags)
|
||
if err != nil {
|
||
t.Errorf("Parse(%#q): %v", tt.Regexp, err)
|
||
continue
|
||
}
|
||
if tt.Dump == "" {
|
||
// It parsed. That's all we care about.
|
||
continue
|
||
}
|
||
d := dump(re)
|
||
if d != tt.Dump {
|
||
t.Errorf("Parse(%#q).Dump() = %#q want %#q", tt.Regexp, d, tt.Dump)
|
||
continue
|
||
}
|
||
|
||
s := re.String()
|
||
if s != tt.Regexp {
|
||
// If ToString didn't return the original regexp,
|
||
// it must have found one with fewer parens.
|
||
// Unfortunately we can't check the length here, because
|
||
// ToString produces "\\{" for a literal brace,
|
||
// but "{" is a shorter equivalent in some contexts.
|
||
nre, err := Parse(s, testFlags)
|
||
if err != nil {
|
||
t.Errorf("Parse(%#q.String() = %#q): %v", tt.Regexp, s, err)
|
||
continue
|
||
}
|
||
nd := dump(nre)
|
||
if d != nd {
|
||
t.Errorf("Parse(%#q) -> %#q; %#q vs %#q", tt.Regexp, s, d, nd)
|
||
}
|
||
|
||
ns := nre.String()
|
||
if s != ns {
|
||
t.Errorf("Parse(%#q) -> %#q -> %#q", tt.Regexp, s, ns)
|
||
}
|
||
}
|
||
}
|
||
}
|