gcc/libgo/go/strings/replace_test.go
Ian Lance Taylor f8d9fa9e80 libgo, compiler: Upgrade libgo to Go 1.4, except for runtime.
This upgrades all of libgo other than the runtime package to
the Go 1.4 release.  In Go 1.4 much of the runtime was
rewritten into Go.  Merging that code will take more time and
will not change the API, so I'm putting it off for now.

There are a few runtime changes anyhow, to accomodate other
packages that rely on minor modifications to the runtime
support.

The compiler changes slightly to add a one-bit flag to each
type descriptor kind that is stored directly in an interface,
which for gccgo is currently only pointer types.  Another
one-bit flag (gcprog) is reserved because it is used by the gc
compiler, but gccgo does not currently use it.

There is another error check in the compiler since I ran
across it during testing.

gotools/:
	* Makefile.am (go_cmd_go_files): Sort entries.  Add generate.go.
	* Makefile.in: Rebuild.

From-SVN: r219627
2015-01-15 00:27:56 +00:00

543 lines
13 KiB
Go

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package strings_test
import (
"bytes"
"fmt"
. "strings"
"testing"
)
var htmlEscaper = NewReplacer(
"&", "&",
"<", "&lt;",
">", "&gt;",
`"`, "&quot;",
"'", "&apos;",
)
var htmlUnescaper = NewReplacer(
"&amp;", "&",
"&lt;", "<",
"&gt;", ">",
"&quot;", `"`,
"&apos;", "'",
)
// The http package's old HTML escaping function.
func oldHTMLEscape(s string) string {
s = Replace(s, "&", "&amp;", -1)
s = Replace(s, "<", "&lt;", -1)
s = Replace(s, ">", "&gt;", -1)
s = Replace(s, `"`, "&quot;", -1)
s = Replace(s, "'", "&apos;", -1)
return s
}
var capitalLetters = NewReplacer("a", "A", "b", "B")
// TestReplacer tests the replacer implementations.
func TestReplacer(t *testing.T) {
type testCase struct {
r *Replacer
in, out string
}
var testCases []testCase
// str converts 0xff to "\xff". This isn't just string(b) since that converts to UTF-8.
str := func(b byte) string {
return string([]byte{b})
}
var s []string
// inc maps "\x00"->"\x01", ..., "a"->"b", "b"->"c", ..., "\xff"->"\x00".
s = nil
for i := 0; i < 256; i++ {
s = append(s, str(byte(i)), str(byte(i+1)))
}
inc := NewReplacer(s...)
// Test cases with 1-byte old strings, 1-byte new strings.
testCases = append(testCases,
testCase{capitalLetters, "brad", "BrAd"},
testCase{capitalLetters, Repeat("a", (32<<10)+123), Repeat("A", (32<<10)+123)},
testCase{capitalLetters, "", ""},
testCase{inc, "brad", "csbe"},
testCase{inc, "\x00\xff", "\x01\x00"},
testCase{inc, "", ""},
testCase{NewReplacer("a", "1", "a", "2"), "brad", "br1d"},
)
// repeat maps "a"->"a", "b"->"bb", "c"->"ccc", ...
s = nil
for i := 0; i < 256; i++ {
n := i + 1 - 'a'
if n < 1 {
n = 1
}
s = append(s, str(byte(i)), Repeat(str(byte(i)), n))
}
repeat := NewReplacer(s...)
// Test cases with 1-byte old strings, variable length new strings.
testCases = append(testCases,
testCase{htmlEscaper, "No changes", "No changes"},
testCase{htmlEscaper, "I <3 escaping & stuff", "I &lt;3 escaping &amp; stuff"},
testCase{htmlEscaper, "&&&", "&amp;&amp;&amp;"},
testCase{htmlEscaper, "", ""},
testCase{repeat, "brad", "bbrrrrrrrrrrrrrrrrrradddd"},
testCase{repeat, "abba", "abbbba"},
testCase{repeat, "", ""},
testCase{NewReplacer("a", "11", "a", "22"), "brad", "br11d"},
)
// The remaining test cases have variable length old strings.
testCases = append(testCases,
testCase{htmlUnescaper, "&amp;amp;", "&amp;"},
testCase{htmlUnescaper, "&lt;b&gt;HTML&apos;s neat&lt;/b&gt;", "<b>HTML's neat</b>"},
testCase{htmlUnescaper, "", ""},
testCase{NewReplacer("a", "1", "a", "2", "xxx", "xxx"), "brad", "br1d"},
testCase{NewReplacer("a", "1", "aa", "2", "aaa", "3"), "aaaa", "1111"},
testCase{NewReplacer("aaa", "3", "aa", "2", "a", "1"), "aaaa", "31"},
)
// gen1 has multiple old strings of variable length. There is no
// overall non-empty common prefix, but some pairwise common prefixes.
gen1 := NewReplacer(
"aaa", "3[aaa]",
"aa", "2[aa]",
"a", "1[a]",
"i", "i",
"longerst", "most long",
"longer", "medium",
"long", "short",
"xx", "xx",
"x", "X",
"X", "Y",
"Y", "Z",
)
testCases = append(testCases,
testCase{gen1, "fooaaabar", "foo3[aaa]b1[a]r"},
testCase{gen1, "long, longerst, longer", "short, most long, medium"},
testCase{gen1, "xxxxx", "xxxxX"},
testCase{gen1, "XiX", "YiY"},
testCase{gen1, "", ""},
)
// gen2 has multiple old strings with no pairwise common prefix.
gen2 := NewReplacer(
"roses", "red",
"violets", "blue",
"sugar", "sweet",
)
testCases = append(testCases,
testCase{gen2, "roses are red, violets are blue...", "red are red, blue are blue..."},
testCase{gen2, "", ""},
)
// gen3 has multiple old strings with an overall common prefix.
gen3 := NewReplacer(
"abracadabra", "poof",
"abracadabrakazam", "splat",
"abraham", "lincoln",
"abrasion", "scrape",
"abraham", "isaac",
)
testCases = append(testCases,
testCase{gen3, "abracadabrakazam abraham", "poofkazam lincoln"},
testCase{gen3, "abrasion abracad", "scrape abracad"},
testCase{gen3, "abba abram abrasive", "abba abram abrasive"},
testCase{gen3, "", ""},
)
// foo{1,2,3,4} have multiple old strings with an overall common prefix
// and 1- or 2- byte extensions from the common prefix.
foo1 := NewReplacer(
"foo1", "A",
"foo2", "B",
"foo3", "C",
)
foo2 := NewReplacer(
"foo1", "A",
"foo2", "B",
"foo31", "C",
"foo32", "D",
)
foo3 := NewReplacer(
"foo11", "A",
"foo12", "B",
"foo31", "C",
"foo32", "D",
)
foo4 := NewReplacer(
"foo12", "B",
"foo32", "D",
)
testCases = append(testCases,
testCase{foo1, "fofoofoo12foo32oo", "fofooA2C2oo"},
testCase{foo1, "", ""},
testCase{foo2, "fofoofoo12foo32oo", "fofooA2Doo"},
testCase{foo2, "", ""},
testCase{foo3, "fofoofoo12foo32oo", "fofooBDoo"},
testCase{foo3, "", ""},
testCase{foo4, "fofoofoo12foo32oo", "fofooBDoo"},
testCase{foo4, "", ""},
)
// genAll maps "\x00\x01\x02...\xfe\xff" to "[all]", amongst other things.
allBytes := make([]byte, 256)
for i := range allBytes {
allBytes[i] = byte(i)
}
allString := string(allBytes)
genAll := NewReplacer(
allString, "[all]",
"\xff", "[ff]",
"\x00", "[00]",
)
testCases = append(testCases,
testCase{genAll, allString, "[all]"},
testCase{genAll, "a\xff" + allString + "\x00", "a[ff][all][00]"},
testCase{genAll, "", ""},
)
// Test cases with empty old strings.
blankToX1 := NewReplacer("", "X")
blankToX2 := NewReplacer("", "X", "", "")
blankHighPriority := NewReplacer("", "X", "o", "O")
blankLowPriority := NewReplacer("o", "O", "", "X")
blankNoOp1 := NewReplacer("", "")
blankNoOp2 := NewReplacer("", "", "", "A")
blankFoo := NewReplacer("", "X", "foobar", "R", "foobaz", "Z")
testCases = append(testCases,
testCase{blankToX1, "foo", "XfXoXoX"},
testCase{blankToX1, "", "X"},
testCase{blankToX2, "foo", "XfXoXoX"},
testCase{blankToX2, "", "X"},
testCase{blankHighPriority, "oo", "XOXOX"},
testCase{blankHighPriority, "ii", "XiXiX"},
testCase{blankHighPriority, "oiio", "XOXiXiXOX"},
testCase{blankHighPriority, "iooi", "XiXOXOXiX"},
testCase{blankHighPriority, "", "X"},
testCase{blankLowPriority, "oo", "OOX"},
testCase{blankLowPriority, "ii", "XiXiX"},
testCase{blankLowPriority, "oiio", "OXiXiOX"},
testCase{blankLowPriority, "iooi", "XiOOXiX"},
testCase{blankLowPriority, "", "X"},
testCase{blankNoOp1, "foo", "foo"},
testCase{blankNoOp1, "", ""},
testCase{blankNoOp2, "foo", "foo"},
testCase{blankNoOp2, "", ""},
testCase{blankFoo, "foobarfoobaz", "XRXZX"},
testCase{blankFoo, "foobar-foobaz", "XRX-XZX"},
testCase{blankFoo, "", "X"},
)
// single string replacer
abcMatcher := NewReplacer("abc", "[match]")
testCases = append(testCases,
testCase{abcMatcher, "", ""},
testCase{abcMatcher, "ab", "ab"},
testCase{abcMatcher, "abc", "[match]"},
testCase{abcMatcher, "abcd", "[match]d"},
testCase{abcMatcher, "cabcabcdabca", "c[match][match]d[match]a"},
)
// Issue 6659 cases (more single string replacer)
noHello := NewReplacer("Hello", "")
testCases = append(testCases,
testCase{noHello, "Hello", ""},
testCase{noHello, "Hellox", "x"},
testCase{noHello, "xHello", "x"},
testCase{noHello, "xHellox", "xx"},
)
// No-arg test cases.
nop := NewReplacer()
testCases = append(testCases,
testCase{nop, "abc", "abc"},
testCase{nop, "", ""},
)
// Run the test cases.
for i, tc := range testCases {
if s := tc.r.Replace(tc.in); s != tc.out {
t.Errorf("%d. Replace(%q) = %q, want %q", i, tc.in, s, tc.out)
}
var buf bytes.Buffer
n, err := tc.r.WriteString(&buf, tc.in)
if err != nil {
t.Errorf("%d. WriteString: %v", i, err)
continue
}
got := buf.String()
if got != tc.out {
t.Errorf("%d. WriteString(%q) wrote %q, want %q", i, tc.in, got, tc.out)
continue
}
if n != len(tc.out) {
t.Errorf("%d. WriteString(%q) wrote correct string but reported %d bytes; want %d (%q)",
i, tc.in, n, len(tc.out), tc.out)
}
}
}
var algorithmTestCases = []struct {
r *Replacer
want string
}{
{capitalLetters, "*strings.byteReplacer"},
{htmlEscaper, "*strings.byteStringReplacer"},
{NewReplacer("12", "123"), "*strings.singleStringReplacer"},
{NewReplacer("1", "12"), "*strings.byteStringReplacer"},
{NewReplacer("", "X"), "*strings.genericReplacer"},
{NewReplacer("a", "1", "b", "12", "cde", "123"), "*strings.genericReplacer"},
}
// TestPickAlgorithm tests that NewReplacer picks the correct algorithm.
func TestPickAlgorithm(t *testing.T) {
for i, tc := range algorithmTestCases {
got := fmt.Sprintf("%T", tc.r.Replacer())
if got != tc.want {
t.Errorf("%d. algorithm = %s, want %s", i, got, tc.want)
}
}
}
type errWriter struct{}
func (errWriter) Write(p []byte) (n int, err error) {
return 0, fmt.Errorf("unwritable")
}
// TestWriteStringError tests that WriteString returns an error
// received from the underlying io.Writer.
func TestWriteStringError(t *testing.T) {
for i, tc := range algorithmTestCases {
n, err := tc.r.WriteString(errWriter{}, "abc")
if n != 0 || err == nil || err.Error() != "unwritable" {
t.Errorf("%d. WriteStringError = %d, %v, want 0, unwritable", i, n, err)
}
}
}
// TestGenericTrieBuilding verifies the structure of the generated trie. There
// is one node per line, and the key ending with the current line is in the
// trie if it ends with a "+".
func TestGenericTrieBuilding(t *testing.T) {
testCases := []struct{ in, out string }{
{"abc;abdef;abdefgh;xx;xy;z", `-
a-
.b-
..c+
..d-
...ef+
.....gh+
x-
.x+
.y+
z+
`},
{"abracadabra;abracadabrakazam;abraham;abrasion", `-
a-
.bra-
....c-
.....adabra+
...........kazam+
....h-
.....am+
....s-
.....ion+
`},
{"aaa;aa;a;i;longerst;longer;long;xx;x;X;Y", `-
X+
Y+
a+
.a+
..a+
i+
l-
.ong+
....er+
......st+
x+
.x+
`},
{"foo;;foo;foo1", `+
f-
.oo+
...1+
`},
}
for _, tc := range testCases {
keys := Split(tc.in, ";")
args := make([]string, len(keys)*2)
for i, key := range keys {
args[i*2] = key
}
got := NewReplacer(args...).PrintTrie()
// Remove tabs from tc.out
wantbuf := make([]byte, 0, len(tc.out))
for i := 0; i < len(tc.out); i++ {
if tc.out[i] != '\t' {
wantbuf = append(wantbuf, tc.out[i])
}
}
want := string(wantbuf)
if got != want {
t.Errorf("PrintTrie(%q)\ngot\n%swant\n%s", tc.in, got, want)
}
}
}
func BenchmarkGenericNoMatch(b *testing.B) {
str := Repeat("A", 100) + Repeat("B", 100)
generic := NewReplacer("a", "A", "b", "B", "12", "123") // varying lengths forces generic
for i := 0; i < b.N; i++ {
generic.Replace(str)
}
}
func BenchmarkGenericMatch1(b *testing.B) {
str := Repeat("a", 100) + Repeat("b", 100)
generic := NewReplacer("a", "A", "b", "B", "12", "123")
for i := 0; i < b.N; i++ {
generic.Replace(str)
}
}
func BenchmarkGenericMatch2(b *testing.B) {
str := Repeat("It&apos;s &lt;b&gt;HTML&lt;/b&gt;!", 100)
for i := 0; i < b.N; i++ {
htmlUnescaper.Replace(str)
}
}
func benchmarkSingleString(b *testing.B, pattern, text string) {
r := NewReplacer(pattern, "[match]")
b.SetBytes(int64(len(text)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
r.Replace(text)
}
}
func BenchmarkSingleMaxSkipping(b *testing.B) {
benchmarkSingleString(b, Repeat("b", 25), Repeat("a", 10000))
}
func BenchmarkSingleLongSuffixFail(b *testing.B) {
benchmarkSingleString(b, "b"+Repeat("a", 500), Repeat("a", 1002))
}
func BenchmarkSingleMatch(b *testing.B) {
benchmarkSingleString(b, "abcdef", Repeat("abcdefghijklmno", 1000))
}
func BenchmarkByteByteNoMatch(b *testing.B) {
str := Repeat("A", 100) + Repeat("B", 100)
for i := 0; i < b.N; i++ {
capitalLetters.Replace(str)
}
}
func BenchmarkByteByteMatch(b *testing.B) {
str := Repeat("a", 100) + Repeat("b", 100)
for i := 0; i < b.N; i++ {
capitalLetters.Replace(str)
}
}
func BenchmarkByteStringMatch(b *testing.B) {
str := "<" + Repeat("a", 99) + Repeat("b", 99) + ">"
for i := 0; i < b.N; i++ {
htmlEscaper.Replace(str)
}
}
func BenchmarkHTMLEscapeNew(b *testing.B) {
str := "I <3 to escape HTML & other text too."
for i := 0; i < b.N; i++ {
htmlEscaper.Replace(str)
}
}
func BenchmarkHTMLEscapeOld(b *testing.B) {
str := "I <3 to escape HTML & other text too."
for i := 0; i < b.N; i++ {
oldHTMLEscape(str)
}
}
func BenchmarkByteStringReplacerWriteString(b *testing.B) {
str := Repeat("I <3 to escape HTML & other text too.", 100)
buf := new(bytes.Buffer)
for i := 0; i < b.N; i++ {
htmlEscaper.WriteString(buf, str)
buf.Reset()
}
}
func BenchmarkByteReplacerWriteString(b *testing.B) {
str := Repeat("abcdefghijklmnopqrstuvwxyz", 100)
buf := new(bytes.Buffer)
for i := 0; i < b.N; i++ {
capitalLetters.WriteString(buf, str)
buf.Reset()
}
}
// BenchmarkByteByteReplaces compares byteByteImpl against multiple Replaces.
func BenchmarkByteByteReplaces(b *testing.B) {
str := Repeat("a", 100) + Repeat("b", 100)
for i := 0; i < b.N; i++ {
Replace(Replace(str, "a", "A", -1), "b", "B", -1)
}
}
// BenchmarkByteByteMap compares byteByteImpl against Map.
func BenchmarkByteByteMap(b *testing.B) {
str := Repeat("a", 100) + Repeat("b", 100)
fn := func(r rune) rune {
switch r {
case 'a':
return 'A'
case 'b':
return 'B'
}
return r
}
for i := 0; i < b.N; i++ {
Map(fn, str)
}
}