gcc/libgo/go/strings/replace.go
Ian Lance Taylor f8d9fa9e80 libgo, compiler: Upgrade libgo to Go 1.4, except for runtime.
This upgrades all of libgo other than the runtime package to
the Go 1.4 release.  In Go 1.4 much of the runtime was
rewritten into Go.  Merging that code will take more time and
will not change the API, so I'm putting it off for now.

There are a few runtime changes anyhow, to accomodate other
packages that rely on minor modifications to the runtime
support.

The compiler changes slightly to add a one-bit flag to each
type descriptor kind that is stored directly in an interface,
which for gccgo is currently only pointer types.  Another
one-bit flag (gcprog) is reserved because it is used by the gc
compiler, but gccgo does not currently use it.

There is another error check in the compiler since I ran
across it during testing.

gotools/:
	* Makefile.am (go_cmd_go_files): Sort entries.  Add generate.go.
	* Makefile.in: Rebuild.

From-SVN: r219627
2015-01-15 00:27:56 +00:00

519 lines
13 KiB
Go

// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package strings
import "io"
// Replacer replaces a list of strings with replacements.
// It is safe for concurrent use by multiple goroutines.
type Replacer struct {
r replacer
}
// replacer is the interface that a replacement algorithm needs to implement.
type replacer interface {
Replace(s string) string
WriteString(w io.Writer, s string) (n int, err error)
}
// NewReplacer returns a new Replacer from a list of old, new string pairs.
// Replacements are performed in order, without overlapping matches.
func NewReplacer(oldnew ...string) *Replacer {
if len(oldnew)%2 == 1 {
panic("strings.NewReplacer: odd argument count")
}
if len(oldnew) == 2 && len(oldnew[0]) > 1 {
return &Replacer{r: makeSingleStringReplacer(oldnew[0], oldnew[1])}
}
allNewBytes := true
for i := 0; i < len(oldnew); i += 2 {
if len(oldnew[i]) != 1 {
return &Replacer{r: makeGenericReplacer(oldnew)}
}
if len(oldnew[i+1]) != 1 {
allNewBytes = false
}
}
if allNewBytes {
r := byteReplacer{}
for i := range r {
r[i] = byte(i)
}
// The first occurrence of old->new map takes precedence
// over the others with the same old string.
for i := len(oldnew) - 2; i >= 0; i -= 2 {
o := oldnew[i][0]
n := oldnew[i+1][0]
r[o] = n
}
return &Replacer{r: &r}
}
r := byteStringReplacer{}
// The first occurrence of old->new map takes precedence
// over the others with the same old string.
for i := len(oldnew) - 2; i >= 0; i -= 2 {
o := oldnew[i][0]
n := oldnew[i+1]
r[o] = []byte(n)
}
return &Replacer{r: &r}
}
// Replace returns a copy of s with all replacements performed.
func (r *Replacer) Replace(s string) string {
return r.r.Replace(s)
}
// WriteString writes s to w with all replacements performed.
func (r *Replacer) WriteString(w io.Writer, s string) (n int, err error) {
return r.r.WriteString(w, s)
}
// trieNode is a node in a lookup trie for prioritized key/value pairs. Keys
// and values may be empty. For example, the trie containing keys "ax", "ay",
// "bcbc", "x" and "xy" could have eight nodes:
//
// n0 -
// n1 a-
// n2 .x+
// n3 .y+
// n4 b-
// n5 .cbc+
// n6 x+
// n7 .y+
//
// n0 is the root node, and its children are n1, n4 and n6; n1's children are
// n2 and n3; n4's child is n5; n6's child is n7. Nodes n0, n1 and n4 (marked
// with a trailing "-") are partial keys, and nodes n2, n3, n5, n6 and n7
// (marked with a trailing "+") are complete keys.
type trieNode struct {
// value is the value of the trie node's key/value pair. It is empty if
// this node is not a complete key.
value string
// priority is the priority (higher is more important) of the trie node's
// key/value pair; keys are not necessarily matched shortest- or longest-
// first. Priority is positive if this node is a complete key, and zero
// otherwise. In the example above, positive/zero priorities are marked
// with a trailing "+" or "-".
priority int
// A trie node may have zero, one or more child nodes:
// * if the remaining fields are zero, there are no children.
// * if prefix and next are non-zero, there is one child in next.
// * if table is non-zero, it defines all the children.
//
// Prefixes are preferred over tables when there is one child, but the
// root node always uses a table for lookup efficiency.
// prefix is the difference in keys between this trie node and the next.
// In the example above, node n4 has prefix "cbc" and n4's next node is n5.
// Node n5 has no children and so has zero prefix, next and table fields.
prefix string
next *trieNode
// table is a lookup table indexed by the next byte in the key, after
// remapping that byte through genericReplacer.mapping to create a dense
// index. In the example above, the keys only use 'a', 'b', 'c', 'x' and
// 'y', which remap to 0, 1, 2, 3 and 4. All other bytes remap to 5, and
// genericReplacer.tableSize will be 5. Node n0's table will be
// []*trieNode{ 0:n1, 1:n4, 3:n6 }, where the 0, 1 and 3 are the remapped
// 'a', 'b' and 'x'.
table []*trieNode
}
func (t *trieNode) add(key, val string, priority int, r *genericReplacer) {
if key == "" {
if t.priority == 0 {
t.value = val
t.priority = priority
}
return
}
if t.prefix != "" {
// Need to split the prefix among multiple nodes.
var n int // length of the longest common prefix
for ; n < len(t.prefix) && n < len(key); n++ {
if t.prefix[n] != key[n] {
break
}
}
if n == len(t.prefix) {
t.next.add(key[n:], val, priority, r)
} else if n == 0 {
// First byte differs, start a new lookup table here. Looking up
// what is currently t.prefix[0] will lead to prefixNode, and
// looking up key[0] will lead to keyNode.
var prefixNode *trieNode
if len(t.prefix) == 1 {
prefixNode = t.next
} else {
prefixNode = &trieNode{
prefix: t.prefix[1:],
next: t.next,
}
}
keyNode := new(trieNode)
t.table = make([]*trieNode, r.tableSize)
t.table[r.mapping[t.prefix[0]]] = prefixNode
t.table[r.mapping[key[0]]] = keyNode
t.prefix = ""
t.next = nil
keyNode.add(key[1:], val, priority, r)
} else {
// Insert new node after the common section of the prefix.
next := &trieNode{
prefix: t.prefix[n:],
next: t.next,
}
t.prefix = t.prefix[:n]
t.next = next
next.add(key[n:], val, priority, r)
}
} else if t.table != nil {
// Insert into existing table.
m := r.mapping[key[0]]
if t.table[m] == nil {
t.table[m] = new(trieNode)
}
t.table[m].add(key[1:], val, priority, r)
} else {
t.prefix = key
t.next = new(trieNode)
t.next.add("", val, priority, r)
}
}
func (r *genericReplacer) lookup(s string, ignoreRoot bool) (val string, keylen int, found bool) {
// Iterate down the trie to the end, and grab the value and keylen with
// the highest priority.
bestPriority := 0
node := &r.root
n := 0
for node != nil {
if node.priority > bestPriority && !(ignoreRoot && node == &r.root) {
bestPriority = node.priority
val = node.value
keylen = n
found = true
}
if s == "" {
break
}
if node.table != nil {
index := r.mapping[s[0]]
if int(index) == r.tableSize {
break
}
node = node.table[index]
s = s[1:]
n++
} else if node.prefix != "" && HasPrefix(s, node.prefix) {
n += len(node.prefix)
s = s[len(node.prefix):]
node = node.next
} else {
break
}
}
return
}
// genericReplacer is the fully generic algorithm.
// It's used as a fallback when nothing faster can be used.
type genericReplacer struct {
root trieNode
// tableSize is the size of a trie node's lookup table. It is the number
// of unique key bytes.
tableSize int
// mapping maps from key bytes to a dense index for trieNode.table.
mapping [256]byte
}
func makeGenericReplacer(oldnew []string) *genericReplacer {
r := new(genericReplacer)
// Find each byte used, then assign them each an index.
for i := 0; i < len(oldnew); i += 2 {
key := oldnew[i]
for j := 0; j < len(key); j++ {
r.mapping[key[j]] = 1
}
}
for _, b := range r.mapping {
r.tableSize += int(b)
}
var index byte
for i, b := range r.mapping {
if b == 0 {
r.mapping[i] = byte(r.tableSize)
} else {
r.mapping[i] = index
index++
}
}
// Ensure root node uses a lookup table (for performance).
r.root.table = make([]*trieNode, r.tableSize)
for i := 0; i < len(oldnew); i += 2 {
r.root.add(oldnew[i], oldnew[i+1], len(oldnew)-i, r)
}
return r
}
type appendSliceWriter []byte
// Write writes to the buffer to satisfy io.Writer.
func (w *appendSliceWriter) Write(p []byte) (int, error) {
*w = append(*w, p...)
return len(p), nil
}
// WriteString writes to the buffer without string->[]byte->string allocations.
func (w *appendSliceWriter) WriteString(s string) (int, error) {
*w = append(*w, s...)
return len(s), nil
}
type stringWriterIface interface {
WriteString(string) (int, error)
}
type stringWriter struct {
w io.Writer
}
func (w stringWriter) WriteString(s string) (int, error) {
return w.w.Write([]byte(s))
}
func getStringWriter(w io.Writer) stringWriterIface {
sw, ok := w.(stringWriterIface)
if !ok {
sw = stringWriter{w}
}
return sw
}
func (r *genericReplacer) Replace(s string) string {
buf := make(appendSliceWriter, 0, len(s))
r.WriteString(&buf, s)
return string(buf)
}
func (r *genericReplacer) WriteString(w io.Writer, s string) (n int, err error) {
sw := getStringWriter(w)
var last, wn int
var prevMatchEmpty bool
for i := 0; i <= len(s); {
// Fast path: s[i] is not a prefix of any pattern.
if i != len(s) && r.root.priority == 0 {
index := int(r.mapping[s[i]])
if index == r.tableSize || r.root.table[index] == nil {
i++
continue
}
}
// Ignore the empty match iff the previous loop found the empty match.
val, keylen, match := r.lookup(s[i:], prevMatchEmpty)
prevMatchEmpty = match && keylen == 0
if match {
wn, err = sw.WriteString(s[last:i])
n += wn
if err != nil {
return
}
wn, err = sw.WriteString(val)
n += wn
if err != nil {
return
}
i += keylen
last = i
continue
}
i++
}
if last != len(s) {
wn, err = sw.WriteString(s[last:])
n += wn
}
return
}
// singleStringReplacer is the implementation that's used when there is only
// one string to replace (and that string has more than one byte).
type singleStringReplacer struct {
finder *stringFinder
// value is the new string that replaces that pattern when it's found.
value string
}
func makeSingleStringReplacer(pattern string, value string) *singleStringReplacer {
return &singleStringReplacer{finder: makeStringFinder(pattern), value: value}
}
func (r *singleStringReplacer) Replace(s string) string {
var buf []byte
i, matched := 0, false
for {
match := r.finder.next(s[i:])
if match == -1 {
break
}
matched = true
buf = append(buf, s[i:i+match]...)
buf = append(buf, r.value...)
i += match + len(r.finder.pattern)
}
if !matched {
return s
}
buf = append(buf, s[i:]...)
return string(buf)
}
func (r *singleStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
sw := getStringWriter(w)
var i, wn int
for {
match := r.finder.next(s[i:])
if match == -1 {
break
}
wn, err = sw.WriteString(s[i : i+match])
n += wn
if err != nil {
return
}
wn, err = sw.WriteString(r.value)
n += wn
if err != nil {
return
}
i += match + len(r.finder.pattern)
}
wn, err = sw.WriteString(s[i:])
n += wn
return
}
// byteReplacer is the implementation that's used when all the "old"
// and "new" values are single ASCII bytes.
// The array contains replacement bytes indexed by old byte.
type byteReplacer [256]byte
func (r *byteReplacer) Replace(s string) string {
var buf []byte // lazily allocated
for i := 0; i < len(s); i++ {
b := s[i]
if r[b] != b {
if buf == nil {
buf = []byte(s)
}
buf[i] = r[b]
}
}
if buf == nil {
return s
}
return string(buf)
}
func (r *byteReplacer) WriteString(w io.Writer, s string) (n int, err error) {
// TODO(bradfitz): use io.WriteString with slices of s, avoiding allocation.
bufsize := 32 << 10
if len(s) < bufsize {
bufsize = len(s)
}
buf := make([]byte, bufsize)
for len(s) > 0 {
ncopy := copy(buf, s[:])
s = s[ncopy:]
for i, b := range buf[:ncopy] {
buf[i] = r[b]
}
wn, err := w.Write(buf[:ncopy])
n += wn
if err != nil {
return n, err
}
}
return n, nil
}
// byteStringReplacer is the implementation that's used when all the
// "old" values are single ASCII bytes but the "new" values vary in size.
// The array contains replacement byte slices indexed by old byte.
// A nil []byte means that the old byte should not be replaced.
type byteStringReplacer [256][]byte
func (r *byteStringReplacer) Replace(s string) string {
newSize := len(s)
anyChanges := false
for i := 0; i < len(s); i++ {
b := s[i]
if r[b] != nil {
anyChanges = true
// The -1 is because we are replacing 1 byte with len(r[b]) bytes.
newSize += len(r[b]) - 1
}
}
if !anyChanges {
return s
}
buf := make([]byte, newSize)
bi := buf
for i := 0; i < len(s); i++ {
b := s[i]
if r[b] != nil {
n := copy(bi, r[b])
bi = bi[n:]
} else {
bi[0] = b
bi = bi[1:]
}
}
return string(buf)
}
func (r *byteStringReplacer) WriteString(w io.Writer, s string) (n int, err error) {
sw := getStringWriter(w)
last := 0
for i := 0; i < len(s); i++ {
b := s[i]
if r[b] == nil {
continue
}
if last != i {
nw, err := sw.WriteString(s[last:i])
n += nw
if err != nil {
return n, err
}
}
last = i + 1
nw, err := w.Write(r[b])
n += nw
if err != nil {
return n, err
}
}
if last != len(s) {
var nw int
nw, err = sw.WriteString(s[last:])
n += nw
}
return
}