gcc/libgo/go/strconv/quote.go
Ian Lance Taylor f8d9fa9e80 libgo, compiler: Upgrade libgo to Go 1.4, except for runtime.
This upgrades all of libgo other than the runtime package to
the Go 1.4 release.  In Go 1.4 much of the runtime was
rewritten into Go.  Merging that code will take more time and
will not change the API, so I'm putting it off for now.

There are a few runtime changes anyhow, to accomodate other
packages that rely on minor modifications to the runtime
support.

The compiler changes slightly to add a one-bit flag to each
type descriptor kind that is stored directly in an interface,
which for gccgo is currently only pointer types.  Another
one-bit flag (gcprog) is reserved because it is used by the gc
compiler, but gccgo does not currently use it.

There is another error check in the compiler since I ran
across it during testing.

gotools/:
	* Makefile.am (go_cmd_go_files): Sort entries.  Add generate.go.
	* Makefile.in: Rebuild.

From-SVN: r219627
2015-01-15 00:27:56 +00:00

456 lines
11 KiB
Go

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:generate go run makeisprint.go -output isprint.go
package strconv
import (
"unicode/utf8"
)
const lowerhex = "0123456789abcdef"
func quoteWith(s string, quote byte, ASCIIonly bool) string {
var runeTmp [utf8.UTFMax]byte
buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
buf = append(buf, quote)
for width := 0; len(s) > 0; s = s[width:] {
r := rune(s[0])
width = 1
if r >= utf8.RuneSelf {
r, width = utf8.DecodeRuneInString(s)
}
if width == 1 && r == utf8.RuneError {
buf = append(buf, `\x`...)
buf = append(buf, lowerhex[s[0]>>4])
buf = append(buf, lowerhex[s[0]&0xF])
continue
}
if r == rune(quote) || r == '\\' { // always backslashed
buf = append(buf, '\\')
buf = append(buf, byte(r))
continue
}
if ASCIIonly {
if r < utf8.RuneSelf && IsPrint(r) {
buf = append(buf, byte(r))
continue
}
} else if IsPrint(r) {
n := utf8.EncodeRune(runeTmp[:], r)
buf = append(buf, runeTmp[:n]...)
continue
}
switch r {
case '\a':
buf = append(buf, `\a`...)
case '\b':
buf = append(buf, `\b`...)
case '\f':
buf = append(buf, `\f`...)
case '\n':
buf = append(buf, `\n`...)
case '\r':
buf = append(buf, `\r`...)
case '\t':
buf = append(buf, `\t`...)
case '\v':
buf = append(buf, `\v`...)
default:
switch {
case r < ' ':
buf = append(buf, `\x`...)
buf = append(buf, lowerhex[s[0]>>4])
buf = append(buf, lowerhex[s[0]&0xF])
case r > utf8.MaxRune:
r = 0xFFFD
fallthrough
case r < 0x10000:
buf = append(buf, `\u`...)
for s := 12; s >= 0; s -= 4 {
buf = append(buf, lowerhex[r>>uint(s)&0xF])
}
default:
buf = append(buf, `\U`...)
for s := 28; s >= 0; s -= 4 {
buf = append(buf, lowerhex[r>>uint(s)&0xF])
}
}
}
}
buf = append(buf, quote)
return string(buf)
}
// Quote returns a double-quoted Go string literal representing s. The
// returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
// control characters and non-printable characters as defined by
// IsPrint.
func Quote(s string) string {
return quoteWith(s, '"', false)
}
// AppendQuote appends a double-quoted Go string literal representing s,
// as generated by Quote, to dst and returns the extended buffer.
func AppendQuote(dst []byte, s string) []byte {
return append(dst, Quote(s)...)
}
// QuoteToASCII returns a double-quoted Go string literal representing s.
// The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
// non-ASCII characters and non-printable characters as defined by IsPrint.
func QuoteToASCII(s string) string {
return quoteWith(s, '"', true)
}
// AppendQuoteToASCII appends a double-quoted Go string literal representing s,
// as generated by QuoteToASCII, to dst and returns the extended buffer.
func AppendQuoteToASCII(dst []byte, s string) []byte {
return append(dst, QuoteToASCII(s)...)
}
// QuoteRune returns a single-quoted Go character literal representing the
// rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100)
// for control characters and non-printable characters as defined by IsPrint.
func QuoteRune(r rune) string {
// TODO: avoid the allocation here.
return quoteWith(string(r), '\'', false)
}
// AppendQuoteRune appends a single-quoted Go character literal representing the rune,
// as generated by QuoteRune, to dst and returns the extended buffer.
func AppendQuoteRune(dst []byte, r rune) []byte {
return append(dst, QuoteRune(r)...)
}
// QuoteRuneToASCII returns a single-quoted Go character literal representing
// the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
// \u0100) for non-ASCII characters and non-printable characters as defined
// by IsPrint.
func QuoteRuneToASCII(r rune) string {
// TODO: avoid the allocation here.
return quoteWith(string(r), '\'', true)
}
// AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune,
// as generated by QuoteRuneToASCII, to dst and returns the extended buffer.
func AppendQuoteRuneToASCII(dst []byte, r rune) []byte {
return append(dst, QuoteRuneToASCII(r)...)
}
// CanBackquote reports whether the string s can be represented
// unchanged as a single-line backquoted string without control
// characters other than tab.
func CanBackquote(s string) bool {
for len(s) > 0 {
r, wid := utf8.DecodeRuneInString(s)
s = s[wid:]
if wid > 1 {
if r == '\ufeff' {
return false // BOMs are invisible and should not be quoted.
}
continue // All other multibyte runes are correctly encoded and assumed printable.
}
if r == utf8.RuneError {
return false
}
if (r < ' ' && r != '\t') || r == '`' || r == '\u007F' {
return false
}
}
return true
}
func unhex(b byte) (v rune, ok bool) {
c := rune(b)
switch {
case '0' <= c && c <= '9':
return c - '0', true
case 'a' <= c && c <= 'f':
return c - 'a' + 10, true
case 'A' <= c && c <= 'F':
return c - 'A' + 10, true
}
return
}
// UnquoteChar decodes the first character or byte in the escaped string
// or character literal represented by the string s.
// It returns four values:
//
// 1) value, the decoded Unicode code point or byte value;
// 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;
// 3) tail, the remainder of the string after the character; and
// 4) an error that will be nil if the character is syntactically valid.
//
// The second argument, quote, specifies the type of literal being parsed
// and therefore which escaped quote character is permitted.
// If set to a single quote, it permits the sequence \' and disallows unescaped '.
// If set to a double quote, it permits \" and disallows unescaped ".
// If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.
func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) {
// easy cases
switch c := s[0]; {
case c == quote && (quote == '\'' || quote == '"'):
err = ErrSyntax
return
case c >= utf8.RuneSelf:
r, size := utf8.DecodeRuneInString(s)
return r, true, s[size:], nil
case c != '\\':
return rune(s[0]), false, s[1:], nil
}
// hard case: c is backslash
if len(s) <= 1 {
err = ErrSyntax
return
}
c := s[1]
s = s[2:]
switch c {
case 'a':
value = '\a'
case 'b':
value = '\b'
case 'f':
value = '\f'
case 'n':
value = '\n'
case 'r':
value = '\r'
case 't':
value = '\t'
case 'v':
value = '\v'
case 'x', 'u', 'U':
n := 0
switch c {
case 'x':
n = 2
case 'u':
n = 4
case 'U':
n = 8
}
var v rune
if len(s) < n {
err = ErrSyntax
return
}
for j := 0; j < n; j++ {
x, ok := unhex(s[j])
if !ok {
err = ErrSyntax
return
}
v = v<<4 | x
}
s = s[n:]
if c == 'x' {
// single-byte string, possibly not UTF-8
value = v
break
}
if v > utf8.MaxRune {
err = ErrSyntax
return
}
value = v
multibyte = true
case '0', '1', '2', '3', '4', '5', '6', '7':
v := rune(c) - '0'
if len(s) < 2 {
err = ErrSyntax
return
}
for j := 0; j < 2; j++ { // one digit already; two more
x := rune(s[j]) - '0'
if x < 0 || x > 7 {
err = ErrSyntax
return
}
v = (v << 3) | x
}
s = s[2:]
if v > 255 {
err = ErrSyntax
return
}
value = v
case '\\':
value = '\\'
case '\'', '"':
if c != quote {
err = ErrSyntax
return
}
value = rune(c)
default:
err = ErrSyntax
return
}
tail = s
return
}
// Unquote interprets s as a single-quoted, double-quoted,
// or backquoted Go string literal, returning the string value
// that s quotes. (If s is single-quoted, it would be a Go
// character literal; Unquote returns the corresponding
// one-character string.)
func Unquote(s string) (t string, err error) {
n := len(s)
if n < 2 {
return "", ErrSyntax
}
quote := s[0]
if quote != s[n-1] {
return "", ErrSyntax
}
s = s[1 : n-1]
if quote == '`' {
if contains(s, '`') {
return "", ErrSyntax
}
return s, nil
}
if quote != '"' && quote != '\'' {
return "", ErrSyntax
}
if contains(s, '\n') {
return "", ErrSyntax
}
// Is it trivial? Avoid allocation.
if !contains(s, '\\') && !contains(s, quote) {
switch quote {
case '"':
return s, nil
case '\'':
r, size := utf8.DecodeRuneInString(s)
if size == len(s) && (r != utf8.RuneError || size != 1) {
return s, nil
}
}
}
var runeTmp [utf8.UTFMax]byte
buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
for len(s) > 0 {
c, multibyte, ss, err := UnquoteChar(s, quote)
if err != nil {
return "", err
}
s = ss
if c < utf8.RuneSelf || !multibyte {
buf = append(buf, byte(c))
} else {
n := utf8.EncodeRune(runeTmp[:], c)
buf = append(buf, runeTmp[:n]...)
}
if quote == '\'' && len(s) != 0 {
// single-quoted must be single character
return "", ErrSyntax
}
}
return string(buf), nil
}
// contains reports whether the string contains the byte c.
func contains(s string, c byte) bool {
for i := 0; i < len(s); i++ {
if s[i] == c {
return true
}
}
return false
}
// bsearch16 returns the smallest i such that a[i] >= x.
// If there is no such i, bsearch16 returns len(a).
func bsearch16(a []uint16, x uint16) int {
i, j := 0, len(a)
for i < j {
h := i + (j-i)/2
if a[h] < x {
i = h + 1
} else {
j = h
}
}
return i
}
// bsearch32 returns the smallest i such that a[i] >= x.
// If there is no such i, bsearch32 returns len(a).
func bsearch32(a []uint32, x uint32) int {
i, j := 0, len(a)
for i < j {
h := i + (j-i)/2
if a[h] < x {
i = h + 1
} else {
j = h
}
}
return i
}
// TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests
// to give the same answer. It allows this package not to depend on unicode,
// and therefore not pull in all the Unicode tables. If the linker were better
// at tossing unused tables, we could get rid of this implementation.
// That would be nice.
// IsPrint reports whether the rune is defined as printable by Go, with
// the same definition as unicode.IsPrint: letters, numbers, punctuation,
// symbols and ASCII space.
func IsPrint(r rune) bool {
// Fast check for Latin-1
if r <= 0xFF {
if 0x20 <= r && r <= 0x7E {
// All the ASCII is printable from space through DEL-1.
return true
}
if 0xA1 <= r && r <= 0xFF {
// Similarly for ¡ through ÿ...
return r != 0xAD // ...except for the bizarre soft hyphen.
}
return false
}
// Same algorithm, either on uint16 or uint32 value.
// First, find first i such that isPrint[i] >= x.
// This is the index of either the start or end of a pair that might span x.
// The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]).
// If we find x in a range, make sure x is not in isNotPrint list.
if 0 <= r && r < 1<<16 {
rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16
i := bsearch16(isPrint, rr)
if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
return false
}
j := bsearch16(isNotPrint, rr)
return j >= len(isNotPrint) || isNotPrint[j] != rr
}
rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32
i := bsearch32(isPrint, rr)
if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
return false
}
if r >= 0x20000 {
return true
}
r -= 0x10000
j := bsearch16(isNotPrint, uint16(r))
return j >= len(isNotPrint) || isNotPrint[j] != uint16(r)
}