libgo: Update Go library to master revision 15489/921e53d4863c.

From-SVN: r195560
This commit is contained in:
Ian Lance Taylor 2013-01-29 20:52:43 +00:00
parent 91bfca5909
commit d6f2922e91
290 changed files with 61242 additions and 49576 deletions

View File

@ -1,4 +1,4 @@
6fdc1974457c
921e53d4863c
The first line of this file holds the Mercurial revision number of the
last merge done from the master library sources.

View File

@ -227,8 +227,8 @@ toolexeclibgoexp_DATA = \
$(exp_inotify_gox) \
exp/norm.gox \
exp/proxy.gox \
exp/ssa.gox \
exp/terminal.gox \
exp/types.gox \
exp/utf8string.gox
toolexeclibgoexphtmldir = $(toolexeclibgoexpdir)/html
@ -256,7 +256,8 @@ toolexeclibgogo_DATA = \
go/parser.gox \
go/printer.gox \
go/scanner.gox \
go/token.gox
go/token.gox \
go/types.gox
toolexeclibgohashdir = $(toolexeclibgodir)/hash
@ -682,7 +683,7 @@ go_net_fd_os_file = go/net/fd_linux.go
go_net_newpollserver_file = go/net/newpollserver_unix.go
else # !LIBGO_IS_LINUX && !LIBGO_IS_RTEMS
if LIBGO_IS_NETBSD
go_net_fd_os_file = go/net/fd_netbsd.go
go_net_fd_os_file = go/net/fd_bsd.go
go_net_newpollserver_file = go/net/newpollserver_unix.go
else # !LIBGO_IS_NETBSD && !LIBGO_IS_LINUX && !LIBGO_IS_RTEMS
# By default use select with pipes. Most systems should have
@ -753,9 +754,16 @@ go_net_interface_file = go/net/interface_stub.go
endif
endif
if LIBGO_IS_LINUX
go_net_cloexec_file = go/net/sock_cloexec.go
else
go_net_cloexec_file = go/net/sys_cloexec.go
endif
go_net_files = \
go/net/cgo_unix.go \
$(go_net_cgo_file) \
$(go_net_cloexec_file) \
go/net/dial.go \
go/net/dnsclient.go \
go/net/dnsclient_unix.go \
@ -856,6 +864,12 @@ endif
endif
endif
if LIBGO_IS_LINUX
go_os_pipe_file = go/os/pipe_linux.go
else
go_os_pipe_file = go/os/pipe_bsd.go
endif
go_os_files = \
$(go_os_dir_file) \
go/os/dir.go \
@ -872,6 +886,7 @@ go_os_files = \
go/os/getwd.go \
go/os/path.go \
go/os/path_unix.go \
$(go_os_pipe_file) \
go/os/proc.go \
$(go_os_stat_file) \
go/os/str.go \
@ -1026,6 +1041,7 @@ go_compress_bzip2_files = \
go_compress_flate_files = \
go/compress/flate/copy.go \
go/compress/flate/deflate.go \
go/compress/flate/fixedhuff.go \
go/compress/flate/huffman_bit_writer.go \
go/compress/flate/huffman_code.go \
go/compress/flate/inflate.go \
@ -1222,8 +1238,10 @@ go_exp_inotify_files = \
go_exp_locale_collate_files = \
go/exp/locale/collate/colelem.go \
go/exp/locale/collate/collate.go \
go/exp/locale/collate/colltab.go \
go/exp/locale/collate/contract.go \
go/exp/locale/collate/export.go \
go/exp/locale/collate/sort.go \
go/exp/locale/collate/table.go \
go/exp/locale/collate/tables.go \
go/exp/locale/collate/trie.go
@ -1248,23 +1266,18 @@ go_exp_proxy_files = \
go/exp/proxy/per_host.go \
go/exp/proxy/proxy.go \
go/exp/proxy/socks5.go
go_exp_ssa_files = \
go/exp/ssa/blockopt.go \
go/exp/ssa/doc.go \
go/exp/ssa/func.go \
go/exp/ssa/sanity.go \
go/exp/ssa/ssa.go \
go/exp/ssa/literal.go \
go/exp/ssa/print.go \
go/exp/ssa/util.go
go_exp_terminal_files = \
go/exp/terminal/terminal.go \
go/exp/terminal/util.go
go_exp_types_files = \
go/exp/types/builtins.go \
go/exp/types/check.go \
go/exp/types/const.go \
go/exp/types/conversions.go \
go/exp/types/errors.go \
go/exp/types/exportdata.go \
go/exp/types/expr.go \
go/exp/types/gcimporter.go \
go/exp/types/operand.go \
go/exp/types/predicates.go \
go/exp/types/stmt.go \
go/exp/types/types.go \
go/exp/types/universe.go
go_exp_utf8string_files = \
go/exp/utf8string/string.go
@ -1305,6 +1318,24 @@ go_go_token_files = \
go/go/token/position.go \
go/go/token/serialize.go \
go/go/token/token.go
go_go_types_files = \
go/go/types/api.go \
go/go/types/builtins.go \
go/go/types/check.go \
go/go/types/const.go \
go/go/types/conversions.go \
go/go/types/errors.go \
go/go/types/exportdata.go \
go/go/types/expr.go \
go/go/types/gcimporter.go \
go/go/types/objects.go \
go/go/types/operand.go \
go/go/types/predicates.go \
go/go/types/resolve.go \
go/go/types/scope.go \
go/go/types/stmt.go \
go/go/types/types.go \
go/go/types/universe.go
go_hash_adler32_files = \
go/hash/adler32/adler32.go
@ -1848,8 +1879,8 @@ libgo_go_objs = \
exp/locale/collate/build.lo \
exp/norm.lo \
exp/proxy.lo \
exp/ssa.lo \
exp/terminal.lo \
exp/types.lo \
exp/utf8string.lo \
html/template.lo \
go/ast.lo \
@ -1860,6 +1891,7 @@ libgo_go_objs = \
go/printer.lo \
go/scanner.lo \
go/token.lo \
go/types.lo \
hash/adler32.lo \
hash/crc32.lo \
hash/crc64.lo \
@ -2751,6 +2783,15 @@ exp/proxy/check: $(CHECK_DEPS)
@$(CHECK)
.PHONY: exp/proxy/check
@go_include@ exp/ssa.lo.dep
exp/ssa.lo.dep: $(go_exp_ssa_files)
$(BUILDDEPS)
exp/ssa.lo: $(go_exp_ssa_files)
$(BUILDPACKAGE)
exp/ssa/check: $(CHECK_DEPS)
@$(CHECK)
.PHONY: exp/ssa/check
@go_include@ exp/terminal.lo.dep
exp/terminal.lo.dep: $(go_exp_terminal_files)
$(BUILDDEPS)
@ -2760,15 +2801,6 @@ exp/terminal/check: $(CHECK_DEPS)
@$(CHECK)
.PHONY: exp/terminal/check
@go_include@ exp/types.lo.dep
exp/types.lo.dep: $(go_exp_types_files)
$(BUILDDEPS)
exp/types.lo: $(go_exp_types_files)
$(BUILDPACKAGE)
exp/types/check: $(CHECK_DEPS)
@$(CHECK)
.PHONY: exp/types/check
@go_include@ exp/utf8string.lo.dep
exp/utf8string.lo.dep: $(go_exp_utf8string_files)
$(BUILDDEPS)
@ -2877,6 +2909,15 @@ go/token/check: $(CHECK_DEPS)
@$(CHECK)
.PHONY: go/token/check
@go_include@ go/types.lo.dep
go/types.lo.dep: $(go_go_types_files)
$(BUILDDEPS)
go/types.lo: $(go_go_types_files)
$(BUILDPACKAGE)
go/types/check: $(CHECK_DEPS)
@$(CHECK)
.PHONY: go/types/check
@go_include@ hash/adler32.lo.dep
hash/adler32.lo.dep: $(go_hash_adler32_files)
$(BUILDDEPS)
@ -3507,9 +3548,9 @@ exp/norm.gox: exp/norm.lo
$(BUILDGOX)
exp/proxy.gox: exp/proxy.lo
$(BUILDGOX)
exp/terminal.gox: exp/terminal.lo
exp/ssa.gox: exp/ssa.lo
$(BUILDGOX)
exp/types.gox: exp/types.lo
exp/terminal.gox: exp/terminal.lo
$(BUILDGOX)
exp/utf8string.gox: exp/utf8string.lo
$(BUILDGOX)
@ -3533,6 +3574,8 @@ go/scanner.gox: go/scanner.lo
$(BUILDGOX)
go/token.gox: go/token.lo
$(BUILDGOX)
go/types.gox: go/types.lo
$(BUILDGOX)
hash/adler32.gox: hash/adler32.lo
$(BUILDGOX)
@ -3734,7 +3777,6 @@ TEST_PACKAGES = \
exp/norm/check \
exp/proxy/check \
exp/terminal/check \
exp/types/check \
exp/utf8string/check \
html/template/check \
go/ast/check \
@ -3745,7 +3787,7 @@ TEST_PACKAGES = \
go/printer/check \
go/scanner/check \
go/token/check \
$(go_types_check_omitted_since_it_calls_6g) \
go/types/check \
hash/adler32/check \
hash/crc32/check \
hash/crc64/check \

View File

@ -156,14 +156,14 @@ am__DEPENDENCIES_2 = bufio.lo bytes.lo bytes/index.lo crypto.lo \
encoding/json.lo encoding/pem.lo encoding/xml.lo \
exp/cookiejar.lo exp/ebnf.lo exp/html.lo exp/html/atom.lo \
exp/locale/collate.lo exp/locale/collate/build.lo exp/norm.lo \
exp/proxy.lo exp/terminal.lo exp/types.lo exp/utf8string.lo \
exp/proxy.lo exp/ssa.lo exp/terminal.lo exp/utf8string.lo \
html/template.lo go/ast.lo go/build.lo go/doc.lo go/format.lo \
go/parser.lo go/printer.lo go/scanner.lo go/token.lo \
hash/adler32.lo hash/crc32.lo hash/crc64.lo hash/fnv.lo \
net/http/cgi.lo net/http/fcgi.lo net/http/httptest.lo \
net/http/httputil.lo net/http/pprof.lo image/color.lo \
image/draw.lo image/gif.lo image/jpeg.lo image/png.lo \
index/suffixarray.lo io/ioutil.lo log/syslog.lo \
go/types.lo hash/adler32.lo hash/crc32.lo hash/crc64.lo \
hash/fnv.lo net/http/cgi.lo net/http/fcgi.lo \
net/http/httptest.lo net/http/httputil.lo net/http/pprof.lo \
image/color.lo image/draw.lo image/gif.lo image/jpeg.lo \
image/png.lo index/suffixarray.lo io/ioutil.lo log/syslog.lo \
log/syslog/syslog_c.lo math/big.lo math/cmplx.lo math/rand.lo \
mime/multipart.lo net/http.lo net/mail.lo net/rpc.lo \
net/smtp.lo net/textproto.lo net/url.lo old/netchan.lo \
@ -617,8 +617,8 @@ toolexeclibgoexp_DATA = \
$(exp_inotify_gox) \
exp/norm.gox \
exp/proxy.gox \
exp/ssa.gox \
exp/terminal.gox \
exp/types.gox \
exp/utf8string.gox
toolexeclibgoexphtmldir = $(toolexeclibgoexpdir)/html
@ -642,7 +642,8 @@ toolexeclibgogo_DATA = \
go/parser.gox \
go/printer.gox \
go/scanner.gox \
go/token.gox
go/token.gox \
go/types.gox
toolexeclibgohashdir = $(toolexeclibgodir)/hash
toolexeclibgohash_DATA = \
@ -973,7 +974,7 @@ go_mime_files = \
# By default use select with pipes. Most systems should have
# something better.
@LIBGO_IS_LINUX_FALSE@@LIBGO_IS_NETBSD_FALSE@@LIBGO_IS_RTEMS_FALSE@go_net_fd_os_file = go/net/fd_select.go
@LIBGO_IS_LINUX_FALSE@@LIBGO_IS_NETBSD_TRUE@@LIBGO_IS_RTEMS_FALSE@go_net_fd_os_file = go/net/fd_netbsd.go
@LIBGO_IS_LINUX_FALSE@@LIBGO_IS_NETBSD_TRUE@@LIBGO_IS_RTEMS_FALSE@go_net_fd_os_file = go/net/fd_bsd.go
@LIBGO_IS_LINUX_TRUE@@LIBGO_IS_RTEMS_FALSE@go_net_fd_os_file = go/net/fd_linux.go
@LIBGO_IS_RTEMS_TRUE@go_net_fd_os_file = go/net/fd_select.go
@LIBGO_IS_LINUX_FALSE@@LIBGO_IS_NETBSD_FALSE@@LIBGO_IS_RTEMS_FALSE@go_net_newpollserver_file = go/net/newpollserver_unix.go
@ -1010,9 +1011,12 @@ go_mime_files = \
@LIBGO_IS_LINUX_FALSE@@LIBGO_IS_NETBSD_FALSE@go_net_interface_file = go/net/interface_stub.go
@LIBGO_IS_LINUX_FALSE@@LIBGO_IS_NETBSD_TRUE@go_net_interface_file = go/net/interface_netbsd.go
@LIBGO_IS_LINUX_TRUE@go_net_interface_file = go/net/interface_linux.go
@LIBGO_IS_LINUX_FALSE@go_net_cloexec_file = go/net/sys_cloexec.go
@LIBGO_IS_LINUX_TRUE@go_net_cloexec_file = go/net/sock_cloexec.go
go_net_files = \
go/net/cgo_unix.go \
$(go_net_cgo_file) \
$(go_net_cloexec_file) \
go/net/dial.go \
go/net/dnsclient.go \
go/net/dnsclient_unix.go \
@ -1068,6 +1072,8 @@ go_net_files = \
@LIBGO_IS_LINUX_FALSE@@LIBGO_IS_OPENBSD_TRUE@@LIBGO_IS_SOLARIS_FALSE@go_os_stat_file = go/os/stat_atim.go
@LIBGO_IS_LINUX_TRUE@@LIBGO_IS_SOLARIS_FALSE@go_os_stat_file = go/os/stat_atim.go
@LIBGO_IS_SOLARIS_TRUE@go_os_stat_file = go/os/stat_solaris.go
@LIBGO_IS_LINUX_FALSE@go_os_pipe_file = go/os/pipe_bsd.go
@LIBGO_IS_LINUX_TRUE@go_os_pipe_file = go/os/pipe_linux.go
go_os_files = \
$(go_os_dir_file) \
go/os/dir.go \
@ -1084,6 +1090,7 @@ go_os_files = \
go/os/getwd.go \
go/os/path.go \
go/os/path_unix.go \
$(go_os_pipe_file) \
go/os/proc.go \
$(go_os_stat_file) \
go/os/str.go \
@ -1208,6 +1215,7 @@ go_compress_bzip2_files = \
go_compress_flate_files = \
go/compress/flate/copy.go \
go/compress/flate/deflate.go \
go/compress/flate/fixedhuff.go \
go/compress/flate/huffman_bit_writer.go \
go/compress/flate/huffman_code.go \
go/compress/flate/inflate.go \
@ -1439,8 +1447,10 @@ go_exp_inotify_files = \
go_exp_locale_collate_files = \
go/exp/locale/collate/colelem.go \
go/exp/locale/collate/collate.go \
go/exp/locale/collate/colltab.go \
go/exp/locale/collate/contract.go \
go/exp/locale/collate/export.go \
go/exp/locale/collate/sort.go \
go/exp/locale/collate/table.go \
go/exp/locale/collate/tables.go \
go/exp/locale/collate/trie.go
@ -1469,25 +1479,20 @@ go_exp_proxy_files = \
go/exp/proxy/proxy.go \
go/exp/proxy/socks5.go
go_exp_ssa_files = \
go/exp/ssa/blockopt.go \
go/exp/ssa/doc.go \
go/exp/ssa/func.go \
go/exp/ssa/sanity.go \
go/exp/ssa/ssa.go \
go/exp/ssa/literal.go \
go/exp/ssa/print.go \
go/exp/ssa/util.go
go_exp_terminal_files = \
go/exp/terminal/terminal.go \
go/exp/terminal/util.go
go_exp_types_files = \
go/exp/types/builtins.go \
go/exp/types/check.go \
go/exp/types/const.go \
go/exp/types/conversions.go \
go/exp/types/errors.go \
go/exp/types/exportdata.go \
go/exp/types/expr.go \
go/exp/types/gcimporter.go \
go/exp/types/operand.go \
go/exp/types/predicates.go \
go/exp/types/stmt.go \
go/exp/types/types.go \
go/exp/types/universe.go
go_exp_utf8string_files = \
go/exp/utf8string/string.go
@ -1536,6 +1541,25 @@ go_go_token_files = \
go/go/token/serialize.go \
go/go/token/token.go
go_go_types_files = \
go/go/types/api.go \
go/go/types/builtins.go \
go/go/types/check.go \
go/go/types/const.go \
go/go/types/conversions.go \
go/go/types/errors.go \
go/go/types/exportdata.go \
go/go/types/expr.go \
go/go/types/gcimporter.go \
go/go/types/objects.go \
go/go/types/operand.go \
go/go/types/predicates.go \
go/go/types/resolve.go \
go/go/types/scope.go \
go/go/types/stmt.go \
go/go/types/types.go \
go/go/types/universe.go
go_hash_adler32_files = \
go/hash/adler32/adler32.go
@ -1976,8 +2000,8 @@ libgo_go_objs = \
exp/locale/collate/build.lo \
exp/norm.lo \
exp/proxy.lo \
exp/ssa.lo \
exp/terminal.lo \
exp/types.lo \
exp/utf8string.lo \
html/template.lo \
go/ast.lo \
@ -1988,6 +2012,7 @@ libgo_go_objs = \
go/printer.lo \
go/scanner.lo \
go/token.lo \
go/types.lo \
hash/adler32.lo \
hash/crc32.lo \
hash/crc64.lo \
@ -2233,7 +2258,6 @@ TEST_PACKAGES = \
exp/norm/check \
exp/proxy/check \
exp/terminal/check \
exp/types/check \
exp/utf8string/check \
html/template/check \
go/ast/check \
@ -2244,7 +2268,7 @@ TEST_PACKAGES = \
go/printer/check \
go/scanner/check \
go/token/check \
$(go_types_check_omitted_since_it_calls_6g) \
go/types/check \
hash/adler32/check \
hash/crc32/check \
hash/crc64/check \
@ -5201,6 +5225,15 @@ exp/proxy/check: $(CHECK_DEPS)
@$(CHECK)
.PHONY: exp/proxy/check
@go_include@ exp/ssa.lo.dep
exp/ssa.lo.dep: $(go_exp_ssa_files)
$(BUILDDEPS)
exp/ssa.lo: $(go_exp_ssa_files)
$(BUILDPACKAGE)
exp/ssa/check: $(CHECK_DEPS)
@$(CHECK)
.PHONY: exp/ssa/check
@go_include@ exp/terminal.lo.dep
exp/terminal.lo.dep: $(go_exp_terminal_files)
$(BUILDDEPS)
@ -5210,15 +5243,6 @@ exp/terminal/check: $(CHECK_DEPS)
@$(CHECK)
.PHONY: exp/terminal/check
@go_include@ exp/types.lo.dep
exp/types.lo.dep: $(go_exp_types_files)
$(BUILDDEPS)
exp/types.lo: $(go_exp_types_files)
$(BUILDPACKAGE)
exp/types/check: $(CHECK_DEPS)
@$(CHECK)
.PHONY: exp/types/check
@go_include@ exp/utf8string.lo.dep
exp/utf8string.lo.dep: $(go_exp_utf8string_files)
$(BUILDDEPS)
@ -5327,6 +5351,15 @@ go/token/check: $(CHECK_DEPS)
@$(CHECK)
.PHONY: go/token/check
@go_include@ go/types.lo.dep
go/types.lo.dep: $(go_go_types_files)
$(BUILDDEPS)
go/types.lo: $(go_go_types_files)
$(BUILDPACKAGE)
go/types/check: $(CHECK_DEPS)
@$(CHECK)
.PHONY: go/types/check
@go_include@ hash/adler32.lo.dep
hash/adler32.lo.dep: $(go_hash_adler32_files)
$(BUILDDEPS)
@ -5949,9 +5982,9 @@ exp/norm.gox: exp/norm.lo
$(BUILDGOX)
exp/proxy.gox: exp/proxy.lo
$(BUILDGOX)
exp/terminal.gox: exp/terminal.lo
exp/ssa.gox: exp/ssa.lo
$(BUILDGOX)
exp/types.gox: exp/types.lo
exp/terminal.gox: exp/terminal.lo
$(BUILDGOX)
exp/utf8string.gox: exp/utf8string.lo
$(BUILDGOX)
@ -5975,6 +6008,8 @@ go/scanner.gox: go/scanner.lo
$(BUILDGOX)
go/token.gox: go/token.lo
$(BUILDGOX)
go/types.gox: go/types.lo
$(BUILDGOX)
hash/adler32.gox: hash/adler32.lo
$(BUILDGOX)

View File

@ -3,6 +3,9 @@
/* Define if building universal (internal helper macro) */
#undef AC_APPLE_UNIVERSAL_BUILD
/* Define to 1 if you have the `accept4' function. */
#undef HAVE_ACCEPT4
/* Define to 1 if you have the `acosl' function. */
#undef HAVE_ACOSL
@ -153,6 +156,9 @@
/* Define to 1 if you have the `openat' function. */
#undef HAVE_OPENAT
/* Define to 1 if you have the `pipe2' function. */
#undef HAVE_PIPE2
/* Define to 1 if you have the `renameat' function. */
#undef HAVE_RENAMEAT
@ -283,6 +289,9 @@
/* Define to 1 if you have the <ustat.h> header file and it works. */
#undef HAVE_USTAT_H
/* Define to 1 if you have the `utimensat' function. */
#undef HAVE_UTIMENSAT
/* Define to 1 if you have the <utime.h> header file. */
#undef HAVE_UTIME_H

2
libgo/configure vendored
View File

@ -14635,7 +14635,7 @@ else
fi
for ac_func in epoll_create1 faccessat fallocate fchmodat fchownat futimesat inotify_add_watch inotify_init inotify_init1 inotify_rm_watch mkdirat mknodat openat renameat sync_file_range splice tee unlinkat unshare
for ac_func in accept4 epoll_create1 faccessat fallocate fchmodat fchownat futimesat inotify_add_watch inotify_init inotify_init1 inotify_rm_watch mkdirat mknodat openat pipe2 renameat sync_file_range splice tee unlinkat unshare utimensat
do :
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"

View File

@ -491,7 +491,7 @@ AC_CHECK_FUNCS(strerror_r strsignal wait4 mincore setenv dl_iterate_phdr)
AM_CONDITIONAL(HAVE_STRERROR_R, test "$ac_cv_func_strerror_r" = yes)
AM_CONDITIONAL(HAVE_WAIT4, test "$ac_cv_func_wait4" = yes)
AC_CHECK_FUNCS(epoll_create1 faccessat fallocate fchmodat fchownat futimesat inotify_add_watch inotify_init inotify_init1 inotify_rm_watch mkdirat mknodat openat renameat sync_file_range splice tee unlinkat unshare)
AC_CHECK_FUNCS(accept4 epoll_create1 faccessat fallocate fchmodat fchownat futimesat inotify_add_watch inotify_init inotify_init1 inotify_rm_watch mkdirat mknodat openat pipe2 renameat sync_file_range splice tee unlinkat unshare utimensat)
AC_TYPE_OFF_T
AC_CHECK_TYPES([loff_t])

View File

@ -18,8 +18,7 @@ import (
func TestOver65kFiles(t *testing.T) {
if testing.Short() {
t.Logf("slow test; skipping")
return
t.Skip("slow test; skipping")
}
buf := new(bytes.Buffer)
w := NewWriter(buf)
@ -108,8 +107,7 @@ func TestFileHeaderRoundTrip64(t *testing.T) {
func TestZip64(t *testing.T) {
if testing.Short() {
t.Logf("slow test; skipping")
return
t.Skip("slow test; skipping")
}
// write 2^32 bytes plus "END\n" to a zip file
buf := new(bytes.Buffer)

View File

@ -76,13 +76,13 @@ func (b *Reader) fill() {
}
// Read new data.
n, e := b.rd.Read(b.buf[b.w:])
n, err := b.rd.Read(b.buf[b.w:])
if n < 0 {
panic(errNegativeRead)
}
b.w += n
if e != nil {
b.err = e
if err != nil {
b.err = err
}
}
@ -379,8 +379,8 @@ func (b *Reader) ReadBytes(delim byte) (line []byte, err error) {
// ReadString returns err != nil if and only if the returned data does not end in
// delim.
func (b *Reader) ReadString(delim byte) (line string, err error) {
bytes, e := b.ReadBytes(delim)
return string(bytes), e
bytes, err := b.ReadBytes(delim)
return string(bytes), err
}
// WriteTo implements io.WriterTo.
@ -461,17 +461,17 @@ func (b *Writer) Flush() error {
if b.n == 0 {
return nil
}
n, e := b.wr.Write(b.buf[0:b.n])
if n < b.n && e == nil {
e = io.ErrShortWrite
n, err := b.wr.Write(b.buf[0:b.n])
if n < b.n && err == nil {
err = io.ErrShortWrite
}
if e != nil {
if err != nil {
if n > 0 && n < b.n {
copy(b.buf[0:b.n-n], b.buf[n:b.n])
}
b.n -= n
b.err = e
return e
b.err = err
return err
}
b.n = 0
return nil

View File

@ -28,9 +28,9 @@ func newRot13Reader(r io.Reader) *rot13Reader {
}
func (r13 *rot13Reader) Read(p []byte) (int, error) {
n, e := r13.r.Read(p)
if e != nil {
return n, e
n, err := r13.r.Read(p)
if err != nil {
return n, err
}
for i := 0; i < n; i++ {
c := p[i] | 0x20 // lowercase byte
@ -48,15 +48,15 @@ func readBytes(buf *Reader) string {
var b [1000]byte
nb := 0
for {
c, e := buf.ReadByte()
if e == io.EOF {
c, err := buf.ReadByte()
if err == io.EOF {
break
}
if e == nil {
if err == nil {
b[nb] = c
nb++
} else if e != iotest.ErrTimeout {
panic("Data: " + e.Error())
} else if err != iotest.ErrTimeout {
panic("Data: " + err.Error())
}
}
return string(b[0:nb])
@ -93,12 +93,12 @@ var readMakers = []readMaker{
func readLines(b *Reader) string {
s := ""
for {
s1, e := b.ReadString('\n')
if e == io.EOF {
s1, err := b.ReadString('\n')
if err == io.EOF {
break
}
if e != nil && e != iotest.ErrTimeout {
panic("GetLines: " + e.Error())
if err != nil && err != iotest.ErrTimeout {
panic("GetLines: " + err.Error())
}
s += s1
}
@ -110,9 +110,9 @@ func reads(buf *Reader, m int) string {
var b [1000]byte
nb := 0
for {
n, e := buf.Read(b[nb : nb+m])
n, err := buf.Read(b[nb : nb+m])
nb += n
if e == io.EOF {
if err == io.EOF {
break
}
}
@ -748,7 +748,7 @@ func testReadLineNewlines(t *testing.T, input string, expect []readLineResult) {
b := NewReaderSize(strings.NewReader(input), minReadBufferSize)
for i, e := range expect {
line, isPrefix, err := b.ReadLine()
if bytes.Compare(line, e.line) != 0 {
if !bytes.Equal(line, e.line) {
t.Errorf("%q call %d, line == %q, want %q", input, i, line, e.line)
return
}

View File

@ -367,7 +367,7 @@ func (b *Buffer) ReadBytes(delim byte) (line []byte, err error) {
return
}
// readSlice is like readBytes but returns a reference to internal buffer data.
// readSlice is like ReadBytes but returns a reference to internal buffer data.
func (b *Buffer) readSlice(delim byte) (line []byte, err error) {
i := IndexByte(b.buf[b.off:], delim)
end := b.off + i + 1
@ -377,6 +377,7 @@ func (b *Buffer) readSlice(delim byte) (line []byte, err error) {
}
line = b.buf[b.off:end]
b.off = end
b.lastRead = opRead
return line, err
}

View File

@ -260,7 +260,7 @@ func TestWriteTo(t *testing.T) {
func TestRuneIO(t *testing.T) {
const NRune = 1000
// Built a test array while we write the data
// Built a test slice while we write the data
b := make([]byte, utf8.UTFMax*NRune)
var buf Buffer
n := 0
@ -453,3 +453,25 @@ func TestReadEmptyAtEOF(t *testing.T) {
t.Errorf("wrong count; got %d want 0", n)
}
}
func TestUnreadByte(t *testing.T) {
b := new(Buffer)
b.WriteString("abcdefghijklmnopqrstuvwxyz")
_, err := b.ReadBytes('m')
if err != nil {
t.Fatalf("ReadBytes: %v", err)
}
err = b.UnreadByte()
if err != nil {
t.Fatalf("UnreadByte: %v", err)
}
c, err := b.ReadByte()
if err != nil {
t.Fatalf("ReadByte: %v", err)
}
if c != 'm' {
t.Errorf("ReadByte = %q; want %q", c, 'm')
}
}

View File

@ -11,8 +11,8 @@ import (
"unicode/utf8"
)
// Compare returns an integer comparing the two byte arrays lexicographically.
// The result will be 0 if a==b, -1 if a < b, and +1 if a > b
// Compare returns an integer comparing two byte slices lexicographically.
// The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
// A nil argument is equivalent to an empty slice.
func Compare(a, b []byte) int {
m := len(a)
@ -53,8 +53,8 @@ func equalPortable(a, b []byte) bool {
return true
}
// explode splits s into an array of UTF-8 sequences, one per Unicode character (still arrays of bytes),
// up to a maximum of n byte arrays. Invalid UTF-8 sequences are chopped into individual bytes.
// explode splits s into a slice of UTF-8 sequences, one per Unicode character (still slices of bytes),
// up to a maximum of n byte slices. Invalid UTF-8 sequences are chopped into individual bytes.
func explode(s []byte, n int) [][]byte {
if n <= 0 {
n = len(s)
@ -226,7 +226,7 @@ func LastIndexAny(s []byte, chars string) int {
}
// Generic split: splits after each instance of sep,
// including sepSave bytes of sep in the subarrays.
// including sepSave bytes of sep in the subslices.
func genSplit(s, sep []byte, sepSave, n int) [][]byte {
if n == 0 {
return nil
@ -287,15 +287,15 @@ func SplitAfter(s, sep []byte) [][]byte {
return genSplit(s, sep, len(sep), -1)
}
// Fields splits the array s around each instance of one or more consecutive white space
// characters, returning a slice of subarrays of s or an empty list if s contains only white space.
// Fields splits the slice s around each instance of one or more consecutive white space
// characters, returning a slice of subslices of s or an empty list if s contains only white space.
func Fields(s []byte) [][]byte {
return FieldsFunc(s, unicode.IsSpace)
}
// FieldsFunc interprets s as a sequence of UTF-8-encoded Unicode code points.
// It splits the array s at each run of code points c satisfying f(c) and
// returns a slice of subarrays of s. If no code points in s satisfy f(c), an
// It splits the slice s at each run of code points c satisfying f(c) and
// returns a slice of subslices of s. If no code points in s satisfy f(c), an
// empty slice is returned.
func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
n := 0
@ -333,46 +333,46 @@ func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
return a[0:na]
}
// Join concatenates the elements of a to create a new byte array. The separator
// sep is placed between elements in the resulting array.
func Join(a [][]byte, sep []byte) []byte {
if len(a) == 0 {
// Join concatenates the elements of s to create a new byte slice. The separator
// sep is placed between elements in the resulting slice.
func Join(s [][]byte, sep []byte) []byte {
if len(s) == 0 {
return []byte{}
}
if len(a) == 1 {
if len(s) == 1 {
// Just return a copy.
return append([]byte(nil), a[0]...)
return append([]byte(nil), s[0]...)
}
n := len(sep) * (len(a) - 1)
for i := 0; i < len(a); i++ {
n += len(a[i])
n := len(sep) * (len(s) - 1)
for _, v := range s {
n += len(v)
}
b := make([]byte, n)
bp := copy(b, a[0])
for _, s := range a[1:] {
bp := copy(b, s[0])
for _, v := range s[1:] {
bp += copy(b[bp:], sep)
bp += copy(b[bp:], s)
bp += copy(b[bp:], v)
}
return b
}
// HasPrefix tests whether the byte array s begins with prefix.
// HasPrefix tests whether the byte slice s begins with prefix.
func HasPrefix(s, prefix []byte) bool {
return len(s) >= len(prefix) && Equal(s[0:len(prefix)], prefix)
}
// HasSuffix tests whether the byte array s ends with suffix.
// HasSuffix tests whether the byte slice s ends with suffix.
func HasSuffix(s, suffix []byte) bool {
return len(s) >= len(suffix) && Equal(s[len(s)-len(suffix):], suffix)
}
// Map returns a copy of the byte array s with all its characters modified
// Map returns a copy of the byte slice s with all its characters modified
// according to the mapping function. If mapping returns a negative value, the character is
// dropped from the string with no replacement. The characters in s and the
// output are interpreted as UTF-8-encoded Unicode code points.
func Map(mapping func(r rune) rune, s []byte) []byte {
// In the worst case, the array can grow when mapped, making
// In the worst case, the slice can grow when mapped, making
// things unpleasant. But it's so rare we barge in assuming it's
// fine. It could also shrink but that falls out naturally.
maxbytes := len(s) // length of b
@ -413,28 +413,28 @@ func Repeat(b []byte, count int) []byte {
return nb
}
// ToUpper returns a copy of the byte array s with all Unicode letters mapped to their upper case.
// ToUpper returns a copy of the byte slice s with all Unicode letters mapped to their upper case.
func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) }
// ToLower returns a copy of the byte array s with all Unicode letters mapped to their lower case.
// ToLower returns a copy of the byte slice s with all Unicode letters mapped to their lower case.
func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) }
// ToTitle returns a copy of the byte array s with all Unicode letters mapped to their title case.
// ToTitle returns a copy of the byte slice s with all Unicode letters mapped to their title case.
func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) }
// ToUpperSpecial returns a copy of the byte array s with all Unicode letters mapped to their
// ToUpperSpecial returns a copy of the byte slice s with all Unicode letters mapped to their
// upper case, giving priority to the special casing rules.
func ToUpperSpecial(_case unicode.SpecialCase, s []byte) []byte {
return Map(func(r rune) rune { return _case.ToUpper(r) }, s)
}
// ToLowerSpecial returns a copy of the byte array s with all Unicode letters mapped to their
// ToLowerSpecial returns a copy of the byte slice s with all Unicode letters mapped to their
// lower case, giving priority to the special casing rules.
func ToLowerSpecial(_case unicode.SpecialCase, s []byte) []byte {
return Map(func(r rune) rune { return _case.ToLower(r) }, s)
}
// ToTitleSpecial returns a copy of the byte array s with all Unicode letters mapped to their
// ToTitleSpecial returns a copy of the byte slice s with all Unicode letters mapped to their
// title case, giving priority to the special casing rules.
func ToTitleSpecial(_case unicode.SpecialCase, s []byte) []byte {
return Map(func(r rune) rune { return _case.ToTitle(r) }, s)

View File

@ -25,16 +25,16 @@ func eq(a, b []string) bool {
return true
}
func arrayOfString(a [][]byte) []string {
result := make([]string, len(a))
for j := 0; j < len(a); j++ {
result[j] = string(a[j])
func sliceOfString(s [][]byte) []string {
result := make([]string, len(s))
for i, v := range s {
result[i] = string(v)
}
return result
}
// For ease of reading, the test cases use strings that are converted to byte
// arrays before invoking the functions.
// slices before invoking the functions.
var abcd = "abcd"
var faces = "☺☻☹"
@ -435,7 +435,7 @@ var explodetests = []ExplodeTest{
func TestExplode(t *testing.T) {
for _, tt := range explodetests {
a := SplitN([]byte(tt.s), nil, tt.n)
result := arrayOfString(a)
result := sliceOfString(a)
if !eq(result, tt.a) {
t.Errorf(`Explode("%s", %d) = %v; want %v`, tt.s, tt.n, result, tt.a)
continue
@ -473,7 +473,7 @@ var splittests = []SplitTest{
func TestSplit(t *testing.T) {
for _, tt := range splittests {
a := SplitN([]byte(tt.s), []byte(tt.sep), tt.n)
result := arrayOfString(a)
result := sliceOfString(a)
if !eq(result, tt.a) {
t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, result, tt.a)
continue
@ -519,7 +519,7 @@ var splitaftertests = []SplitTest{
func TestSplitAfter(t *testing.T) {
for _, tt := range splitaftertests {
a := SplitAfterN([]byte(tt.s), []byte(tt.sep), tt.n)
result := arrayOfString(a)
result := sliceOfString(a)
if !eq(result, tt.a) {
t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, result, tt.a)
continue
@ -559,7 +559,7 @@ var fieldstests = []FieldsTest{
func TestFields(t *testing.T) {
for _, tt := range fieldstests {
a := Fields([]byte(tt.s))
result := arrayOfString(a)
result := sliceOfString(a)
if !eq(result, tt.a) {
t.Errorf("Fields(%q) = %v; want %v", tt.s, a, tt.a)
continue
@ -570,7 +570,7 @@ func TestFields(t *testing.T) {
func TestFieldsFunc(t *testing.T) {
for _, tt := range fieldstests {
a := FieldsFunc([]byte(tt.s), unicode.IsSpace)
result := arrayOfString(a)
result := sliceOfString(a)
if !eq(result, tt.a) {
t.Errorf("FieldsFunc(%q, unicode.IsSpace) = %v; want %v", tt.s, a, tt.a)
continue
@ -585,15 +585,15 @@ func TestFieldsFunc(t *testing.T) {
}
for _, tt := range fieldsFuncTests {
a := FieldsFunc([]byte(tt.s), pred)
result := arrayOfString(a)
result := sliceOfString(a)
if !eq(result, tt.a) {
t.Errorf("FieldsFunc(%q) = %v, want %v", tt.s, a, tt.a)
}
}
}
// Test case for any function which accepts and returns a byte array.
// For ease of creation, we write the byte arrays as strings.
// Test case for any function which accepts and returns a byte slice.
// For ease of creation, we write the byte slices as strings.
type StringTest struct {
in, out string
}

View File

@ -10,6 +10,7 @@ import (
"fmt"
"io"
"os"
"sort"
)
func ExampleBuffer() {
@ -27,3 +28,41 @@ func ExampleBuffer_reader() {
io.Copy(os.Stdout, dec)
// Output: Gophers rule!
}
func ExampleCompare() {
// Interpret Compare's result by comparing it to zero.
var a, b []byte
if bytes.Compare(a, b) < 0 {
// a less b
}
if bytes.Compare(a, b) <= 0 {
// a less or equal b
}
if bytes.Compare(a, b) > 0 {
// a greater b
}
if bytes.Compare(a, b) >= 0 {
// a greater or equal b
}
// Prefer Equal to Compare for equality comparisons.
if bytes.Equal(a, b) {
// a equal b
}
if !bytes.Equal(a, b) {
// a not equal b
}
}
func ExampleCompare_search() {
// Binary search to find a matching byte slice.
var needle []byte
var haystack [][]byte // Assume sorted
i := sort.Search(len(haystack), func(i int) bool {
// Return haystack[i] >= needle.
return bytes.Compare(haystack[i], needle) >= 0
})
if i < len(haystack) && bytes.Equal(haystack[i], needle) {
// Found it!
}
}

View File

@ -124,8 +124,7 @@ func (r *sparseReader) Read(b []byte) (n int, err error) {
func TestVeryLongSparseChunk(t *testing.T) {
if testing.Short() {
t.Logf("skipping sparse chunk during short test")
return
t.Skip("skipping sparse chunk during short test")
}
w, err := NewWriter(ioutil.Discard, 1)
if err != nil {

View File

@ -0,0 +1,74 @@
package flate
// autogenerated by gen.go, DO NOT EDIT
var fixedHuffmanDecoder = huffmanDecoder{
7,
[huffmanNumChunks]uint32{
0x1007, 0x0508, 0x0108, 0x1188, 0x1107, 0x0708, 0x0308, 0x0c09,
0x1087, 0x0608, 0x0208, 0x0a09, 0x0008, 0x0808, 0x0408, 0x0e09,
0x1047, 0x0588, 0x0188, 0x0909, 0x1147, 0x0788, 0x0388, 0x0d09,
0x10c7, 0x0688, 0x0288, 0x0b09, 0x0088, 0x0888, 0x0488, 0x0f09,
0x1027, 0x0548, 0x0148, 0x11c8, 0x1127, 0x0748, 0x0348, 0x0c89,
0x10a7, 0x0648, 0x0248, 0x0a89, 0x0048, 0x0848, 0x0448, 0x0e89,
0x1067, 0x05c8, 0x01c8, 0x0989, 0x1167, 0x07c8, 0x03c8, 0x0d89,
0x10e7, 0x06c8, 0x02c8, 0x0b89, 0x00c8, 0x08c8, 0x04c8, 0x0f89,
0x1017, 0x0528, 0x0128, 0x11a8, 0x1117, 0x0728, 0x0328, 0x0c49,
0x1097, 0x0628, 0x0228, 0x0a49, 0x0028, 0x0828, 0x0428, 0x0e49,
0x1057, 0x05a8, 0x01a8, 0x0949, 0x1157, 0x07a8, 0x03a8, 0x0d49,
0x10d7, 0x06a8, 0x02a8, 0x0b49, 0x00a8, 0x08a8, 0x04a8, 0x0f49,
0x1037, 0x0568, 0x0168, 0x11e8, 0x1137, 0x0768, 0x0368, 0x0cc9,
0x10b7, 0x0668, 0x0268, 0x0ac9, 0x0068, 0x0868, 0x0468, 0x0ec9,
0x1077, 0x05e8, 0x01e8, 0x09c9, 0x1177, 0x07e8, 0x03e8, 0x0dc9,
0x10f7, 0x06e8, 0x02e8, 0x0bc9, 0x00e8, 0x08e8, 0x04e8, 0x0fc9,
0x1007, 0x0518, 0x0118, 0x1198, 0x1107, 0x0718, 0x0318, 0x0c29,
0x1087, 0x0618, 0x0218, 0x0a29, 0x0018, 0x0818, 0x0418, 0x0e29,
0x1047, 0x0598, 0x0198, 0x0929, 0x1147, 0x0798, 0x0398, 0x0d29,
0x10c7, 0x0698, 0x0298, 0x0b29, 0x0098, 0x0898, 0x0498, 0x0f29,
0x1027, 0x0558, 0x0158, 0x11d8, 0x1127, 0x0758, 0x0358, 0x0ca9,
0x10a7, 0x0658, 0x0258, 0x0aa9, 0x0058, 0x0858, 0x0458, 0x0ea9,
0x1067, 0x05d8, 0x01d8, 0x09a9, 0x1167, 0x07d8, 0x03d8, 0x0da9,
0x10e7, 0x06d8, 0x02d8, 0x0ba9, 0x00d8, 0x08d8, 0x04d8, 0x0fa9,
0x1017, 0x0538, 0x0138, 0x11b8, 0x1117, 0x0738, 0x0338, 0x0c69,
0x1097, 0x0638, 0x0238, 0x0a69, 0x0038, 0x0838, 0x0438, 0x0e69,
0x1057, 0x05b8, 0x01b8, 0x0969, 0x1157, 0x07b8, 0x03b8, 0x0d69,
0x10d7, 0x06b8, 0x02b8, 0x0b69, 0x00b8, 0x08b8, 0x04b8, 0x0f69,
0x1037, 0x0578, 0x0178, 0x11f8, 0x1137, 0x0778, 0x0378, 0x0ce9,
0x10b7, 0x0678, 0x0278, 0x0ae9, 0x0078, 0x0878, 0x0478, 0x0ee9,
0x1077, 0x05f8, 0x01f8, 0x09e9, 0x1177, 0x07f8, 0x03f8, 0x0de9,
0x10f7, 0x06f8, 0x02f8, 0x0be9, 0x00f8, 0x08f8, 0x04f8, 0x0fe9,
0x1007, 0x0508, 0x0108, 0x1188, 0x1107, 0x0708, 0x0308, 0x0c19,
0x1087, 0x0608, 0x0208, 0x0a19, 0x0008, 0x0808, 0x0408, 0x0e19,
0x1047, 0x0588, 0x0188, 0x0919, 0x1147, 0x0788, 0x0388, 0x0d19,
0x10c7, 0x0688, 0x0288, 0x0b19, 0x0088, 0x0888, 0x0488, 0x0f19,
0x1027, 0x0548, 0x0148, 0x11c8, 0x1127, 0x0748, 0x0348, 0x0c99,
0x10a7, 0x0648, 0x0248, 0x0a99, 0x0048, 0x0848, 0x0448, 0x0e99,
0x1067, 0x05c8, 0x01c8, 0x0999, 0x1167, 0x07c8, 0x03c8, 0x0d99,
0x10e7, 0x06c8, 0x02c8, 0x0b99, 0x00c8, 0x08c8, 0x04c8, 0x0f99,
0x1017, 0x0528, 0x0128, 0x11a8, 0x1117, 0x0728, 0x0328, 0x0c59,
0x1097, 0x0628, 0x0228, 0x0a59, 0x0028, 0x0828, 0x0428, 0x0e59,
0x1057, 0x05a8, 0x01a8, 0x0959, 0x1157, 0x07a8, 0x03a8, 0x0d59,
0x10d7, 0x06a8, 0x02a8, 0x0b59, 0x00a8, 0x08a8, 0x04a8, 0x0f59,
0x1037, 0x0568, 0x0168, 0x11e8, 0x1137, 0x0768, 0x0368, 0x0cd9,
0x10b7, 0x0668, 0x0268, 0x0ad9, 0x0068, 0x0868, 0x0468, 0x0ed9,
0x1077, 0x05e8, 0x01e8, 0x09d9, 0x1177, 0x07e8, 0x03e8, 0x0dd9,
0x10f7, 0x06e8, 0x02e8, 0x0bd9, 0x00e8, 0x08e8, 0x04e8, 0x0fd9,
0x1007, 0x0518, 0x0118, 0x1198, 0x1107, 0x0718, 0x0318, 0x0c39,
0x1087, 0x0618, 0x0218, 0x0a39, 0x0018, 0x0818, 0x0418, 0x0e39,
0x1047, 0x0598, 0x0198, 0x0939, 0x1147, 0x0798, 0x0398, 0x0d39,
0x10c7, 0x0698, 0x0298, 0x0b39, 0x0098, 0x0898, 0x0498, 0x0f39,
0x1027, 0x0558, 0x0158, 0x11d8, 0x1127, 0x0758, 0x0358, 0x0cb9,
0x10a7, 0x0658, 0x0258, 0x0ab9, 0x0058, 0x0858, 0x0458, 0x0eb9,
0x1067, 0x05d8, 0x01d8, 0x09b9, 0x1167, 0x07d8, 0x03d8, 0x0db9,
0x10e7, 0x06d8, 0x02d8, 0x0bb9, 0x00d8, 0x08d8, 0x04d8, 0x0fb9,
0x1017, 0x0538, 0x0138, 0x11b8, 0x1117, 0x0738, 0x0338, 0x0c79,
0x1097, 0x0638, 0x0238, 0x0a79, 0x0038, 0x0838, 0x0438, 0x0e79,
0x1057, 0x05b8, 0x01b8, 0x0979, 0x1157, 0x07b8, 0x03b8, 0x0d79,
0x10d7, 0x06b8, 0x02b8, 0x0b79, 0x00b8, 0x08b8, 0x04b8, 0x0f79,
0x1037, 0x0578, 0x0178, 0x11f8, 0x1137, 0x0778, 0x0378, 0x0cf9,
0x10b7, 0x0678, 0x0278, 0x0af9, 0x0078, 0x0878, 0x0478, 0x0ef9,
0x1077, 0x05f8, 0x01f8, 0x09f9, 0x1177, 0x07f8, 0x03f8, 0x0df9,
0x10f7, 0x06f8, 0x02f8, 0x0bf9, 0x00f8, 0x08f8, 0x04f8, 0x0ff9,
},
nil, 0,
}

View File

@ -10,122 +10,9 @@ package flate
import (
"bytes"
"reflect"
"testing"
)
// The Huffman code lengths used by the fixed-format Huffman blocks.
var fixedHuffmanBits = [...]int{
// 0-143 length 8
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
// 144-255 length 9
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
// 256-279 length 7
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7,
// 280-287 length 8
8, 8, 8, 8, 8, 8, 8, 8,
}
type InitDecoderTest struct {
in []int
out huffmanDecoder
ok bool
}
var initDecoderTests = []*InitDecoderTest{
// Example from Connell 1973,
{
[]int{3, 5, 2, 4, 3, 5, 5, 4, 4, 3, 4, 5},
huffmanDecoder{
2, 5,
[maxCodeLen + 1]int{2: 0, 4, 13, 31},
[maxCodeLen + 1]int{2: 0, 1, 6, 20},
// Paper used different code assignment:
// 2, 9, 4, 0, 10, 8, 3, 7, 1, 5, 11, 6
// Reordered here so that codes of same length
// are assigned to increasing numbers.
[]int{2, 0, 4, 9, 3, 7, 8, 10, 1, 5, 6, 11},
},
true,
},
// Example from RFC 1951 section 3.2.2
{
[]int{2, 1, 3, 3},
huffmanDecoder{
1, 3,
[maxCodeLen + 1]int{1: 0, 2, 7},
[maxCodeLen + 1]int{1: 0, 1, 4},
[]int{1, 0, 2, 3},
},
true,
},
// Second example from RFC 1951 section 3.2.2
{
[]int{3, 3, 3, 3, 3, 2, 4, 4},
huffmanDecoder{
2, 4,
[maxCodeLen + 1]int{2: 0, 6, 15},
[maxCodeLen + 1]int{2: 0, 1, 8},
[]int{5, 0, 1, 2, 3, 4, 6, 7},
},
true,
},
// Static Huffman codes (RFC 1951 section 3.2.6)
{
fixedHuffmanBits[0:],
fixedHuffmanDecoder,
true,
},
// Illegal input.
{
[]int{},
huffmanDecoder{},
false,
},
// Illegal input.
{
[]int{0, 0, 0, 0, 0, 0, 0},
huffmanDecoder{},
false,
},
}
func TestInitDecoder(t *testing.T) {
for i, tt := range initDecoderTests {
var h huffmanDecoder
if h.init(tt.in) != tt.ok {
t.Errorf("test %d: init = %v", i, !tt.ok)
continue
}
if !reflect.DeepEqual(&h, &tt.out) {
t.Errorf("test %d:\nhave %v\nwant %v", i, h, tt.out)
}
}
}
func TestUncompressedSource(t *testing.T) {
decoder := NewReader(bytes.NewBuffer([]byte{0x01, 0x01, 0x00, 0xfe, 0xff, 0x11}))
output := make([]byte, 1)

View File

@ -0,0 +1,165 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
// This program generates fixedhuff.go
// Invoke as
//
// go run gen.go |gofmt >fixedhuff.go
package main
import (
"fmt"
)
const maxCodeLen = 16
// Note: the definition of the huffmanDecoder struct is copied from
// inflate.go, as it is private to the implementation.
// chunk & 15 is number of bits
// chunk >> 4 is value, including table link
const (
huffmanChunkBits = 9
huffmanNumChunks = 1 << huffmanChunkBits
huffmanCountMask = 15
huffmanValueShift = 4
)
type huffmanDecoder struct {
min int // the minimum code length
chunks [huffmanNumChunks]uint32 // chunks as described above
links [][]uint32 // overflow links
linkMask uint32 // mask the width of the link table
}
// Initialize Huffman decoding tables from array of code lengths.
func (h *huffmanDecoder) init(bits []int) bool {
// Count number of codes of each length,
// compute min and max length.
var count [maxCodeLen]int
var min, max int
for _, n := range bits {
if n == 0 {
continue
}
if min == 0 || n < min {
min = n
}
if n > max {
max = n
}
count[n]++
}
if max == 0 {
return false
}
h.min = min
var linkBits uint
var numLinks int
if max > huffmanChunkBits {
linkBits = uint(max) - huffmanChunkBits
numLinks = 1 << linkBits
h.linkMask = uint32(numLinks - 1)
}
code := 0
var nextcode [maxCodeLen]int
for i := min; i <= max; i++ {
if i == huffmanChunkBits+1 {
// create link tables
link := code >> 1
h.links = make([][]uint32, huffmanNumChunks-link)
for j := uint(link); j < huffmanNumChunks; j++ {
reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8
reverse >>= uint(16 - huffmanChunkBits)
off := j - uint(link)
h.chunks[reverse] = uint32(off<<huffmanValueShift + uint(i))
h.links[off] = make([]uint32, 1<<linkBits)
}
}
n := count[i]
nextcode[i] = code
code += n
code <<= 1
}
for i, n := range bits {
if n == 0 {
continue
}
code := nextcode[n]
nextcode[n]++
chunk := uint32(i<<huffmanValueShift | n)
reverse := int(reverseByte[code>>8]) | int(reverseByte[code&0xff])<<8
reverse >>= uint(16 - n)
if n <= huffmanChunkBits {
for off := reverse; off < huffmanNumChunks; off += 1 << uint(n) {
h.chunks[off] = chunk
}
} else {
linktab := h.links[h.chunks[reverse&(huffmanNumChunks-1)]>>huffmanValueShift]
reverse >>= huffmanChunkBits
for off := reverse; off < numLinks; off += 1 << uint(n-huffmanChunkBits) {
linktab[off] = chunk
}
}
}
return true
}
func main() {
var h huffmanDecoder
var bits [288]int
initReverseByte()
for i := 0; i < 144; i++ {
bits[i] = 8
}
for i := 144; i < 256; i++ {
bits[i] = 9
}
for i := 256; i < 280; i++ {
bits[i] = 7
}
for i := 280; i < 288; i++ {
bits[i] = 8
}
h.init(bits[:])
fmt.Println("package flate")
fmt.Println()
fmt.Println("// autogenerated by gen.go, DO NOT EDIT")
fmt.Println()
fmt.Println("var fixedHuffmanDecoder = huffmanDecoder{")
fmt.Printf("\t%d,\n", h.min)
fmt.Println("\t[huffmanNumChunks]uint32{")
for i := 0; i < huffmanNumChunks; i++ {
if i&7 == 0 {
fmt.Printf("\t\t")
} else {
fmt.Printf(" ")
}
fmt.Printf("0x%04x,", h.chunks[i])
if i&7 == 7 {
fmt.Println()
}
}
fmt.Println("\t},")
fmt.Println("\tnil, 0,")
fmt.Println("}")
}
var reverseByte [256]byte
func initReverseByte() {
for x := 0; x < 256; x++ {
var result byte
for i := uint(0); i < 8; i++ {
result |= byte(((x >> i) & 1) << (7 - i))
}
reverseByte[x] = result
}
}

View File

@ -54,32 +54,46 @@ func (e *WriteError) Error() string {
return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
}
// Huffman decoder is based on
// J. Brian Connell, ``A Huffman-Shannon-Fano Code,''
// Proceedings of the IEEE, 61(7) (July 1973), pp 1046-1047.
// Note that much of the implemenation of huffmanDecoder is also copied
// into gen.go (in package main) for the purpose of precomputing the
// fixed huffman tables so they can be included statically.
// The data structure for decoding Huffman tables is based on that of
// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
// For codes smaller than the table width, there are multiple entries
// (each combination of trailing bits has the same value). For codes
// larger than the table width, the table contains a link to an overflow
// table. The width of each entry in the link table is the maximum code
// size minus the chunk width.
// Note that you can do a lookup in the table even without all bits
// filled. Since the extra bits are zero, and the DEFLATE Huffman codes
// have the property that shorter codes come before longer ones, the
// bit length estimate in the result is a lower bound on the actual
// number of bits.
// chunk & 15 is number of bits
// chunk >> 4 is value, including table link
const (
huffmanChunkBits = 9
huffmanNumChunks = 1 << huffmanChunkBits
huffmanCountMask = 15
huffmanValueShift = 4
)
type huffmanDecoder struct {
// min, max code length
min, max int
// limit[i] = largest code word of length i
// Given code v of length n,
// need more bits if v > limit[n].
limit [maxCodeLen + 1]int
// base[i] = smallest code word of length i - seq number
base [maxCodeLen + 1]int
// codes[seq number] = output code.
// Given code v of length n, value is
// codes[v - base[n]].
codes []int
min int // the minimum code length
chunks [huffmanNumChunks]uint32 // chunks as described above
links [][]uint32 // overflow links
linkMask uint32 // mask the width of the link table
}
// Initialize Huffman decoding tables from array of code lengths.
func (h *huffmanDecoder) init(bits []int) bool {
// Count number of codes of each length,
// compute min and max length.
var count [maxCodeLen + 1]int
var count [maxCodeLen]int
var min, max int
for _, n := range bits {
if n == 0 {
@ -98,93 +112,58 @@ func (h *huffmanDecoder) init(bits []int) bool {
}
h.min = min
h.max = max
// For each code range, compute
// nextcode (first code of that length),
// limit (last code of that length), and
// base (offset from first code to sequence number).
var linkBits uint
var numLinks int
if max > huffmanChunkBits {
linkBits = uint(max) - huffmanChunkBits
numLinks = 1 << linkBits
h.linkMask = uint32(numLinks - 1)
}
code := 0
seq := 0
var nextcode [maxCodeLen]int
for i := min; i <= max; i++ {
if i == huffmanChunkBits+1 {
// create link tables
link := code >> 1
h.links = make([][]uint32, huffmanNumChunks-link)
for j := uint(link); j < huffmanNumChunks; j++ {
reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8
reverse >>= uint(16 - huffmanChunkBits)
off := j - uint(link)
h.chunks[reverse] = uint32(off<<huffmanValueShift + uint(i))
h.links[off] = make([]uint32, 1<<linkBits)
}
}
n := count[i]
nextcode[i] = code
h.base[i] = code - seq
code += n
seq += n
h.limit[i] = code - 1
code <<= 1
}
// Make array mapping sequence numbers to codes.
if len(h.codes) < len(bits) {
h.codes = make([]int, len(bits))
}
for i, n := range bits {
if n == 0 {
continue
}
code := nextcode[n]
nextcode[n]++
seq := code - h.base[n]
h.codes[seq] = i
chunk := uint32(i<<huffmanValueShift | n)
reverse := int(reverseByte[code>>8]) | int(reverseByte[code&0xff])<<8
reverse >>= uint(16 - n)
if n <= huffmanChunkBits {
for off := reverse; off < huffmanNumChunks; off += 1 << uint(n) {
h.chunks[off] = chunk
}
} else {
linktab := h.links[h.chunks[reverse&(huffmanNumChunks-1)]>>huffmanValueShift]
reverse >>= huffmanChunkBits
for off := reverse; off < numLinks; off += 1 << uint(n-huffmanChunkBits) {
linktab[off] = chunk
}
}
}
return true
}
// Hard-coded Huffman tables for DEFLATE algorithm.
// See RFC 1951, section 3.2.6.
var fixedHuffmanDecoder = huffmanDecoder{
7, 9,
[maxCodeLen + 1]int{7: 23, 199, 511},
[maxCodeLen + 1]int{7: 0, 24, 224},
[]int{
// length 7: 256-279
256, 257, 258, 259, 260, 261, 262,
263, 264, 265, 266, 267, 268, 269,
270, 271, 272, 273, 274, 275, 276,
277, 278, 279,
// length 8: 0-143
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108,
109, 110, 111, 112, 113, 114, 115, 116,
117, 118, 119, 120, 121, 122, 123, 124,
125, 126, 127, 128, 129, 130, 131, 132,
133, 134, 135, 136, 137, 138, 139, 140,
141, 142, 143,
// length 8: 280-287
280, 281, 282, 283, 284, 285, 286, 287,
// length 9: 144-255
144, 145, 146, 147, 148, 149, 150, 151,
152, 153, 154, 155, 156, 157, 158, 159,
160, 161, 162, 163, 164, 165, 166, 167,
168, 169, 170, 171, 172, 173, 174, 175,
176, 177, 178, 179, 180, 181, 182, 183,
184, 185, 186, 187, 188, 189, 190, 191,
192, 193, 194, 195, 196, 197, 198, 199,
200, 201, 202, 203, 204, 205, 206, 207,
208, 209, 210, 211, 212, 213, 214, 215,
216, 217, 218, 219, 220, 221, 222, 223,
224, 225, 226, 227, 228, 229, 230, 231,
232, 233, 234, 235, 236, 237, 238, 239,
240, 241, 242, 243, 244, 245, 246, 247,
248, 249, 250, 251, 252, 253, 254, 255,
},
}
// The actual read interface needed by NewReader.
// If the passed in io.Reader does not also have ReadByte,
// the NewReader will introduce its own buffering.
@ -644,23 +623,23 @@ func (f *decompressor) moreBits() error {
// Read the next Huffman-encoded symbol from f according to h.
func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
for n := uint(h.min); n <= uint(h.max); n++ {
lim := h.limit[n]
if lim == -1 {
continue
}
n := uint(h.min)
for {
for f.nb < n {
if err := f.moreBits(); err != nil {
return 0, err
}
}
v := int(f.b & uint32(1<<n-1))
v <<= 16 - n
v = int(reverseByte[v>>8]) | int(reverseByte[v&0xFF])<<8 // reverse bits
if v <= lim {
chunk := h.chunks[f.b&(huffmanNumChunks-1)]
n = uint(chunk & huffmanCountMask)
if n > huffmanChunkBits {
chunk = h.links[chunk>>huffmanValueShift][(f.b>>huffmanChunkBits)&h.linkMask]
n = uint(chunk & huffmanCountMask)
}
if n <= f.nb {
f.b >>= n
f.nb -= n
return h.codes[v-h.base[n]], nil
return int(chunk >> huffmanValueShift), nil
}
}
return 0, CorruptInputError(f.roffset)

View File

@ -13,7 +13,7 @@ import (
// A writer is a buffered, flushable writer.
type writer interface {
WriteByte(byte) error
io.ByteWriter
Flush() error
}

View File

@ -108,6 +108,8 @@ func (l *List) insertValue(v interface{}, at *Element) *Element {
func (l *List) remove(e *Element) *Element {
e.prev.next = e.next
e.next.prev = e.prev
e.next = nil // avoid memory leaks
e.prev = nil // avoid memory leaks
e.list = nil
l.len--
return e

View File

@ -28,13 +28,10 @@ func (r StreamReader) Read(dst []byte) (n int, err error) {
type StreamWriter struct {
S Stream
W io.Writer
Err error
Err error // unused
}
func (w StreamWriter) Write(src []byte) (n int, err error) {
if w.Err != nil {
return 0, w.Err
}
c := make([]byte, len(src))
w.S.XORKeyStream(c, src)
n, err = w.W.Write(c)
@ -42,7 +39,6 @@ func (w StreamWriter) Write(src []byte) (n int, err error) {
if err == nil { // should never happen
err = io.ErrShortWrite
}
w.Err = err
}
return
}

View File

@ -1503,3 +1503,21 @@ func TestSubstitutionTableKnownAnswerDecrypt(t *testing.T) {
}
}
}
func ExampleNewTripleDESCipher() {
// NewTripleDESCipher can also be used when EDE2 is required by
// duplicating the first 8 bytes of the 16-byte key.
ede2Key := []byte("example key 1234")
var tripleDESKey []byte
tripleDESKey = append(tripleDESKey, ede2Key[:16]...)
tripleDESKey = append(tripleDESKey, ede2Key[:8]...)
_, err := NewTripleDESCipher(tripleDESKey)
if err != nil {
panic(err)
}
// See crypto/cipher for how to use a cipher.Block for encryption and
// decryption.
}

View File

@ -10,6 +10,21 @@ import (
"math/big"
)
// smallPrimes is a list of small, prime numbers that allows us to rapidly
// exclude some fraction of composite candidates when searching for a random
// prime. This list is truncated at the point where smallPrimesProduct exceeds
// a uint64. It does not include two because we ensure that the candidates are
// odd by construction.
var smallPrimes = []uint8{
3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53,
}
// smallPrimesProduct is the product of the values in smallPrimes and allows us
// to reduce a candidate prime by this number and then determine whether it's
// coprime to all the elements of smallPrimes without further big.Int
// operations.
var smallPrimesProduct = new(big.Int).SetUint64(16294579238595022365)
// Prime returns a number, p, of the given size, such that p is prime
// with high probability.
func Prime(rand io.Reader, bits int) (p *big.Int, err error) {
@ -25,6 +40,8 @@ func Prime(rand io.Reader, bits int) (p *big.Int, err error) {
bytes := make([]byte, (bits+7)/8)
p = new(big.Int)
bigMod := new(big.Int)
for {
_, err = io.ReadFull(rand, bytes)
if err != nil {
@ -33,13 +50,51 @@ func Prime(rand io.Reader, bits int) (p *big.Int, err error) {
// Clear bits in the first byte to make sure the candidate has a size <= bits.
bytes[0] &= uint8(int(1<<b) - 1)
// Don't let the value be too small, i.e, set the most significant bit.
bytes[0] |= 1 << (b - 1)
// Don't let the value be too small, i.e, set the most significant two bits.
// Setting the top two bits, rather than just the top bit,
// means that when two of these values are multiplied together,
// the result isn't ever one bit short.
if b >= 2 {
bytes[0] |= 3 << (b - 2)
} else {
// Here b==1, because b cannot be zero.
bytes[0] |= 1
if len(bytes) > 1 {
bytes[1] |= 0x80
}
}
// Make the value odd since an even number this large certainly isn't prime.
bytes[len(bytes)-1] |= 1
p.SetBytes(bytes)
if p.ProbablyPrime(20) {
// Calculate the value mod the product of smallPrimes. If it's
// a multiple of any of these primes we add two until it isn't.
// The probability of overflowing is minimal and can be ignored
// because we still perform Miller-Rabin tests on the result.
bigMod.Mod(p, smallPrimesProduct)
mod := bigMod.Uint64()
NextDelta:
for delta := uint64(0); delta < 1<<20; delta += 2 {
m := mod + delta
for _, prime := range smallPrimes {
if m%uint64(prime) == 0 {
continue NextDelta
}
}
if delta > 0 {
bigMod.SetUint64(delta)
p.Add(p, bigMod)
}
break
}
// There is a tiny possibility that, by adding delta, we caused
// the number to be one bit too long. Thus we check BitLen
// here.
if p.ProbablyPrime(20) && p.BitLen() == bits {
return
}
}

View File

@ -57,7 +57,7 @@ func TestDecryptPKCS1v15(t *testing.T) {
t.Errorf("#%d error decrypting", i)
}
want := []byte(test.out)
if bytes.Compare(out, want) != 0 {
if !bytes.Equal(out, want) {
t.Errorf("#%d got:%#v want:%#v", i, out, want)
}
}
@ -90,7 +90,7 @@ func TestEncryptPKCS1v15(t *testing.T) {
return false
}
if bytes.Compare(plaintext, in) != 0 {
if !bytes.Equal(plaintext, in) {
t.Errorf("output mismatch: %#v %#v", plaintext, in)
return false
}
@ -132,7 +132,7 @@ func TestEncryptPKCS1v15SessionKey(t *testing.T) {
t.Errorf("#%d error decrypting", i)
}
want := []byte(test.out)
if bytes.Compare(key, want) != 0 {
if !bytes.Equal(key, want) {
t.Errorf("#%d got:%#v want:%#v", i, key, want)
}
}
@ -176,7 +176,7 @@ func TestSignPKCS1v15(t *testing.T) {
}
expected, _ := hex.DecodeString(test.out)
if bytes.Compare(s, expected) != 0 {
if !bytes.Equal(s, expected) {
t.Errorf("#%d got: %x want: %x", i, s, expected)
}
}

View File

@ -175,6 +175,11 @@ NextSetOfPrimes:
pminus1.Sub(prime, bigOne)
totient.Mul(totient, pminus1)
}
if n.BitLen() != bits {
// This should never happen because crypto/rand should
// set the top two bits in each prime.
continue NextSetOfPrimes
}
g := new(big.Int)
priv.D = new(big.Int)

View File

@ -21,6 +21,9 @@ func TestKeyGeneration(t *testing.T) {
if err != nil {
t.Errorf("failed to generate key")
}
if bits := priv.N.BitLen(); bits != size {
t.Errorf("key too short (%d vs %d)", bits, size)
}
testKeyBasics(t, priv)
}
@ -176,7 +179,7 @@ func TestEncryptOAEP(t *testing.T) {
if err != nil {
t.Errorf("#%d,%d error: %s", i, j, err)
}
if bytes.Compare(out, message.out) != 0 {
if !bytes.Equal(out, message.out) {
t.Errorf("#%d,%d bad result: %x (want %x)", i, j, out, message.out)
}
}
@ -200,7 +203,7 @@ func TestDecryptOAEP(t *testing.T) {
out, err := DecryptOAEP(sha1, nil, private, message.out, nil)
if err != nil {
t.Errorf("#%d,%d error: %s", i, j, err)
} else if bytes.Compare(out, message.in) != 0 {
} else if !bytes.Equal(out, message.in) {
t.Errorf("#%d,%d bad result: %#v (want %#v)", i, j, out, message.in)
}
@ -208,7 +211,7 @@ func TestDecryptOAEP(t *testing.T) {
out, err = DecryptOAEP(sha1, random, private, message.out, nil)
if err != nil {
t.Errorf("#%d,%d (blind) error: %s", i, j, err)
} else if bytes.Compare(out, message.in) != 0 {
} else if !bytes.Equal(out, message.in) {
t.Errorf("#%d,%d (blind) bad result: %#v (want %#v)", i, j, out, message.in)
}
}

View File

@ -184,6 +184,12 @@ type Config struct {
// is nil, TLS uses a list of suites supported by the implementation.
CipherSuites []uint16
// PreferServerCipherSuites controls whether the server selects the
// client's most preferred ciphersuite, or the server's most preferred
// ciphersuite. If true then the server's preference, as expressed in
// the order of elements in CipherSuites, is used.
PreferServerCipherSuites bool
// SessionTicketsDisabled may be set to true to disable session ticket
// (resumption) support.
SessionTicketsDisabled bool

View File

@ -180,8 +180,17 @@ Curves:
return true, nil
}
for _, id := range hs.clientHello.cipherSuites {
if hs.suite = c.tryCipherSuite(id, hs.ellipticOk); hs.suite != nil {
var preferenceList, supportedList []uint16
if c.config.PreferServerCipherSuites {
preferenceList = c.config.cipherSuites()
supportedList = hs.clientHello.cipherSuites
} else {
preferenceList = hs.clientHello.cipherSuites
supportedList = c.config.cipherSuites()
}
for _, id := range preferenceList {
if hs.suite = c.tryCipherSuite(id, supportedList, hs.ellipticOk); hs.suite != nil {
break
}
}
@ -222,7 +231,7 @@ func (hs *serverHandshakeState) checkForResumption() bool {
}
// Check that we also support the ciphersuite from the session.
hs.suite = c.tryCipherSuite(hs.sessionState.cipherSuite, hs.ellipticOk)
hs.suite = c.tryCipherSuite(hs.sessionState.cipherSuite, c.config.cipherSuites(), hs.ellipticOk)
if hs.suite == nil {
return false
}
@ -568,8 +577,8 @@ func (hs *serverHandshakeState) processCertsFromClient(certificates [][]byte) (*
// tryCipherSuite returns a cipherSuite with the given id if that cipher suite
// is acceptable to use.
func (c *Conn) tryCipherSuite(id uint16, ellipticOk bool) *cipherSuite {
for _, supported := range c.config.cipherSuites() {
func (c *Conn) tryCipherSuite(id uint16, supportedCipherSuites []uint16, ellipticOk bool) *cipherSuite {
for _, supported := range supportedCipherSuites {
if id == supported {
var candidate *cipherSuite

View File

@ -125,6 +125,50 @@ func TestClose(t *testing.T) {
}
}
func testHandshake(clientConfig, serverConfig *Config) (state ConnectionState, err error) {
c, s := net.Pipe()
go func() {
cli := Client(c, clientConfig)
cli.Handshake()
c.Close()
}()
server := Server(s, serverConfig)
err = server.Handshake()
if err == nil {
state = server.ConnectionState()
}
s.Close()
return
}
func TestCipherSuitePreference(t *testing.T) {
serverConfig := &Config{
CipherSuites: []uint16{TLS_RSA_WITH_RC4_128_SHA, TLS_RSA_WITH_AES_128_CBC_SHA, TLS_ECDHE_RSA_WITH_RC4_128_SHA},
Certificates: testConfig.Certificates,
}
clientConfig := &Config{
CipherSuites: []uint16{TLS_RSA_WITH_AES_128_CBC_SHA, TLS_RSA_WITH_RC4_128_SHA},
InsecureSkipVerify: true,
}
state, err := testHandshake(clientConfig, serverConfig)
if err != nil {
t.Fatalf("handshake failed: %s", err)
}
if state.CipherSuite != TLS_RSA_WITH_AES_128_CBC_SHA {
// By default the server should use the client's preference.
t.Fatalf("Client's preference was not used, got %x", state.CipherSuite)
}
serverConfig.PreferServerCipherSuites = true
state, err = testHandshake(clientConfig, serverConfig)
if err != nil {
t.Fatalf("handshake failed: %s", err)
}
if state.CipherSuite != TLS_RSA_WITH_RC4_128_SHA {
t.Fatalf("Server's preference was not used, got %x", state.CipherSuite)
}
}
func testServerScript(t *testing.T, name string, serverScript [][]byte, config *Config, peers []*x509.Certificate) {
c, s := net.Pipe()
srv := Server(s, config)

View File

@ -70,11 +70,12 @@ func initSystemRoots() {
var data C.CFDataRef = nil
err := C.FetchPEMRoots(&data)
if err != -1 {
defer C.CFRelease(C.CFTypeRef(data))
buf := C.GoBytes(unsafe.Pointer(C.CFDataGetBytePtr(data)), C.int(C.CFDataGetLength(data)))
roots.AppendCertsFromPEM(buf)
if err == -1 {
return
}
defer C.CFRelease(C.CFTypeRef(data))
buf := C.GoBytes(unsafe.Pointer(C.CFDataGetBytePtr(data)), C.int(C.CFDataGetLength(data)))
roots.AppendCertsFromPEM(buf)
systemRoots = roots
}

View File

@ -23,9 +23,11 @@ func initSystemRoots() {
data, err := ioutil.ReadFile(file)
if err == nil {
roots.AppendCertsFromPEM(data)
break
systemRoots = roots
return
}
}
systemRoots = roots
// All of the files failed to load. systemRoots will be nil which will
// trigger a specific error at verification time.
}

View File

@ -11,5 +11,4 @@ func (c *Certificate) systemVerify(opts *VerifyOptions) (chains [][]*Certificate
}
func initSystemRoots() {
systemRoots = NewCertPool()
}

View File

@ -27,9 +27,11 @@ func initSystemRoots() {
data, err := ioutil.ReadFile(file)
if err == nil {
roots.AppendCertsFromPEM(data)
break
systemRoots = roots
return
}
}
systemRoots = roots
// All of the files failed to load. systemRoots will be nil which will
// trigger a specific error at verification time.
}

View File

@ -226,5 +226,4 @@ func (c *Certificate) systemVerify(opts *VerifyOptions) (chains [][]*Certificate
}
func initSystemRoots() {
systemRoots = NewCertPool()
}

View File

@ -82,6 +82,14 @@ func (e UnknownAuthorityError) Error() string {
return "x509: certificate signed by unknown authority"
}
// SystemRootsError results when we fail to load the system root certificates.
type SystemRootsError struct {
}
func (e SystemRootsError) Error() string {
return "x509: failed to load system roots and no roots provided"
}
// VerifyOptions contains parameters for Certificate.Verify. It's a structure
// because other PKIX verification APIs have ended up needing many options.
type VerifyOptions struct {
@ -170,6 +178,9 @@ func (c *Certificate) Verify(opts VerifyOptions) (chains [][]*Certificate, err e
if opts.Roots == nil {
opts.Roots = systemRootsPool()
if opts.Roots == nil {
return nil, SystemRootsError{}
}
}
err = c.isValid(leafCertificate, nil, &opts)

View File

@ -15,19 +15,31 @@ import (
)
type verifyTest struct {
leaf string
intermediates []string
roots []string
currentTime int64
dnsName string
systemSkip bool
keyUsages []ExtKeyUsage
leaf string
intermediates []string
roots []string
currentTime int64
dnsName string
systemSkip bool
keyUsages []ExtKeyUsage
testSystemRootsError bool
errorCallback func(*testing.T, int, error) bool
expectedChains [][]string
}
var verifyTests = []verifyTest{
{
leaf: googleLeaf,
intermediates: []string{thawteIntermediate},
currentTime: 1302726541,
dnsName: "www.google.com",
testSystemRootsError: true,
// Without any roots specified we should get a system roots
// error.
errorCallback: expectSystemRootsError,
},
{
leaf: googleLeaf,
intermediates: []string{thawteIntermediate},
@ -180,6 +192,14 @@ func expectAuthorityUnknown(t *testing.T, i int, err error) (ok bool) {
return true
}
func expectSystemRootsError(t *testing.T, i int, err error) bool {
if _, ok := err.(SystemRootsError); !ok {
t.Errorf("#%d: error was not SystemRootsError: %s", i, err)
return false
}
return true
}
func certificateFromPEM(pemBytes string) (*Certificate, error) {
block, _ := pem.Decode([]byte(pemBytes))
if block == nil {
@ -193,6 +213,9 @@ func testVerify(t *testing.T, useSystemRoots bool) {
if useSystemRoots && test.systemSkip {
continue
}
if runtime.GOOS == "windows" && test.testSystemRootsError {
continue
}
opts := VerifyOptions{
Intermediates: NewCertPool(),
@ -226,8 +249,19 @@ func testVerify(t *testing.T, useSystemRoots bool) {
return
}
var oldSystemRoots *CertPool
if test.testSystemRootsError {
oldSystemRoots = systemRootsPool()
systemRoots = nil
opts.Roots = nil
}
chains, err := leaf.Verify(opts)
if test.testSystemRootsError {
systemRoots = oldSystemRoots
}
if test.errorCallback == nil && err != nil {
t.Errorf("#%d: unexpected error: %s", i, err)
}
@ -275,8 +309,7 @@ func TestGoVerify(t *testing.T) {
func TestSystemVerify(t *testing.T) {
if runtime.GOOS != "windows" {
t.Logf("skipping verify test using system APIs on %q", runtime.GOOS)
return
t.Skipf("skipping verify test using system APIs on %q", runtime.GOOS)
}
testVerify(t, true)

View File

@ -369,19 +369,11 @@ func (db *DB) exec(query string, args []interface{}) (res Result, err error) {
}
defer sti.Close()
dargs, err := driverArgs(sti, args)
if err != nil {
return nil, err
}
resi, err := sti.Exec(dargs)
if err != nil {
return nil, err
}
return result{resi}, nil
return resultFromStatement(sti, args...)
}
// Query executes a query that returns rows, typically a SELECT.
// The args are for any placeholder parameters in the query.
func (db *DB) Query(query string, args ...interface{}) (*Rows, error) {
stmt, err := db.Prepare(query)
if err != nil {
@ -608,16 +600,7 @@ func (tx *Tx) Exec(query string, args ...interface{}) (Result, error) {
}
defer sti.Close()
dargs, err := driverArgs(sti, args)
if err != nil {
return nil, err
}
resi, err := sti.Exec(dargs)
if err != nil {
return nil, err
}
return result{resi}, nil
return resultFromStatement(sti, args...)
}
// Query executes a query that returns rows, typically a SELECT.
@ -682,6 +665,10 @@ func (s *Stmt) Exec(args ...interface{}) (Result, error) {
}
defer releaseConn(nil)
return resultFromStatement(si, args...)
}
func resultFromStatement(si driver.Stmt, args ...interface{}) (Result, error) {
// -1 means the driver doesn't know how to count the number of
// placeholders, so we won't sanity check input here and instead let the
// driver deal with errors.

View File

@ -52,6 +52,14 @@ func dotest() bool {
return true
}
func endtest() {
if pclineTempDir != "" {
os.RemoveAll(pclineTempDir)
pclineTempDir = ""
pclinetestBinary = ""
}
}
func getTable(t *testing.T) *Table {
f, tab := crack(os.Args[0], t)
f.Close()
@ -95,6 +103,7 @@ func TestLineFromAline(t *testing.T) {
if !dotest() {
return
}
defer endtest()
tab := getTable(t)
@ -129,7 +138,7 @@ func TestLineFromAline(t *testing.T) {
if !ok {
t.Errorf("file %s starts on line %d", path, line)
} else if line != ll+1 {
t.Errorf("expected next line of file %s to be %d, got %d", path, ll+1, line)
t.Fatalf("expected next line of file %s to be %d, got %d", path, ll+1, line)
}
lastline[path] = line
}
@ -142,6 +151,7 @@ func TestLineAline(t *testing.T) {
if !dotest() {
return
}
defer endtest()
tab := getTable(t)
@ -183,7 +193,7 @@ func TestPCLine(t *testing.T) {
if !dotest() {
return
}
defer os.RemoveAll(pclineTempDir)
defer endtest()
f, tab := crack(pclinetestBinary, t)
text := f.Section(".text")

View File

@ -13,6 +13,7 @@ package gosym
// and the Go format is the runtime source, specifically ../../runtime/symtab.c.
import (
"bytes"
"encoding/binary"
"fmt"
"strconv"
@ -104,11 +105,18 @@ type sym struct {
name []byte
}
var littleEndianSymtab = []byte{0xFE, 0xFF, 0xFF, 0xFF, 0x00, 0x00}
func walksymtab(data []byte, fn func(sym) error) error {
var order binary.ByteOrder = binary.BigEndian
if bytes.HasPrefix(data, littleEndianSymtab) {
data = data[6:]
order = binary.LittleEndian
}
var s sym
p := data
for len(p) >= 6 {
s.value = binary.BigEndian.Uint32(p[0:4])
s.value = order.Uint32(p[0:4])
typ := p[4]
if typ&0x80 == 0 {
return &DecodingError{len(data) - len(p) + 4, "bad symbol type", typ}
@ -139,7 +147,7 @@ func walksymtab(data []byte, fn func(sym) error) error {
}
s.name = p[0:i]
i += nnul
s.gotype = binary.BigEndian.Uint32(p[i : i+4])
s.gotype = order.Uint32(p[i : i+4])
p = p[i+4:]
fn(s)
}

View File

@ -124,7 +124,7 @@ func TestBitString(t *testing.T) {
t.Errorf("#%d: Incorrect error result (did fail? %v, expected: %v)", i, err == nil, test.ok)
}
if err == nil {
if test.bitLength != ret.BitLength || bytes.Compare(ret.Bytes, test.out) != 0 {
if test.bitLength != ret.BitLength || !bytes.Equal(ret.Bytes, test.out) {
t.Errorf("#%d: Bad result: %v (expected %v %v)", i, ret, test.out, test.bitLength)
}
}
@ -166,7 +166,7 @@ func TestBitStringRightAlign(t *testing.T) {
for i, test := range bitStringRightAlignTests {
bs := BitString{test.in, test.inlen}
out := bs.RightAlign()
if bytes.Compare(out, test.out) != 0 {
if !bytes.Equal(out, test.out) {
t.Errorf("#%d got: %x want: %x", i, out, test.out)
}
}
@ -477,7 +477,7 @@ func TestRawStructs(t *testing.T) {
if s.A != 0x50 {
t.Errorf("bad value for A: got %d want %d", s.A, 0x50)
}
if bytes.Compare([]byte(s.Raw), input) != 0 {
if !bytes.Equal([]byte(s.Raw), input) {
t.Errorf("bad value for Raw: got %x want %x", s.Raw, input)
}
}

View File

@ -132,7 +132,7 @@ func TestMarshal(t *testing.T) {
t.Errorf("#%d failed: %s", i, err)
}
out, _ := hex.DecodeString(test.out)
if bytes.Compare(out, data) != 0 {
if !bytes.Equal(out, data) {
t.Errorf("#%d got: %x want %x\n\t%q\n\t%q", i, data, out, data, out)
}

View File

@ -67,11 +67,13 @@ point values may be received into any floating point variable. However,
the destination variable must be able to represent the value or the decode
operation will fail.
Structs, arrays and slices are also supported. Strings and arrays of bytes are
supported with a special, efficient representation (see below). When a slice is
decoded, if the existing slice has capacity the slice will be extended in place;
if not, a new array is allocated. Regardless, the length of the resulting slice
reports the number of elements decoded.
Structs, arrays and slices are also supported. Structs encode and
decode only exported fields. Strings and arrays of bytes are supported
with a special, efficient representation (see below). When a slice
is decoded, if the existing slice has capacity the slice will be
extended in place; if not, a new array is allocated. Regardless,
the length of the resulting slice reports the number of elements
decoded.
Functions and channels cannot be sent in a gob. Attempting
to encode a value that contains one will fail.

View File

@ -137,8 +137,8 @@ func (enc *Encoder) sendType(w io.Writer, state *encoderState, origt reflect.Typ
ut := userType(origt)
if ut.isGobEncoder {
// The rules are different: regardless of the underlying type's representation,
// we need to tell the other side that this exact type is a GobEncoder.
return enc.sendActualType(w, state, ut, ut.user)
// we need to tell the other side that the base type is a GobEncoder.
return enc.sendActualType(w, state, ut, ut.base)
}
// It's a concrete value, so drill down to the base type.

View File

@ -142,6 +142,18 @@ type GobTest5 struct {
V *ValueGobber
}
type GobTest6 struct {
X int // guarantee we have something in common with GobTest*
V ValueGobber
W *ValueGobber
}
type GobTest7 struct {
X int // guarantee we have something in common with GobTest*
V *ValueGobber
W ValueGobber
}
type GobTestIgnoreEncoder struct {
X int // guarantee we have something in common with GobTest*
}
@ -360,6 +372,61 @@ func TestGobEncoderValueEncoder(t *testing.T) {
}
}
// Test that we can use a value then a pointer type of a GobEncoder
// in the same encoded value. Bug 4647.
func TestGobEncoderValueThenPointer(t *testing.T) {
v := ValueGobber("forty-two")
w := ValueGobber("six-by-nine")
// this was a bug: encoding a GobEncoder by value before a GobEncoder
// pointer would cause duplicate type definitions to be sent.
b := new(bytes.Buffer)
enc := NewEncoder(b)
if err := enc.Encode(GobTest6{42, v, &w}); err != nil {
t.Fatal("encode error:", err)
}
dec := NewDecoder(b)
x := new(GobTest6)
if err := dec.Decode(x); err != nil {
t.Fatal("decode error:", err)
}
if got, want := x.V, v; got != want {
t.Errorf("v = %q, want %q", got, want)
}
if got, want := x.W, w; got == nil {
t.Errorf("w = nil, want %q", want)
} else if *got != want {
t.Errorf("w = %q, want %q", *got, want)
}
}
// Test that we can use a pointer then a value type of a GobEncoder
// in the same encoded value.
func TestGobEncoderPointerThenValue(t *testing.T) {
v := ValueGobber("forty-two")
w := ValueGobber("six-by-nine")
b := new(bytes.Buffer)
enc := NewEncoder(b)
if err := enc.Encode(GobTest7{42, &v, w}); err != nil {
t.Fatal("encode error:", err)
}
dec := NewDecoder(b)
x := new(GobTest7)
if err := dec.Decode(x); err != nil {
t.Fatal("decode error:", err)
}
if got, want := x.V, v; got == nil {
t.Errorf("v = nil, want %q", want)
} else if *got != want {
t.Errorf("v = %q, want %q", got, want)
}
if got, want := x.W, w; got != want {
t.Errorf("w = %q, want %q", got, want)
}
}
func TestGobEncoderFieldTypeError(t *testing.T) {
// GobEncoder to non-decoder: error
b := new(bytes.Buffer)

View File

@ -65,7 +65,7 @@ func TestDecodeString(t *testing.T) {
t.Errorf("#%d: unexpected err value: %s", i, err)
continue
}
if bytes.Compare(dst, test.dec) != 0 {
if !bytes.Equal(dst, test.dec) {
t.Errorf("#%d: got: %#v want: #%v", i, dst, test.dec)
}
}

View File

@ -52,6 +52,25 @@ import (
// an UnmarshalTypeError describing the earliest such error.
//
func Unmarshal(data []byte, v interface{}) error {
// skip heavy processing for primitive values
var first byte
var i int
for i, first = range data {
if !isSpace(rune(first)) {
break
}
}
if first != '{' && first != '[' {
rv := reflect.ValueOf(v)
if rv.Kind() != reflect.Ptr || rv.IsNil() {
return &InvalidUnmarshalError{reflect.TypeOf(v)}
}
var d decodeState
d.literalStore(data[i:], rv.Elem(), false)
return d.savedError
}
d := new(decodeState).init(data)
// Quick check for well-formedness.
@ -87,6 +106,7 @@ func (e *UnmarshalTypeError) Error() string {
// An UnmarshalFieldError describes a JSON object key that
// led to an unexported (and therefore unwritable) struct field.
// (No longer used; kept for compatibility.)
type UnmarshalFieldError struct {
Key string
Type reflect.Type
@ -328,15 +348,19 @@ func (d *decodeState) array(v reflect.Value) {
// Check type of target.
switch v.Kind() {
case reflect.Interface:
if v.NumMethod() == 0 {
// Decoding into nil interface? Switch to non-reflect code.
v.Set(reflect.ValueOf(d.arrayInterface()))
return
}
// Otherwise it's invalid.
fallthrough
default:
d.saveError(&UnmarshalTypeError{"array", v.Type()})
d.off--
d.next()
return
case reflect.Interface:
// Decoding into nil interface? Switch to non-reflect code.
v.Set(reflect.ValueOf(d.arrayInterface()))
return
case reflect.Array:
case reflect.Slice:
break
@ -422,7 +446,7 @@ func (d *decodeState) object(v reflect.Value) {
v = pv
// Decoding into nil interface? Switch to non-reflect code.
if v.Kind() == reflect.Interface {
if v.Kind() == reflect.Interface && v.NumMethod() == 0 {
v.Set(reflect.ValueOf(d.objectInterface()))
return
}
@ -430,9 +454,9 @@ func (d *decodeState) object(v reflect.Value) {
// Check type of target: struct or map[string]T
switch v.Kind() {
case reflect.Map:
// map must have string type
// map must have string kind
t := v.Type()
if t.Key() != reflect.TypeOf("") {
if t.Key().Kind() != reflect.String {
d.saveError(&UnmarshalTypeError{"object", v.Type()})
break
}
@ -440,11 +464,9 @@ func (d *decodeState) object(v reflect.Value) {
v.Set(reflect.MakeMap(t))
}
case reflect.Struct:
default:
d.saveError(&UnmarshalTypeError{"object", v.Type()})
}
if !v.IsValid() {
d.off--
d.next() // skip over { } in input
return
@ -509,15 +531,6 @@ func (d *decodeState) object(v reflect.Value) {
}
subv = subv.Field(i)
}
} else {
// To give a good error, a quick scan for unexported fields in top level.
st := v.Type()
for i := 0; i < st.NumField(); i++ {
f := st.Field(i)
if f.PkgPath != "" && strings.EqualFold(f.Name, key) {
d.saveError(&UnmarshalFieldError{key, st, f})
}
}
}
}
@ -536,10 +549,12 @@ func (d *decodeState) object(v reflect.Value) {
} else {
d.value(subv)
}
// Write value back to map;
// if using struct, subv points into struct already.
if v.Kind() == reflect.Map {
v.SetMapIndex(reflect.ValueOf(key), subv)
kv := reflect.ValueOf(key).Convert(v.Type().Key())
v.SetMapIndex(kv, subv)
}
// Next token must be , or }.
@ -625,7 +640,11 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
case reflect.Bool:
v.SetBool(value)
case reflect.Interface:
v.Set(reflect.ValueOf(value))
if v.NumMethod() == 0 {
v.Set(reflect.ValueOf(value))
} else {
d.saveError(&UnmarshalTypeError{"bool", v.Type()})
}
}
case '"': // string
@ -655,7 +674,11 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
case reflect.String:
v.SetString(string(s))
case reflect.Interface:
v.Set(reflect.ValueOf(string(s)))
if v.NumMethod() == 0 {
v.Set(reflect.ValueOf(string(s)))
} else {
d.saveError(&UnmarshalTypeError{"string", v.Type()})
}
}
default: // number
@ -684,6 +707,10 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
d.saveError(err)
break
}
if v.NumMethod() != 0 {
d.saveError(&UnmarshalTypeError{"number", v.Type()})
break
}
v.Set(reflect.ValueOf(n))
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:

View File

@ -199,12 +199,19 @@ var unmarshalTests = []unmarshalTest{
{in: `"invalid: \uD834x\uDD1E"`, ptr: new(string), out: "invalid: \uFFFDx\uFFFD"},
{in: "null", ptr: new(interface{}), out: nil},
{in: `{"X": [1,2,3], "Y": 4}`, ptr: new(T), out: T{Y: 4}, err: &UnmarshalTypeError{"array", reflect.TypeOf("")}},
{in: `{"x": 1}`, ptr: new(tx), out: tx{}, err: &UnmarshalFieldError{"x", txType, txType.Field(0)}},
{in: `{"x": 1}`, ptr: new(tx), out: tx{}},
{in: `{"F1":1,"F2":2,"F3":3}`, ptr: new(V), out: V{F1: float64(1), F2: int32(2), F3: Number("3")}},
{in: `{"F1":1,"F2":2,"F3":3}`, ptr: new(V), out: V{F1: Number("1"), F2: int32(2), F3: Number("3")}, useNumber: true},
{in: `{"k1":1,"k2":"s","k3":[1,2.0,3e-3],"k4":{"kk1":"s","kk2":2}}`, ptr: new(interface{}), out: ifaceNumAsFloat64},
{in: `{"k1":1,"k2":"s","k3":[1,2.0,3e-3],"k4":{"kk1":"s","kk2":2}}`, ptr: new(interface{}), out: ifaceNumAsNumber, useNumber: true},
// raw values with whitespace
{in: "\n true ", ptr: new(bool), out: true},
{in: "\t 1 ", ptr: new(int), out: 1},
{in: "\r 1.2 ", ptr: new(float64), out: 1.2},
{in: "\t -5 \n", ptr: new(int16), out: int16(-5)},
{in: "\t \"a\\u1234\" \n", ptr: new(string), out: "a\u1234"},
// Z has a "-" tag.
{in: `{"Y": 1, "Z": 2}`, ptr: new(T), out: T{Y: 1}},
@ -217,6 +224,16 @@ var unmarshalTests = []unmarshalTest{
{in: `[1, 2, 3+]`, err: &SyntaxError{"invalid character '+' after array element", 9}},
{in: `{"X":12x}`, err: &SyntaxError{"invalid character 'x' after object key:value pair", 8}, useNumber: true},
// raw value errors
{in: "\x01 42", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}},
{in: " 42 \x01", err: &SyntaxError{"invalid character '\\x01' after top-level value", 5}},
{in: "\x01 true", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}},
{in: " false \x01", err: &SyntaxError{"invalid character '\\x01' after top-level value", 8}},
{in: "\x01 1.2", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}},
{in: " 3.4 \x01", err: &SyntaxError{"invalid character '\\x01' after top-level value", 6}},
{in: "\x01 \"string\"", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}},
{in: " \"string\" \x01", err: &SyntaxError{"invalid character '\\x01' after top-level value", 11}},
// array tests
{in: `[1, 2, 3]`, ptr: new([3]int), out: [3]int{1, 2, 3}},
{in: `[1, 2, 3]`, ptr: new([1]int), out: [1]int{1}},
@ -422,7 +439,7 @@ func TestUnmarshalMarshal(t *testing.T) {
if err != nil {
t.Fatalf("Marshal: %v", err)
}
if bytes.Compare(jsonBig, b) != 0 {
if !bytes.Equal(jsonBig, b) {
t.Errorf("Marshal jsonBig")
diff(t, b, jsonBig)
return
@ -474,7 +491,7 @@ func TestLargeByteSlice(t *testing.T) {
if err := Unmarshal(b, &s1); err != nil {
t.Fatalf("Unmarshal: %v", err)
}
if bytes.Compare(s0, s1) != 0 {
if !bytes.Equal(s0, s1) {
t.Errorf("Marshal large byte slice")
diff(t, s0, s1)
}
@ -1000,3 +1017,72 @@ func TestUnmarshalNulls(t *testing.T) {
t.Errorf("Unmarshal of null values affected primitives")
}
}
func TestStringKind(t *testing.T) {
type stringKind string
type aMap map[stringKind]int
var m1, m2 map[stringKind]int
m1 = map[stringKind]int{
"foo": 42,
}
data, err := Marshal(m1)
if err != nil {
t.Errorf("Unexpected error marshalling: %v", err)
}
err = Unmarshal(data, &m2)
if err != nil {
t.Errorf("Unexpected error unmarshalling: %v", err)
}
if !reflect.DeepEqual(m1, m2) {
t.Error("Items should be equal after encoding and then decoding")
}
}
var decodeTypeErrorTests = []struct {
dest interface{}
src string
}{
{new(string), `{"user": "name"}`}, // issue 4628.
{new(error), `{}`}, // issue 4222
{new(error), `[]`},
{new(error), `""`},
{new(error), `123`},
{new(error), `true`},
}
func TestUnmarshalTypeError(t *testing.T) {
for _, item := range decodeTypeErrorTests {
err := Unmarshal([]byte(item.src), item.dest)
if _, ok := err.(*UnmarshalTypeError); !ok {
t.Errorf("expected type error for Unmarshal(%q, type %T): got %v instead",
item.src, item.dest, err)
}
}
}
// Test handling of unexported fields that should be ignored.
// Issue 4660
type unexportedFields struct {
Name string
m map[string]interface{} `json:"-"`
m2 map[string]interface{} `json:"abcd"`
}
func TestUnmarshalUnexported(t *testing.T) {
input := `{"Name": "Bob", "m": {"x": 123}, "m2": {"y": 456}, "abcd": {"z": 789}}`
want := &unexportedFields{Name: "Bob"}
out := &unexportedFields{}
err := Unmarshal([]byte(input), out)
if err != nil {
t.Errorf("got error %v, expected nil", err)
}
if !reflect.DeepEqual(out, want) {
t.Errorf("got %q, want %q", out, want)
}
}

View File

@ -75,8 +75,9 @@ import (
// Field int `json:",omitempty"`
//
// The "string" option signals that a field is stored as JSON inside a
// JSON-encoded string. This extra level of encoding is sometimes
// used when communicating with JavaScript programs:
// JSON-encoded string. It applies only to fields of string, floating point,
// or integer types. This extra level of encoding is sometimes used when
// communicating with JavaScript programs:
//
// Int64String int64 `json:",string"`
//
@ -437,7 +438,7 @@ func isValidTag(s string) bool {
}
for _, c := range s {
switch {
case strings.ContainsRune("!#$%&()*+-./:<=>?@[]^_{|}~", c):
case strings.ContainsRune("!#$%&()*+-./:<=>?@[]^_{|}~ ", c):
// Backslash and quote chars are reserved, but
// otherwise any punctuation chars are allowed
// in a tag name.
@ -617,13 +618,20 @@ func typeFields(t reflect.Type) []field {
index := make([]int, len(f.index)+1)
copy(index, f.index)
index[len(f.index)] = i
ft := sf.Type
if ft.Name() == "" && ft.Kind() == reflect.Ptr {
// Follow pointer.
ft = ft.Elem()
}
// Record found field and index sequence.
if name != "" || !sf.Anonymous {
if name != "" || !sf.Anonymous || ft.Kind() != reflect.Struct {
tagged := name != ""
if name == "" {
name = sf.Name
}
fields = append(fields, field{name, tagged, index, sf.Type,
fields = append(fields, field{name, tagged, index, ft,
opts.Contains("omitempty"), opts.Contains("string")})
if count[f.typ] > 1 {
// If there were multiple instances, add a second,
@ -636,11 +644,6 @@ func typeFields(t reflect.Type) []field {
}
// Record new anonymous struct to explore in next round.
ft := sf.Type
if ft.Name() == "" {
// Must be pointer.
ft = ft.Elem()
}
nextCount[ft]++
if nextCount[ft] == 1 {
next = append(next, field{name: ft.Name(), index: index, typ: ft})

View File

@ -186,3 +186,23 @@ func TestMarshalerEscaping(t *testing.T) {
t.Errorf("got %q, want %q", got, want)
}
}
type IntType int
type MyStruct struct {
IntType
}
func TestAnonymousNonstruct(t *testing.T) {
var i IntType = 11
a := MyStruct{i}
const want = `{"IntType":11}`
b, err := Marshal(a)
if err != nil {
t.Fatalf("Marshal: %v", err)
}
if got := string(b); got != want {
t.Errorf("got %q, want %q", got, want)
}
}

View File

@ -92,7 +92,7 @@ func TestCompactBig(t *testing.T) {
t.Fatalf("Compact: %v", err)
}
b := buf.Bytes()
if bytes.Compare(b, jsonBig) != 0 {
if !bytes.Equal(b, jsonBig) {
t.Error("Compact(jsonBig) != jsonBig")
diff(t, b, jsonBig)
return
@ -118,7 +118,7 @@ func TestIndentBig(t *testing.T) {
t.Fatalf("Indent2: %v", err)
}
b1 := buf1.Bytes()
if bytes.Compare(b1, b) != 0 {
if !bytes.Equal(b1, b) {
t.Error("Indent(Indent(jsonBig)) != Indent(jsonBig)")
diff(t, b1, b)
return
@ -130,7 +130,7 @@ func TestIndentBig(t *testing.T) {
t.Fatalf("Compact: %v", err)
}
b1 = buf1.Bytes()
if bytes.Compare(b1, jsonBig) != 0 {
if !bytes.Equal(b1, jsonBig) {
t.Error("Compact(Indent(jsonBig)) != jsonBig")
diff(t, b1, jsonBig)
return

View File

@ -60,6 +60,14 @@ type badCodeTag struct {
Z string `json:" !\"#&'()*+,."`
}
type spaceTag struct {
Q string `json:"With space"`
}
type unicodeTag struct {
W string `json:"Ελλάδα"`
}
var structTagObjectKeyTests = []struct {
raw interface{}
value string
@ -78,6 +86,8 @@ var structTagObjectKeyTests = []struct {
{badCodeTag{"Reliable Man"}, "Reliable Man", "Z"},
{percentSlashTag{"brut"}, "brut", "text/html%"},
{punctuationTag{"Union Rags"}, "Union Rags", "!#$%&()*+-./:<=>?@[]^_{|}~"},
{spaceTag{"Perreddu"}, "Perreddu", "With space"},
{unicodeTag{"Loukanikos"}, "Loukanikos", "Ελλάδα"},
}
func TestStructTagObjectKey(t *testing.T) {

View File

@ -241,7 +241,7 @@ func (p *printer) marshalSimple(typ reflect.Type, val reflect.Value) error {
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
p.WriteString(strconv.FormatUint(val.Uint(), 10))
case reflect.Float32, reflect.Float64:
p.WriteString(strconv.FormatFloat(val.Float(), 'g', -1, 64))
p.WriteString(strconv.FormatFloat(val.Float(), 'g', -1, val.Type().Bits()))
case reflect.String:
// TODO: Add EscapeString.
Escape(p, []byte(val.String()))
@ -273,19 +273,32 @@ func (p *printer) marshalStruct(tinfo *typeInfo, val reflect.Value) error {
s := parentStack{printer: p}
for i := range tinfo.fields {
finfo := &tinfo.fields[i]
if finfo.flags&(fAttr|fAny) != 0 {
if finfo.flags&(fAttr) != 0 {
continue
}
vf := finfo.value(val)
switch finfo.flags & fMode {
case fCharData:
var scratch [64]byte
switch vf.Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
Escape(p, strconv.AppendInt(scratch[:0], vf.Int(), 10))
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
Escape(p, strconv.AppendUint(scratch[:0], vf.Uint(), 10))
case reflect.Float32, reflect.Float64:
Escape(p, strconv.AppendFloat(scratch[:0], vf.Float(), 'g', -1, vf.Type().Bits()))
case reflect.Bool:
Escape(p, strconv.AppendBool(scratch[:0], vf.Bool()))
case reflect.String:
Escape(p, []byte(vf.String()))
case reflect.Slice:
if elem, ok := vf.Interface().([]byte); ok {
Escape(p, elem)
}
case reflect.Struct:
if vf.Type() == timeType {
Escape(p, []byte(vf.Interface().(time.Time).Format(time.RFC3339Nano)))
}
}
continue
@ -340,7 +353,7 @@ func (p *printer) marshalStruct(tinfo *typeInfo, val reflect.Value) error {
continue
}
case fElement:
case fElement, fElement | fAny:
s.trim(finfo.parents)
if len(finfo.parents) > len(s.stack) {
if vf.Kind() != reflect.Ptr && vf.Kind() != reflect.Interface || !vf.IsNil() {

View File

@ -59,6 +59,36 @@ type Book struct {
Title string `xml:",chardata"`
}
type Event struct {
XMLName struct{} `xml:"event"`
Year int `xml:",chardata"`
}
type Movie struct {
XMLName struct{} `xml:"movie"`
Length uint `xml:",chardata"`
}
type Pi struct {
XMLName struct{} `xml:"pi"`
Approximation float32 `xml:",chardata"`
}
type Universe struct {
XMLName struct{} `xml:"universe"`
Visible float64 `xml:",chardata"`
}
type Particle struct {
XMLName struct{} `xml:"particle"`
HasMass bool `xml:",chardata"`
}
type Departure struct {
XMLName struct{} `xml:"departure"`
When time.Time `xml:",chardata"`
}
type SecretAgent struct {
XMLName struct{} `xml:"agent"`
Handle string `xml:"handle,attr"`
@ -188,6 +218,18 @@ type AnyTest struct {
AnyField AnyHolder `xml:",any"`
}
type AnyOmitTest struct {
XMLName struct{} `xml:"a"`
Nested string `xml:"nested>value"`
AnyField *AnyHolder `xml:",any,omitempty"`
}
type AnySliceTest struct {
XMLName struct{} `xml:"a"`
Nested string `xml:"nested>value"`
AnyField []AnyHolder `xml:",any"`
}
type AnyHolder struct {
XMLName Name
XML string `xml:",innerxml"`
@ -333,6 +375,12 @@ var marshalTests = []struct {
{Value: &Domain{Name: []byte("google.com&friends")}, ExpectXML: `<domain>google.com&amp;friends</domain>`},
{Value: &Domain{Name: []byte("google.com"), Comment: []byte(" &friends ")}, ExpectXML: `<domain>google.com<!-- &friends --></domain>`},
{Value: &Book{Title: "Pride & Prejudice"}, ExpectXML: `<book>Pride &amp; Prejudice</book>`},
{Value: &Event{Year: -3114}, ExpectXML: `<event>-3114</event>`},
{Value: &Movie{Length: 13440}, ExpectXML: `<movie>13440</movie>`},
{Value: &Pi{Approximation: 3.14159265}, ExpectXML: `<pi>3.1415927</pi>`},
{Value: &Universe{Visible: 9.3e13}, ExpectXML: `<universe>9.3e+13</universe>`},
{Value: &Particle{HasMass: true}, ExpectXML: `<particle>true</particle>`},
{Value: &Departure{When: ParseTime("2013-01-09T00:15:00-09:00")}, ExpectXML: `<departure>2013-01-09T00:15:00-09:00</departure>`},
{Value: atomValue, ExpectXML: atomXml},
{
Value: &Ship{
@ -652,12 +700,43 @@ var marshalTests = []struct {
XML: "<sub>unknown</sub>",
},
},
UnmarshalOnly: true,
},
{
Value: &AnyTest{Nested: "known", AnyField: AnyHolder{XML: "<unknown/>"}},
ExpectXML: `<a><nested><value>known</value></nested></a>`,
MarshalOnly: true,
Value: &AnyTest{Nested: "known",
AnyField: AnyHolder{
XML: "<unknown/>",
XMLName: Name{Local: "AnyField"},
},
},
ExpectXML: `<a><nested><value>known</value></nested><AnyField><unknown/></AnyField></a>`,
},
{
ExpectXML: `<a><nested><value>b</value></nested></a>`,
Value: &AnyOmitTest{
Nested: "b",
},
},
{
ExpectXML: `<a><nested><value>b</value></nested><c><d>e</d></c><g xmlns="f"><h>i</h></g></a>`,
Value: &AnySliceTest{
Nested: "b",
AnyField: []AnyHolder{
{
XMLName: Name{Local: "c"},
XML: "<d>e</d>",
},
{
XMLName: Name{Space: "f", Local: "g"},
XML: "<h>i</h>",
},
},
},
},
{
ExpectXML: `<a><nested><value>b</value></nested></a>`,
Value: &AnySliceTest{
Nested: "b",
},
},
// Test recursive types.
@ -690,15 +769,17 @@ var marshalTests = []struct {
// Test escaping.
{
ExpectXML: `<a><nested><value>dquote: &#34;; squote: &#39;; ampersand: &amp;; less: &lt;; greater: &gt;;</value></nested></a>`,
ExpectXML: `<a><nested><value>dquote: &#34;; squote: &#39;; ampersand: &amp;; less: &lt;; greater: &gt;;</value></nested><empty></empty></a>`,
Value: &AnyTest{
Nested: `dquote: "; squote: '; ampersand: &; less: <; greater: >;`,
Nested: `dquote: "; squote: '; ampersand: &; less: <; greater: >;`,
AnyField: AnyHolder{XMLName: Name{Local: "empty"}},
},
},
{
ExpectXML: `<a><nested><value>newline: &#xA;; cr: &#xD;; tab: &#x9;;</value></nested></a>`,
ExpectXML: `<a><nested><value>newline: &#xA;; cr: &#xD;; tab: &#x9;;</value></nested><AnyField></AnyField></a>`,
Value: &AnyTest{
Nested: "newline: \n; cr: \r; tab: \t;",
Nested: "newline: \n; cr: \r; tab: \t;",
AnyField: AnyHolder{XMLName: Name{Local: "AnyField"}},
},
},
{

View File

@ -279,7 +279,7 @@ func (p *Decoder) unmarshal(val reflect.Value, start *StartElement) error {
saveComment = finfo.value(sv)
}
case fAny:
case fAny, fAny | fElement:
if !saveAny.IsValid() {
saveAny = finfo.value(sv)
}
@ -374,68 +374,58 @@ Loop:
}
func copyValue(dst reflect.Value, src []byte) (err error) {
// Helper functions for integer and unsigned integer conversions
var itmp int64
getInt64 := func() bool {
itmp, err = strconv.ParseInt(string(src), 10, 64)
// TODO: should check sizes
return err == nil
}
var utmp uint64
getUint64 := func() bool {
utmp, err = strconv.ParseUint(string(src), 10, 64)
// TODO: check for overflow?
return err == nil
}
var ftmp float64
getFloat64 := func() bool {
ftmp, err = strconv.ParseFloat(string(src), 64)
// TODO: check for overflow?
return err == nil
if dst.Kind() == reflect.Ptr {
if dst.IsNil() {
dst.Set(reflect.New(dst.Type().Elem()))
}
dst = dst.Elem()
}
// Save accumulated data.
switch t := dst; t.Kind() {
switch dst.Kind() {
case reflect.Invalid:
// Probably a comment.
// Probably a commendst.
default:
return errors.New("cannot happen: unknown type " + t.Type().String())
return errors.New("cannot happen: unknown type " + dst.Type().String())
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
if !getInt64() {
itmp, err := strconv.ParseInt(string(src), 10, dst.Type().Bits())
if err != nil {
return err
}
t.SetInt(itmp)
dst.SetInt(itmp)
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
if !getUint64() {
utmp, err := strconv.ParseUint(string(src), 10, dst.Type().Bits())
if err != nil {
return err
}
t.SetUint(utmp)
dst.SetUint(utmp)
case reflect.Float32, reflect.Float64:
if !getFloat64() {
ftmp, err := strconv.ParseFloat(string(src), dst.Type().Bits())
if err != nil {
return err
}
t.SetFloat(ftmp)
dst.SetFloat(ftmp)
case reflect.Bool:
value, err := strconv.ParseBool(strings.TrimSpace(string(src)))
if err != nil {
return err
}
t.SetBool(value)
dst.SetBool(value)
case reflect.String:
t.SetString(string(src))
dst.SetString(string(src))
case reflect.Slice:
if len(src) == 0 {
// non-nil to flag presence
src = []byte{}
}
t.SetBytes(src)
dst.SetBytes(src)
case reflect.Struct:
if t.Type() == timeType {
if dst.Type() == timeType {
tv, err := time.Parse(time.RFC3339, string(src))
if err != nil {
return err
}
t.Set(reflect.ValueOf(tv))
dst.Set(reflect.ValueOf(tv))
}
}
return nil

View File

@ -355,3 +355,47 @@ func TestUnmarshalWithoutNameType(t *testing.T) {
t.Fatalf("have %v\nwant %v", x.Attr, OK)
}
}
func TestUnmarshalAttr(t *testing.T) {
type ParamVal struct {
Int int `xml:"int,attr"`
}
type ParamPtr struct {
Int *int `xml:"int,attr"`
}
type ParamStringPtr struct {
Int *string `xml:"int,attr"`
}
x := []byte(`<Param int="1" />`)
p1 := &ParamPtr{}
if err := Unmarshal(x, p1); err != nil {
t.Fatalf("Unmarshal: %s", err)
}
if p1.Int == nil {
t.Fatalf("Unmarshal failed in to *int field")
} else if *p1.Int != 1 {
t.Fatalf("Unmarshal with %s failed:\nhave %#v,\n want %#v", x, p1.Int, 1)
}
p2 := &ParamVal{}
if err := Unmarshal(x, p2); err != nil {
t.Fatalf("Unmarshal: %s", err)
}
if p2.Int != 1 {
t.Fatalf("Unmarshal with %s failed:\nhave %#v,\n want %#v", x, p2.Int, 1)
}
p3 := &ParamStringPtr{}
if err := Unmarshal(x, p3); err != nil {
t.Fatalf("Unmarshal: %s", err)
}
if p3.Int == nil {
t.Fatalf("Unmarshal failed in to *string field")
} else if *p3.Int != "1" {
t.Fatalf("Unmarshal with %s failed:\nhave %#v,\n want %#v", x, p3.Int, 1)
}
}

View File

@ -154,6 +154,9 @@ func structFieldInfo(typ reflect.Type, f *reflect.StructField) (*fieldInfo, erro
// This will also catch multiple modes in a single field.
valid = false
}
if finfo.flags&fMode == fAny {
finfo.flags |= fElement
}
if finfo.flags&fOmitEmpty != 0 && finfo.flags&(fElement|fAttr) == 0 {
valid = false
}

View File

@ -5,14 +5,13 @@
package main
import (
"errors"
"exp/types"
"flag"
"fmt"
"go/ast"
"go/parser"
"go/scanner"
"go/token"
"go/types"
"io/ioutil"
"os"
"path/filepath"
@ -92,8 +91,7 @@ func parse(fset *token.FileSet, filename string, src []byte) *ast.File {
return file
}
func parseStdin(fset *token.FileSet) (files map[string]*ast.File) {
files = make(map[string]*ast.File)
func parseStdin(fset *token.FileSet) (files []*ast.File) {
src, err := ioutil.ReadAll(os.Stdin)
if err != nil {
report(err)
@ -101,13 +99,12 @@ func parseStdin(fset *token.FileSet) (files map[string]*ast.File) {
}
const filename = "<standard input>"
if file := parse(fset, filename, src); file != nil {
files[filename] = file
files = []*ast.File{file}
}
return
}
func parseFiles(fset *token.FileSet, filenames []string) (files map[string]*ast.File) {
files = make(map[string]*ast.File)
func parseFiles(fset *token.FileSet, filenames []string) (files []*ast.File) {
for _, filename := range filenames {
src, err := ioutil.ReadFile(filename)
if err != nil {
@ -115,11 +112,7 @@ func parseFiles(fset *token.FileSet, filenames []string) (files map[string]*ast.
continue
}
if file := parse(fset, filename, src); file != nil {
if files[filename] != nil {
report(errors.New(fmt.Sprintf("%q: duplicate file", filename)))
continue
}
files[filename] = file
files = append(files, file)
}
}
return
@ -169,15 +162,10 @@ func processFiles(filenames []string, allFiles bool) {
processPackage(fset, parseFiles(fset, filenames[0:i]))
}
func processPackage(fset *token.FileSet, files map[string]*ast.File) {
// make a package (resolve all identifiers)
pkg, err := ast.NewPackage(fset, files, types.GcImport, types.Universe)
func processPackage(fset *token.FileSet, files []*ast.File) {
_, err := types.Check(fset, files)
if err != nil {
report(err)
return
}
if err = types.Check(fset, pkg, nil, nil); err != nil {
report(err)
}
}

View File

@ -51,17 +51,20 @@ var tests = []string{
"exp/gotype/testdata/test1.go",
// directories
// Note: packages that don't typecheck yet are commented out
// Note: Packages that don't typecheck yet are commented out.
// Unless there is a comment next to the commented out packages,
// the package doesn't typecheck due to errors in the shift
// expression checker.
"archive/tar",
"archive/zip",
"bufio",
"bytes",
"compress/bzip2",
// "compress/bzip2",
"compress/flate",
"compress/gzip",
"compress/lzw",
// "compress/lzw",
"compress/zlib",
"container/heap",
@ -77,7 +80,7 @@ var tests = []string{
"crypto/elliptic",
"crypto/hmac",
"crypto/md5",
"crypto/rand",
// "crypto/rand",
"crypto/rc4",
// "crypto/rsa", // intermittent failure: /home/gri/go2/src/pkg/crypto/rsa/pkcs1v15.go:21:27: undeclared name: io
"crypto/sha1",
@ -91,14 +94,14 @@ var tests = []string{
"database/sql",
"database/sql/driver",
"debug/dwarf",
// "debug/dwarf",
"debug/elf",
"debug/gosym",
"debug/macho",
"debug/pe",
"encoding/ascii85",
"encoding/asn1",
// "encoding/asn1",
"encoding/base32",
"encoding/base64",
"encoding/binary",
@ -114,7 +117,6 @@ var tests = []string{
"flag",
"fmt",
"exp/types",
"exp/gotype",
"go/ast",
@ -124,7 +126,8 @@ var tests = []string{
"go/parser",
"go/printer",
"go/scanner",
"go/token",
// "go/token",
"go/types",
"hash/adler32",
"hash/crc32",
@ -135,7 +138,7 @@ var tests = []string{
"image/color",
"image/draw",
"image/gif",
"image/jpeg",
// "image/jpeg",
"image/png",
"index/suffixarray",
@ -146,15 +149,15 @@ var tests = []string{
"log",
"log/syslog",
"math",
"math/big",
// "math",
//"math/big",
"math/cmplx",
"math/rand",
"mime",
"mime/multipart",
// "net", // c:\go\root\src\pkg\net\interface_windows.go:54:13: invalid operation: division by zero
// "net",
"net/http",
"net/http/cgi",
"net/http/fcgi",
@ -165,41 +168,41 @@ var tests = []string{
"net/rpc",
"net/rpc/jsonrpc",
"net/smtp",
"net/textproto",
// "net/textproto",
"net/url",
"path",
"path/filepath",
// "reflect", // unsafe.Sizeof must return size > 0 for pointer types
"reflect",
"regexp",
"regexp/syntax",
"runtime",
// "runtime",
"runtime/cgo",
"runtime/debug",
"runtime/pprof",
"sort",
// "strconv", // bug in switch case duplicate detection
// "strconv",
"strings",
"sync",
"sync/atomic",
// "syscall", c:\go\root\src\pkg\syscall\syscall_windows.go:35:16: cannot convert EINVAL (constant 536870951) to error
// "syscall",
"testing",
"testing/iotest",
"testing/quick",
"text/scanner",
// "text/scanner",
"text/tabwriter",
"text/template",
"text/template/parse",
// "time", // local const decls without initialization expressions
"time",
"unicode",
"unicode/utf16",
"unicode/utf8",

View File

@ -4,7 +4,6 @@
/*
Package html implements an HTML5-compliant tokenizer and parser.
INCOMPLETE.
Tokenization is done by creating a Tokenizer for an io.Reader r. It is the
caller's responsibility to ensure that r provides UTF-8 encoded HTML.

View File

@ -382,15 +382,9 @@ func BenchmarkParser(b *testing.B) {
}
b.SetBytes(int64(len(buf)))
runtime.GC()
var ms runtime.MemStats
runtime.ReadMemStats(&ms)
mallocs := ms.Mallocs
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
Parse(bytes.NewBuffer(buf))
}
b.StopTimer()
runtime.ReadMemStats(&ms)
mallocs = ms.Mallocs - mallocs
b.Logf("%d iterations, %d mallocs per iteration\n", b.N, int(mallocs)/b.N)
}

View File

@ -14,7 +14,7 @@ import (
type writer interface {
io.Writer
WriteByte(byte) error
io.ByteWriter
WriteString(string) (int, error)
}

View File

@ -634,9 +634,7 @@ func benchmarkTokenizer(b *testing.B, level int) {
}
b.SetBytes(int64(len(buf)))
runtime.GC()
var ms runtime.MemStats
runtime.ReadMemStats(&ms)
mallocs := ms.Mallocs
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
z := NewTokenizer(bytes.NewBuffer(buf))
@ -674,10 +672,6 @@ func benchmarkTokenizer(b *testing.B, level int) {
}
}
}
b.StopTimer()
runtime.ReadMemStats(&ms)
mallocs = ms.Mallocs - mallocs
b.Logf("%d iterations, %d mallocs per iteration\n", b.N, int(mallocs)/b.N)
}
func BenchmarkRawLevelTokenizer(b *testing.B) { benchmarkTokenizer(b, rawLevel) }

View File

@ -98,24 +98,24 @@ func (b *Builder) Tailoring(locale string) *Tailoring {
// a value for each colelem that is a variable. (See the reference above.)
func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error {
str := string(runes)
elems := make([][]int, len(colelems))
elems := make([]rawCE, len(colelems))
for i, ce := range colelems {
elems[i] = append(elems[i], ce...)
if len(ce) == 0 {
elems[i] = append(elems[i], []int{0, 0, 0, 0}...)
break
}
elems[i] = makeRawCE(ce, 0)
if len(ce) == 1 {
elems[i] = append(elems[i], defaultSecondary)
elems[i].w[1] = defaultSecondary
}
if len(ce) <= 2 {
elems[i] = append(elems[i], defaultTertiary)
elems[i].w[2] = defaultTertiary
}
if len(ce) <= 3 {
elems[i] = append(elems[i], ce[0])
elems[i].w[3] = ce[0]
}
}
for i, ce := range elems {
p := ce.w[0]
isvar := false
for _, j := range variables {
if i == j {
@ -123,18 +123,18 @@ func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error {
}
}
if isvar {
if ce[0] >= b.minNonVar && b.minNonVar > 0 {
return fmt.Errorf("primary value %X of variable is larger than the smallest non-variable %X", ce[0], b.minNonVar)
if p >= b.minNonVar && b.minNonVar > 0 {
return fmt.Errorf("primary value %X of variable is larger than the smallest non-variable %X", p, b.minNonVar)
}
if ce[0] > b.varTop {
b.varTop = ce[0]
if p > b.varTop {
b.varTop = p
}
} else if ce[0] > 1 { // 1 is a special primary value reserved for FFFE
if ce[0] <= b.varTop {
return fmt.Errorf("primary value %X of non-variable is smaller than the highest variable %X", ce[0], b.varTop)
} else if p > 1 { // 1 is a special primary value reserved for FFFE
if p <= b.varTop {
return fmt.Errorf("primary value %X of non-variable is smaller than the highest variable %X", p, b.varTop)
}
if b.minNonVar == 0 || ce[0] < b.minNonVar {
b.minNonVar = ce[0]
if b.minNonVar == 0 || p < b.minNonVar {
b.minNonVar = p
}
}
}
@ -142,16 +142,42 @@ func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error {
if err != nil {
return err
}
cccs := []uint8{}
nfd := norm.NFD.String(str)
for i := range nfd {
cccs = append(cccs, norm.NFD.PropertiesString(nfd[i:]).CCC())
}
if len(cccs) < len(elems) {
if len(cccs) > 2 {
return fmt.Errorf("number of decomposed characters should be greater or equal to the number of collation elements for len(colelems) > 3 (%d < %d)", len(cccs), len(elems))
}
p := len(elems) - 1
for ; p > 0 && elems[p].w[0] == 0; p-- {
elems[p].ccc = cccs[len(cccs)-1]
}
for ; p >= 0; p-- {
elems[p].ccc = cccs[0]
}
} else {
for i := range elems {
elems[i].ccc = cccs[i]
}
}
// doNorm in collate.go assumes that the following conditions hold.
if len(elems) > 1 && len(cccs) > 1 && cccs[0] != 0 && cccs[0] != cccs[len(cccs)-1] {
return fmt.Errorf("incompatible CCC values for expansion %X (%d)", runes, cccs)
}
b.root.newEntry(str, elems)
return nil
}
func (t *Tailoring) setAnchor(anchor string) error {
anchor = norm.NFD.String(anchor)
anchor = norm.NFC.String(anchor)
a := t.index.find(anchor)
if a == nil {
a = t.index.newEntry(anchor, nil)
a.implicit = true
a.modified = true
for _, r := range []rune(anchor) {
e := t.index.find(string(r))
e.lock = true
@ -221,7 +247,7 @@ func (t *Tailoring) Insert(level collate.Level, str, extend string) error {
if t.anchor == nil {
return fmt.Errorf("%s:Insert: no anchor point set for tailoring of %s", t.id, str)
}
str = norm.NFD.String(str)
str = norm.NFC.String(str)
e := t.index.find(str)
if e == nil {
e = t.index.newEntry(str, nil)
@ -262,12 +288,13 @@ func (t *Tailoring) Insert(level collate.Level, str, extend string) error {
}
e.extend = norm.NFD.String(extend)
e.exclude = false
e.modified = true
e.elems = nil
t.anchor = e
return nil
}
func (o *ordering) getWeight(e *entry) [][]int {
func (o *ordering) getWeight(e *entry) []rawCE {
if len(e.elems) == 0 && e.logical == noAnchor {
if e.implicit {
for _, r := range e.runes {
@ -279,11 +306,10 @@ func (o *ordering) getWeight(e *entry) [][]int {
for ; a.elems == nil && !a.implicit; a = a.next {
count[a.level]++
}
e.elems = append([][]int(nil), make([]int, len(a.elems[0])))
copy(e.elems[0], a.elems[0])
e.elems = []rawCE{makeRawCE(a.elems[0].w, a.elems[0].ccc)}
for i := collate.Primary; i < collate.Quaternary; i++ {
if count[i] != 0 {
e.elems[0][i] -= count[i]
e.elems[0].w[i] -= count[i]
break
}
}
@ -315,11 +341,11 @@ func (o *ordering) verifyWeights(a, b *entry, level collate.Level) error {
return nil
}
for i := collate.Primary; i < level; i++ {
if a.elems[0][i] < b.elems[0][i] {
if a.elems[0].w[i] < b.elems[0].w[i] {
return nil
}
}
if a.elems[0][level] >= b.elems[0][level] {
if a.elems[0].w[level] >= b.elems[0].w[level] {
err := fmt.Errorf("%s:overflow: collation elements of %q (%X) overflows those of %q (%X) at level %d (%X >= %X)", o.id, a.str, a.runes, b.str, b.runes, level, a.elems, b.elems)
log.Println(err)
// TODO: return the error instead, or better, fix the conflicting entry by making room.
@ -339,6 +365,54 @@ func (b *Builder) errorID(locale string, e error) {
}
}
// patchNorm ensures that NFC and NFD counterparts are consistent.
func (o *ordering) patchNorm() {
// Insert the NFD counterparts, if necessary.
for _, e := range o.ordered {
nfd := norm.NFD.String(e.str)
if nfd != e.str {
if e0 := o.find(nfd); e0 != nil && !e0.modified {
e0.elems = e.elems
} else if e.modified && !equalCEArrays(o.genColElems(nfd), e.elems) {
e := o.newEntry(nfd, e.elems)
e.modified = true
}
}
}
// Update unchanged composed forms if one of their parts changed.
for _, e := range o.ordered {
nfd := norm.NFD.String(e.str)
if e.modified || nfd == e.str {
continue
}
if e0 := o.find(nfd); e0 != nil {
e.elems = e0.elems
} else {
e.elems = o.genColElems(nfd)
if norm.NFD.LastBoundary([]byte(nfd)) == 0 {
r := []rune(nfd)
head := string(r[0])
tail := ""
for i := 1; i < len(r); i++ {
s := norm.NFC.String(head + string(r[i]))
if e0 := o.find(s); e0 != nil && e0.modified {
head = s
} else {
tail += string(r[i])
}
}
e.elems = append(o.genColElems(head), o.genColElems(tail)...)
}
}
}
// Exclude entries for which the individual runes generate the same collation elements.
for _, e := range o.ordered {
if len(e.runes) > 1 && equalCEArrays(o.genColElems(e.str), e.elems) {
e.exclude = true
}
}
}
func (b *Builder) buildOrdering(o *ordering) {
for _, e := range o.ordered {
o.getWeight(e)
@ -346,6 +420,7 @@ func (b *Builder) buildOrdering(o *ordering) {
for _, e := range o.ordered {
o.addExtension(e)
}
o.patchNorm()
o.sort()
simplify(o)
b.processExpansions(o) // requires simplify
@ -392,11 +467,11 @@ func (b *Builder) Build() (*collate.Collator, error) {
if err != nil {
return nil, err
}
c := collate.Init(t)
if c == nil {
table := collate.Init(t)
if table == nil {
panic("generated table of incompatible type")
}
return c, nil
return collate.NewFromTable(table), nil
}
// Build builds a Collator for Tailoring t.
@ -436,20 +511,20 @@ func (b *Builder) Print(w io.Writer) (n int, err error) {
// reproducibleFromNFKD checks whether the given expansion could be generated
// from an NFKD expansion.
func reproducibleFromNFKD(e *entry, exp, nfkd [][]int) bool {
func reproducibleFromNFKD(e *entry, exp, nfkd []rawCE) bool {
// Length must be equal.
if len(exp) != len(nfkd) {
return false
}
for i, ce := range exp {
// Primary and secondary values should be equal.
if ce[0] != nfkd[i][0] || ce[1] != nfkd[i][1] {
if ce.w[0] != nfkd[i].w[0] || ce.w[1] != nfkd[i].w[1] {
return false
}
// Tertiary values should be equal to maxTertiary for third element onwards.
// TODO: there seem to be a lot of cases in CLDR (e.g. ㏭ in zh.xml) that can
// simply be dropped. Try this out by dropping the following code.
if i >= 2 && ce[2] != maxTertiary {
if i >= 2 && ce.w[2] != maxTertiary {
return false
}
if _, err := makeCE(ce); err != nil {
@ -469,22 +544,12 @@ func simplify(o *ordering) {
keep[e.runes[0]] = true
}
}
// Remove entries for which the runes normalize (using NFD) to identical values.
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
s := e.str
nfd := norm.NFD.String(s)
if len(e.runes) > 1 || keep[e.runes[0]] || nfd == s {
continue
}
if equalCEArrays(o.genColElems(nfd), e.elems) {
e.remove()
}
}
// Tag entries for which the runes NFKD decompose to identical values.
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
s := e.str
nfkd := norm.NFKD.String(s)
if e.decompose || len(e.runes) > 1 || len(e.elems) == 1 || keep[e.runes[0]] || nfkd == s {
nfd := norm.NFD.String(s)
if e.decompose || len(e.runes) > 1 || len(e.elems) == 1 || keep[e.runes[0]] || nfkd == nfd {
continue
}
if reproducibleFromNFKD(e, e.elems, o.genColElems(nfkd)) {
@ -589,18 +654,18 @@ func (b *Builder) processContractions(o *ordering) {
// Bucket sort entries in index order.
es := make([]*entry, len(l))
for _, e := range l {
var o, sn int
var p, sn int
if len(e.runes) > 1 {
str := []byte(string(e.runes[1:]))
o, sn = t.contractTries.lookup(handle, str)
p, sn = t.contractTries.lookup(handle, str)
if sn != len(str) {
log.Fatalf("processContractions: unexpected length for '%X'; len=%d; want %d", e.runes, sn, len(str))
log.Fatalf("%s: processContractions: unexpected length for '%X'; len=%d; want %d", o.id, e.runes, sn, len(str))
}
}
if es[o] != nil {
log.Fatalf("Multiple contractions for position %d for rune %U", o, e.runes[0])
if es[p] != nil {
log.Fatalf("%s: multiple contractions for position %d for rune %U", o.id, p, e.runes[0])
}
es[o] = e
es[p] = e
}
// Create collation elements for contractions.
elems := []uint32{}

View File

@ -7,48 +7,64 @@ package build
import "testing"
// cjk returns an implicit collation element for a CJK rune.
func cjk(r rune) [][]int {
func cjk(r rune) []rawCE {
// A CJK character C is represented in the DUCET as
// [.AAAA.0020.0002.C][.BBBB.0000.0000.C]
// Where AAAA is the most significant 15 bits plus a base value.
// Any base value will work for the test, so we pick the common value of FB40.
const base = 0xFB40
return [][]int{
{base + int(r>>15), defaultSecondary, defaultTertiary, int(r)},
{int(r&0x7FFF) | 0x8000, 0, 0, int(r)},
return []rawCE{
{w: []int{base + int(r>>15), defaultSecondary, defaultTertiary, int(r)}},
{w: []int{int(r&0x7FFF) | 0x8000, 0, 0, int(r)}},
}
}
func pCE(p int) [][]int {
return [][]int{{p, defaultSecondary, defaultTertiary, 0}}
func pCE(p int) []rawCE {
return mkCE([]int{p, defaultSecondary, defaultTertiary, 0}, 0)
}
func pqCE(p, q int) [][]int {
return [][]int{{p, defaultSecondary, defaultTertiary, q}}
func pqCE(p, q int) []rawCE {
return mkCE([]int{p, defaultSecondary, defaultTertiary, q}, 0)
}
func ptCE(p, t int) [][]int {
return [][]int{{p, defaultSecondary, t, 0}}
func ptCE(p, t int) []rawCE {
return mkCE([]int{p, defaultSecondary, t, 0}, 0)
}
func sCE(s int) [][]int {
return [][]int{{0, s, defaultTertiary, 0}}
func ptcCE(p, t int, ccc uint8) []rawCE {
return mkCE([]int{p, defaultSecondary, t, 0}, ccc)
}
func stCE(s, t int) [][]int {
return [][]int{{0, s, t, 0}}
func sCE(s int) []rawCE {
return mkCE([]int{0, s, defaultTertiary, 0}, 0)
}
func stCE(s, t int) []rawCE {
return mkCE([]int{0, s, t, 0}, 0)
}
func scCE(s int, ccc uint8) []rawCE {
return mkCE([]int{0, s, defaultTertiary, 0}, ccc)
}
func mkCE(w []int, ccc uint8) []rawCE {
return []rawCE{rawCE{w, ccc}}
}
// ducetElem is used to define test data that is used to generate a table.
type ducetElem struct {
str string
ces [][]int
ces []rawCE
}
func newBuilder(t *testing.T, ducet []ducetElem) *Builder {
b := NewBuilder()
for _, e := range ducet {
if err := b.Add([]rune(e.str), e.ces, nil); err != nil {
ces := [][]int{}
for _, ce := range e.ces {
ces = append(ces, ce.w)
}
if err := b.Add([]rune(e.str), ces, nil); err != nil {
t.Errorf(err.Error())
}
}
@ -58,7 +74,7 @@ func newBuilder(t *testing.T, ducet []ducetElem) *Builder {
}
type convertTest struct {
in, out [][]int
in, out []rawCE
err bool
}
@ -73,7 +89,10 @@ var convLargeTests = []convertTest{
func TestConvertLarge(t *testing.T) {
for i, tt := range convLargeTests {
e := &entry{elems: tt.in}
e := new(entry)
for _, ce := range tt.in {
e.elems = append(e.elems, makeRawCE(ce.w, ce.ccc))
}
elems, err := convertLargeWeights(e.elems)
if tt.err {
if err == nil {
@ -173,16 +192,18 @@ func TestSimplify(t *testing.T) {
}
var expandTest = []ducetElem{
{"\u00C0", append(ptCE(100, 8), sCE(30)...)},
{"\u00C8", append(ptCE(105, 8), sCE(30)...)},
{"\u00C9", append(ptCE(105, 8), sCE(30)...)}, // identical expansion
{"\u0300", append(scCE(29, 230), scCE(30, 230)...)},
{"\u00C0", append(ptCE(100, 8), scCE(30, 230)...)},
{"\u00C8", append(ptCE(105, 8), scCE(30, 230)...)},
{"\u00C9", append(ptCE(105, 8), scCE(30, 230)...)}, // identical expansion
{"\u05F2", append(ptCE(200, 4), ptCE(200, 4)[0], ptCE(200, 4)[0])},
{"\u01FF", append(ptCE(200, 4), ptcCE(201, 4, 0)[0], scCE(30, 230)[0])},
}
func TestExpand(t *testing.T) {
const (
totalExpansions = 3
totalElements = 2 + 2 + 3 + totalExpansions
totalExpansions = 5
totalElements = 2 + 2 + 2 + 3 + 3 + totalExpansions
)
b := newBuilder(t, expandTest)
o := &b.root

View File

@ -16,6 +16,17 @@ const (
maxTertiary = 0x1F
)
type rawCE struct {
w []int
ccc uint8
}
func makeRawCE(w []int, ccc uint8) rawCE {
ce := rawCE{w: make([]int, 4), ccc: ccc}
copy(ce.w, w)
return ce
}
// A collation element is represented as an uint32.
// In the typical case, a rune maps to a single collation element. If a rune
// can be the start of a contraction or expands into multiple collation elements,
@ -29,29 +40,36 @@ const (
// 01pppppp pppppppp ppppppp0 ssssssss
// - p* is primary collation value
// - s* is the secondary collation value
// or
// 00pppppp pppppppp ppppppps sssttttt, where
// - p* is primary collation value
// - s* offset of secondary from default value.
// - t* is the tertiary collation value
// 100ttttt cccccccc pppppppp pppppppp
// - t* is the tertiar collation value
// - c* is the cannonical combining class
// - p* is the primary collation value
// Collation elements with a secondary value are of the form
// 10000000 0000ssss ssssssss tttttttt, where
// - 16 BMP implicit -> weight
// - 8 bit s
// - default tertiary
// 1010cccc ccccssss ssssssss tttttttt, where
// - c* is the canonical combining class
// - s* is the secondary collation value
// - t* is the tertiary collation value
const (
maxPrimaryBits = 21
maxPrimaryCompactBits = 16
maxSecondaryBits = 12
maxSecondaryCompactBits = 8
maxCCCBits = 8
maxSecondaryDiffBits = 4
maxTertiaryBits = 8
maxTertiaryCompactBits = 5
isSecondary = 0x80000000
isPrimary = 0x40000000
isPrimary = 0x40000000
isPrimaryCCC = 0x80000000
isSecondary = 0xA0000000
)
func makeCE(weights []int) (uint32, error) {
func makeCE(rce rawCE) (uint32, error) {
weights := rce.w
if w := weights[0]; w >= 1<<maxPrimaryBits || w < 0 {
return 0, fmt.Errorf("makeCE: primary weight out of bounds: %x >= %x", w, 1<<maxPrimaryBits)
}
@ -63,14 +81,25 @@ func makeCE(weights []int) (uint32, error) {
}
ce := uint32(0)
if weights[0] != 0 {
if weights[2] == defaultTertiary {
if rce.ccc != 0 {
if weights[0] >= 1<<maxPrimaryCompactBits {
return 0, fmt.Errorf("makeCE: primary weight with non-zero CCC out of bounds: %x >= %x", weights[0], 1<<maxPrimaryCompactBits)
}
if weights[1] != defaultSecondary {
return 0, fmt.Errorf("makeCE: cannot combine non-default secondary value (%x) with non-zero CCC (%x)", weights[1], rce.ccc)
}
ce = uint32(weights[2] << (maxPrimaryCompactBits + maxCCCBits))
ce |= uint32(rce.ccc) << maxPrimaryCompactBits
ce |= uint32(weights[0])
ce |= isPrimaryCCC
} else if weights[2] == defaultTertiary {
if weights[1] >= 1<<maxSecondaryCompactBits {
return 0, fmt.Errorf("makeCE: secondary weight with non-zero primary out of bounds: %x >= %x", weights[1], 1<<maxSecondaryCompactBits)
}
ce = uint32(weights[0]<<(maxSecondaryCompactBits+1) + weights[1])
ce |= isPrimary
} else {
d := weights[1] - defaultSecondary + 4
d := weights[1] - defaultSecondary + maxSecondaryDiffBits
if d >= 1<<maxSecondaryDiffBits || d < 0 {
return 0, fmt.Errorf("makeCE: secondary weight diff out of bounds: %x < 0 || %x > %x", d, d, 1<<maxSecondaryDiffBits)
}
@ -82,6 +111,7 @@ func makeCE(weights []int) (uint32, error) {
}
} else {
ce = uint32(weights[1]<<maxTertiaryBits + weights[2])
ce += uint32(rce.ccc) << (maxSecondaryBits + maxTertiaryBits)
ce |= isSecondary
}
return ce, nil
@ -207,7 +237,7 @@ func implicitPrimary(r rune) int {
// We will rewrite these characters to a single CE.
// We assume the CJK values start at 0x8000.
// See http://unicode.org/reports/tr10/#Implicit_Weights
func convertLargeWeights(elems [][]int) (res [][]int, err error) {
func convertLargeWeights(elems []rawCE) (res []rawCE, err error) {
const (
cjkPrimaryStart = 0xFB40
rarePrimaryStart = 0xFB80
@ -219,7 +249,7 @@ func convertLargeWeights(elems [][]int) (res [][]int, err error) {
shiftBits = 15
)
for i := 0; i < len(elems); i++ {
ce := elems[i]
ce := elems[i].w
p := ce[0]
if p < cjkPrimaryStart {
continue
@ -233,10 +263,10 @@ func convertLargeWeights(elems [][]int) (res [][]int, err error) {
if i+1 >= len(elems) {
return elems, fmt.Errorf("second part of double primary weight missing: %v", elems)
}
if elems[i+1][0]&lowBitsFlag == 0 {
if elems[i+1].w[0]&lowBitsFlag == 0 {
return elems, fmt.Errorf("malformed second part of double primary weight: %v", elems)
}
np := ((p & highBitsMask) << shiftBits) + elems[i+1][0]&lowBitsMask
np := ((p & highBitsMask) << shiftBits) + elems[i+1].w[0]&lowBitsMask
switch {
case p < rarePrimaryStart:
np += commonUnifiedOffset
@ -257,26 +287,25 @@ func convertLargeWeights(elems [][]int) (res [][]int, err error) {
// nextWeight computes the first possible collation weights following elems
// for the given level.
func nextWeight(level collate.Level, elems [][]int) [][]int {
func nextWeight(level collate.Level, elems []rawCE) []rawCE {
if level == collate.Identity {
next := make([][]int, len(elems))
next := make([]rawCE, len(elems))
copy(next, elems)
return next
}
next := [][]int{make([]int, len(elems[0]))}
copy(next[0], elems[0])
next[0][level]++
next := []rawCE{makeRawCE(elems[0].w, elems[0].ccc)}
next[0].w[level]++
if level < collate.Secondary {
next[0][collate.Secondary] = defaultSecondary
next[0].w[collate.Secondary] = defaultSecondary
}
if level < collate.Tertiary {
next[0][collate.Tertiary] = defaultTertiary
next[0].w[collate.Tertiary] = defaultTertiary
}
// Filter entries that cannot influence ordering.
for _, ce := range elems[1:] {
skip := true
for i := collate.Primary; i < level; i++ {
skip = skip && ce[i] == 0
skip = skip && ce.w[i] == 0
}
if !skip {
next = append(next, ce)
@ -285,18 +314,18 @@ func nextWeight(level collate.Level, elems [][]int) [][]int {
return next
}
func nextVal(elems [][]int, i int, level collate.Level) (index, value int) {
for ; i < len(elems) && elems[i][level] == 0; i++ {
func nextVal(elems []rawCE, i int, level collate.Level) (index, value int) {
for ; i < len(elems) && elems[i].w[level] == 0; i++ {
}
if i < len(elems) {
return i, elems[i][level]
return i, elems[i].w[level]
}
return i, 0
}
// compareWeights returns -1 if a < b, 1 if a > b, or 0 otherwise.
// It also returns the collation level at which the difference is found.
func compareWeights(a, b [][]int) (result int, level collate.Level) {
func compareWeights(a, b []rawCE) (result int, level collate.Level) {
for level := collate.Primary; level < collate.Identity; level++ {
var va, vb int
for ia, ib := 0, 0; ia < len(a) || ib < len(b); ia, ib = ia+1, ib+1 {
@ -314,19 +343,16 @@ func compareWeights(a, b [][]int) (result int, level collate.Level) {
return 0, collate.Identity
}
func equalCE(a, b []int) bool {
if len(a) != len(b) {
return false
}
func equalCE(a, b rawCE) bool {
for i := 0; i < 3; i++ {
if b[i] != a[i] {
if b.w[i] != a.w[i] {
return false
}
}
return true
}
func equalCEArrays(a, b [][]int) bool {
func equalCEArrays(a, b []rawCE) bool {
if len(a) != len(b) {
return false
}

View File

@ -16,7 +16,7 @@ type ceTest struct {
}
func normalCE(in []int) (ce uint32, err error) {
return makeCE(in)
return makeCE(rawCE{w: in[:3], ccc: uint8(in[3])})
}
func expandCE(in []int) (ce uint32, err error) {
@ -32,17 +32,20 @@ func decompCE(in []int) (ce uint32, err error) {
}
var ceTests = []ceTest{
{normalCE, []int{0, 0, 0}, 0x80000000},
{normalCE, []int{0, 0x28, 3}, 0x80002803},
{normalCE, []int{100, defaultSecondary, 3}, 0x0000C883},
{normalCE, []int{0, 0, 0, 0}, 0xA0000000},
{normalCE, []int{0, 0x28, 3, 0}, 0xA0002803},
{normalCE, []int{0, 0x28, 3, 0xFF}, 0xAFF02803},
{normalCE, []int{100, defaultSecondary, 3, 0}, 0x0000C883},
// non-ignorable primary with non-default secondary
{normalCE, []int{100, 0x28, defaultTertiary}, 0x4000C828},
{normalCE, []int{100, defaultSecondary + 8, 3}, 0x0000C983},
{normalCE, []int{100, 0, 3}, 0xFFFF}, // non-ignorable primary with non-supported secondary
{normalCE, []int{100, 1, 3}, 0xFFFF},
{normalCE, []int{1 << maxPrimaryBits, defaultSecondary, 0}, 0xFFFF},
{normalCE, []int{0, 1 << maxSecondaryBits, 0}, 0xFFFF},
{normalCE, []int{100, defaultSecondary, 1 << maxTertiaryBits}, 0xFFFF},
{normalCE, []int{100, 0x28, defaultTertiary, 0}, 0x4000C828},
{normalCE, []int{100, defaultSecondary + 8, 3, 0}, 0x0000C983},
{normalCE, []int{100, 0, 3, 0}, 0xFFFF}, // non-ignorable primary with non-supported secondary
{normalCE, []int{100, 1, 3, 0}, 0xFFFF},
{normalCE, []int{1 << maxPrimaryBits, defaultSecondary, 0, 0}, 0xFFFF},
{normalCE, []int{0, 1 << maxSecondaryBits, 0, 0}, 0xFFFF},
{normalCE, []int{100, defaultSecondary, 1 << maxTertiaryBits, 0}, 0xFFFF},
{normalCE, []int{0x123, defaultSecondary, 8, 0xFF}, 0x88FF0123},
{normalCE, []int{0x123, defaultSecondary + 1, 8, 0xFF}, 0xFFFF},
{contractCE, []int{0, 0, 0}, 0xC0000000},
{contractCE, []int{1, 1, 1}, 0xC0010011},
@ -85,6 +88,14 @@ func TestColElem(t *testing.T) {
}
}
func mkRawCES(in [][]int) []rawCE {
out := []rawCE{}
for _, w := range in {
out = append(out, rawCE{w: w})
}
return out
}
type weightsTest struct {
a, b [][]int
level collate.Level
@ -119,8 +130,8 @@ var extra = [][]int{{200, 32, 8, 0}, {0, 32, 8, 0}, {0, 0, 8, 0}, {0, 0, 0, 0}}
func TestNextWeight(t *testing.T) {
for i, tt := range nextWeightTests {
test := func(l collate.Level, tt weightsTest, a, gold [][]int) {
res := nextWeight(tt.level, a)
if !equalCEArrays(gold, res) {
res := nextWeight(tt.level, mkRawCES(a))
if !equalCEArrays(mkRawCES(gold), res) {
t.Errorf("%d:%d: expected weights %d; found %d", i, l, gold, res)
}
}
@ -189,7 +200,7 @@ var compareTests = []weightsTest{
func TestCompareWeights(t *testing.T) {
for i, tt := range compareTests {
test := func(tt weightsTest, a, b [][]int) {
res, level := compareWeights(a, b)
res, level := compareWeights(mkRawCES(a), mkRawCES(b))
if res != tt.result {
t.Errorf("%d: expected comparisson result %d; found %d", i, tt.result, res)
}

View File

@ -6,6 +6,7 @@ package build
import (
"exp/locale/collate"
"exp/norm"
"fmt"
"log"
"sort"
@ -28,7 +29,7 @@ const (
type entry struct {
str string // same as string(runes)
runes []rune
elems [][]int // the collation elements
elems []rawCE // the collation elements
extend string // weights of extend to be appended to elems
before bool // weights relative to next instead of previous.
lock bool // entry is used in extension and can no longer be moved.
@ -41,6 +42,7 @@ type entry struct {
decompose bool // can use NFKD decomposition to generate elems
exclude bool // do not include in table
implicit bool // derived, is not included in the list
modified bool // entry was modified in tailoring
logical logicalAnchor
expansionIndex int // used to store index into expansion table
@ -162,10 +164,10 @@ func (e *entry) encode() (ce uint32, err error) {
}
switch {
case e.decompose:
t1 := e.elems[0][2]
t1 := e.elems[0].w[2]
t2 := 0
if len(e.elems) > 1 {
t2 = e.elems[1][2]
t2 = e.elems[1].w[2]
}
ce, err = makeDecompose(t1, t2)
case e.contractionStarter():
@ -231,7 +233,7 @@ func (o *ordering) insert(e *entry) {
// newEntry creates a new entry for the given info and inserts it into
// the index.
func (o *ordering) newEntry(s string, ces [][]int) *entry {
func (o *ordering) newEntry(s string, ces []rawCE) *entry {
e := &entry{
runes: []rune(s),
elems: ces,
@ -249,14 +251,29 @@ func (o *ordering) find(str string) *entry {
if e == nil {
r := []rune(str)
if len(r) == 1 {
e = o.newEntry(string(r[0]), [][]int{
{
implicitPrimary(r[0]),
defaultSecondary,
defaultTertiary,
int(r[0]),
},
})
const (
firstHangul = 0xAC00
lastHangul = 0xD7A3
)
if r[0] >= firstHangul && r[0] <= lastHangul {
ce := []rawCE{}
nfd := norm.NFD.String(str)
for _, r := range nfd {
ce = append(ce, o.find(string(r)).elems...)
}
e = o.newEntry(nfd, ce)
} else {
e = o.newEntry(string(r[0]), []rawCE{
{w: []int{
implicitPrimary(r[0]),
defaultSecondary,
defaultTertiary,
int(r[0]),
},
},
})
e.modified = true
}
e.exclude = true // do not index implicits
}
}
@ -275,7 +292,7 @@ func makeRootOrdering() ordering {
}
insert := func(typ logicalAnchor, s string, ce []int) {
e := &entry{
elems: [][]int{ce},
elems: []rawCE{{w: ce}},
str: s,
exclude: true,
logical: typ,
@ -362,10 +379,14 @@ func (o *ordering) sort() {
// genColElems generates a collation element array from the runes in str. This
// assumes that all collation elements have already been added to the Builder.
func (o *ordering) genColElems(str string) [][]int {
elems := [][]int{}
func (o *ordering) genColElems(str string) []rawCE {
elems := []rawCE{}
for _, r := range []rune(str) {
elems = append(elems, o.find(string(r)).elems...)
for _, ce := range o.find(string(r)).elems {
if ce.w[0] != 0 || ce.w[1] != 0 || ce.w[2] != 0 {
elems = append(elems, ce)
}
}
}
return elems
}

View File

@ -20,7 +20,7 @@ type entryTest struct {
// entries plus a leading and trailing anchor.
func makeList(n int) []*entry {
es := make([]*entry, n+2)
weights := [][]int{{100, 20, 5, 0}}
weights := []rawCE{{w: []int{100, 20, 5, 0}}}
for i := range es {
runes := []rune{rune(i)}
es[i] = &entry{
@ -176,8 +176,8 @@ type entryLessTest struct {
}
var (
w1 = [][]int{{100, 20, 5, 5}}
w2 = [][]int{{101, 20, 5, 5}}
w1 = []rawCE{{w: []int{100, 20, 5, 5}}}
w2 = []rawCE{{w: []int{101, 20, 5, 5}}}
)
var entryLessTests = []entryLessTest{

View File

@ -69,30 +69,14 @@ func (t *table) fprint(w io.Writer, name string) (n, size int, err error) {
}
size += sz
}
p := func(f string, a ...interface{}) {
nn, e := fmt.Fprintf(w, f, a...)
update(nn, 0, e)
}
// Write main table.
size += int(reflect.TypeOf(*t).Size())
p("var %sTable = table{\n", name)
update(t.index.printStruct(w, t.root, name))
p(",\n")
p("%sExpandElem[:],\n", name)
update(t.contractTries.printStruct(w, name))
p(",\n")
p("%sContractElem[:],\n", name)
p("%d,\n", t.maxContractLen)
p("0x%X,\n", t.variableTop)
p("}\n\n")
// Write arrays needed for the structure.
update(printColElems(w, t.expandElem, name+"ExpandElem"))
update(printColElems(w, t.contractElem, name+"ContractElem"))
update(t.index.printArrays(w, name))
update(t.contractTries.printArray(w, name))
p("// Total size of %sTable is %d bytes\n", name, size)
nn, e := fmt.Fprintf(w, "// Total size of %sTable is %d bytes\n", name, size)
update(nn, 0, e)
return
}

View File

@ -8,27 +8,43 @@ import (
"unicode"
)
// Level identifies the collation comparison level.
// The primary level corresponds to the basic sorting of text.
// The secondary level corresponds to accents and related linguistic elements.
// The tertiary level corresponds to casing and related concepts.
// The quaternary level is derived from the other levels by the
// various algorithms for handling variable elements.
type Level int
const (
Primary Level = iota
Secondary
Tertiary
Quaternary
Identity
)
const (
defaultSecondary = 0x20
defaultTertiary = 0x2
maxTertiary = 0x1F
maxQuaternary = 0x1FFFFF // 21 bits.
MaxQuaternary = 0x1FFFFF // 21 bits.
)
// colElem is a representation of a collation element.
// In the typical case, a rune maps to a single collation element. If a rune
// can be the start of a contraction or expands into multiple collation elements,
// then the colElem that is associated with a rune will have a special form to represent
// such m to n mappings. Such special colElems have a value >= 0x80000000.
type colElem uint32
// Elem is a representation of a collation element. This API provides ways to encode
// and decode Elems. Implementations of collation tables may use values greater
// or equal to PrivateUse for their own purposes. However, these should never be
// returned by AppendNext.
type Elem uint32
const (
maxCE colElem = 0x80FFFFFF
minContract = 0xC0000000
maxContract = 0xDFFFFFFF
minExpand = 0xE0000000
maxExpand = 0xEFFFFFFF
minDecomp = 0xF0000000
maxCE Elem = 0xAFFFFFFF
PrivateUse = minContract
minContract = 0xC0000000
maxContract = 0xDFFFFFFF
minExpand = 0xE0000000
maxExpand = 0xEFFFFFFF
minDecomp = 0xF0000000
)
type ceType int
@ -40,7 +56,7 @@ const (
ceDecompose // rune expands using NFKC decomposition
)
func (ce colElem) ctype() ceType {
func (ce Elem) ctype() ceType {
if ce <= maxCE {
return ceNormal
}
@ -62,69 +78,115 @@ func (ce colElem) ctype() ceType {
// 01pppppp pppppppp ppppppp0 ssssssss
// - p* is primary collation value
// - s* is the secondary collation value
// or
// 00pppppp pppppppp ppppppps sssttttt, where
// - p* is primary collation value
// - s* offset of secondary from default value.
// - t* is the tertiary collation value
// 100ttttt cccccccc pppppppp pppppppp
// - t* is the tertiar collation value
// - c* is the cannonical combining class
// - p* is the primary collation value
// Collation elements with a secondary value are of the form
// 10000000 0000ssss ssssssss tttttttt, where
// - 16 BMP implicit -> weight
// - 8 bit s
// - default tertiary
// 1010cccc ccccssss ssssssss tttttttt, where
// - c* is the canonical combining class
// - s* is the secondary collation value
// - t* is the tertiary collation value
// 11qqqqqq qqqqqqqq qqqqqqq0 00000000
// - q* quaternary value
const (
ceTypeMask = 0xC0000000
ceTypeMaskExt = 0xE0000000
ceType1 = 0x40000000
ceType2 = 0x00000000
ceType3 = 0x80000000
ceType3or4 = 0x80000000
ceType4 = 0xA0000000
ceTypeQ = 0xC0000000
ceIgnore = ceType3
ceIgnore = ceType4
firstNonPrimary = 0x80000000
lastSpecialPrimary = 0xA0000000
secondaryMask = 0x80000000
hasTertiaryMask = 0x40000000
primaryValueMask = 0x3FFFFE00
primaryShift = 9
compactPrimaryBits = 16
compactSecondaryShift = 5
minCompactSecondary = defaultSecondary - 4
)
func makeImplicitCE(primary int) colElem {
return ceType1 | colElem(primary<<primaryShift) | defaultSecondary
func makeImplicitCE(primary int) Elem {
return ceType1 | Elem(primary<<primaryShift) | defaultSecondary
}
func makeQuaternary(primary int) colElem {
return ceTypeQ | colElem(primary<<primaryShift)
// MakeElem returns an Elem for the given values. It will return an error
// if the given combination of values is invalid.
func MakeElem(primary, secondary, tertiary int, ccc uint8) (Elem, error) {
// TODO: implement
return 0, nil
}
func (ce colElem) primary() int {
// MakeQuaternary returns an Elem with the given quaternary value.
func MakeQuaternary(v int) Elem {
return ceTypeQ | Elem(v<<primaryShift)
}
// Mask sets weights for any level smaller than l to 0.
// The resulting Elem can be used to test for equality with
// other Elems to which the same mask has been applied.
func (ce Elem) Mask(l Level) uint32 {
return 0
}
// CCC returns the canoncial combining class associated with the underlying character,
// if applicable, or 0 otherwise.
func (ce Elem) CCC() uint8 {
if ce&ceType3or4 != 0 {
if ce&ceType4 == ceType3or4 {
return uint8(ce >> 16)
}
return uint8(ce >> 20)
}
return 0
}
// Primary returns the primary collation weight for ce.
func (ce Elem) Primary() int {
if ce >= firstNonPrimary {
return 0
if ce > lastSpecialPrimary {
return 0
}
return int(uint16(ce))
}
return int(ce&primaryValueMask) >> primaryShift
}
func (ce colElem) secondary() int {
// Secondary returns the secondary collation weight for ce.
func (ce Elem) Secondary() int {
switch ce & ceTypeMask {
case ceType1:
return int(uint8(ce))
case ceType2:
return minCompactSecondary + int((ce>>compactSecondaryShift)&0xF)
case ceType3:
return int(uint16(ce >> 8))
case ceType3or4:
if ce < ceType4 {
return defaultSecondary
}
return int(ce>>8) & 0xFFF
case ceTypeQ:
return 0
}
panic("should not reach here")
}
func (ce colElem) tertiary() uint8 {
// Tertiary returns the tertiary collation weight for ce.
func (ce Elem) Tertiary() uint8 {
if ce&hasTertiaryMask == 0 {
if ce&ceType3 == 0 {
if ce&ceType3or4 == 0 {
return uint8(ce & 0x1F)
}
return uint8(ce)
if ce&ceType4 == ceType4 {
return uint8(ce)
}
return uint8(ce>>24) & 0x1F // type 2
} else if ce&ceTypeMask == ceType1 {
return defaultTertiary
}
@ -132,27 +194,47 @@ func (ce colElem) tertiary() uint8 {
return 0
}
func (ce colElem) updateTertiary(t uint8) colElem {
func (ce Elem) updateTertiary(t uint8) Elem {
if ce&ceTypeMask == ceType1 {
// convert to type 4
nce := ce & primaryValueMask
nce |= colElem(uint8(ce)-minCompactSecondary) << compactSecondaryShift
nce |= Elem(uint8(ce)-minCompactSecondary) << compactSecondaryShift
ce = nce
} else if ce&ceTypeMaskExt == ceType3or4 {
ce &= ^Elem(maxTertiary << 24)
return ce | (Elem(t) << 24)
} else {
ce &= ^colElem(maxTertiary)
// type 2 or 4
ce &= ^Elem(maxTertiary)
}
return ce | colElem(t)
return ce | Elem(t)
}
// quaternary returns the quaternary value if explicitly specified,
// 0 if ce == ceIgnore, or maxQuaternary otherwise.
// Quaternary returns the quaternary value if explicitly specified,
// 0 if ce == ceIgnore, or MaxQuaternary otherwise.
// Quaternary values are used only for shifted variants.
func (ce colElem) quaternary() int {
func (ce Elem) Quaternary() int {
if ce&ceTypeMask == ceTypeQ {
return int(ce&primaryValueMask) >> primaryShift
} else if ce == ceIgnore {
return 0
}
return maxQuaternary
return MaxQuaternary
}
// Weight returns the collation weight for the given level.
func (ce Elem) Weight(l Level) int {
switch l {
case Primary:
return ce.Primary()
case Secondary:
return ce.Secondary()
case Tertiary:
return int(ce.Tertiary())
case Quaternary:
return ce.Quaternary()
}
return 0 // return 0 (ignore) for undefined levels.
}
// For contractions, collation elements are of the form
@ -167,7 +249,7 @@ const (
maxContractOffsetBits = 13
)
func splitContractIndex(ce colElem) (index, n, offset int) {
func splitContractIndex(ce Elem) (index, n, offset int) {
n = int(ce & (1<<maxNBits - 1))
ce >>= maxNBits
index = int(ce & (1<<maxTrieIndexBits - 1))
@ -176,23 +258,23 @@ func splitContractIndex(ce colElem) (index, n, offset int) {
return
}
// For expansions, colElems are of the form 11100000 00000000 bbbbbbbb bbbbbbbb,
// For expansions, Elems are of the form 11100000 00000000 bbbbbbbb bbbbbbbb,
// where b* is the index into the expansion sequence table.
const maxExpandIndexBits = 16
func splitExpandIndex(ce colElem) (index int) {
func splitExpandIndex(ce Elem) (index int) {
return int(uint16(ce))
}
// Some runes can be expanded using NFKD decomposition. Instead of storing the full
// sequence of collation elements, we decompose the rune and lookup the collation
// elements for each rune in the decomposition and modify the tertiary weights.
// The colElem, in this case, is of the form 11110000 00000000 wwwwwwww vvvvvvvv, where
// The Elem, in this case, is of the form 11110000 00000000 wwwwwwww vvvvvvvv, where
// - v* is the replacement tertiary weight for the first rune,
// - w* is the replacement tertiary weight for the second rune,
// Tertiary weights of subsequent runes should be replaced with maxTertiary.
// See http://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details.
func splitDecompose(ce colElem) (t1, t2 uint8) {
func splitDecompose(ce Elem) (t1, t2 uint8) {
return uint8(ce), uint8(ce >> 8)
}

View File

@ -10,12 +10,12 @@ import (
)
type ceTest struct {
f func(inout []int) (colElem, ceType)
f func(inout []int) (Elem, ceType)
arg []int
}
// The make* funcs are simplified versions of the functions in build/colelem.go
func makeCE(weights []int) colElem {
func makeCE(weights []int) Elem {
const (
maxPrimaryBits = 21
maxSecondaryBits = 12
@ -23,72 +23,81 @@ func makeCE(weights []int) colElem {
maxSecondaryDiffBits = 4
maxTertiaryBits = 8
maxTertiaryCompactBits = 5
isSecondary = 0x80000000
isPrimary = 0x40000000
isPrimaryCCC = 0x80000000
isSecondary = 0xA0000000
)
var ce colElem
var ce Elem
ccc := weights[3]
if weights[0] != 0 {
if weights[2] == defaultTertiary {
ce = colElem(weights[0]<<(maxSecondaryCompactBits+1) + weights[1])
if ccc != 0 {
ce = Elem(weights[2] << 24)
ce |= Elem(ccc) << 16
ce |= Elem(weights[0])
ce |= isPrimaryCCC
} else if weights[2] == defaultTertiary {
ce = Elem(weights[0]<<(maxSecondaryCompactBits+1) + weights[1])
ce |= isPrimary
} else {
d := weights[1] - defaultSecondary + 4
ce = colElem(weights[0]<<maxSecondaryDiffBits + d)
ce = ce<<maxTertiaryCompactBits + colElem(weights[2])
ce = Elem(weights[0]<<maxSecondaryDiffBits + d)
ce = ce<<maxTertiaryCompactBits + Elem(weights[2])
}
} else {
ce = colElem(weights[1]<<maxTertiaryBits + weights[2])
ce = Elem(weights[1]<<maxTertiaryBits + weights[2])
ce += Elem(ccc) << 20
ce |= isSecondary
}
return ce
}
func makeContractIndex(index, n, offset int) colElem {
func makeContractIndex(index, n, offset int) Elem {
const (
contractID = 0xC0000000
maxNBits = 4
maxTrieIndexBits = 12
maxContractOffsetBits = 13
)
ce := colElem(contractID)
ce += colElem(offset << (maxNBits + maxTrieIndexBits))
ce += colElem(index << maxNBits)
ce += colElem(n)
ce := Elem(contractID)
ce += Elem(offset << (maxNBits + maxTrieIndexBits))
ce += Elem(index << maxNBits)
ce += Elem(n)
return ce
}
func makeExpandIndex(index int) colElem {
func makeExpandIndex(index int) Elem {
const expandID = 0xE0000000
return expandID + colElem(index)
return expandID + Elem(index)
}
func makeDecompose(t1, t2 int) colElem {
func makeDecompose(t1, t2 int) Elem {
const decompID = 0xF0000000
return colElem(t2<<8+t1) + decompID
return Elem(t2<<8+t1) + decompID
}
func normalCE(inout []int) (ce colElem, t ceType) {
w := makeCE(inout)
inout[0] = w.primary()
inout[1] = w.secondary()
inout[2] = int(w.tertiary())
func normalCE(inout []int) (ce Elem, t ceType) {
ce = makeCE(inout)
inout[0] = ce.Primary()
inout[1] = ce.Secondary()
inout[2] = int(ce.Tertiary())
inout[3] = int(ce.CCC())
return ce, ceNormal
}
func expandCE(inout []int) (ce colElem, t ceType) {
func expandCE(inout []int) (ce Elem, t ceType) {
ce = makeExpandIndex(inout[0])
inout[0] = splitExpandIndex(ce)
return ce, ceExpansionIndex
}
func contractCE(inout []int) (ce colElem, t ceType) {
func contractCE(inout []int) (ce Elem, t ceType) {
ce = makeContractIndex(inout[0], inout[1], inout[2])
i, n, o := splitContractIndex(ce)
inout[0], inout[1], inout[2] = i, n, o
return ce, ceContractionIndex
}
func decompCE(inout []int) (ce colElem, t ceType) {
func decompCE(inout []int) (ce Elem, t ceType) {
ce = makeDecompose(inout[0], inout[1])
t1, t2 := splitDecompose(ce)
inout[0], inout[1] = int(t1), int(t2)
@ -102,9 +111,13 @@ const (
)
var ceTests = []ceTest{
{normalCE, []int{0, 0, 0}},
{normalCE, []int{0, 30, 3}},
{normalCE, []int{100, defaultSecondary, 3}},
{normalCE, []int{0, 0, 0, 0}},
{normalCE, []int{0, 30, 3, 0}},
{normalCE, []int{0, 30, 3, 0xFF}},
{normalCE, []int{100, defaultSecondary, defaultTertiary, 0}},
{normalCE, []int{100, defaultSecondary, defaultTertiary, 0xFF}},
{normalCE, []int{100, defaultSecondary, 3, 0}},
{normalCE, []int{0x123, defaultSecondary, 8, 0xFF}},
{contractCE, []int{0, 0, 0}},
{contractCE, []int{1, 1, 1}},
@ -127,11 +140,11 @@ func TestColElem(t *testing.T) {
copy(inout, tt.arg)
ce, typ := tt.f(inout)
if ce.ctype() != typ {
t.Errorf("%d: type is %d; want %d", i, ce.ctype(), typ)
t.Errorf("%d: type is %d; want %d (ColElem: %X)", i, ce.ctype(), typ, ce)
}
for j, a := range tt.arg {
if inout[j] != a {
t.Errorf("%d: argument %d is %X; want %X", i, j, inout[j], a)
t.Errorf("%d: argument %d is %X; want %X (ColElem: %X)", i, j, inout[j], a, ce)
}
}
}
@ -170,13 +183,14 @@ func TestImplicit(t *testing.T) {
func TestUpdateTertiary(t *testing.T) {
tests := []struct {
in, out colElem
in, out Elem
t uint8
}{
{0x4000FE20, 0x0000FE8A, 0x0A},
{0x4000FE21, 0x0000FEAA, 0x0A},
{0x0000FE8B, 0x0000FE83, 0x03},
{0x8000CC02, 0x8000CC1B, 0x1B},
{0x82FF0188, 0x9BFF0188, 0x1B},
{0xAFF0CC02, 0xAFF0CC1B, 0x1B},
}
for i, tt := range tests {
if out := tt.in.updateTertiary(tt.t); out != tt.out {
@ -184,3 +198,77 @@ func TestUpdateTertiary(t *testing.T) {
}
}
}
func TestDoNorm(t *testing.T) {
const div = -1 // The insertion point of the next block.
tests := []struct {
in, out []int
}{
{in: []int{4, div, 3},
out: []int{3, 4},
},
{in: []int{4, div, 3, 3, 3},
out: []int{3, 3, 3, 4},
},
{in: []int{0, 4, div, 3},
out: []int{0, 3, 4},
},
{in: []int{0, 0, 4, 5, div, 3, 3},
out: []int{0, 0, 3, 3, 4, 5},
},
{in: []int{0, 0, 1, 4, 5, div, 3, 3},
out: []int{0, 0, 1, 3, 3, 4, 5},
},
{in: []int{0, 0, 1, 4, 5, div, 4, 4},
out: []int{0, 0, 1, 4, 4, 4, 5},
},
}
for j, tt := range tests {
i := iter{}
var w, p, s int
for k, cc := range tt.in {
if cc == 0 {
s = 0
}
if cc == div {
w = 100
p = k
i.pStarter = s
continue
}
i.ce = append(i.ce, makeCE([]int{w, 20, 2, cc}))
}
i.prevCCC = i.ce[p-1].CCC()
i.doNorm(p, i.ce[p].CCC())
if len(i.ce) != len(tt.out) {
t.Errorf("%d: length was %d; want %d", j, len(i.ce), len(tt.out))
}
prevCCC := uint8(0)
for k, ce := range i.ce {
if int(ce.CCC()) != tt.out[k] {
t.Errorf("%d:%d: unexpected CCC. Was %d; want %d", j, k, ce.CCC(), tt.out[k])
}
if k > 0 && ce.CCC() == prevCCC && i.ce[k-1].Primary() > ce.Primary() {
t.Errorf("%d:%d: normalization crossed across CCC boundary.", j, k)
}
}
}
// test cutoff of large sequence of combining characters.
result := []uint8{8, 8, 8, 5, 5}
for o := -2; o <= 2; o++ {
i := iter{pStarter: 2, prevCCC: 8}
n := maxCombiningCharacters + 1 + o
for j := 1; j < n+i.pStarter; j++ {
i.ce = append(i.ce, makeCE([]int{100, 20, 2, 8}))
}
p := len(i.ce)
i.ce = append(i.ce, makeCE([]int{0, 20, 2, 5}))
i.doNorm(p, 5)
if i.prevCCC != result[o+2] {
t.Errorf("%d: i.prevCCC was %d; want %d", n, i.prevCCC, result[o+2])
}
if result[o+2] == 5 && i.pStarter != p {
t.Errorf("%d: i.pStarter was %d; want %d", n, i.pStarter, p)
}
}
}

View File

@ -12,22 +12,6 @@ import (
"exp/norm"
)
// Level identifies the collation comparison level.
// The primary level corresponds to the basic sorting of text.
// The secondary level corresponds to accents and related linguistic elements.
// The tertiary level corresponds to casing and related concepts.
// The quaternary level is derived from the other levels by the
// various algorithms for handling variable elements.
type Level int
const (
Primary Level = iota
Secondary
Tertiary
Quaternary
Identity
)
// AlternateHandling identifies the various ways in which variables are handled.
// A rune with a primary weight lower than the variable top is considered a
// variable.
@ -55,6 +39,12 @@ const (
// Collator provides functionality for comparing strings for a given
// collation order.
type Collator struct {
// TODO: hide most of these options. Low-level options are set through the locale
// identifier (as defined by LDML) while high-level options are set through SetOptions.
// Using high-level options allows us to be more flexible (such as not ignoring
// Thai vowels for IgnoreDiacriticals) and more user-friendly (such as allowing
// diacritical marks to be ignored but not case without having to fiddle with levels).
// Strength sets the maximum level to use in comparison.
Strength Level
@ -80,13 +70,39 @@ type Collator struct {
// at a primary level with its numeric value. For example, "A-21" < "A-123".
Numeric bool
// The largest primary value that is considered to be variable.
variableTop uint32
f norm.Form
t *table
t Weigher
sorter sorter
_iter [2]iter
}
// An Option is used to change the behavior of Collator. They override the
// settings passed through the locale identifier.
type Option int
const (
Numeric Option = 1 << iota // Sort numbers numerically ("2" < "12").
IgnoreCase // Case-insensitive search.
IgnoreDiacritics // Ignore diacritical marks. ("o" == "ö").
IgnoreWidth // Ignore full versus normal width.
UpperFirst // Sort upper case before lower case.
LowerFirst // Sort lower case before upper case.
Force // Force ordering if strings are equivalent but not equal.
Loose = IgnoreDiacritics | IgnoreWidth | IgnoreCase
)
// SetOptions accepts a Options or-ed together. All previous calls to SetOptions are ignored.
func (c *Collator) SetOptions(o Option) {
// TODO: implement
}
func (c *Collator) iter(i int) *iter {
// TODO: evaluate performance for making the second iterator optional.
return &c._iter[i]
@ -101,18 +117,20 @@ func Locales() []string {
// New returns a new Collator initialized for the given locale.
func New(loc string) *Collator {
// TODO: handle locale selection according to spec.
t := &mainTable
var t tableIndex
if loc != "" {
if idx, ok := locales[loc]; ok {
t = mainTable.indexedTable(idx)
t = idx
} else {
t = locales["root"]
}
}
return newCollator(t)
return NewFromTable(Init(t))
}
func newCollator(t *table) *Collator {
func NewFromTable(t Weigher) *Collator {
c := &Collator{
Strength: Quaternary,
Strength: Tertiary,
f: norm.NFD,
t: t,
}
@ -121,12 +139,6 @@ func newCollator(t *table) *Collator {
return c
}
// SetVariableTop sets all runes with primary strength less than the primary
// strength of r to be variable and thus affected by alternate handling.
func (c *Collator) SetVariableTop(r rune) {
// TODO: implement
}
// Buffer holds keys generated by Key and KeyString.
type Buffer struct {
buf [4096]byte
@ -149,8 +161,8 @@ func (b *Buffer) Reset() {
func (c *Collator) Compare(a, b []byte) int {
// TODO: skip identical prefixes once we have a fast way to detect if a rune is
// part of a contraction. This would lead to roughly a 10% speedup for the colcmp regtest.
c.iter(0).setInput(c, a)
c.iter(1).setInput(c, b)
c.iter(0).setInput(a)
c.iter(1).setInput(b)
if res := c.compare(); res != 0 {
return res
}
@ -165,8 +177,8 @@ func (c *Collator) Compare(a, b []byte) int {
func (c *Collator) CompareString(a, b string) int {
// TODO: skip identical prefixes once we have a fast way to detect if a rune is
// part of a contraction. This would lead to roughly a 10% speedup for the colcmp regtest.
c.iter(0).setInputString(c, a)
c.iter(1).setInputString(c, b)
c.iter(0).setInputString(a)
c.iter(1).setInputString(b)
if res := c.compare(); res != 0 {
return res
}
@ -234,11 +246,6 @@ func (c *Collator) compare() int {
return 0
}
func (c *Collator) Prefix(s, prefix []byte) int {
// iterate over s, track bytes consumed.
return 0
}
// Key returns the collation key for str.
// Passing the buffer buf may avoid memory allocations.
// The returned slice will point to an allocation in Buffer and will remain
@ -259,114 +266,184 @@ func (c *Collator) KeyFromString(buf *Buffer, str string) []byte {
return c.key(buf, c.getColElemsString(str))
}
func (c *Collator) key(buf *Buffer, w []colElem) []byte {
processWeights(c.Alternate, c.t.variableTop, w)
func (c *Collator) key(buf *Buffer, w []Elem) []byte {
processWeights(c.Alternate, c.variableTop, w)
kn := len(buf.key)
c.keyFromElems(buf, w)
return buf.key[kn:]
}
func (c *Collator) getColElems(str []byte) []colElem {
func (c *Collator) getColElems(str []byte) []Elem {
i := c.iter(0)
i.setInput(c, str)
for !i.done() {
i.next()
i.setInput(str)
for i.next() {
}
return i.ce
}
func (c *Collator) getColElemsString(str string) []colElem {
func (c *Collator) getColElemsString(str string) []Elem {
i := c.iter(0)
i.setInputString(c, str)
for !i.done() {
i.next()
i.setInputString(str)
for i.next() {
}
return i.ce
}
type iter struct {
src norm.Iter
norm [1024]byte
buf []byte
p int
minBufSize int
bytes []byte
str string
wa [512]colElem
ce []colElem
wa [512]Elem
ce []Elem
pce int
nce int // nce <= len(nce)
t *table
_done, eof bool
prevCCC uint8
pStarter int
t Weigher
}
func (i *iter) init(c *Collator) {
i.t = c.t
i.minBufSize = c.t.maxContractLen
i.ce = i.wa[:0]
i.buf = i.norm[:0]
}
func (i *iter) reset() {
i.ce = i.ce[:0]
i.buf = i.buf[:0]
i.p = 0
i.eof = i.src.Done()
i._done = i.eof
i.nce = 0
i.prevCCC = 0
i.pStarter = 0
}
func (i *iter) setInput(c *Collator, s []byte) *iter {
i.src.SetInput(c.f, s)
func (i *iter) setInput(s []byte) *iter {
i.bytes = s
i.str = ""
i.reset()
return i
}
func (i *iter) setInputString(c *Collator, s string) *iter {
i.src.SetInputString(c.f, s)
func (i *iter) setInputString(s string) *iter {
i.str = s
i.bytes = nil
i.reset()
return i
}
func (i *iter) done() bool {
return i._done
return len(i.str) == 0 && len(i.bytes) == 0
}
func (i *iter) next() {
if !i.eof && len(i.buf)-i.p < i.minBufSize {
// replenish buffer
n := copy(i.buf, i.buf[i.p:])
n += i.src.Next(i.buf[n:cap(i.buf)])
i.buf = i.buf[:n]
i.p = 0
i.eof = i.src.Done()
func (i *iter) tail(n int) {
if i.bytes == nil {
i.str = i.str[n:]
} else {
i.bytes = i.bytes[n:]
}
if i.p == len(i.buf) {
i._done = true
}
func (i *iter) appendNext() int {
var sz int
if i.bytes == nil {
i.ce, sz = i.t.AppendNextString(i.ce, i.str)
} else {
i.ce, sz = i.t.AppendNext(i.ce, i.bytes)
}
return sz
}
// next appends Elems to the internal array until it adds an element with CCC=0.
// In the majority of cases, a Elem with a primary value > 0 will have
// a CCC of 0. The CCC values of colation elements are also used to detect if the
// input string was not normalized and to adjust the result accordingly.
func (i *iter) next() bool {
for !i.done() {
p0 := len(i.ce)
sz := i.appendNext()
i.tail(sz)
last := len(i.ce) - 1
if ccc := i.ce[last].CCC(); ccc == 0 {
i.nce = len(i.ce)
i.pStarter = last
i.prevCCC = 0
return true
} else if p0 < last && i.ce[p0].CCC() == 0 {
// set i.nce to only cover part of i.ce for which ccc == 0 and
// use rest the next call to next.
for p0++; p0 < last && i.ce[p0].CCC() == 0; p0++ {
}
i.nce = p0
i.pStarter = p0 - 1
i.prevCCC = ccc
return true
} else if ccc < i.prevCCC {
i.doNorm(p0, ccc) // should be rare for most common cases
} else {
i.prevCCC = ccc
}
}
if len(i.ce) != i.nce {
i.nce = len(i.ce)
return true
}
return false
}
// nextPlain is the same as next, but does not "normalize" the collation
// elements.
// TODO: remove this function. Using this instead of next does not seem
// to improve performance in any significant way. We retain this until
// later for evaluation purposes.
func (i *iter) nextPlain() bool {
if i.done() {
return false
}
sz := i.appendNext()
i.tail(sz)
i.nce = len(i.ce)
return true
}
const maxCombiningCharacters = 30
// doNorm reorders the collation elements in i.ce.
// It assumes that blocks of collation elements added with appendNext
// either start and end with the same CCC or start with CCC == 0.
// This allows for a single insertion point for the entire block.
// The correctness of this assumption is verified in builder.go.
func (i *iter) doNorm(p int, ccc uint8) {
if p-i.pStarter > maxCombiningCharacters {
i.prevCCC = i.ce[len(i.ce)-1].CCC()
i.pStarter = len(i.ce) - 1
return
}
sz := 0
i.ce, sz = i.t.appendNext(i.ce, i.buf[i.p:])
i.p += sz
n := len(i.ce)
k := p
for p--; p > i.pStarter && ccc < i.ce[p-1].CCC(); p-- {
}
i.ce = append(i.ce, i.ce[p:k]...)
copy(i.ce[p:], i.ce[k:])
i.ce = i.ce[:n]
}
func (i *iter) nextPrimary() int {
for {
for ; i.pce < len(i.ce); i.pce++ {
if v := i.ce[i.pce].primary(); v != 0 {
for ; i.pce < i.nce; i.pce++ {
if v := i.ce[i.pce].Primary(); v != 0 {
i.pce++
return v
}
}
if i.done() {
if !i.next() {
return 0
}
i.next()
}
panic("should not reach here")
}
func (i *iter) nextSecondary() int {
for ; i.pce < len(i.ce); i.pce++ {
if v := i.ce[i.pce].secondary(); v != 0 {
if v := i.ce[i.pce].Secondary(); v != 0 {
i.pce++
return v
}
@ -376,7 +453,7 @@ func (i *iter) nextSecondary() int {
func (i *iter) prevSecondary() int {
for ; i.pce < len(i.ce); i.pce++ {
if v := i.ce[len(i.ce)-i.pce-1].secondary(); v != 0 {
if v := i.ce[len(i.ce)-i.pce-1].Secondary(); v != 0 {
i.pce++
return v
}
@ -386,7 +463,7 @@ func (i *iter) prevSecondary() int {
func (i *iter) nextTertiary() int {
for ; i.pce < len(i.ce); i.pce++ {
if v := i.ce[i.pce].tertiary(); v != 0 {
if v := i.ce[i.pce].Tertiary(); v != 0 {
i.pce++
return int(v)
}
@ -396,7 +473,7 @@ func (i *iter) nextTertiary() int {
func (i *iter) nextQuaternary() int {
for ; i.pce < len(i.ce); i.pce++ {
if v := i.ce[i.pce].quaternary(); v != 0 {
if v := i.ce[i.pce].Quaternary(); v != 0 {
i.pce++
return v
}
@ -416,9 +493,9 @@ func appendPrimary(key []byte, p int) []byte {
// keyFromElems converts the weights ws to a compact sequence of bytes.
// The result will be appended to the byte buffer in buf.
func (c *Collator) keyFromElems(buf *Buffer, ws []colElem) {
func (c *Collator) keyFromElems(buf *Buffer, ws []Elem) {
for _, v := range ws {
if w := v.primary(); w > 0 {
if w := v.Primary(); w > 0 {
buf.key = appendPrimary(buf.key, w)
}
}
@ -427,13 +504,13 @@ func (c *Collator) keyFromElems(buf *Buffer, ws []colElem) {
// TODO: we can use one 0 if we can guarantee that all non-zero weights are > 0xFF.
if !c.Backwards {
for _, v := range ws {
if w := v.secondary(); w > 0 {
if w := v.Secondary(); w > 0 {
buf.key = append(buf.key, uint8(w>>8), uint8(w))
}
}
} else {
for i := len(ws) - 1; i >= 0; i-- {
if w := ws[i].secondary(); w > 0 {
if w := ws[i].Secondary(); w > 0 {
buf.key = append(buf.key, uint8(w>>8), uint8(w))
}
}
@ -444,12 +521,12 @@ func (c *Collator) keyFromElems(buf *Buffer, ws []colElem) {
if Tertiary <= c.Strength || c.CaseLevel {
buf.key = append(buf.key, 0, 0)
for _, v := range ws {
if w := v.tertiary(); w > 0 {
if w := v.Tertiary(); w > 0 {
buf.key = append(buf.key, uint8(w))
}
}
// Derive the quaternary weights from the options and other levels.
// Note that we represent maxQuaternary as 0xFF. The first byte of the
// Note that we represent MaxQuaternary as 0xFF. The first byte of the
// representation of a primary weight is always smaller than 0xFF,
// so using this single byte value will compare correctly.
if Quaternary <= c.Strength && c.Alternate >= AltShifted {
@ -457,7 +534,7 @@ func (c *Collator) keyFromElems(buf *Buffer, ws []colElem) {
lastNonFFFF := len(buf.key)
buf.key = append(buf.key, 0)
for _, v := range ws {
if w := v.quaternary(); w == maxQuaternary {
if w := v.Quaternary(); w == MaxQuaternary {
buf.key = append(buf.key, 0xFF)
} else if w > 0 {
buf.key = appendPrimary(buf.key, w)
@ -468,7 +545,7 @@ func (c *Collator) keyFromElems(buf *Buffer, ws []colElem) {
} else {
buf.key = append(buf.key, 0)
for _, v := range ws {
if w := v.quaternary(); w == maxQuaternary {
if w := v.Quaternary(); w == MaxQuaternary {
buf.key = append(buf.key, 0xFF)
} else if w > 0 {
buf.key = appendPrimary(buf.key, w)
@ -479,14 +556,14 @@ func (c *Collator) keyFromElems(buf *Buffer, ws []colElem) {
}
}
func processWeights(vw AlternateHandling, top uint32, wa []colElem) {
func processWeights(vw AlternateHandling, top uint32, wa []Elem) {
ignore := false
vtop := int(top)
switch vw {
case AltShifted, AltShiftTrimmed:
for i := range wa {
if p := wa[i].primary(); p <= vtop && p != 0 {
wa[i] = makeQuaternary(p)
if p := wa[i].Primary(); p <= vtop && p != 0 {
wa[i] = MakeQuaternary(p)
ignore = true
} else if p == 0 {
if ignore {
@ -498,7 +575,7 @@ func processWeights(vw AlternateHandling, top uint32, wa []colElem) {
}
case AltBlanked:
for i := range wa {
if p := wa[i].primary(); p <= vtop && (ignore || p != 0) {
if p := wa[i].Primary(); p <= vtop && (ignore || p != 0) {
wa[i] = ceIgnore
ignore = true
} else {

View File

@ -0,0 +1,28 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package collate
// A Weigher can be used as a source for Collator and Searcher.
type Weigher interface {
// Start finds the start of the segment that includes position p.
Start(p int, b []byte) int
// StartString finds the start of the segment that includes position p.
StartString(p int, s string) int
// AppendNext appends Elems to buf corresponding to the longest match
// of a single character or contraction from the start of s.
// It returns the new buf and the number of bytes consumed.
AppendNext(buf []Elem, s []byte) (ce []Elem, n int)
// AppendNextString appends Elems to buf corresponding to the longest match
// of a single character or contraction from the start of s.
// It returns the new buf and the number of bytes consumed.
AppendNextString(buf []Elem, s string) (ce []Elem, n int)
// Domain returns a slice of all single characters and contractions for which
// collation elements are defined in this table.
Domain() []string
}

View File

@ -27,8 +27,21 @@ type ctScanner struct {
done bool
}
type ctScannerString struct {
states contractTrieSet
s string
n int
index int
pindex int
done bool
}
func (t contractTrieSet) scanner(index, n int, b []byte) ctScanner {
return ctScanner{states: t[index:], s: b, n: n}
return ctScanner{s: b, states: t[index:], n: n}
}
func (t contractTrieSet) scannerString(index, n int, str string) ctScannerString {
return ctScannerString{s: str, states: t[index:], n: n}
}
// result returns the offset i and bytes consumed p so far. If no suffix
@ -37,6 +50,10 @@ func (s *ctScanner) result() (i, p int) {
return s.index, s.pindex
}
func (s *ctScannerString) result() (i, p int) {
return s.index, s.pindex
}
const (
final = 0
noIndex = 0xFF
@ -84,3 +101,45 @@ func (s *ctScanner) scan(p int) int {
}
return pr
}
// scan is a verbatim copy of ctScanner.scan.
func (s *ctScannerString) scan(p int) int {
pr := p // the p at the rune start
str := s.s
states, n := s.states, s.n
for i := 0; i < n && p < len(str); {
e := states[i]
c := str[p]
// TODO: a significant number of contractions are of a form that
// cannot match discontiguous UTF-8 in a normalized string. We could let
// a negative value of e.n mean that we can set s.done = true and avoid
// the need for additional matches.
if c >= e.l {
if e.l == c {
p++
if e.i != noIndex {
s.index = int(e.i)
s.pindex = p
}
if e.n != final {
i, states, n = 0, states[int(e.h)+n:], int(e.n)
if p >= len(str) || utf8.RuneStart(str[p]) {
s.states, s.n, pr = states, n, p
}
} else {
s.done = true
return p
}
continue
} else if e.n == final && c <= e.h {
p++
s.done = true
s.index = int(c-e.l) + int(e.i)
s.pindex = p
return p
}
}
i++
}
return pr
}

View File

@ -4,9 +4,8 @@
package collate
// Init is used by type Builder in exp/locale/collate/build/
// to create Collator instances. It is for internal use only.
func Init(data interface{}) *Collator {
// Init is for internal use only.
func Init(data interface{}) Weigher {
init, ok := data.(tableInitializer)
if !ok {
return nil
@ -14,15 +13,15 @@ func Init(data interface{}) *Collator {
t := &table{}
loff, voff := init.FirstBlockOffsets()
t.index.index = init.TrieIndex()
t.index.index0 = t.index.index[blockSize*loff:]
t.index.index0 = t.index.index[blockSize*int(loff):]
t.index.values = init.TrieValues()
t.index.values0 = t.index.values[blockSize*voff:]
t.index.values0 = t.index.values[blockSize*int(voff):]
t.expandElem = init.ExpandElems()
t.contractTries = init.ContractTries()
t.contractElem = init.ContractElems()
t.maxContractLen = init.MaxContractLen()
t.variableTop = init.VariableTop()
return newCollator(t)
return t
}
type tableInitializer interface {

View File

@ -25,43 +25,43 @@ func W(ce ...int) Weights {
if len(ce) > 3 {
w.Quaternary = ce[3]
} else if w.Tertiary != 0 {
w.Quaternary = maxQuaternary
w.Quaternary = MaxQuaternary
}
return w
}
func (w Weights) String() string {
return fmt.Sprintf("[%d.%d.%d.%d]", w.Primary, w.Secondary, w.Tertiary, w.Quaternary)
return fmt.Sprintf("[%X.%X.%X.%X]", w.Primary, w.Secondary, w.Tertiary, w.Quaternary)
}
type Table struct {
t *table
t Weigher
}
func GetTable(c *Collator) *Table {
return &Table{c.t}
}
func convertToWeights(ws []colElem) []Weights {
func convertToWeights(ws []Elem) []Weights {
out := make([]Weights, len(ws))
for i, w := range ws {
out[i] = Weights{int(w.primary()), int(w.secondary()), int(w.tertiary()), int(w.quaternary())}
out[i] = Weights{int(w.Primary()), int(w.Secondary()), int(w.Tertiary()), int(w.Quaternary())}
}
return out
}
func convertFromWeights(ws []Weights) []colElem {
out := make([]colElem, len(ws))
func convertFromWeights(ws []Weights) []Elem {
out := make([]Elem, len(ws))
for i, w := range ws {
out[i] = makeCE([]int{w.Primary, w.Secondary, w.Tertiary})
out[i] = makeCE([]int{w.Primary, w.Secondary, w.Tertiary, 0})
if out[i] == ceIgnore && w.Quaternary > 0 {
out[i] = makeQuaternary(w.Quaternary)
out[i] = MakeQuaternary(w.Quaternary)
}
}
return out
}
func (t *Table) AppendNext(s []byte) ([]Weights, int) {
w, n := t.t.appendNext(nil, s)
w, n := t.t.AppendNext(nil, s)
return convertToWeights(w), n
}
@ -69,7 +69,7 @@ func SetTop(c *Collator, top int) {
if c.t == nil {
c.t = &table{}
}
c.t.variableTop = uint32(top)
c.variableTop = uint32(top)
}
func GetColElems(c *Collator, str []byte) []Weights {

View File

@ -674,7 +674,7 @@ func testCollator(c *collate.Collator) {
for _, str := range testInput.values() {
k0 := c0.KeyFromString(&buf, str)
k := c.KeyFromString(&buf, str)
if bytes.Compare(k0, k) != 0 {
if !bytes.Equal(k0, k) {
failOnError(fmt.Errorf("test:%U: keys differ (%x vs %x)", []rune(str), k0, k))
}
buf.Reset()

View File

@ -0,0 +1,90 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package collate
import (
"bytes"
"sort"
)
const (
maxSortBuffer = 40960
maxSortEntries = 4096
)
type swapper interface {
Swap(i, j int)
}
type sorter struct {
buf *Buffer
keys [][]byte
src swapper
}
func (s *sorter) init(n int) {
if s.buf == nil {
s.buf = &Buffer{}
s.buf.init()
}
if cap(s.keys) < n {
s.keys = make([][]byte, n)
}
s.keys = s.keys[0:n]
}
func (s *sorter) clean() {
if len(s.buf.key) > maxSortBuffer {
s.buf.key = s.buf.buf[:0]
}
if len(s.keys) > maxSortEntries {
s.keys = nil
}
}
func (s *sorter) sort(src swapper) {
s.src = src
sort.Sort(s)
}
func (s sorter) Len() int {
return len(s.keys)
}
func (s sorter) Less(i, j int) bool {
return bytes.Compare(s.keys[i], s.keys[j]) == -1
}
func (s sorter) Swap(i, j int) {
s.keys[i], s.keys[j] = s.keys[j], s.keys[i]
s.src.Swap(i, j)
}
// A Lister can be sorted by Collator's Sort method.
type Lister interface {
Len() int
Swap(i, j int)
// Bytes returns the bytes of the text at index i.
Bytes(i int) []byte
}
// Sort uses sort.Sort to sort the strings represented by x using the rules of c.
func (c *Collator) Sort(x Lister) {
n := x.Len()
c.sorter.init(n)
for i := 0; i < n; i++ {
c.sorter.keys[i] = c.Key(c.sorter.buf, x.Bytes(i))
}
c.sorter.sort(x)
}
// Strings sorts x using the rules of c.
func (c *Collator) Strings(x []string) {
c.sorter.init(len(x))
for i, s := range x {
c.sorter.keys[i] = c.KeyFromString(c.sorter.buf, s)
}
c.sorter.sort(sort.StringSlice(x))
}

View File

@ -0,0 +1,52 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package collate_test
import (
"exp/locale/collate"
"fmt"
"testing"
)
func ExampleCollator_Strings() {
c := collate.New("root")
strings := []string{
"ad",
"äb",
"ac",
}
c.Strings(strings)
fmt.Println(strings)
// Output: [äb ac ad]
}
type sorter []string
func (s sorter) Len() int {
return len(s)
}
func (s sorter) Swap(i, j int) {
s[j], s[i] = s[i], s[j]
}
func (s sorter) Bytes(i int) []byte {
return []byte(s[i])
}
func TestSort(t *testing.T) {
c := collate.New("en")
strings := []string{
"bcd",
"abc",
"ddd",
}
c.Sort(sorter(strings))
res := fmt.Sprint(strings)
want := "[abc bcd ddd]"
if res != want {
t.Errorf("found %s; want %s", res, want)
}
}

View File

@ -37,18 +37,96 @@ func (t *table) indexedTable(idx tableIndex) *table {
return &nt
}
func (t *table) AppendNext(w []Elem, b []byte) (res []Elem, n int) {
return t.appendNext(w, source{bytes: b})
}
func (t *table) AppendNextString(w []Elem, s string) (res []Elem, n int) {
return t.appendNext(w, source{str: s})
}
func (t *table) Start(p int, b []byte) int {
// TODO: implement
panic("not implemented")
}
func (t *table) StartString(p int, s string) int {
// TODO: implement
panic("not implemented")
}
func (t *table) Domain() []string {
// TODO: implement
panic("not implemented")
}
type source struct {
str string
bytes []byte
}
func (src *source) lookup(t *table) (ce Elem, sz int) {
if src.bytes == nil {
return t.index.lookupString(src.str)
}
return t.index.lookup(src.bytes)
}
func (src *source) tail(sz int) {
if src.bytes == nil {
src.str = src.str[sz:]
} else {
src.bytes = src.bytes[sz:]
}
}
func (src *source) nfd(buf []byte, end int) []byte {
if src.bytes == nil {
return norm.NFD.AppendString(buf[:0], src.str[:end])
}
return norm.NFD.Append(buf[:0], src.bytes[:end]...)
}
func (src *source) rune() (r rune, sz int) {
if src.bytes == nil {
return utf8.DecodeRuneInString(src.str)
}
return utf8.DecodeRune(src.bytes)
}
func (src *source) properties(f norm.Form) norm.Properties {
if src.bytes == nil {
return f.PropertiesString(src.str)
}
return f.Properties(src.bytes)
}
// appendNext appends the weights corresponding to the next rune or
// contraction in s. If a contraction is matched to a discontinuous
// sequence of runes, the weights for the interstitial runes are
// appended as well. It returns a new slice that includes the appended
// weights and the number of bytes consumed from s.
func (t *table) appendNext(w []colElem, s []byte) ([]colElem, int) {
v, sz := t.index.lookup(s)
ce := colElem(v)
func (t *table) appendNext(w []Elem, src source) (res []Elem, n int) {
ce, sz := src.lookup(t)
tp := ce.ctype()
if tp == ceNormal {
if ce == 0 {
r, _ := utf8.DecodeRune(s)
r, _ := src.rune()
const (
hangulSize = 3
firstHangul = 0xAC00
lastHangul = 0xD7A3
)
if r >= firstHangul && r <= lastHangul {
// TODO: performance can be considerably improved here.
n = sz
var buf [16]byte // Used for decomposing Hangul.
for b := src.nfd(buf[:0], hangulSize); len(b) > 0; b = b[sz:] {
ce, sz = t.index.lookup(b)
w = append(w, ce)
}
return w, n
}
ce = makeImplicitCE(implicitPrimary(r))
}
w = append(w, ce)
@ -56,15 +134,20 @@ func (t *table) appendNext(w []colElem, s []byte) ([]colElem, int) {
w = t.appendExpansion(w, ce)
} else if tp == ceContractionIndex {
n := 0
w, n = t.matchContraction(w, ce, s[sz:])
src.tail(sz)
if src.bytes == nil {
w, n = t.matchContractionString(w, ce, src.str)
} else {
w, n = t.matchContraction(w, ce, src.bytes)
}
sz += n
} else if tp == ceDecompose {
// Decompose using NFCK and replace tertiary weights.
// Decompose using NFKD and replace tertiary weights.
t1, t2 := splitDecompose(ce)
i := len(w)
nfkd := norm.NFKD.Properties(s).Decomposition()
nfkd := src.properties(norm.NFKD).Decomposition()
for p := 0; len(nfkd) > 0; nfkd = nfkd[p:] {
w, p = t.appendNext(w, nfkd)
w, p = t.appendNext(w, source{bytes: nfkd})
}
w[i] = w[i].updateTertiary(t1)
if i++; i < len(w) {
@ -77,17 +160,17 @@ func (t *table) appendNext(w []colElem, s []byte) ([]colElem, int) {
return w, sz
}
func (t *table) appendExpansion(w []colElem, ce colElem) []colElem {
func (t *table) appendExpansion(w []Elem, ce Elem) []Elem {
i := splitExpandIndex(ce)
n := int(t.expandElem[i])
i++
for _, ce := range t.expandElem[i : i+n] {
w = append(w, colElem(ce))
w = append(w, Elem(ce))
}
return w
}
func (t *table) matchContraction(w []colElem, ce colElem, suffix []byte) ([]colElem, int) {
func (t *table) matchContraction(w []Elem, ce Elem, suffix []byte) ([]Elem, int) {
index, n, offset := splitContractIndex(ce)
scan := t.contractTries.scanner(index, n, suffix)
@ -99,16 +182,17 @@ func (t *table) matchContraction(w []colElem, ce colElem, suffix []byte) ([]colE
// By now we should have filtered most cases.
p0 := p
bufn := 0
rune := norm.NFC.Properties(suffix[p:])
rune := norm.NFD.Properties(suffix[p:])
p += rune.Size()
if prevCC := rune.TrailCCC(); prevCC != 0 {
if rune.LeadCCC() != 0 {
prevCC := rune.TrailCCC()
// A gap may only occur in the last normalization segment.
// This also ensures that len(scan.s) < norm.MaxSegmentSize.
if end := norm.NFC.FirstBoundary(suffix[p:]); end != -1 {
if end := norm.NFD.FirstBoundary(suffix[p:]); end != -1 {
scan.s = suffix[:p+end]
}
for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf {
rune = norm.NFC.Properties(suffix[p:])
rune = norm.NFD.Properties(suffix[p:])
if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc {
break
}
@ -128,7 +212,7 @@ func (t *table) matchContraction(w []colElem, ce colElem, suffix []byte) ([]colE
}
// Append weights for the matched contraction, which may be an expansion.
i, n := scan.result()
ce = colElem(t.contractElem[i+offset])
ce = Elem(t.contractElem[i+offset])
if ce.ctype() == ceNormal {
w = append(w, ce)
} else {
@ -136,7 +220,98 @@ func (t *table) matchContraction(w []colElem, ce colElem, suffix []byte) ([]colE
}
// Append weights for the runes in the segment not part of the contraction.
for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] {
w, p = t.appendNext(w, b)
w, p = t.appendNext(w, source{bytes: b})
}
return w, n
}
// TODO: unify the two implementations. This is best done after first simplifying
// the algorithm taking into account the inclusion of both NFC and NFD forms
// in the table.
func (t *table) matchContractionString(w []Elem, ce Elem, suffix string) ([]Elem, int) {
index, n, offset := splitContractIndex(ce)
scan := t.contractTries.scannerString(index, n, suffix)
buf := [norm.MaxSegmentSize]byte{}
bufp := 0
p := scan.scan(0)
if !scan.done && p < len(suffix) && suffix[p] >= utf8.RuneSelf {
// By now we should have filtered most cases.
p0 := p
bufn := 0
rune := norm.NFD.PropertiesString(suffix[p:])
p += rune.Size()
if rune.LeadCCC() != 0 {
prevCC := rune.TrailCCC()
// A gap may only occur in the last normalization segment.
// This also ensures that len(scan.s) < norm.MaxSegmentSize.
if end := norm.NFD.FirstBoundaryInString(suffix[p:]); end != -1 {
scan.s = suffix[:p+end]
}
for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf {
rune = norm.NFD.PropertiesString(suffix[p:])
if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc {
break
}
prevCC = rune.TrailCCC()
if pp := scan.scan(p); pp != p {
// Copy the interstitial runes for later processing.
bufn += copy(buf[bufn:], suffix[p0:p])
if scan.pindex == pp {
bufp = bufn
}
p, p0 = pp, pp
} else {
p += rune.Size()
}
}
}
}
// Append weights for the matched contraction, which may be an expansion.
i, n := scan.result()
ce = Elem(t.contractElem[i+offset])
if ce.ctype() == ceNormal {
w = append(w, ce)
} else {
w = t.appendExpansion(w, ce)
}
// Append weights for the runes in the segment not part of the contraction.
for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] {
w, p = t.appendNext(w, source{bytes: b})
}
return w, n
}
// TODO: this should stay after the rest of this file is moved to colltab
func (t tableIndex) TrieIndex() []uint16 {
return mainLookup[:]
}
func (t tableIndex) TrieValues() []uint32 {
return mainValues[:]
}
func (t tableIndex) FirstBlockOffsets() (lookup, value uint16) {
return uint16(t.lookupOffset), uint16(t.valuesOffset)
}
func (t tableIndex) ExpandElems() []uint32 {
return mainExpandElem[:]
}
func (t tableIndex) ContractTries() []struct{ l, h, n, i uint8 } {
return mainCTEntries[:]
}
func (t tableIndex) ContractElems() []uint32 {
return mainContractElem[:]
}
func (t tableIndex) MaxContractLen() int {
return 18
}
func (t tableIndex) VariableTop() uint32 {
return 0x30E
}

File diff suppressed because it is too large Load Diff

View File

@ -31,18 +31,79 @@ const (
te = 0xFE // 1111 1110
)
func (t *trie) lookupValue(n uint16, b byte) colElem {
return colElem(t.values[int(n)<<6+int(b)])
func (t *trie) lookupValue(n uint16, b byte) Elem {
return Elem(t.values[int(n)<<6+int(b)])
}
// lookup returns the trie value for the first UTF-8 encoding in s and
// the width in bytes of this encoding. The size will be 0 if s does not
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
func (t *trie) lookup(s []byte) (v colElem, sz int) {
func (t *trie) lookup(s []byte) (v Elem, sz int) {
c0 := s[0]
switch {
case c0 < tx:
return colElem(t.values0[c0]), 1
return Elem(t.values0[c0]), 1
case c0 < t2:
return 0, 1
case c0 < t3:
if len(s) < 2 {
return 0, 0
}
i := t.index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
return t.lookupValue(i, c1), 2
case c0 < t4:
if len(s) < 3 {
return 0, 0
}
i := t.index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := int(i)<<6 + int(c1)
i = t.index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
return t.lookupValue(i, c2), 3
case c0 < t5:
if len(s) < 4 {
return 0, 0
}
i := t.index0[c0]
c1 := s[1]
if c1 < tx || t2 <= c1 {
return 0, 1
}
o := int(i)<<6 + int(c1)
i = t.index[o]
c2 := s[2]
if c2 < tx || t2 <= c2 {
return 0, 2
}
o = int(i)<<6 + int(c2)
i = t.index[o]
c3 := s[3]
if c3 < tx || t2 <= c3 {
return 0, 3
}
return t.lookupValue(i, c3), 4
}
// Illegal rune
return 0, 1
}
// The body of lookupString is a verbatim copy of that of lookup.
func (t *trie) lookupString(s string) (v Elem, sz int) {
c0 := s[0]
switch {
case c0 < tx:
return Elem(t.values0[c0]), 1
case c0 < t2:
return 0, 1
case c0 < t3:

View File

@ -28,24 +28,20 @@ type reorderBuffer struct {
nbyte uint8 // Number or bytes.
f formInfo
src input
nsrc int
srcBytes inputBytes
srcString inputString
tmpBytes inputBytes
src input
nsrc int
tmpBytes input
}
func (rb *reorderBuffer) init(f Form, src []byte) {
rb.f = *formTable[f]
rb.srcBytes = inputBytes(src)
rb.src = &rb.srcBytes
rb.src.setBytes(src)
rb.nsrc = len(src)
}
func (rb *reorderBuffer) initString(f Form, src string) {
rb.f = *formTable[f]
rb.srcString = inputString(src)
rb.src = &rb.srcString
rb.src.setString(src)
rb.nsrc = len(src)
}
@ -121,9 +117,9 @@ func (rb *reorderBuffer) insert(src input, i int, info Properties) bool {
// in dcomp. dcomp must be a sequence of decomposed UTF-8-encoded runes.
func (rb *reorderBuffer) insertDecomposed(dcomp []byte) bool {
saveNrune, saveNbyte := rb.nrune, rb.nbyte
rb.tmpBytes = inputBytes(dcomp)
rb.tmpBytes.setBytes(dcomp)
for i := 0; i < len(dcomp); {
info := rb.f.info(&rb.tmpBytes, i)
info := rb.f.info(rb.tmpBytes, i)
pos := rb.nbyte
if !rb.insertOrdered(info) {
rb.nrune, rb.nbyte = saveNrune, saveNbyte

View File

@ -81,7 +81,7 @@ func flushF(rb *reorderBuffer) []byte {
}
func flushCopyF(rb *reorderBuffer) []byte {
out := make([]byte, MaxSegmentSize)
out := make([]byte, maxByteBufferSize)
n := rb.flushCopy(out)
return out[:n]
}

View File

@ -0,0 +1,81 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package norm_test
import (
"bytes"
"exp/norm"
"fmt"
"unicode/utf8"
)
// EqualSimple uses a norm.Iter to compare two non-normalized
// strings for equivalence.
func EqualSimple(a, b string) bool {
var ia, ib norm.Iter
ia.InitString(norm.NFKD, a)
ib.InitString(norm.NFKD, b)
for !ia.Done() && !ib.Done() {
if !bytes.Equal(ia.Next(), ib.Next()) {
return false
}
}
return ia.Done() && ib.Done()
}
// FindPrefix finds the longest common prefix of ASCII characters
// of a and b.
func FindPrefix(a, b string) int {
i := 0
for ; i < len(a) && i < len(b) && a[i] < utf8.RuneSelf && a[i] == b[i]; i++ {
}
return i
}
// EqualOpt is like EqualSimple, but optimizes the special
// case for ASCII characters.
func EqualOpt(a, b string) bool {
n := FindPrefix(a, b)
a, b = a[n:], b[n:]
var ia, ib norm.Iter
ia.InitString(norm.NFKD, a)
ib.InitString(norm.NFKD, b)
for !ia.Done() && !ib.Done() {
if !bytes.Equal(ia.Next(), ib.Next()) {
return false
}
if n := int64(FindPrefix(a[ia.Pos():], b[ib.Pos():])); n != 0 {
ia.Seek(n, 1)
ib.Seek(n, 1)
}
}
return ia.Done() && ib.Done()
}
var compareTests = []struct{ a, b string }{
{"aaa", "aaa"},
{"aaa", "aab"},
{"a\u0300a", "\u00E0a"},
{"a\u0300\u0320b", "a\u0320\u0300b"},
{"\u1E0A\u0323", "\x44\u0323\u0307"},
// A character that decomposes into multiple segments
// spans several iterations.
{"\u3304", "\u30A4\u30CB\u30F3\u30AF\u3099"},
}
func ExampleIter() {
for i, t := range compareTests {
r0 := EqualSimple(t.a, t.b)
r1 := EqualOpt(t.a, t.b)
fmt.Printf("%d: %v %v\n", i, r0, r1)
}
// Output:
// 0: true true
// 1: false false
// 2: true true
// 3: true true
// 4: true true
// 5: true true
}

View File

@ -50,6 +50,7 @@ type formInfo struct {
form Form
composing, compatibility bool // form type
info lookupFunc
nextMain iterFunc
}
var formTable []*formInfo
@ -67,7 +68,9 @@ func init() {
} else {
f.info = lookupInfoNFC
}
f.nextMain = nextDecomposed
if Form(i) == NFC || Form(i) == NFKC {
f.nextMain = nextComposed
f.composing = true
}
}
@ -117,6 +120,10 @@ func (p Properties) isInert() bool {
return p.flags&0xf == 0 && p.ccc == 0
}
func (p Properties) multiSegment() bool {
return p.index >= firstMulti && p.index < endMulti
}
// Decomposition returns the decomposition for the underlying rune
// or nil if there is none.
func (p Properties) Decomposition() []byte {

View File

@ -6,91 +6,100 @@ package norm
import "unicode/utf8"
type input interface {
skipASCII(p, max int) int
skipNonStarter(p int) int
appendSlice(buf []byte, s, e int) []byte
copySlice(buf []byte, s, e int)
charinfoNFC(p int) (uint16, int)
charinfoNFKC(p int) (uint16, int)
hangul(p int) rune
type input struct {
str string
bytes []byte
}
type inputString string
func inputBytes(str []byte) input {
return input{bytes: str}
}
func (s inputString) skipASCII(p, max int) int {
for ; p < max && s[p] < utf8.RuneSelf; p++ {
func inputString(str string) input {
return input{str: str}
}
func (in *input) setBytes(str []byte) {
in.str = ""
in.bytes = str
}
func (in *input) setString(str string) {
in.str = str
in.bytes = nil
}
func (in *input) _byte(p int) byte {
if in.bytes == nil {
return in.str[p]
}
return in.bytes[p]
}
func (in *input) skipASCII(p, max int) int {
if in.bytes == nil {
for ; p < max && in.str[p] < utf8.RuneSelf; p++ {
}
} else {
for ; p < max && in.bytes[p] < utf8.RuneSelf; p++ {
}
}
return p
}
func (s inputString) skipNonStarter(p int) int {
for ; p < len(s) && !utf8.RuneStart(s[p]); p++ {
func (in *input) skipNonStarter(p int) int {
if in.bytes == nil {
for ; p < len(in.str) && !utf8.RuneStart(in.str[p]); p++ {
}
} else {
for ; p < len(in.bytes) && !utf8.RuneStart(in.bytes[p]); p++ {
}
}
return p
}
func (s inputString) appendSlice(buf []byte, b, e int) []byte {
func (in *input) appendSlice(buf []byte, b, e int) []byte {
if in.bytes != nil {
return append(buf, in.bytes[b:e]...)
}
for i := b; i < e; i++ {
buf = append(buf, s[i])
buf = append(buf, in.str[i])
}
return buf
}
func (s inputString) copySlice(buf []byte, b, e int) {
copy(buf, s[b:e])
}
func (s inputString) charinfoNFC(p int) (uint16, int) {
return nfcTrie.lookupString(string(s[p:]))
}
func (s inputString) charinfoNFKC(p int) (uint16, int) {
return nfkcTrie.lookupString(string(s[p:]))
}
func (s inputString) hangul(p int) rune {
if !isHangulString(string(s[p:])) {
return 0
func (in *input) copySlice(buf []byte, b, e int) int {
if in.bytes == nil {
return copy(buf, in.str[b:e])
}
rune, _ := utf8.DecodeRuneInString(string(s[p:]))
return rune
return copy(buf, in.bytes[b:e])
}
type inputBytes []byte
func (s inputBytes) skipASCII(p, max int) int {
for ; p < max && s[p] < utf8.RuneSelf; p++ {
func (in *input) charinfoNFC(p int) (uint16, int) {
if in.bytes == nil {
return nfcTrie.lookupString(in.str[p:])
}
return p
return nfcTrie.lookup(in.bytes[p:])
}
func (s inputBytes) skipNonStarter(p int) int {
for ; p < len(s) && !utf8.RuneStart(s[p]); p++ {
func (in *input) charinfoNFKC(p int) (uint16, int) {
if in.bytes == nil {
return nfkcTrie.lookupString(in.str[p:])
}
return p
return nfkcTrie.lookup(in.bytes[p:])
}
func (s inputBytes) appendSlice(buf []byte, b, e int) []byte {
return append(buf, s[b:e]...)
}
func (s inputBytes) copySlice(buf []byte, b, e int) {
copy(buf, s[b:e])
}
func (s inputBytes) charinfoNFC(p int) (uint16, int) {
return nfcTrie.lookup(s[p:])
}
func (s inputBytes) charinfoNFKC(p int) (uint16, int) {
return nfkcTrie.lookup(s[p:])
}
func (s inputBytes) hangul(p int) rune {
if !isHangul(s[p:]) {
return 0
func (in *input) hangul(p int) (r rune) {
if in.bytes == nil {
if !isHangulString(in.str[p:]) {
return 0
}
r, _ = utf8.DecodeRuneInString(in.str[p:])
} else {
if !isHangul(in.bytes[p:]) {
return 0
}
r, _ = utf8.DecodeRune(in.bytes[p:])
}
rune, _ := utf8.DecodeRune(s[p:])
return rune
return r
}

View File

@ -4,53 +4,96 @@
package norm
import (
"fmt"
"unicode/utf8"
)
const MaxSegmentSize = maxByteBufferSize
// An Iter iterates over a string or byte slice, while normalizing it
// to a given Form.
type Iter struct {
rb reorderBuffer
info Properties // first character saved from previous iteration
next iterFunc // implementation of next depends on form
rb reorderBuffer
buf [maxByteBufferSize]byte
info Properties // first character saved from previous iteration
next iterFunc // implementation of next depends on form
asciiF iterFunc
p int // current position in input source
outStart int // start of current segment in output buffer
inStart int // start of current segment in input source
maxp int // position in output buffer after which not to start a new segment
maxseg int // for tracking an excess of combining characters
tccc uint8
done bool
p int // current position in input source
multiSeg []byte // remainder of multi-segment decomposition
}
type iterFunc func(*Iter, []byte) int
type iterFunc func(*Iter) []byte
// SetInput initializes i to iterate over src after normalizing it to Form f.
func (i *Iter) SetInput(f Form, src []byte) {
// Init initializes i to iterate over src after normalizing it to Form f.
func (i *Iter) Init(f Form, src []byte) {
i.p = 0
if len(src) == 0 {
i.setDone()
i.rb.nsrc = 0
return
}
i.multiSeg = nil
i.rb.init(f, src)
if i.rb.f.composing {
i.next = nextComposed
} else {
i.next = nextDecomposed
}
i.p = 0
if i.done = len(src) == 0; !i.done {
i.info = i.rb.f.info(i.rb.src, i.p)
}
i.next = i.rb.f.nextMain
i.asciiF = nextASCIIBytes
i.info = i.rb.f.info(i.rb.src, i.p)
}
// SetInputString initializes i to iterate over src after normalizing it to Form f.
func (i *Iter) SetInputString(f Form, src string) {
i.rb.initString(f, src)
if i.rb.f.composing {
i.next = nextComposed
} else {
i.next = nextDecomposed
}
// InitString initializes i to iterate over src after normalizing it to Form f.
func (i *Iter) InitString(f Form, src string) {
i.p = 0
if i.done = len(src) == 0; !i.done {
i.info = i.rb.f.info(i.rb.src, i.p)
if len(src) == 0 {
i.setDone()
i.rb.nsrc = 0
return
}
i.multiSeg = nil
i.rb.initString(f, src)
i.next = i.rb.f.nextMain
i.asciiF = nextASCIIString
i.info = i.rb.f.info(i.rb.src, i.p)
}
// Seek sets the segment to be returned by the next call to Next to start
// at position p. It is the responsibility of the caller to set p to the
// start of a UTF8 rune.
func (i *Iter) Seek(offset int64, whence int) (int64, error) {
var abs int64
switch whence {
case 0:
abs = offset
case 1:
abs = int64(i.p) + offset
case 2:
abs = int64(i.rb.nsrc) + offset
default:
return 0, fmt.Errorf("norm: invalid whence")
}
if abs < 0 {
return 0, fmt.Errorf("norm: negative position")
}
if int(abs) >= i.rb.nsrc {
i.setDone()
return int64(i.p), nil
}
i.p = int(abs)
i.multiSeg = nil
i.next = i.rb.f.nextMain
i.info = i.rb.f.info(i.rb.src, i.p)
return abs, nil
}
// returnSlice returns a slice of the underlying input type as a byte slice.
// If the underlying is of type []byte, it will simply return a slice.
// If the underlying is of type string, it will copy the slice to the buffer
// and return that.
func (i *Iter) returnSlice(a, b int) []byte {
if i.rb.src.bytes == nil {
return i.buf[:copy(i.buf[:], i.rb.src.str[a:b])]
}
return i.rb.src.bytes[a:b]
}
// Pos returns the byte position at which the next call to Next will commence processing.
@ -58,140 +101,232 @@ func (i *Iter) Pos() int {
return i.p
}
func (i *Iter) setDone() {
i.next = nextDone
i.p = i.rb.nsrc
}
// Done returns true if there is no more input to process.
func (i *Iter) Done() bool {
return i.done
return i.p >= i.rb.nsrc
}
// Next writes f(i.input[i.Pos():n]...) to buffer buf, where n is the
// largest boundary of i.input such that the result fits in buf.
// It returns the number of bytes written to buf.
// len(buf) should be at least MaxSegmentSize.
// Done must be false before calling Next.
func (i *Iter) Next(buf []byte) int {
return i.next(i, buf)
// Next returns f(i.input[i.Pos():n]), where n is a boundary of i.input.
// For any input a and b for which f(a) == f(b), subsequent calls
// to Next will return the same segments.
// Modifying runes are grouped together with the preceding starter, if such a starter exists.
// Although not guaranteed, n will typically be the smallest possible n.
func (i *Iter) Next() []byte {
return i.next(i)
}
func (i *Iter) initNext(outn, inStart int) {
i.outStart = 0
i.inStart = inStart
i.maxp = outn - MaxSegmentSize
i.maxseg = MaxSegmentSize
}
// setStart resets the start of the new segment to the given position.
// It returns true if there is not enough room for the new segment.
func (i *Iter) setStart(outp, inp int) bool {
if outp > i.maxp {
return true
func nextASCIIBytes(i *Iter) []byte {
p := i.p + 1
if p >= i.rb.nsrc {
i.setDone()
return i.rb.src.bytes[i.p:p]
}
i.outStart = outp
i.inStart = inp
i.maxseg = outp + MaxSegmentSize
return false
if i.rb.src.bytes[p] < utf8.RuneSelf {
p0 := i.p
i.p = p
return i.rb.src.bytes[p0:p]
}
i.info = i.rb.f.info(i.rb.src, i.p)
i.next = i.rb.f.nextMain
return i.next(i)
}
func min(a, b int) int {
if a < b {
return a
func nextASCIIString(i *Iter) []byte {
p := i.p + 1
if p >= i.rb.nsrc {
i.buf[0] = i.rb.src.str[i.p]
i.setDone()
return i.buf[:1]
}
return b
if i.rb.src.str[p] < utf8.RuneSelf {
i.buf[0] = i.rb.src.str[i.p]
i.p = p
return i.buf[:1]
}
i.info = i.rb.f.info(i.rb.src, i.p)
i.next = i.rb.f.nextMain
return i.next(i)
}
func nextHangul(i *Iter) []byte {
if r := i.rb.src.hangul(i.p); r != 0 {
i.p += hangulUTF8Size
if i.p >= i.rb.nsrc {
i.setDone()
}
return i.buf[:decomposeHangul(i.buf[:], r)]
}
i.info = i.rb.f.info(i.rb.src, i.p)
i.next = i.rb.f.nextMain
return i.next(i)
}
func nextDone(i *Iter) []byte {
return nil
}
// nextMulti is used for iterating over multi-segment decompositions
// for decomposing normal forms.
func nextMulti(i *Iter) []byte {
j := 0
d := i.multiSeg
// skip first rune
for j = 1; j < len(d) && !utf8.RuneStart(d[j]); j++ {
}
for j < len(d) {
info := i.rb.f.info(input{bytes: d}, j)
if info.ccc == 0 {
i.multiSeg = d[j:]
return d[:j]
}
j += int(info.size)
}
// treat last segment as normal decomposition
i.next = i.rb.f.nextMain
return i.next(i)
}
// nextMultiNorm is used for iterating over multi-segment decompositions
// for composing normal forms.
func nextMultiNorm(i *Iter) []byte {
j := 0
d := i.multiSeg
// skip first rune
for j = 1; j < len(d) && !utf8.RuneStart(d[j]); j++ {
}
for j < len(d) {
info := i.rb.f.info(input{bytes: d}, j)
if info.ccc == 0 {
i.multiSeg = d[j:]
return d[:j]
}
j += int(info.size)
}
i.multiSeg = nil
i.next = nextComposed
i.p++ // restore old valud of i.p. See nextComposed.
if i.p >= i.rb.nsrc {
i.setDone()
}
return d
}
// nextDecomposed is the implementation of Next for forms NFD and NFKD.
func nextDecomposed(i *Iter, out []byte) int {
var outp int
i.initNext(len(out), i.p)
doFast:
inCopyStart, outCopyStart := i.p, outp // invariant xCopyStart <= i.xStart
func nextDecomposed(i *Iter) (next []byte) {
startp, outp := i.p, 0
inCopyStart, outCopyStart := i.p, 0
for {
if sz := int(i.info.size); sz <= 1 {
// ASCII or illegal byte. Either way, advance by 1.
i.p++
p := i.p
i.p++ // ASCII or illegal byte. Either way, advance by 1.
if i.p >= i.rb.nsrc {
i.setDone()
return i.returnSlice(p, i.p)
} else if i.rb.src._byte(i.p) < utf8.RuneSelf {
i.next = i.asciiF
return i.returnSlice(p, i.p)
}
outp++
max := min(i.rb.nsrc, len(out)-outp+i.p)
if np := i.rb.src.skipASCII(i.p, max); np > i.p {
outp += np - i.p
i.p = np
if i.p >= i.rb.nsrc {
break
}
// ASCII may combine with consecutive runes.
if i.setStart(outp-1, i.p-1) {
i.p--
outp--
i.info.size = 1
break
}
}
} else if d := i.info.Decomposition(); d != nil {
i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.p)
// Note: If leading CCC != 0, then len(d) == 2 and last is also non-zero.
// Case 1: there is a leftover to copy. In this case the decomposition
// must begin with a modifier and should always be appended.
// Case 2: no leftover. Simply return d if followed by a ccc == 0 value.
p := outp + len(d)
if p > i.maxseg && i.setStart(outp, i.p) {
return outp
if outp > 0 {
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
if p > len(i.buf) {
return i.buf[:outp]
}
} else if i.info.multiSegment() {
// outp must be 0 as multi-segment decompositions always
// start a new segment.
if i.multiSeg == nil {
i.multiSeg = d
i.next = nextMulti
return nextMulti(i)
}
// We are in the last segment. Treat as normal decomposition.
d = i.multiSeg
i.multiSeg = nil
p = len(d)
}
copy(out[outp:], d)
prevCC := i.info.tccc
if i.p += sz; i.p >= i.rb.nsrc {
i.setDone()
i.info = Properties{} // Force BoundaryBefore to succeed.
} else {
i.info = i.rb.f.info(i.rb.src, i.p)
}
if i.info.BoundaryBefore() {
if outp > 0 {
copy(i.buf[outp:], d)
return i.buf[:p]
}
return d
}
copy(i.buf[outp:], d)
outp = p
i.p += sz
inCopyStart, outCopyStart = i.p, outp
} else if r := i.rb.src.hangul(i.p); r != 0 {
i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.p)
for {
outp += decomposeHangul(out[outp:], r)
i.p += hangulUTF8Size
if r = i.rb.src.hangul(i.p); r == 0 {
break
}
if i.setStart(outp, i.p) {
return outp
}
if i.info.ccc < prevCC {
goto doNorm
}
inCopyStart, outCopyStart = i.p, outp
continue
} else if r := i.rb.src.hangul(i.p); r != 0 {
i.next = nextHangul
i.p += hangulUTF8Size
if i.p >= i.rb.nsrc {
i.setDone()
}
return i.buf[:decomposeHangul(i.buf[:], r)]
} else {
p := outp + sz
if p > i.maxseg && i.setStart(outp, i.p) {
if p > len(i.buf) {
break
}
outp = p
i.p += sz
}
if i.p >= i.rb.nsrc {
i.setDone()
break
}
prevCC := i.info.tccc
i.info = i.rb.f.info(i.rb.src, i.p)
if cc := i.info.ccc; cc == 0 {
if i.setStart(outp, i.p) {
break
}
} else if cc < prevCC {
if i.info.BoundaryBefore() {
break
} else if i.info.ccc < prevCC {
goto doNorm
}
}
if inCopyStart != i.p {
i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.p)
if outCopyStart == 0 {
return i.returnSlice(inCopyStart, i.p)
} else if inCopyStart < i.p {
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
}
i.done = i.p >= i.rb.nsrc
return outp
return i.buf[:outp]
doNorm:
// Insert what we have decomposed so far in the reorderBuffer.
// As we will only reorder, there will always be enough room.
i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.p)
if !i.rb.insertDecomposed(out[i.outStart:outp]) {
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
if !i.rb.insertDecomposed(i.buf[0:outp]) {
// Start over to prevent decompositions from crossing segment boundaries.
// This is a rare occurrence.
i.p = i.inStart
i.p = startp
i.info = i.rb.f.info(i.rb.src, i.p)
}
outp = i.outStart
for {
if !i.rb.insert(i.rb.src, i.p, i.info) {
break
}
if i.p += int(i.info.size); i.p >= i.rb.nsrc {
outp += i.rb.flushCopy(out[outp:])
i.done = true
return outp
i.setDone()
break
}
i.info = i.rb.f.info(i.rb.src, i.p)
if i.info.ccc == 0 {
@ -199,27 +334,19 @@ doNorm:
}
}
// new segment or too many combining characters: exit normalization
if outp += i.rb.flushCopy(out[outp:]); i.setStart(outp, i.p) {
return outp
}
goto doFast
return i.buf[:i.rb.flushCopy(i.buf[:])]
}
// nextComposed is the implementation of Next for forms NFC and NFKC.
func nextComposed(i *Iter, out []byte) int {
var outp int
i.initNext(len(out), i.p)
doFast:
inCopyStart, outCopyStart := i.p, outp // invariant xCopyStart <= i.xStart
func nextComposed(i *Iter) []byte {
outp, startp := 0, i.p
var prevCC uint8
for {
if !i.info.isYesC() {
goto doNorm
}
if cc := i.info.ccc; cc == 0 {
if i.setStart(outp, i.p) {
break
}
if cc := i.info.ccc; cc == 0 && outp > 0 {
break
} else if cc < prevCC {
goto doNorm
}
@ -229,49 +356,33 @@ doFast:
sz = 1 // illegal rune: copy byte-by-byte
}
p := outp + sz
if p > i.maxseg && i.setStart(outp, i.p) {
if p > len(i.buf) {
break
}
outp = p
i.p += sz
max := min(i.rb.nsrc, len(out)-outp+i.p)
if np := i.rb.src.skipASCII(i.p, max); np > i.p {
outp += np - i.p
i.p = np
if i.p >= i.rb.nsrc {
break
}
// ASCII may combine with consecutive runes.
if i.setStart(outp-1, i.p-1) {
i.p--
outp--
i.info = Properties{size: 1}
break
}
}
if i.p >= i.rb.nsrc {
i.setDone()
break
} else if i.rb.src._byte(i.p) < utf8.RuneSelf {
i.next = i.asciiF
break
}
i.info = i.rb.f.info(i.rb.src, i.p)
}
if inCopyStart != i.p {
i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.p)
}
i.done = i.p >= i.rb.nsrc
return outp
return i.returnSlice(startp, i.p)
doNorm:
i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.inStart)
outp, i.p = i.outStart, i.inStart
multi := false
i.p = startp
i.info = i.rb.f.info(i.rb.src, i.p)
for {
if !i.rb.insert(i.rb.src, i.p, i.info) {
break
}
multi = multi || i.info.multiSegment()
if i.p += int(i.info.size); i.p >= i.rb.nsrc {
i.rb.compose()
outp += i.rb.flushCopy(out[outp:])
i.done = true
return outp
i.setDone()
break
}
i.info = i.rb.f.info(i.rb.src, i.p)
if i.info.BoundaryBefore() {
@ -279,8 +390,12 @@ doNorm:
}
}
i.rb.compose()
if outp += i.rb.flushCopy(out[outp:]); i.setStart(outp, i.p) {
return outp
seg := i.buf[:i.rb.flushCopy(i.buf[:])]
if multi {
i.p-- // fake not being done yet
i.multiSeg = seg
i.next = nextMultiNorm
return nextMultiNorm(i)
}
goto doFast
return seg
}

View File

@ -9,21 +9,12 @@ import (
"testing"
)
var iterBufSizes = []int{
MaxSegmentSize,
1.5 * MaxSegmentSize,
2 * MaxSegmentSize,
3 * MaxSegmentSize,
100 * MaxSegmentSize,
}
func doIterNorm(f Form, buf []byte, s string) []byte {
func doIterNorm(f Form, s string) []byte {
acc := []byte{}
i := Iter{}
i.SetInputString(f, s)
i.InitString(f, s)
for !i.Done() {
n := i.Next(buf)
acc = append(acc, buf[:n]...)
acc = append(acc, i.Next()...)
}
return acc
}
@ -35,30 +26,28 @@ func runIterTests(t *testing.T, name string, f Form, tests []AppendTest, norm bo
if norm {
gold = string(f.AppendString(nil, test.out))
}
for _, sz := range iterBufSizes {
buf := make([]byte, sz)
out := string(doIterNorm(f, buf, in))
if len(out) != len(gold) {
const msg = "%s:%d:%d: length is %d; want %d"
t.Errorf(msg, name, i, sz, len(out), len(gold))
}
if out != gold {
// Find first rune that differs and show context.
ir := []rune(out)
ig := []rune(gold)
for j := 0; j < len(ir) && j < len(ig); j++ {
if ir[j] == ig[j] {
continue
}
if j -= 3; j < 0 {
j = 0
}
for e := j + 7; j < e && j < len(ir) && j < len(ig); j++ {
const msg = "%s:%d:%d: runeAt(%d) = %U; want %U"
t.Errorf(msg, name, i, sz, j, ir[j], ig[j])
}
break
out := string(doIterNorm(f, in))
if len(out) != len(gold) {
const msg = "%s:%d: length is %d; want %d"
t.Errorf(msg, name, i, len(out), len(gold))
}
if out != gold {
// Find first rune that differs and show context.
ir := []rune(out)
ig := []rune(gold)
t.Errorf("\n%X != \n%X", ir, ig)
for j := 0; j < len(ir) && j < len(ig); j++ {
if ir[j] == ig[j] {
continue
}
if j -= 3; j < 0 {
j = 0
}
for e := j + 7; j < e && j < len(ir) && j < len(ig); j++ {
const msg = "%s:%d: runeAt(%d) = %U; want %U"
t.Errorf(msg, name, i, j, ir[j], ig[j])
}
break
}
}
}
@ -68,42 +57,44 @@ func rep(r rune, n int) string {
return strings.Repeat(string(r), n)
}
const segSize = maxByteBufferSize
var iterTests = []AppendTest{
{"", ascii, ascii},
{"", txt_all, txt_all},
{"", "a" + rep(0x0300, MaxSegmentSize/2), "a" + rep(0x0300, MaxSegmentSize/2)},
{"", "a" + rep(0x0300, segSize/2), "a" + rep(0x0300, segSize/2)},
}
var iterTestsD = []AppendTest{
{ // segment overflow on unchanged character
"",
"a" + rep(0x0300, MaxSegmentSize/2) + "\u0316",
"a" + rep(0x0300, MaxSegmentSize/2-1) + "\u0316\u0300",
"a" + rep(0x0300, segSize/2) + "\u0316",
"a" + rep(0x0300, segSize/2-1) + "\u0316\u0300",
},
{ // segment overflow on unchanged character + start value
"",
"a" + rep(0x0300, MaxSegmentSize/2+maxCombiningChars+4) + "\u0316",
"a" + rep(0x0300, MaxSegmentSize/2+maxCombiningChars) + "\u0316" + rep(0x300, 4),
"a" + rep(0x0300, segSize/2+maxCombiningChars+4) + "\u0316",
"a" + rep(0x0300, segSize/2+maxCombiningChars) + "\u0316" + rep(0x300, 4),
},
{ // segment overflow on decomposition
"",
"a" + rep(0x0300, MaxSegmentSize/2-1) + "\u0340",
"a" + rep(0x0300, MaxSegmentSize/2),
"a" + rep(0x0300, segSize/2-1) + "\u0340",
"a" + rep(0x0300, segSize/2),
},
{ // segment overflow on decomposition + start value
"",
"a" + rep(0x0300, MaxSegmentSize/2-1) + "\u0340" + rep(0x300, maxCombiningChars+4) + "\u0320",
"a" + rep(0x0300, MaxSegmentSize/2-1) + rep(0x300, maxCombiningChars+1) + "\u0320" + rep(0x300, 4),
"a" + rep(0x0300, segSize/2-1) + "\u0340" + rep(0x300, maxCombiningChars+4) + "\u0320",
"a" + rep(0x0300, segSize/2-1) + rep(0x300, maxCombiningChars+1) + "\u0320" + rep(0x300, 4),
},
{ // start value after ASCII overflow
"",
rep('a', MaxSegmentSize) + rep(0x300, maxCombiningChars+2) + "\u0320",
rep('a', MaxSegmentSize) + rep(0x300, maxCombiningChars) + "\u0320\u0300\u0300",
rep('a', segSize) + rep(0x300, maxCombiningChars+2) + "\u0320",
rep('a', segSize) + rep(0x300, maxCombiningChars) + "\u0320\u0300\u0300",
},
{ // start value after Hangul overflow
"",
rep(0xAC00, MaxSegmentSize/6) + rep(0x300, maxCombiningChars+2) + "\u0320",
strings.Repeat("\u1100\u1161", MaxSegmentSize/6) + rep(0x300, maxCombiningChars-1) + "\u0320" + rep(0x300, 3),
rep(0xAC00, segSize/6) + rep(0x300, maxCombiningChars+2) + "\u0320",
strings.Repeat("\u1100\u1161", segSize/6) + rep(0x300, maxCombiningChars+1) + "\u0320" + rep(0x300, 1),
},
{ // start value after cc=0
"",
@ -125,8 +116,8 @@ var iterTestsC = []AppendTest{
},
{ // segment overflow
"",
"a" + rep(0x0305, MaxSegmentSize/2+4) + "\u0316",
"a" + rep(0x0305, MaxSegmentSize/2-1) + "\u0316" + rep(0x305, 5),
"a" + rep(0x0305, segSize/2+4) + "\u0316",
"a" + rep(0x0305, segSize/2-1) + "\u0316" + rep(0x305, 5),
},
}
@ -148,27 +139,39 @@ type SegmentTest struct {
}
var segmentTests = []SegmentTest{
{rep('a', MaxSegmentSize), []string{rep('a', MaxSegmentSize), ""}},
{rep('a', MaxSegmentSize+2), []string{rep('a', MaxSegmentSize-1), "aaa", ""}},
{rep('a', MaxSegmentSize) + "\u0300aa", []string{rep('a', MaxSegmentSize-1), "a\u0300", "aa", ""}},
{"\u1E0A\u0323a", []string{"\x44\u0323\u0307", "a", ""}},
{rep('a', segSize), append(strings.Split(rep('a', segSize), ""), "")},
{rep('a', segSize+2), append(strings.Split(rep('a', segSize+2), ""), "")},
{rep('a', segSize) + "\u0300aa",
append(strings.Split(rep('a', segSize-1), ""), "a\u0300", "a", "a", "")},
}
var segmentTestsK = []SegmentTest{
{"\u3332", []string{"\u30D5", "\u30A1", "\u30E9", "\u30C3", "\u30C8\u3099", ""}},
// last segment of multi-segment decomposition needs normalization
{"\u3332\u093C", []string{"\u30D5", "\u30A1", "\u30E9", "\u30C3", "\u30C8\u093C\u3099", ""}},
// Hangul and Jamo are grouped togeter.
{"\uAC00", []string{"\u1100\u1161", ""}},
{"\uAC01", []string{"\u1100\u1161\u11A8", ""}},
{"\u1100\u1161", []string{"\u1100\u1161", ""}},
}
// Note that, by design, segmentation is equal for composing and decomposing forms.
func TestIterSegmentation(t *testing.T) {
segmentTest(t, "SegmentTestD", NFD, segmentTests)
segmentTest(t, "SegmentTestC", NFC, segmentTests)
segmentTest(t, "SegmentTestD", NFKD, segmentTestsK)
segmentTest(t, "SegmentTestC", NFKC, segmentTestsK)
}
func segmentTest(t *testing.T, name string, f Form, tests []SegmentTest) {
iter := Iter{}
for i, tt := range segmentTests {
buf := make([]byte, MaxSegmentSize)
iter.SetInputString(f, tt.in)
for i, tt := range tests {
iter.InitString(f, tt.in)
for j, seg := range tt.out {
if seg == "" {
if !iter.Done() {
n := iter.Next(buf)
res := string(buf[:n])
res := string(iter.Next())
t.Errorf(`%s:%d:%d: expected Done()==true, found segment "%s"`, name, i, j, res)
}
continue
@ -176,10 +179,9 @@ func segmentTest(t *testing.T, name string, f Form, tests []SegmentTest) {
if iter.Done() {
t.Errorf("%s:%d:%d: Done()==true, want false", name, i, j)
}
n := iter.Next(buf)
seg = f.String(seg)
if res := string(buf[:n]); res != seg {
t.Errorf(`%s:%d:%d" segment was "%s" (%d); want "%s" (%d)`, name, i, j, res, len(res), seg, len(seg))
if res := string(iter.Next()); res != seg {
t.Errorf(`%s:%d:%d" segment was "%s" (%d); want "%s" (%d) %X %X`, name, i, j, res, len(res), seg, len(seg), []rune(res), []rune(seg))
}
}
}

View File

@ -574,7 +574,19 @@ func makeEntry(f *FormInfo) uint16 {
// decompSet keeps track of unique decompositions, grouped by whether
// the decomposition is followed by a trailing and/or leading CCC.
type decompSet [4]map[string]bool
type decompSet [6]map[string]bool
const (
normalDecomp = iota
firstMulti
firstCCC
endMulti
firstLeadingCCC
firstCCCZeroExcept
lastDecomp
)
var cname = []string{"firstMulti", "firstCCC", "endMulti", "firstLeadingCCC", "firstCCCZeroExcept", "lastDecomp"}
func makeDecompSet() decompSet {
m := decompSet{}
@ -614,20 +626,30 @@ func printCharInfoTables() int {
const msg = "%U: lccc (%d) must be <= tcc (%d)"
logger.Fatalf(msg, r, lccc, tccc)
}
index := 0
index := normalDecomp
if tccc > 0 || lccc > 0 {
s += string([]byte{tccc})
index = 1
index = endMulti
for _, r := range d[1:] {
if ccc(r) == 0 {
index = firstCCC
}
}
if lccc > 0 {
s += string([]byte{lccc})
index = 2
if index == firstCCC {
logger.Fatalf("%U: multi-segment decomposition not supported for decompositions with leading CCC != 0", r)
}
index = firstLeadingCCC
}
if cc != lccc {
if cc != 0 {
logger.Fatalf("%U: for lccc != ccc, expected ccc to be 0; was %d", r, cc)
}
index = 3
index = firstCCCZeroExcept
}
} else if len(d) > 1 {
index = firstMulti
}
return index, s
}
@ -653,7 +675,6 @@ func printCharInfoTables() int {
size := 0
positionMap := make(map[string]uint16)
decompositions.WriteString("\000")
cname := []string{"firstCCC", "firstLeadingCCC", "firstCCCZeroExcept", "lastDecomp"}
fmt.Println("const (")
for i, m := range decompSet {
sa := []string{}

View File

@ -6,6 +6,7 @@ package norm
import (
"bytes"
"io"
"strings"
"testing"
)
@ -504,12 +505,35 @@ func appendBench(f Form, in []byte) func() {
}
func iterBench(f Form, in []byte) func() {
buf := make([]byte, 4*len(in))
iter := Iter{}
return func() {
iter.SetInput(f, in)
iter.Init(f, in)
for !iter.Done() {
iter.Next(buf)
iter.Next()
}
}
}
func readerBench(f Form, in []byte) func() {
buf := make([]byte, 4*len(in))
return func() {
r := f.Reader(bytes.NewReader(in))
var err error
for err == nil {
_, err = r.Read(buf)
}
if err != io.EOF {
panic("")
}
}
}
func writerBench(f Form, in []byte) func() {
buf := make([]byte, 0, 4*len(in))
return func() {
r := f.Writer(bytes.NewBuffer(buf))
if _, err := r.Write(in); err != nil {
panic("")
}
}
}
@ -517,6 +541,8 @@ func iterBench(f Form, in []byte) func() {
func appendBenchmarks(bm []func(), f Form, in []byte) []func() {
//bm = append(bm, appendBench(f, in))
bm = append(bm, iterBench(f, in))
//bm = append(bm, readerBench(f, in))
//bm = append(bm, writerBench(f, in))
return bm
}

View File

@ -223,13 +223,11 @@ func doTest(t *Test, f norm.Form, gold, test string) {
cmpResult(t, "Bytes", f, gold, test, string(result))
sresult := f.String(test)
cmpResult(t, "String", f, gold, test, sresult)
buf := make([]byte, norm.MaxSegmentSize)
acc := []byte{}
i := norm.Iter{}
i.SetInputString(f, test)
i.InitString(f, test)
for !i.Done() {
n := i.Next(buf)
acc = append(acc, buf[:n]...)
acc = append(acc, i.Next()...)
}
cmpResult(t, "Iter.Next", f, gold, test, string(acc))
for i := range test {

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,190 @@
package ssa
// Simple block optimisations to simplify the control flow graph.
// TODO(adonovan): instead of creating several "unreachable" blocks
// per function in the Builder, reuse a single one (e.g. at Blocks[1])
// to reduce garbage.
//
// TODO(adonovan): in the absence of multiway branch instructions,
// each BasicBlock has 0, 1, or 2 successors. We should preallocate
// the backing array for the Succs slice inline in BasicBlock.
import (
"fmt"
"os"
)
// If true, perform sanity checking and show progress at each
// successive iteration of optimizeBlocks. Very verbose.
const debugBlockOpt = false
func hasPhi(b *BasicBlock) bool {
_, ok := b.Instrs[0].(*Phi)
return ok
}
// prune attempts to prune block b if it is unreachable (i.e. has no
// predecessors other than itself), disconnecting it from the CFG.
// The result is true if the optimisation was applied. i is the block
// index within the function.
//
func prune(f *Function, i int, b *BasicBlock) bool {
if i == 0 {
return false // don't prune entry block
}
if len(b.Preds) == 0 || len(b.Preds) == 1 && b.Preds[0] == b {
// Disconnect it from its successors.
for _, c := range b.Succs {
c.removePred(b)
}
if debugBlockOpt {
fmt.Fprintln(os.Stderr, "prune", b.Name)
}
// Delete b.
f.Blocks[i] = nil
return true
}
return false
}
// jumpThreading attempts to apply simple jump-threading to block b,
// in which a->b->c become a->c if b is just a Jump.
// The result is true if the optimisation was applied.
// i is the block index within the function.
//
func jumpThreading(f *Function, i int, b *BasicBlock) bool {
if i == 0 {
return false // don't apply to entry block
}
if b.Instrs == nil {
fmt.Println("empty block ", b.Name)
return false
}
if _, ok := b.Instrs[0].(*Jump); !ok {
return false // not just a jump
}
c := b.Succs[0]
if c == b {
return false // don't apply to degenerate jump-to-self.
}
if hasPhi(c) {
return false // not sound without more effort
}
for j, a := range b.Preds {
a.replaceSucc(b, c)
// If a now has two edges to c, replace its degenerate If by Jump.
if len(a.Succs) == 2 && a.Succs[0] == c && a.Succs[1] == c {
jump := new(Jump)
jump.SetBlock(a)
a.Instrs[len(a.Instrs)-1] = jump
a.Succs = a.Succs[:1]
c.removePred(b)
} else {
if j == 0 {
c.replacePred(b, a)
} else {
c.Preds = append(c.Preds, a)
}
}
if debugBlockOpt {
fmt.Fprintln(os.Stderr, "jumpThreading", a.Name, b.Name, c.Name)
}
}
f.Blocks[i] = nil
return true
}
// fuseBlocks attempts to apply the block fusion optimisation to block
// a, in which a->b becomes ab if len(a.Succs)==len(b.Preds)==1.
// The result is true if the optimisation was applied.
//
func fuseBlocks(f *Function, a *BasicBlock) bool {
if len(a.Succs) != 1 {
return false
}
b := a.Succs[0]
if len(b.Preds) != 1 {
return false
}
// Eliminate jump at end of A, then copy all of B across.
a.Instrs = append(a.Instrs[:len(a.Instrs)-1], b.Instrs...)
for _, instr := range b.Instrs {
instr.SetBlock(a)
}
// A inherits B's successors
a.Succs = b.Succs
// Fix up Preds links of all successors of B.
for _, c := range b.Succs {
c.replacePred(b, a)
}
if debugBlockOpt {
fmt.Fprintln(os.Stderr, "fuseBlocks", a.Name, b.Name)
}
// Make b unreachable. Subsequent pruning will reclaim it.
b.Preds = nil
return true
}
// optimizeBlocks() performs some simple block optimizations on a
// completed function: dead block elimination, block fusion, jump
// threading.
//
func optimizeBlocks(f *Function) {
// Loop until no further progress.
changed := true
for changed {
changed = false
if debugBlockOpt {
f.DumpTo(os.Stderr)
MustSanityCheck(f, nil)
}
for i, b := range f.Blocks {
// f.Blocks will temporarily contain nils to indicate
// deleted blocks; we remove them at the end.
if b == nil {
continue
}
// Prune unreachable blocks (including all empty blocks).
if prune(f, i, b) {
changed = true
continue // (b was pruned)
}
// Fuse blocks. b->c becomes bc.
if fuseBlocks(f, b) {
changed = true
}
// a->b->c becomes a->c if b contains only a Jump.
if jumpThreading(f, i, b) {
changed = true
continue // (b was disconnected)
}
}
}
// Eliminate nils from Blocks.
j := 0
for _, b := range f.Blocks {
if b != nil {
f.Blocks[j] = b
j++
}
}
// Nil out b.Blocks[j:] to aid GC.
for i := j; i < len(f.Blocks); i++ {
f.Blocks[i] = nil
}
f.Blocks = f.Blocks[:j]
}

113
libgo/go/exp/ssa/doc.go Normal file
View File

@ -0,0 +1,113 @@
// Package ssa defines a representation of the elements of Go programs
// (packages, types, functions, variables and constants) using a
// static single-assignment (SSA) form intermediate representation
// (IR) for the the bodies of functions.
//
// THIS INTERFACE IS EXPERIMENTAL AND IS LIKELY TO CHANGE.
//
// For an introduction to SSA form, see
// http://en.wikipedia.org/wiki/Static_single_assignment_form.
// This page provides a broader reading list:
// http://www.dcs.gla.ac.uk/~jsinger/ssa.html.
//
// The level of abstraction of the SSA form is intentionally close to
// the source language to facilitate construction of source analysis
// tools. It is not primarily intended for machine code generation.
//
// All looping, branching and switching constructs are replaced with
// unstructured control flow. We may add higher-level control flow
// primitives in the future to facilitate constant-time dispatch of
// switch statements, for example.
//
// Builder encapsulates the tasks of type-checking (using go/types)
// abstract syntax trees (as defined by go/ast) for the source files
// comprising a Go program, and the conversion of each function from
// Go ASTs to the SSA representation.
//
// By supplying an instance of the SourceLocator function prototype,
// clients may control how the builder locates, loads and parses Go
// sources files for imported packages. This package provides
// GorootLoader, which uses go/build to locate packages in the Go
// source distribution, and go/parser to parse them.
//
// The builder initially builds a naive SSA form in which all local
// variables are addresses of stack locations with explicit loads and
// stores. If desired, registerisation and φ-node insertion using
// dominance and dataflow can be performed as a later pass to improve
// the accuracy and performance of subsequent analyses; this pass is
// not yet implemented.
//
// The program representation constructed by this package is fully
// resolved internally, i.e. it does not rely on the names of Values,
// Packages, Functions, Types or BasicBlocks for the correct
// interpretation of the program. Only the identities of objects and
// the topology of the SSA and type graphs are semantically
// significant. (There is one exception: Ids, used to identify field
// and method names, contain strings.) Avoidance of name-based
// operations simplifies the implementation of subsequent passes and
// can make them very efficient. Many objects are nonetheless named
// to aid in debugging, but it is not essential that the names be
// either accurate or unambiguous. The public API exposes a number of
// name-based maps for client convenience.
//
// Given a Go source package such as this:
//
// package main
//
// import "fmt"
//
// const message = "Hello, World!"
//
// func hello() {
// fmt.Println(message)
// }
//
// The SSA Builder creates a *Program containing a main *Package such
// as this:
//
// Package(Name: "main")
// Members:
// "message": *Literal (Type: untyped string, Value: "Hello, World!")
// "init·guard": *Global (Type: *bool)
// "hello": *Function (Type: func())
// Init: *Function (Type: func())
//
// The printed representation of the function main.hello is shown
// below. Within the function listing, the name of each BasicBlock
// such as ".0.entry" is printed left-aligned, followed by the block's
// instructions, i.e. implementations of Instruction.
// For each instruction that defines an SSA virtual register
// (i.e. implements Value), the type of that value is shown in the
// right column.
//
// # Name: main.hello
// # Declared at hello.go:7:6
// # Type: func()
// func hello():
// .0.entry:
// t0 = new [1]interface{} *[1]interface{}
// t1 = &t0[0:untyped integer] *interface{}
// t2 = make interface interface{} <- string ("Hello, World!":string) interface{}
// *t1 = t2
// t3 = slice t0[:] []interface{}
// t4 = fmt.Println(t3) (n int, err error)
// ret
//
// TODO(adonovan): demonstrate more features in the example:
// parameters and control flow at the least.
//
// TODO(adonovan): Consider how token.Pos source location information
// should be made available generally. Currently it is only present in
// Package, Function and CallCommon.
//
// TODO(adonovan): Provide an example skeleton application that loads
// and dumps the SSA form of a program. Accommodate package-at-a-time
// vs. whole-program operation.
//
// TODO(adonovan): Consider the exceptional control-flow implications
// of defer and recover().
//
// TODO(adonovan): build tables/functions that relate source variables
// to SSA variables to assist user interfaces that make queries about
// specific source entities.
package ssa

416
libgo/go/exp/ssa/func.go Normal file
View File

@ -0,0 +1,416 @@
package ssa
// This file implements the Function and BasicBlock types.
import (
"fmt"
"go/ast"
"go/types"
"io"
"os"
)
// Mode bits for additional diagnostics and checking.
// TODO(adonovan): move these to builder.go once submitted.
type BuilderMode uint
const (
LogPackages BuilderMode = 1 << iota // Dump package inventory to stderr
LogFunctions // Dump function SSA code to stderr
LogSource // Show source locations as SSA builder progresses
SanityCheckFunctions // Perform sanity checking of function bodies
UseGCImporter // Ignore SourceLoader; use gc-compiled object code for all imports
)
// addEdge adds a control-flow graph edge from from to to.
func addEdge(from, to *BasicBlock) {
from.Succs = append(from.Succs, to)
to.Preds = append(to.Preds, from)
}
// emit appends an instruction to the current basic block.
// If the instruction defines a Value, it is returned.
//
func (b *BasicBlock) emit(i Instruction) Value {
i.SetBlock(b)
b.Instrs = append(b.Instrs, i)
v, _ := i.(Value)
return v
}
// phis returns the prefix of b.Instrs containing all the block's φ-nodes.
func (b *BasicBlock) phis() []Instruction {
for i, instr := range b.Instrs {
if _, ok := instr.(*Phi); !ok {
return b.Instrs[:i]
}
}
return nil // unreachable in well-formed blocks
}
// replacePred replaces all occurrences of p in b's predecessor list with q.
// Ordinarily there should be at most one.
//
func (b *BasicBlock) replacePred(p, q *BasicBlock) {
for i, pred := range b.Preds {
if pred == p {
b.Preds[i] = q
}
}
}
// replaceSucc replaces all occurrences of p in b's successor list with q.
// Ordinarily there should be at most one.
//
func (b *BasicBlock) replaceSucc(p, q *BasicBlock) {
for i, succ := range b.Succs {
if succ == p {
b.Succs[i] = q
}
}
}
// removePred removes all occurrences of p in b's
// predecessor list and φ-nodes.
// Ordinarily there should be at most one.
//
func (b *BasicBlock) removePred(p *BasicBlock) {
phis := b.phis()
// We must preserve edge order for φ-nodes.
j := 0
for i, pred := range b.Preds {
if pred != p {
b.Preds[j] = b.Preds[i]
// Strike out φ-edge too.
for _, instr := range phis {
phi := instr.(*Phi)
phi.Edges[j] = phi.Edges[i]
}
j++
}
}
// Nil out b.Preds[j:] and φ-edges[j:] to aid GC.
for i := j; i < len(b.Preds); i++ {
b.Preds[i] = nil
for _, instr := range phis {
instr.(*Phi).Edges[i] = nil
}
}
b.Preds = b.Preds[:j]
for _, instr := range phis {
phi := instr.(*Phi)
phi.Edges = phi.Edges[:j]
}
}
// Destinations associated with unlabelled for/switch/select stmts.
// We push/pop one of these as we enter/leave each construct and for
// each BranchStmt we scan for the innermost target of the right type.
//
type targets struct {
tail *targets // rest of stack
_break *BasicBlock
_continue *BasicBlock
_fallthrough *BasicBlock
}
// Destinations associated with a labelled block.
// We populate these as labels are encountered in forward gotos or
// labelled statements.
//
type lblock struct {
_goto *BasicBlock
_break *BasicBlock
_continue *BasicBlock
}
// funcSyntax holds the syntax tree for the function declaration and body.
type funcSyntax struct {
recvField *ast.FieldList
paramFields *ast.FieldList
resultFields *ast.FieldList
body *ast.BlockStmt
}
// labelledBlock returns the branch target associated with the
// specified label, creating it if needed.
//
func (f *Function) labelledBlock(label *ast.Ident) *lblock {
lb := f.lblocks[label.Obj]
if lb == nil {
lb = &lblock{_goto: f.newBasicBlock("label." + label.Name)}
f.lblocks[label.Obj] = lb
}
return lb
}
// addParam adds a (non-escaping) parameter to f.Params of the
// specified name and type.
//
func (f *Function) addParam(name string, typ types.Type) *Parameter {
v := &Parameter{
Name_: name,
Type_: pointer(typ), // address of param
}
f.Params = append(f.Params, v)
return v
}
func (f *Function) addObjParam(obj types.Object) *Parameter {
p := f.addParam(obj.GetName(), obj.GetType())
f.objects[obj] = p
return p
}
// start initializes the function prior to generating SSA code for its body.
// Precondition: f.Type() already set.
//
// If f.syntax != nil, f is a Go source function and idents must be a
// mapping from syntactic identifiers to their canonical type objects;
// Otherwise, idents is ignored and the usual set-up for Go source
// functions is skipped.
//
func (f *Function) start(mode BuilderMode, idents map[*ast.Ident]types.Object) {
if mode&LogSource != 0 {
fmt.Fprintf(os.Stderr, "build function %s @ %s\n", f.FullName(), f.Prog.Files.Position(f.Pos))
}
f.currentBlock = f.newBasicBlock("entry")
f.objects = make(map[types.Object]Value) // needed for some synthetics, e.g. init
if f.syntax == nil {
return // synthetic function; no syntax tree
}
f.lblocks = make(map[*ast.Object]*lblock)
// Receiver (at most one inner iteration).
if f.syntax.recvField != nil {
for _, field := range f.syntax.recvField.List {
for _, n := range field.Names {
f.addObjParam(idents[n])
}
if field.Names == nil {
f.addParam(f.Signature.Recv.Name, f.Signature.Recv.Type)
}
}
}
// Parameters.
if f.syntax.paramFields != nil {
for _, field := range f.syntax.paramFields.List {
for _, n := range field.Names {
f.addObjParam(idents[n])
}
}
}
// Results.
if f.syntax.resultFields != nil {
for _, field := range f.syntax.resultFields.List {
// Implicit "var" decl of locals for named results.
for _, n := range field.Names {
f.results = append(f.results, f.addNamedLocal(idents[n]))
}
}
}
}
// finish() finalizes the function after SSA code generation of its body.
func (f *Function) finish(mode BuilderMode) {
f.objects = nil
f.results = nil
f.currentBlock = nil
f.lblocks = nil
f.syntax = nil
// Remove any f.Locals that are now heap-allocated.
j := 0
for _, l := range f.Locals {
if !l.Heap {
f.Locals[j] = l
j++
}
}
// Nil out f.Locals[j:] to aid GC.
for i := j; i < len(f.Locals); i++ {
f.Locals[i] = nil
}
f.Locals = f.Locals[:j]
// Ensure all value-defining Instructions have register names.
// (Non-Instruction Values are named at construction.)
tmp := 0
for _, b := range f.Blocks {
for _, instr := range b.Instrs {
switch instr := instr.(type) {
case *Alloc:
// Local Allocs may already be named.
if instr.Name_ == "" {
instr.Name_ = fmt.Sprintf("t%d", tmp)
tmp++
}
case Value:
instr.(interface {
setNum(int)
}).setNum(tmp)
tmp++
}
}
}
optimizeBlocks(f)
if mode&LogFunctions != 0 {
f.DumpTo(os.Stderr)
}
if mode&SanityCheckFunctions != 0 {
MustSanityCheck(f, nil)
}
if mode&LogSource != 0 {
fmt.Fprintf(os.Stderr, "build function %s done\n", f.FullName())
}
}
// addNamedLocal creates a local variable, adds it to function f and
// returns it. Its name and type are taken from obj. Subsequent
// calls to f.lookup(obj) will return the same local.
//
// Precondition: f.syntax != nil (i.e. a Go source function).
//
func (f *Function) addNamedLocal(obj types.Object) *Alloc {
l := f.addLocal(obj.GetType())
l.Name_ = obj.GetName()
f.objects[obj] = l
return l
}
// addLocal creates an anonymous local variable of type typ, adds it
// to function f and returns it.
//
func (f *Function) addLocal(typ types.Type) *Alloc {
v := &Alloc{Type_: pointer(typ)}
f.Locals = append(f.Locals, v)
f.emit(v)
return v
}
// lookup returns the address of the named variable identified by obj
// that is local to function f or one of its enclosing functions.
// If escaping, the reference comes from a potentially escaping pointer
// expression and the referent must be heap-allocated.
//
func (f *Function) lookup(obj types.Object, escaping bool) Value {
if v, ok := f.objects[obj]; ok {
if escaping {
switch v := v.(type) {
case *Capture:
// TODO(adonovan): fix: we must support this case.
// Requires copying to a 'new' Alloc.
fmt.Fprintln(os.Stderr, "Error: escaping reference to Capture")
case *Parameter:
v.Heap = true
case *Alloc:
v.Heap = true
default:
panic(fmt.Sprintf("Unexpected Function.objects kind: %T", v))
}
}
return v // function-local var (address)
}
// Definition must be in an enclosing function;
// plumb it through intervening closures.
if f.Enclosing == nil {
panic("no Value for type.Object " + obj.GetName())
}
v := &Capture{f.Enclosing.lookup(obj, true)} // escaping
f.objects[obj] = v
f.FreeVars = append(f.FreeVars, v)
return v
}
// emit emits the specified instruction to function f, updating the
// control-flow graph if required.
//
func (f *Function) emit(instr Instruction) Value {
return f.currentBlock.emit(instr)
}
// DumpTo prints to w a human readable "disassembly" of the SSA code of
// all basic blocks of function f.
//
func (f *Function) DumpTo(w io.Writer) {
fmt.Fprintf(w, "# Name: %s\n", f.FullName())
fmt.Fprintf(w, "# Declared at %s\n", f.Prog.Files.Position(f.Pos))
fmt.Fprintf(w, "# Type: %s\n", f.Type())
if f.Enclosing != nil {
fmt.Fprintf(w, "# Parent: %s\n", f.Enclosing.Name())
}
if f.FreeVars != nil {
io.WriteString(w, "# Free variables:\n")
for i, fv := range f.FreeVars {
fmt.Fprintf(w, "# % 3d:\t%s %s\n", i, fv.Name(), fv.Type())
}
}
params := f.Params
if f.Signature.Recv != nil {
fmt.Fprintf(w, "func (%s) %s(", params[0].Name(), f.Name())
params = params[1:]
} else {
fmt.Fprintf(w, "func %s(", f.Name())
}
for i, v := range params {
if i > 0 {
io.WriteString(w, ", ")
}
io.WriteString(w, v.Name())
}
io.WriteString(w, "):\n")
for _, b := range f.Blocks {
if b == nil {
// Corrupt CFG.
fmt.Fprintf(w, ".nil:\n")
continue
}
fmt.Fprintf(w, ".%s:\t\t\t\t\t\t\t P:%d S:%d\n", b.Name, len(b.Preds), len(b.Succs))
if false { // CFG debugging
fmt.Fprintf(w, "\t# CFG: %s --> %s --> %s\n", blockNames(b.Preds), b.Name, blockNames(b.Succs))
}
for _, instr := range b.Instrs {
io.WriteString(w, "\t")
if v, ok := instr.(Value); ok {
l := 80 // for old time's sake.
// Left-align the instruction.
if name := v.Name(); name != "" {
n, _ := fmt.Fprintf(w, "%s = ", name)
l -= n
}
n, _ := io.WriteString(w, instr.String())
l -= n
// Right-align the type.
if t := v.Type(); t != nil {
fmt.Fprintf(w, "%*s", l-9, t)
}
} else {
io.WriteString(w, instr.String())
}
io.WriteString(w, "\n")
}
}
fmt.Fprintf(w, "\n")
}
// newBasicBlock adds to f a new basic block with a unique name and
// returns it. It does not automatically become the current block for
// subsequent calls to emit.
//
func (f *Function) newBasicBlock(name string) *BasicBlock {
b := &BasicBlock{
Name: fmt.Sprintf("%d.%s", len(f.Blocks), name),
Func: f,
}
f.Blocks = append(f.Blocks, b)
return b
}

137
libgo/go/exp/ssa/literal.go Normal file
View File

@ -0,0 +1,137 @@
package ssa
// This file defines the Literal SSA value type.
import (
"fmt"
"go/types"
"math/big"
"strconv"
)
// newLiteral returns a new literal of the specified value and type.
// val must be valid according to the specification of Literal.Value.
//
func newLiteral(val interface{}, typ types.Type) *Literal {
// This constructor exists to provide a single place to
// insert logging/assertions during debugging.
return &Literal{typ, val}
}
// intLiteral returns an untyped integer literal that evaluates to i.
func intLiteral(i int64) *Literal {
return newLiteral(i, types.Typ[types.UntypedInt])
}
// nilLiteral returns a nil literal of the specified (reference) type.
func nilLiteral(typ types.Type) *Literal {
return newLiteral(types.NilType{}, typ)
}
func (l *Literal) Name() string {
var s string
switch x := l.Value.(type) {
case bool:
s = fmt.Sprintf("%v", l.Value)
case int64:
s = fmt.Sprintf("%d", l.Value)
case *big.Int:
s = x.String()
case *big.Rat:
s = x.FloatString(20)
case string:
if len(x) > 20 {
x = x[:17] + "..." // abbreviate
}
s = strconv.Quote(x)
case types.Complex:
r := x.Re.FloatString(20)
i := x.Im.FloatString(20)
s = fmt.Sprintf("%s+%si", r, i)
case types.NilType:
s = "nil"
default:
panic(fmt.Sprintf("unexpected literal value: %T", x))
}
return s + ":" + l.Type_.String()
}
func (l *Literal) Type() types.Type {
return l.Type_
}
// IsNil returns true if this literal represents a typed or untyped nil value.
func (l *Literal) IsNil() bool {
_, ok := l.Value.(types.NilType)
return ok
}
// Int64 returns the numeric value of this literal truncated to fit
// a signed 64-bit integer.
//
func (l *Literal) Int64() int64 {
switch x := l.Value.(type) {
case int64:
return x
case *big.Int:
return x.Int64()
case *big.Rat:
// TODO(adonovan): fix: is this the right rounding mode?
var q big.Int
return q.Quo(x.Num(), x.Denom()).Int64()
}
panic(fmt.Sprintf("unexpected literal value: %T", l.Value))
}
// Uint64 returns the numeric value of this literal truncated to fit
// an unsigned 64-bit integer.
//
func (l *Literal) Uint64() uint64 {
switch x := l.Value.(type) {
case int64:
if x < 0 {
return 0
}
return uint64(x)
case *big.Int:
return x.Uint64()
case *big.Rat:
// TODO(adonovan): fix: is this right?
var q big.Int
return q.Quo(x.Num(), x.Denom()).Uint64()
}
panic(fmt.Sprintf("unexpected literal value: %T", l.Value))
}
// Float64 returns the numeric value of this literal truncated to fit
// a float64.
//
func (l *Literal) Float64() float64 {
switch x := l.Value.(type) {
case int64:
return float64(x)
case *big.Int:
var r big.Rat
f, _ := r.SetInt(x).Float64()
return f
case *big.Rat:
f, _ := x.Float64()
return f
}
panic(fmt.Sprintf("unexpected literal value: %T", l.Value))
}
// Complex128 returns the complex value of this literal truncated to
// fit a complex128.
//
func (l *Literal) Complex128() complex128 {
switch x := l.Value.(type) {
case int64, *big.Int, *big.Rat:
return complex(l.Float64(), 0)
case types.Complex:
re64, _ := x.Re.Float64()
im64, _ := x.Im.Float64()
return complex(re64, im64)
}
panic(fmt.Sprintf("unexpected literal value: %T", l.Value))
}

383
libgo/go/exp/ssa/print.go Normal file
View File

@ -0,0 +1,383 @@
package ssa
// This file implements the String() methods for all Value and
// Instruction types.
import (
"bytes"
"fmt"
"go/ast"
"go/types"
)
func (id Id) String() string {
if id.Pkg == nil {
return id.Name
}
return fmt.Sprintf("%s/%s", id.Pkg.Path, id.Name)
}
// relName returns the name of v relative to i.
// In most cases, this is identical to v.Name(), but for cross-package
// references to Functions (including methods) and Globals, the
// package-qualified FullName is used instead.
//
func relName(v Value, i Instruction) string {
switch v := v.(type) {
case *Global:
if v.Pkg == i.Block().Func.Pkg {
return v.Name()
}
return v.FullName()
case *Function:
if v.Pkg == nil || v.Pkg == i.Block().Func.Pkg {
return v.Name()
}
return v.FullName()
}
return v.Name()
}
// Value.String()
//
// This method is provided only for debugging.
// It never appears in disassembly, which uses Value.Name().
func (v *Literal) String() string {
return fmt.Sprintf("literal %s rep=%T", v.Name(), v.Value)
}
func (v *Parameter) String() string {
return fmt.Sprintf("parameter %s : %s", v.Name(), v.Type())
}
func (v *Capture) String() string {
return fmt.Sprintf("capture %s : %s", v.Name(), v.Type())
}
func (v *Global) String() string {
return fmt.Sprintf("global %s : %s", v.Name(), v.Type())
}
func (v *Builtin) String() string {
return fmt.Sprintf("builtin %s : %s", v.Name(), v.Type())
}
func (r *Function) String() string {
return fmt.Sprintf("function %s : %s", r.Name(), r.Type())
}
// FullName returns the name of this function qualified by the
// package name, unless it is anonymous or synthetic.
//
// TODO(adonovan): move to func.go when it's submitted.
//
func (f *Function) FullName() string {
if f.Enclosing != nil || f.Pkg == nil {
return f.Name_ // anonymous or synthetic
}
return fmt.Sprintf("%s.%s", f.Pkg.ImportPath, f.Name_)
}
// FullName returns g's package-qualified name.
func (g *Global) FullName() string {
return fmt.Sprintf("%s.%s", g.Pkg.ImportPath, g.Name_)
}
// Instruction.String()
func (v *Alloc) String() string {
op := "local"
if v.Heap {
op = "new"
}
return fmt.Sprintf("%s %s", op, indirectType(v.Type()))
}
func (v *Phi) String() string {
var b bytes.Buffer
b.WriteString("phi [")
for i, edge := range v.Edges {
if i > 0 {
b.WriteString(", ")
}
// Be robust against malformed CFG.
blockname := "?"
if v.Block_ != nil && i < len(v.Block_.Preds) {
blockname = v.Block_.Preds[i].Name
}
b.WriteString(blockname)
b.WriteString(": ")
b.WriteString(relName(edge, v))
}
b.WriteString("]")
return b.String()
}
func printCall(v *CallCommon, prefix string, instr Instruction) string {
var b bytes.Buffer
b.WriteString(prefix)
if v.Func != nil {
b.WriteString(relName(v.Func, instr))
} else {
name := underlyingType(v.Recv.Type()).(*types.Interface).Methods[v.Method].Name
fmt.Fprintf(&b, "invoke %s.%s [#%d]", relName(v.Recv, instr), name, v.Method)
}
b.WriteString("(")
for i, arg := range v.Args {
if i > 0 {
b.WriteString(", ")
}
b.WriteString(relName(arg, instr))
}
if v.HasEllipsis {
b.WriteString("...")
}
b.WriteString(")")
return b.String()
}
func (v *Call) String() string {
return printCall(&v.CallCommon, "", v)
}
func (v *BinOp) String() string {
return fmt.Sprintf("%s %s %s", relName(v.X, v), v.Op.String(), relName(v.Y, v))
}
func (v *UnOp) String() string {
return fmt.Sprintf("%s%s%s", v.Op, relName(v.X, v), commaOk(v.CommaOk))
}
func (v *Conv) String() string {
return fmt.Sprintf("convert %s <- %s (%s)", v.Type(), v.X.Type(), relName(v.X, v))
}
func (v *ChangeInterface) String() string {
return fmt.Sprintf("change interface %s <- %s (%s)", v.Type(), v.X.Type(), relName(v.X, v))
}
func (v *MakeInterface) String() string {
return fmt.Sprintf("make interface %s <- %s (%s)", v.Type(), v.X.Type(), relName(v.X, v))
}
func (v *MakeClosure) String() string {
var b bytes.Buffer
fmt.Fprintf(&b, "make closure %s", relName(v.Fn, v))
if v.Bindings != nil {
b.WriteString(" [")
for i, c := range v.Bindings {
if i > 0 {
b.WriteString(", ")
}
b.WriteString(relName(c, v))
}
b.WriteString("]")
}
return b.String()
}
func (v *MakeSlice) String() string {
var b bytes.Buffer
b.WriteString("make slice ")
b.WriteString(v.Type().String())
b.WriteString(" ")
b.WriteString(relName(v.Len, v))
b.WriteString(" ")
b.WriteString(relName(v.Cap, v))
return b.String()
}
func (v *Slice) String() string {
var b bytes.Buffer
b.WriteString("slice ")
b.WriteString(relName(v.X, v))
b.WriteString("[")
if v.Low != nil {
b.WriteString(relName(v.Low, v))
}
b.WriteString(":")
if v.High != nil {
b.WriteString(relName(v.High, v))
}
b.WriteString("]")
return b.String()
}
func (v *MakeMap) String() string {
res := ""
if v.Reserve != nil {
res = relName(v.Reserve, v)
}
return fmt.Sprintf("make %s %s", v.Type(), res)
}
func (v *MakeChan) String() string {
return fmt.Sprintf("make %s %s", v.Type(), relName(v.Size, v))
}
func (v *FieldAddr) String() string {
fields := underlyingType(indirectType(v.X.Type())).(*types.Struct).Fields
// Be robust against a bad index.
name := "?"
if v.Field >= 0 && v.Field < len(fields) {
name = fields[v.Field].Name
}
return fmt.Sprintf("&%s.%s [#%d]", relName(v.X, v), name, v.Field)
}
func (v *Field) String() string {
fields := underlyingType(v.X.Type()).(*types.Struct).Fields
// Be robust against a bad index.
name := "?"
if v.Field >= 0 && v.Field < len(fields) {
name = fields[v.Field].Name
}
return fmt.Sprintf("%s.%s [#%d]", relName(v.X, v), name, v.Field)
}
func (v *IndexAddr) String() string {
return fmt.Sprintf("&%s[%s]", relName(v.X, v), relName(v.Index, v))
}
func (v *Index) String() string {
return fmt.Sprintf("%s[%s]", relName(v.X, v), relName(v.Index, v))
}
func (v *Lookup) String() string {
return fmt.Sprintf("%s[%s]%s", relName(v.X, v), relName(v.Index, v), commaOk(v.CommaOk))
}
func (v *Range) String() string {
return "range " + relName(v.X, v)
}
func (v *Next) String() string {
return "next " + relName(v.Iter, v)
}
func (v *TypeAssert) String() string {
return fmt.Sprintf("typeassert%s %s.(%s)", commaOk(v.CommaOk), relName(v.X, v), v.AssertedType)
}
func (v *Extract) String() string {
return fmt.Sprintf("extract %s #%d", relName(v.Tuple, v), v.Index)
}
func (s *Jump) String() string {
// Be robust against malformed CFG.
blockname := "?"
if s.Block_ != nil && len(s.Block_.Succs) == 1 {
blockname = s.Block_.Succs[0].Name
}
return fmt.Sprintf("jump %s", blockname)
}
func (s *If) String() string {
// Be robust against malformed CFG.
tblockname, fblockname := "?", "?"
if s.Block_ != nil && len(s.Block_.Succs) == 2 {
tblockname = s.Block_.Succs[0].Name
fblockname = s.Block_.Succs[1].Name
}
return fmt.Sprintf("if %s goto %s else %s", relName(s.Cond, s), tblockname, fblockname)
}
func (s *Go) String() string {
return printCall(&s.CallCommon, "go ", s)
}
func (s *Ret) String() string {
var b bytes.Buffer
b.WriteString("ret")
for i, r := range s.Results {
if i == 0 {
b.WriteString(" ")
} else {
b.WriteString(", ")
}
b.WriteString(relName(r, s))
}
return b.String()
}
func (s *Send) String() string {
return fmt.Sprintf("send %s <- %s", relName(s.Chan, s), relName(s.X, s))
}
func (s *Defer) String() string {
return printCall(&s.CallCommon, "defer ", s)
}
func (s *Select) String() string {
var b bytes.Buffer
for i, st := range s.States {
if i > 0 {
b.WriteString(", ")
}
if st.Dir == ast.RECV {
b.WriteString("<-")
b.WriteString(relName(st.Chan, s))
} else {
b.WriteString(relName(st.Chan, s))
b.WriteString("<-")
b.WriteString(relName(st.Send, s))
}
}
non := ""
if !s.Blocking {
non = "non"
}
return fmt.Sprintf("select %sblocking [%s]", non, b.String())
}
func (s *Store) String() string {
return fmt.Sprintf("*%s = %s", relName(s.Addr, s), relName(s.Val, s))
}
func (s *MapUpdate) String() string {
return fmt.Sprintf("%s[%s] = %s", relName(s.Map, s), relName(s.Key, s), relName(s.Value, s))
}
func (p *Package) String() string {
// TODO(adonovan): prettify output.
var b bytes.Buffer
fmt.Fprintf(&b, "Package %s at %s:\n", p.ImportPath, p.Prog.Files.File(p.Pos).Name())
// TODO(adonovan): make order deterministic.
maxname := 0
for name := range p.Members {
if l := len(name); l > maxname {
maxname = l
}
}
for name, mem := range p.Members {
switch mem := mem.(type) {
case *Literal:
fmt.Fprintf(&b, " const %-*s %s\n", maxname, name, mem.Name())
case *Function:
fmt.Fprintf(&b, " func %-*s %s\n", maxname, name, mem.Type())
case *Type:
fmt.Fprintf(&b, " type %-*s %s\n", maxname, name, mem.NamedType.Underlying)
// TODO(adonovan): make order deterministic.
for name, method := range mem.Methods {
fmt.Fprintf(&b, " method %s %s\n", name, method.Signature)
}
case *Global:
fmt.Fprintf(&b, " var %-*s %s\n", maxname, name, mem.Type())
}
}
return b.String()
}
func commaOk(x bool) string {
if x {
return ",ok"
}
return ""
}

263
libgo/go/exp/ssa/sanity.go Normal file
View File

@ -0,0 +1,263 @@
package ssa
// An optional pass for sanity checking invariants of the SSA representation.
// Currently it checks CFG invariants but little at the instruction level.
import (
"bytes"
"fmt"
"io"
"os"
)
type sanity struct {
reporter io.Writer
fn *Function
block *BasicBlock
insane bool
}
// SanityCheck performs integrity checking of the SSA representation
// of the function fn and returns true if it was valid. Diagnostics
// are written to reporter if non-nil, os.Stderr otherwise. Some
// diagnostics are only warnings and do not imply a negative result.
//
// Sanity checking is intended to facilitate the debugging of code
// transformation passes.
//
func SanityCheck(fn *Function, reporter io.Writer) bool {
if reporter == nil {
reporter = os.Stderr
}
return (&sanity{reporter: reporter}).checkFunction(fn)
}
// MustSanityCheck is like SanityCheck but panics instead of returning
// a negative result.
//
func MustSanityCheck(fn *Function, reporter io.Writer) {
if !SanityCheck(fn, reporter) {
panic("SanityCheck failed")
}
}
// blockNames returns the names of the specified blocks as a
// human-readable string.
//
func blockNames(blocks []*BasicBlock) string {
var buf bytes.Buffer
for i, b := range blocks {
if i > 0 {
io.WriteString(&buf, ", ")
}
io.WriteString(&buf, b.Name)
}
return buf.String()
}
func (s *sanity) diagnostic(prefix, format string, args ...interface{}) {
fmt.Fprintf(s.reporter, "%s: function %s", prefix, s.fn.FullName())
if s.block != nil {
fmt.Fprintf(s.reporter, ", block %s", s.block.Name)
}
io.WriteString(s.reporter, ": ")
fmt.Fprintf(s.reporter, format, args...)
io.WriteString(s.reporter, "\n")
}
func (s *sanity) errorf(format string, args ...interface{}) {
s.insane = true
s.diagnostic("Error", format, args...)
}
func (s *sanity) warnf(format string, args ...interface{}) {
s.diagnostic("Warning", format, args...)
}
// findDuplicate returns an arbitrary basic block that appeared more
// than once in blocks, or nil if all were unique.
func findDuplicate(blocks []*BasicBlock) *BasicBlock {
if len(blocks) < 2 {
return nil
}
if blocks[0] == blocks[1] {
return blocks[0]
}
// Slow path:
m := make(map[*BasicBlock]bool)
for _, b := range blocks {
if m[b] {
return b
}
m[b] = true
}
return nil
}
func (s *sanity) checkInstr(idx int, instr Instruction) {
switch instr := instr.(type) {
case *If, *Jump, *Ret:
s.errorf("control flow instruction not at end of block")
case *Phi:
if idx == 0 {
// It suffices to apply this check to just the first phi node.
if dup := findDuplicate(s.block.Preds); dup != nil {
s.errorf("phi node in block with duplicate predecessor %s", dup.Name)
}
} else {
prev := s.block.Instrs[idx-1]
if _, ok := prev.(*Phi); !ok {
s.errorf("Phi instruction follows a non-Phi: %T", prev)
}
}
if ne, np := len(instr.Edges), len(s.block.Preds); ne != np {
s.errorf("phi node has %d edges but %d predecessors", ne, np)
}
case *Alloc:
case *Call:
case *BinOp:
case *UnOp:
case *MakeClosure:
case *MakeChan:
case *MakeMap:
case *MakeSlice:
case *Slice:
case *Field:
case *FieldAddr:
case *IndexAddr:
case *Index:
case *Select:
case *Range:
case *TypeAssert:
case *Extract:
case *Go:
case *Defer:
case *Send:
case *Store:
case *MapUpdate:
case *Next:
case *Lookup:
case *Conv:
case *ChangeInterface:
case *MakeInterface:
// TODO(adonovan): implement checks.
default:
panic(fmt.Sprintf("Unknown instruction type: %T", instr))
}
}
func (s *sanity) checkFinalInstr(idx int, instr Instruction) {
switch instr.(type) {
case *If:
if nsuccs := len(s.block.Succs); nsuccs != 2 {
s.errorf("If-terminated block has %d successors; expected 2", nsuccs)
return
}
if s.block.Succs[0] == s.block.Succs[1] {
s.errorf("If-instruction has same True, False target blocks: %s", s.block.Succs[0].Name)
return
}
case *Jump:
if nsuccs := len(s.block.Succs); nsuccs != 1 {
s.errorf("Jump-terminated block has %d successors; expected 1", nsuccs)
return
}
case *Ret:
if nsuccs := len(s.block.Succs); nsuccs != 0 {
s.errorf("Ret-terminated block has %d successors; expected none", nsuccs)
return
}
// TODO(adonovan): check number and types of results
default:
s.errorf("non-control flow instruction at end of block")
}
}
func (s *sanity) checkBlock(b *BasicBlock, isEntry bool) {
s.block = b
// Check all blocks are reachable.
// (The entry block is always implicitly reachable.)
if !isEntry && len(b.Preds) == 0 {
s.warnf("unreachable block")
if b.Instrs == nil {
// Since this block is about to be pruned,
// tolerating transient problems in it
// simplifies other optimisations.
return
}
}
// Check predecessor and successor relations are dual.
for _, a := range b.Preds {
found := false
for _, bb := range a.Succs {
if bb == b {
found = true
break
}
}
if !found {
s.errorf("expected successor edge in predecessor %s; found only: %s", a.Name, blockNames(a.Succs))
}
}
for _, c := range b.Succs {
found := false
for _, bb := range c.Preds {
if bb == b {
found = true
break
}
}
if !found {
s.errorf("expected predecessor edge in successor %s; found only: %s", c.Name, blockNames(c.Preds))
}
}
// Check each instruction is sane.
n := len(b.Instrs)
if n == 0 {
s.errorf("basic block contains no instructions")
}
for j, instr := range b.Instrs {
if b2 := instr.Block(); b2 == nil {
s.errorf("nil Block() for instruction at index %d", j)
continue
} else if b2 != b {
s.errorf("wrong Block() (%s) for instruction at index %d ", b2.Name, j)
continue
}
if j < n-1 {
s.checkInstr(j, instr)
} else {
s.checkFinalInstr(j, instr)
}
}
}
func (s *sanity) checkFunction(fn *Function) bool {
// TODO(adonovan): check Function invariants:
// - check owning Package (if any) contains this function.
// - check params match signature
// - check locals are all !Heap
// - check transient fields are nil
// - check block labels are unique (warning)
s.fn = fn
if fn.Prog == nil {
s.errorf("nil Prog")
}
for i, b := range fn.Blocks {
if b == nil {
s.warnf("nil *BasicBlock at f.Blocks[%d]", i)
continue
}
s.checkBlock(b, i == 0)
}
s.block = nil
s.fn = nil
return !s.insane
}

Some files were not shown because too many files have changed in this diff Show More