libgo: Update Go library to master revision 15489/921e53d4863c.
From-SVN: r195560
This commit is contained in:
parent
91bfca5909
commit
d6f2922e91
|
@ -1,4 +1,4 @@
|
|||
6fdc1974457c
|
||||
921e53d4863c
|
||||
|
||||
The first line of this file holds the Mercurial revision number of the
|
||||
last merge done from the master library sources.
|
||||
|
|
|
@ -227,8 +227,8 @@ toolexeclibgoexp_DATA = \
|
|||
$(exp_inotify_gox) \
|
||||
exp/norm.gox \
|
||||
exp/proxy.gox \
|
||||
exp/ssa.gox \
|
||||
exp/terminal.gox \
|
||||
exp/types.gox \
|
||||
exp/utf8string.gox
|
||||
|
||||
toolexeclibgoexphtmldir = $(toolexeclibgoexpdir)/html
|
||||
|
@ -256,7 +256,8 @@ toolexeclibgogo_DATA = \
|
|||
go/parser.gox \
|
||||
go/printer.gox \
|
||||
go/scanner.gox \
|
||||
go/token.gox
|
||||
go/token.gox \
|
||||
go/types.gox
|
||||
|
||||
toolexeclibgohashdir = $(toolexeclibgodir)/hash
|
||||
|
||||
|
@ -682,7 +683,7 @@ go_net_fd_os_file = go/net/fd_linux.go
|
|||
go_net_newpollserver_file = go/net/newpollserver_unix.go
|
||||
else # !LIBGO_IS_LINUX && !LIBGO_IS_RTEMS
|
||||
if LIBGO_IS_NETBSD
|
||||
go_net_fd_os_file = go/net/fd_netbsd.go
|
||||
go_net_fd_os_file = go/net/fd_bsd.go
|
||||
go_net_newpollserver_file = go/net/newpollserver_unix.go
|
||||
else # !LIBGO_IS_NETBSD && !LIBGO_IS_LINUX && !LIBGO_IS_RTEMS
|
||||
# By default use select with pipes. Most systems should have
|
||||
|
@ -753,9 +754,16 @@ go_net_interface_file = go/net/interface_stub.go
|
|||
endif
|
||||
endif
|
||||
|
||||
if LIBGO_IS_LINUX
|
||||
go_net_cloexec_file = go/net/sock_cloexec.go
|
||||
else
|
||||
go_net_cloexec_file = go/net/sys_cloexec.go
|
||||
endif
|
||||
|
||||
go_net_files = \
|
||||
go/net/cgo_unix.go \
|
||||
$(go_net_cgo_file) \
|
||||
$(go_net_cloexec_file) \
|
||||
go/net/dial.go \
|
||||
go/net/dnsclient.go \
|
||||
go/net/dnsclient_unix.go \
|
||||
|
@ -856,6 +864,12 @@ endif
|
|||
endif
|
||||
endif
|
||||
|
||||
if LIBGO_IS_LINUX
|
||||
go_os_pipe_file = go/os/pipe_linux.go
|
||||
else
|
||||
go_os_pipe_file = go/os/pipe_bsd.go
|
||||
endif
|
||||
|
||||
go_os_files = \
|
||||
$(go_os_dir_file) \
|
||||
go/os/dir.go \
|
||||
|
@ -872,6 +886,7 @@ go_os_files = \
|
|||
go/os/getwd.go \
|
||||
go/os/path.go \
|
||||
go/os/path_unix.go \
|
||||
$(go_os_pipe_file) \
|
||||
go/os/proc.go \
|
||||
$(go_os_stat_file) \
|
||||
go/os/str.go \
|
||||
|
@ -1026,6 +1041,7 @@ go_compress_bzip2_files = \
|
|||
go_compress_flate_files = \
|
||||
go/compress/flate/copy.go \
|
||||
go/compress/flate/deflate.go \
|
||||
go/compress/flate/fixedhuff.go \
|
||||
go/compress/flate/huffman_bit_writer.go \
|
||||
go/compress/flate/huffman_code.go \
|
||||
go/compress/flate/inflate.go \
|
||||
|
@ -1222,8 +1238,10 @@ go_exp_inotify_files = \
|
|||
go_exp_locale_collate_files = \
|
||||
go/exp/locale/collate/colelem.go \
|
||||
go/exp/locale/collate/collate.go \
|
||||
go/exp/locale/collate/colltab.go \
|
||||
go/exp/locale/collate/contract.go \
|
||||
go/exp/locale/collate/export.go \
|
||||
go/exp/locale/collate/sort.go \
|
||||
go/exp/locale/collate/table.go \
|
||||
go/exp/locale/collate/tables.go \
|
||||
go/exp/locale/collate/trie.go
|
||||
|
@ -1248,23 +1266,18 @@ go_exp_proxy_files = \
|
|||
go/exp/proxy/per_host.go \
|
||||
go/exp/proxy/proxy.go \
|
||||
go/exp/proxy/socks5.go
|
||||
go_exp_ssa_files = \
|
||||
go/exp/ssa/blockopt.go \
|
||||
go/exp/ssa/doc.go \
|
||||
go/exp/ssa/func.go \
|
||||
go/exp/ssa/sanity.go \
|
||||
go/exp/ssa/ssa.go \
|
||||
go/exp/ssa/literal.go \
|
||||
go/exp/ssa/print.go \
|
||||
go/exp/ssa/util.go
|
||||
go_exp_terminal_files = \
|
||||
go/exp/terminal/terminal.go \
|
||||
go/exp/terminal/util.go
|
||||
go_exp_types_files = \
|
||||
go/exp/types/builtins.go \
|
||||
go/exp/types/check.go \
|
||||
go/exp/types/const.go \
|
||||
go/exp/types/conversions.go \
|
||||
go/exp/types/errors.go \
|
||||
go/exp/types/exportdata.go \
|
||||
go/exp/types/expr.go \
|
||||
go/exp/types/gcimporter.go \
|
||||
go/exp/types/operand.go \
|
||||
go/exp/types/predicates.go \
|
||||
go/exp/types/stmt.go \
|
||||
go/exp/types/types.go \
|
||||
go/exp/types/universe.go
|
||||
go_exp_utf8string_files = \
|
||||
go/exp/utf8string/string.go
|
||||
|
||||
|
@ -1305,6 +1318,24 @@ go_go_token_files = \
|
|||
go/go/token/position.go \
|
||||
go/go/token/serialize.go \
|
||||
go/go/token/token.go
|
||||
go_go_types_files = \
|
||||
go/go/types/api.go \
|
||||
go/go/types/builtins.go \
|
||||
go/go/types/check.go \
|
||||
go/go/types/const.go \
|
||||
go/go/types/conversions.go \
|
||||
go/go/types/errors.go \
|
||||
go/go/types/exportdata.go \
|
||||
go/go/types/expr.go \
|
||||
go/go/types/gcimporter.go \
|
||||
go/go/types/objects.go \
|
||||
go/go/types/operand.go \
|
||||
go/go/types/predicates.go \
|
||||
go/go/types/resolve.go \
|
||||
go/go/types/scope.go \
|
||||
go/go/types/stmt.go \
|
||||
go/go/types/types.go \
|
||||
go/go/types/universe.go
|
||||
|
||||
go_hash_adler32_files = \
|
||||
go/hash/adler32/adler32.go
|
||||
|
@ -1848,8 +1879,8 @@ libgo_go_objs = \
|
|||
exp/locale/collate/build.lo \
|
||||
exp/norm.lo \
|
||||
exp/proxy.lo \
|
||||
exp/ssa.lo \
|
||||
exp/terminal.lo \
|
||||
exp/types.lo \
|
||||
exp/utf8string.lo \
|
||||
html/template.lo \
|
||||
go/ast.lo \
|
||||
|
@ -1860,6 +1891,7 @@ libgo_go_objs = \
|
|||
go/printer.lo \
|
||||
go/scanner.lo \
|
||||
go/token.lo \
|
||||
go/types.lo \
|
||||
hash/adler32.lo \
|
||||
hash/crc32.lo \
|
||||
hash/crc64.lo \
|
||||
|
@ -2751,6 +2783,15 @@ exp/proxy/check: $(CHECK_DEPS)
|
|||
@$(CHECK)
|
||||
.PHONY: exp/proxy/check
|
||||
|
||||
@go_include@ exp/ssa.lo.dep
|
||||
exp/ssa.lo.dep: $(go_exp_ssa_files)
|
||||
$(BUILDDEPS)
|
||||
exp/ssa.lo: $(go_exp_ssa_files)
|
||||
$(BUILDPACKAGE)
|
||||
exp/ssa/check: $(CHECK_DEPS)
|
||||
@$(CHECK)
|
||||
.PHONY: exp/ssa/check
|
||||
|
||||
@go_include@ exp/terminal.lo.dep
|
||||
exp/terminal.lo.dep: $(go_exp_terminal_files)
|
||||
$(BUILDDEPS)
|
||||
|
@ -2760,15 +2801,6 @@ exp/terminal/check: $(CHECK_DEPS)
|
|||
@$(CHECK)
|
||||
.PHONY: exp/terminal/check
|
||||
|
||||
@go_include@ exp/types.lo.dep
|
||||
exp/types.lo.dep: $(go_exp_types_files)
|
||||
$(BUILDDEPS)
|
||||
exp/types.lo: $(go_exp_types_files)
|
||||
$(BUILDPACKAGE)
|
||||
exp/types/check: $(CHECK_DEPS)
|
||||
@$(CHECK)
|
||||
.PHONY: exp/types/check
|
||||
|
||||
@go_include@ exp/utf8string.lo.dep
|
||||
exp/utf8string.lo.dep: $(go_exp_utf8string_files)
|
||||
$(BUILDDEPS)
|
||||
|
@ -2877,6 +2909,15 @@ go/token/check: $(CHECK_DEPS)
|
|||
@$(CHECK)
|
||||
.PHONY: go/token/check
|
||||
|
||||
@go_include@ go/types.lo.dep
|
||||
go/types.lo.dep: $(go_go_types_files)
|
||||
$(BUILDDEPS)
|
||||
go/types.lo: $(go_go_types_files)
|
||||
$(BUILDPACKAGE)
|
||||
go/types/check: $(CHECK_DEPS)
|
||||
@$(CHECK)
|
||||
.PHONY: go/types/check
|
||||
|
||||
@go_include@ hash/adler32.lo.dep
|
||||
hash/adler32.lo.dep: $(go_hash_adler32_files)
|
||||
$(BUILDDEPS)
|
||||
|
@ -3507,9 +3548,9 @@ exp/norm.gox: exp/norm.lo
|
|||
$(BUILDGOX)
|
||||
exp/proxy.gox: exp/proxy.lo
|
||||
$(BUILDGOX)
|
||||
exp/terminal.gox: exp/terminal.lo
|
||||
exp/ssa.gox: exp/ssa.lo
|
||||
$(BUILDGOX)
|
||||
exp/types.gox: exp/types.lo
|
||||
exp/terminal.gox: exp/terminal.lo
|
||||
$(BUILDGOX)
|
||||
exp/utf8string.gox: exp/utf8string.lo
|
||||
$(BUILDGOX)
|
||||
|
@ -3533,6 +3574,8 @@ go/scanner.gox: go/scanner.lo
|
|||
$(BUILDGOX)
|
||||
go/token.gox: go/token.lo
|
||||
$(BUILDGOX)
|
||||
go/types.gox: go/types.lo
|
||||
$(BUILDGOX)
|
||||
|
||||
hash/adler32.gox: hash/adler32.lo
|
||||
$(BUILDGOX)
|
||||
|
@ -3734,7 +3777,6 @@ TEST_PACKAGES = \
|
|||
exp/norm/check \
|
||||
exp/proxy/check \
|
||||
exp/terminal/check \
|
||||
exp/types/check \
|
||||
exp/utf8string/check \
|
||||
html/template/check \
|
||||
go/ast/check \
|
||||
|
@ -3745,7 +3787,7 @@ TEST_PACKAGES = \
|
|||
go/printer/check \
|
||||
go/scanner/check \
|
||||
go/token/check \
|
||||
$(go_types_check_omitted_since_it_calls_6g) \
|
||||
go/types/check \
|
||||
hash/adler32/check \
|
||||
hash/crc32/check \
|
||||
hash/crc64/check \
|
||||
|
|
|
@ -156,14 +156,14 @@ am__DEPENDENCIES_2 = bufio.lo bytes.lo bytes/index.lo crypto.lo \
|
|||
encoding/json.lo encoding/pem.lo encoding/xml.lo \
|
||||
exp/cookiejar.lo exp/ebnf.lo exp/html.lo exp/html/atom.lo \
|
||||
exp/locale/collate.lo exp/locale/collate/build.lo exp/norm.lo \
|
||||
exp/proxy.lo exp/terminal.lo exp/types.lo exp/utf8string.lo \
|
||||
exp/proxy.lo exp/ssa.lo exp/terminal.lo exp/utf8string.lo \
|
||||
html/template.lo go/ast.lo go/build.lo go/doc.lo go/format.lo \
|
||||
go/parser.lo go/printer.lo go/scanner.lo go/token.lo \
|
||||
hash/adler32.lo hash/crc32.lo hash/crc64.lo hash/fnv.lo \
|
||||
net/http/cgi.lo net/http/fcgi.lo net/http/httptest.lo \
|
||||
net/http/httputil.lo net/http/pprof.lo image/color.lo \
|
||||
image/draw.lo image/gif.lo image/jpeg.lo image/png.lo \
|
||||
index/suffixarray.lo io/ioutil.lo log/syslog.lo \
|
||||
go/types.lo hash/adler32.lo hash/crc32.lo hash/crc64.lo \
|
||||
hash/fnv.lo net/http/cgi.lo net/http/fcgi.lo \
|
||||
net/http/httptest.lo net/http/httputil.lo net/http/pprof.lo \
|
||||
image/color.lo image/draw.lo image/gif.lo image/jpeg.lo \
|
||||
image/png.lo index/suffixarray.lo io/ioutil.lo log/syslog.lo \
|
||||
log/syslog/syslog_c.lo math/big.lo math/cmplx.lo math/rand.lo \
|
||||
mime/multipart.lo net/http.lo net/mail.lo net/rpc.lo \
|
||||
net/smtp.lo net/textproto.lo net/url.lo old/netchan.lo \
|
||||
|
@ -617,8 +617,8 @@ toolexeclibgoexp_DATA = \
|
|||
$(exp_inotify_gox) \
|
||||
exp/norm.gox \
|
||||
exp/proxy.gox \
|
||||
exp/ssa.gox \
|
||||
exp/terminal.gox \
|
||||
exp/types.gox \
|
||||
exp/utf8string.gox
|
||||
|
||||
toolexeclibgoexphtmldir = $(toolexeclibgoexpdir)/html
|
||||
|
@ -642,7 +642,8 @@ toolexeclibgogo_DATA = \
|
|||
go/parser.gox \
|
||||
go/printer.gox \
|
||||
go/scanner.gox \
|
||||
go/token.gox
|
||||
go/token.gox \
|
||||
go/types.gox
|
||||
|
||||
toolexeclibgohashdir = $(toolexeclibgodir)/hash
|
||||
toolexeclibgohash_DATA = \
|
||||
|
@ -973,7 +974,7 @@ go_mime_files = \
|
|||
# By default use select with pipes. Most systems should have
|
||||
# something better.
|
||||
@LIBGO_IS_LINUX_FALSE@@LIBGO_IS_NETBSD_FALSE@@LIBGO_IS_RTEMS_FALSE@go_net_fd_os_file = go/net/fd_select.go
|
||||
@LIBGO_IS_LINUX_FALSE@@LIBGO_IS_NETBSD_TRUE@@LIBGO_IS_RTEMS_FALSE@go_net_fd_os_file = go/net/fd_netbsd.go
|
||||
@LIBGO_IS_LINUX_FALSE@@LIBGO_IS_NETBSD_TRUE@@LIBGO_IS_RTEMS_FALSE@go_net_fd_os_file = go/net/fd_bsd.go
|
||||
@LIBGO_IS_LINUX_TRUE@@LIBGO_IS_RTEMS_FALSE@go_net_fd_os_file = go/net/fd_linux.go
|
||||
@LIBGO_IS_RTEMS_TRUE@go_net_fd_os_file = go/net/fd_select.go
|
||||
@LIBGO_IS_LINUX_FALSE@@LIBGO_IS_NETBSD_FALSE@@LIBGO_IS_RTEMS_FALSE@go_net_newpollserver_file = go/net/newpollserver_unix.go
|
||||
|
@ -1010,9 +1011,12 @@ go_mime_files = \
|
|||
@LIBGO_IS_LINUX_FALSE@@LIBGO_IS_NETBSD_FALSE@go_net_interface_file = go/net/interface_stub.go
|
||||
@LIBGO_IS_LINUX_FALSE@@LIBGO_IS_NETBSD_TRUE@go_net_interface_file = go/net/interface_netbsd.go
|
||||
@LIBGO_IS_LINUX_TRUE@go_net_interface_file = go/net/interface_linux.go
|
||||
@LIBGO_IS_LINUX_FALSE@go_net_cloexec_file = go/net/sys_cloexec.go
|
||||
@LIBGO_IS_LINUX_TRUE@go_net_cloexec_file = go/net/sock_cloexec.go
|
||||
go_net_files = \
|
||||
go/net/cgo_unix.go \
|
||||
$(go_net_cgo_file) \
|
||||
$(go_net_cloexec_file) \
|
||||
go/net/dial.go \
|
||||
go/net/dnsclient.go \
|
||||
go/net/dnsclient_unix.go \
|
||||
|
@ -1068,6 +1072,8 @@ go_net_files = \
|
|||
@LIBGO_IS_LINUX_FALSE@@LIBGO_IS_OPENBSD_TRUE@@LIBGO_IS_SOLARIS_FALSE@go_os_stat_file = go/os/stat_atim.go
|
||||
@LIBGO_IS_LINUX_TRUE@@LIBGO_IS_SOLARIS_FALSE@go_os_stat_file = go/os/stat_atim.go
|
||||
@LIBGO_IS_SOLARIS_TRUE@go_os_stat_file = go/os/stat_solaris.go
|
||||
@LIBGO_IS_LINUX_FALSE@go_os_pipe_file = go/os/pipe_bsd.go
|
||||
@LIBGO_IS_LINUX_TRUE@go_os_pipe_file = go/os/pipe_linux.go
|
||||
go_os_files = \
|
||||
$(go_os_dir_file) \
|
||||
go/os/dir.go \
|
||||
|
@ -1084,6 +1090,7 @@ go_os_files = \
|
|||
go/os/getwd.go \
|
||||
go/os/path.go \
|
||||
go/os/path_unix.go \
|
||||
$(go_os_pipe_file) \
|
||||
go/os/proc.go \
|
||||
$(go_os_stat_file) \
|
||||
go/os/str.go \
|
||||
|
@ -1208,6 +1215,7 @@ go_compress_bzip2_files = \
|
|||
go_compress_flate_files = \
|
||||
go/compress/flate/copy.go \
|
||||
go/compress/flate/deflate.go \
|
||||
go/compress/flate/fixedhuff.go \
|
||||
go/compress/flate/huffman_bit_writer.go \
|
||||
go/compress/flate/huffman_code.go \
|
||||
go/compress/flate/inflate.go \
|
||||
|
@ -1439,8 +1447,10 @@ go_exp_inotify_files = \
|
|||
go_exp_locale_collate_files = \
|
||||
go/exp/locale/collate/colelem.go \
|
||||
go/exp/locale/collate/collate.go \
|
||||
go/exp/locale/collate/colltab.go \
|
||||
go/exp/locale/collate/contract.go \
|
||||
go/exp/locale/collate/export.go \
|
||||
go/exp/locale/collate/sort.go \
|
||||
go/exp/locale/collate/table.go \
|
||||
go/exp/locale/collate/tables.go \
|
||||
go/exp/locale/collate/trie.go
|
||||
|
@ -1469,25 +1479,20 @@ go_exp_proxy_files = \
|
|||
go/exp/proxy/proxy.go \
|
||||
go/exp/proxy/socks5.go
|
||||
|
||||
go_exp_ssa_files = \
|
||||
go/exp/ssa/blockopt.go \
|
||||
go/exp/ssa/doc.go \
|
||||
go/exp/ssa/func.go \
|
||||
go/exp/ssa/sanity.go \
|
||||
go/exp/ssa/ssa.go \
|
||||
go/exp/ssa/literal.go \
|
||||
go/exp/ssa/print.go \
|
||||
go/exp/ssa/util.go
|
||||
|
||||
go_exp_terminal_files = \
|
||||
go/exp/terminal/terminal.go \
|
||||
go/exp/terminal/util.go
|
||||
|
||||
go_exp_types_files = \
|
||||
go/exp/types/builtins.go \
|
||||
go/exp/types/check.go \
|
||||
go/exp/types/const.go \
|
||||
go/exp/types/conversions.go \
|
||||
go/exp/types/errors.go \
|
||||
go/exp/types/exportdata.go \
|
||||
go/exp/types/expr.go \
|
||||
go/exp/types/gcimporter.go \
|
||||
go/exp/types/operand.go \
|
||||
go/exp/types/predicates.go \
|
||||
go/exp/types/stmt.go \
|
||||
go/exp/types/types.go \
|
||||
go/exp/types/universe.go
|
||||
|
||||
go_exp_utf8string_files = \
|
||||
go/exp/utf8string/string.go
|
||||
|
||||
|
@ -1536,6 +1541,25 @@ go_go_token_files = \
|
|||
go/go/token/serialize.go \
|
||||
go/go/token/token.go
|
||||
|
||||
go_go_types_files = \
|
||||
go/go/types/api.go \
|
||||
go/go/types/builtins.go \
|
||||
go/go/types/check.go \
|
||||
go/go/types/const.go \
|
||||
go/go/types/conversions.go \
|
||||
go/go/types/errors.go \
|
||||
go/go/types/exportdata.go \
|
||||
go/go/types/expr.go \
|
||||
go/go/types/gcimporter.go \
|
||||
go/go/types/objects.go \
|
||||
go/go/types/operand.go \
|
||||
go/go/types/predicates.go \
|
||||
go/go/types/resolve.go \
|
||||
go/go/types/scope.go \
|
||||
go/go/types/stmt.go \
|
||||
go/go/types/types.go \
|
||||
go/go/types/universe.go
|
||||
|
||||
go_hash_adler32_files = \
|
||||
go/hash/adler32/adler32.go
|
||||
|
||||
|
@ -1976,8 +2000,8 @@ libgo_go_objs = \
|
|||
exp/locale/collate/build.lo \
|
||||
exp/norm.lo \
|
||||
exp/proxy.lo \
|
||||
exp/ssa.lo \
|
||||
exp/terminal.lo \
|
||||
exp/types.lo \
|
||||
exp/utf8string.lo \
|
||||
html/template.lo \
|
||||
go/ast.lo \
|
||||
|
@ -1988,6 +2012,7 @@ libgo_go_objs = \
|
|||
go/printer.lo \
|
||||
go/scanner.lo \
|
||||
go/token.lo \
|
||||
go/types.lo \
|
||||
hash/adler32.lo \
|
||||
hash/crc32.lo \
|
||||
hash/crc64.lo \
|
||||
|
@ -2233,7 +2258,6 @@ TEST_PACKAGES = \
|
|||
exp/norm/check \
|
||||
exp/proxy/check \
|
||||
exp/terminal/check \
|
||||
exp/types/check \
|
||||
exp/utf8string/check \
|
||||
html/template/check \
|
||||
go/ast/check \
|
||||
|
@ -2244,7 +2268,7 @@ TEST_PACKAGES = \
|
|||
go/printer/check \
|
||||
go/scanner/check \
|
||||
go/token/check \
|
||||
$(go_types_check_omitted_since_it_calls_6g) \
|
||||
go/types/check \
|
||||
hash/adler32/check \
|
||||
hash/crc32/check \
|
||||
hash/crc64/check \
|
||||
|
@ -5201,6 +5225,15 @@ exp/proxy/check: $(CHECK_DEPS)
|
|||
@$(CHECK)
|
||||
.PHONY: exp/proxy/check
|
||||
|
||||
@go_include@ exp/ssa.lo.dep
|
||||
exp/ssa.lo.dep: $(go_exp_ssa_files)
|
||||
$(BUILDDEPS)
|
||||
exp/ssa.lo: $(go_exp_ssa_files)
|
||||
$(BUILDPACKAGE)
|
||||
exp/ssa/check: $(CHECK_DEPS)
|
||||
@$(CHECK)
|
||||
.PHONY: exp/ssa/check
|
||||
|
||||
@go_include@ exp/terminal.lo.dep
|
||||
exp/terminal.lo.dep: $(go_exp_terminal_files)
|
||||
$(BUILDDEPS)
|
||||
|
@ -5210,15 +5243,6 @@ exp/terminal/check: $(CHECK_DEPS)
|
|||
@$(CHECK)
|
||||
.PHONY: exp/terminal/check
|
||||
|
||||
@go_include@ exp/types.lo.dep
|
||||
exp/types.lo.dep: $(go_exp_types_files)
|
||||
$(BUILDDEPS)
|
||||
exp/types.lo: $(go_exp_types_files)
|
||||
$(BUILDPACKAGE)
|
||||
exp/types/check: $(CHECK_DEPS)
|
||||
@$(CHECK)
|
||||
.PHONY: exp/types/check
|
||||
|
||||
@go_include@ exp/utf8string.lo.dep
|
||||
exp/utf8string.lo.dep: $(go_exp_utf8string_files)
|
||||
$(BUILDDEPS)
|
||||
|
@ -5327,6 +5351,15 @@ go/token/check: $(CHECK_DEPS)
|
|||
@$(CHECK)
|
||||
.PHONY: go/token/check
|
||||
|
||||
@go_include@ go/types.lo.dep
|
||||
go/types.lo.dep: $(go_go_types_files)
|
||||
$(BUILDDEPS)
|
||||
go/types.lo: $(go_go_types_files)
|
||||
$(BUILDPACKAGE)
|
||||
go/types/check: $(CHECK_DEPS)
|
||||
@$(CHECK)
|
||||
.PHONY: go/types/check
|
||||
|
||||
@go_include@ hash/adler32.lo.dep
|
||||
hash/adler32.lo.dep: $(go_hash_adler32_files)
|
||||
$(BUILDDEPS)
|
||||
|
@ -5949,9 +5982,9 @@ exp/norm.gox: exp/norm.lo
|
|||
$(BUILDGOX)
|
||||
exp/proxy.gox: exp/proxy.lo
|
||||
$(BUILDGOX)
|
||||
exp/terminal.gox: exp/terminal.lo
|
||||
exp/ssa.gox: exp/ssa.lo
|
||||
$(BUILDGOX)
|
||||
exp/types.gox: exp/types.lo
|
||||
exp/terminal.gox: exp/terminal.lo
|
||||
$(BUILDGOX)
|
||||
exp/utf8string.gox: exp/utf8string.lo
|
||||
$(BUILDGOX)
|
||||
|
@ -5975,6 +6008,8 @@ go/scanner.gox: go/scanner.lo
|
|||
$(BUILDGOX)
|
||||
go/token.gox: go/token.lo
|
||||
$(BUILDGOX)
|
||||
go/types.gox: go/types.lo
|
||||
$(BUILDGOX)
|
||||
|
||||
hash/adler32.gox: hash/adler32.lo
|
||||
$(BUILDGOX)
|
||||
|
|
|
@ -3,6 +3,9 @@
|
|||
/* Define if building universal (internal helper macro) */
|
||||
#undef AC_APPLE_UNIVERSAL_BUILD
|
||||
|
||||
/* Define to 1 if you have the `accept4' function. */
|
||||
#undef HAVE_ACCEPT4
|
||||
|
||||
/* Define to 1 if you have the `acosl' function. */
|
||||
#undef HAVE_ACOSL
|
||||
|
||||
|
@ -153,6 +156,9 @@
|
|||
/* Define to 1 if you have the `openat' function. */
|
||||
#undef HAVE_OPENAT
|
||||
|
||||
/* Define to 1 if you have the `pipe2' function. */
|
||||
#undef HAVE_PIPE2
|
||||
|
||||
/* Define to 1 if you have the `renameat' function. */
|
||||
#undef HAVE_RENAMEAT
|
||||
|
||||
|
@ -283,6 +289,9 @@
|
|||
/* Define to 1 if you have the <ustat.h> header file and it works. */
|
||||
#undef HAVE_USTAT_H
|
||||
|
||||
/* Define to 1 if you have the `utimensat' function. */
|
||||
#undef HAVE_UTIMENSAT
|
||||
|
||||
/* Define to 1 if you have the <utime.h> header file. */
|
||||
#undef HAVE_UTIME_H
|
||||
|
||||
|
|
|
@ -14635,7 +14635,7 @@ else
|
|||
fi
|
||||
|
||||
|
||||
for ac_func in epoll_create1 faccessat fallocate fchmodat fchownat futimesat inotify_add_watch inotify_init inotify_init1 inotify_rm_watch mkdirat mknodat openat renameat sync_file_range splice tee unlinkat unshare
|
||||
for ac_func in accept4 epoll_create1 faccessat fallocate fchmodat fchownat futimesat inotify_add_watch inotify_init inotify_init1 inotify_rm_watch mkdirat mknodat openat pipe2 renameat sync_file_range splice tee unlinkat unshare utimensat
|
||||
do :
|
||||
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
|
||||
ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
|
||||
|
|
|
@ -491,7 +491,7 @@ AC_CHECK_FUNCS(strerror_r strsignal wait4 mincore setenv dl_iterate_phdr)
|
|||
AM_CONDITIONAL(HAVE_STRERROR_R, test "$ac_cv_func_strerror_r" = yes)
|
||||
AM_CONDITIONAL(HAVE_WAIT4, test "$ac_cv_func_wait4" = yes)
|
||||
|
||||
AC_CHECK_FUNCS(epoll_create1 faccessat fallocate fchmodat fchownat futimesat inotify_add_watch inotify_init inotify_init1 inotify_rm_watch mkdirat mknodat openat renameat sync_file_range splice tee unlinkat unshare)
|
||||
AC_CHECK_FUNCS(accept4 epoll_create1 faccessat fallocate fchmodat fchownat futimesat inotify_add_watch inotify_init inotify_init1 inotify_rm_watch mkdirat mknodat openat pipe2 renameat sync_file_range splice tee unlinkat unshare utimensat)
|
||||
AC_TYPE_OFF_T
|
||||
AC_CHECK_TYPES([loff_t])
|
||||
|
||||
|
|
|
@ -18,8 +18,7 @@ import (
|
|||
|
||||
func TestOver65kFiles(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Logf("slow test; skipping")
|
||||
return
|
||||
t.Skip("slow test; skipping")
|
||||
}
|
||||
buf := new(bytes.Buffer)
|
||||
w := NewWriter(buf)
|
||||
|
@ -108,8 +107,7 @@ func TestFileHeaderRoundTrip64(t *testing.T) {
|
|||
|
||||
func TestZip64(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Logf("slow test; skipping")
|
||||
return
|
||||
t.Skip("slow test; skipping")
|
||||
}
|
||||
// write 2^32 bytes plus "END\n" to a zip file
|
||||
buf := new(bytes.Buffer)
|
||||
|
|
|
@ -76,13 +76,13 @@ func (b *Reader) fill() {
|
|||
}
|
||||
|
||||
// Read new data.
|
||||
n, e := b.rd.Read(b.buf[b.w:])
|
||||
n, err := b.rd.Read(b.buf[b.w:])
|
||||
if n < 0 {
|
||||
panic(errNegativeRead)
|
||||
}
|
||||
b.w += n
|
||||
if e != nil {
|
||||
b.err = e
|
||||
if err != nil {
|
||||
b.err = err
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -379,8 +379,8 @@ func (b *Reader) ReadBytes(delim byte) (line []byte, err error) {
|
|||
// ReadString returns err != nil if and only if the returned data does not end in
|
||||
// delim.
|
||||
func (b *Reader) ReadString(delim byte) (line string, err error) {
|
||||
bytes, e := b.ReadBytes(delim)
|
||||
return string(bytes), e
|
||||
bytes, err := b.ReadBytes(delim)
|
||||
return string(bytes), err
|
||||
}
|
||||
|
||||
// WriteTo implements io.WriterTo.
|
||||
|
@ -461,17 +461,17 @@ func (b *Writer) Flush() error {
|
|||
if b.n == 0 {
|
||||
return nil
|
||||
}
|
||||
n, e := b.wr.Write(b.buf[0:b.n])
|
||||
if n < b.n && e == nil {
|
||||
e = io.ErrShortWrite
|
||||
n, err := b.wr.Write(b.buf[0:b.n])
|
||||
if n < b.n && err == nil {
|
||||
err = io.ErrShortWrite
|
||||
}
|
||||
if e != nil {
|
||||
if err != nil {
|
||||
if n > 0 && n < b.n {
|
||||
copy(b.buf[0:b.n-n], b.buf[n:b.n])
|
||||
}
|
||||
b.n -= n
|
||||
b.err = e
|
||||
return e
|
||||
b.err = err
|
||||
return err
|
||||
}
|
||||
b.n = 0
|
||||
return nil
|
||||
|
|
|
@ -28,9 +28,9 @@ func newRot13Reader(r io.Reader) *rot13Reader {
|
|||
}
|
||||
|
||||
func (r13 *rot13Reader) Read(p []byte) (int, error) {
|
||||
n, e := r13.r.Read(p)
|
||||
if e != nil {
|
||||
return n, e
|
||||
n, err := r13.r.Read(p)
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
for i := 0; i < n; i++ {
|
||||
c := p[i] | 0x20 // lowercase byte
|
||||
|
@ -48,15 +48,15 @@ func readBytes(buf *Reader) string {
|
|||
var b [1000]byte
|
||||
nb := 0
|
||||
for {
|
||||
c, e := buf.ReadByte()
|
||||
if e == io.EOF {
|
||||
c, err := buf.ReadByte()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if e == nil {
|
||||
if err == nil {
|
||||
b[nb] = c
|
||||
nb++
|
||||
} else if e != iotest.ErrTimeout {
|
||||
panic("Data: " + e.Error())
|
||||
} else if err != iotest.ErrTimeout {
|
||||
panic("Data: " + err.Error())
|
||||
}
|
||||
}
|
||||
return string(b[0:nb])
|
||||
|
@ -93,12 +93,12 @@ var readMakers = []readMaker{
|
|||
func readLines(b *Reader) string {
|
||||
s := ""
|
||||
for {
|
||||
s1, e := b.ReadString('\n')
|
||||
if e == io.EOF {
|
||||
s1, err := b.ReadString('\n')
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if e != nil && e != iotest.ErrTimeout {
|
||||
panic("GetLines: " + e.Error())
|
||||
if err != nil && err != iotest.ErrTimeout {
|
||||
panic("GetLines: " + err.Error())
|
||||
}
|
||||
s += s1
|
||||
}
|
||||
|
@ -110,9 +110,9 @@ func reads(buf *Reader, m int) string {
|
|||
var b [1000]byte
|
||||
nb := 0
|
||||
for {
|
||||
n, e := buf.Read(b[nb : nb+m])
|
||||
n, err := buf.Read(b[nb : nb+m])
|
||||
nb += n
|
||||
if e == io.EOF {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
@ -748,7 +748,7 @@ func testReadLineNewlines(t *testing.T, input string, expect []readLineResult) {
|
|||
b := NewReaderSize(strings.NewReader(input), minReadBufferSize)
|
||||
for i, e := range expect {
|
||||
line, isPrefix, err := b.ReadLine()
|
||||
if bytes.Compare(line, e.line) != 0 {
|
||||
if !bytes.Equal(line, e.line) {
|
||||
t.Errorf("%q call %d, line == %q, want %q", input, i, line, e.line)
|
||||
return
|
||||
}
|
||||
|
|
|
@ -367,7 +367,7 @@ func (b *Buffer) ReadBytes(delim byte) (line []byte, err error) {
|
|||
return
|
||||
}
|
||||
|
||||
// readSlice is like readBytes but returns a reference to internal buffer data.
|
||||
// readSlice is like ReadBytes but returns a reference to internal buffer data.
|
||||
func (b *Buffer) readSlice(delim byte) (line []byte, err error) {
|
||||
i := IndexByte(b.buf[b.off:], delim)
|
||||
end := b.off + i + 1
|
||||
|
@ -377,6 +377,7 @@ func (b *Buffer) readSlice(delim byte) (line []byte, err error) {
|
|||
}
|
||||
line = b.buf[b.off:end]
|
||||
b.off = end
|
||||
b.lastRead = opRead
|
||||
return line, err
|
||||
}
|
||||
|
||||
|
|
|
@ -260,7 +260,7 @@ func TestWriteTo(t *testing.T) {
|
|||
|
||||
func TestRuneIO(t *testing.T) {
|
||||
const NRune = 1000
|
||||
// Built a test array while we write the data
|
||||
// Built a test slice while we write the data
|
||||
b := make([]byte, utf8.UTFMax*NRune)
|
||||
var buf Buffer
|
||||
n := 0
|
||||
|
@ -453,3 +453,25 @@ func TestReadEmptyAtEOF(t *testing.T) {
|
|||
t.Errorf("wrong count; got %d want 0", n)
|
||||
}
|
||||
}
|
||||
|
||||
func TestUnreadByte(t *testing.T) {
|
||||
b := new(Buffer)
|
||||
b.WriteString("abcdefghijklmnopqrstuvwxyz")
|
||||
|
||||
_, err := b.ReadBytes('m')
|
||||
if err != nil {
|
||||
t.Fatalf("ReadBytes: %v", err)
|
||||
}
|
||||
|
||||
err = b.UnreadByte()
|
||||
if err != nil {
|
||||
t.Fatalf("UnreadByte: %v", err)
|
||||
}
|
||||
c, err := b.ReadByte()
|
||||
if err != nil {
|
||||
t.Fatalf("ReadByte: %v", err)
|
||||
}
|
||||
if c != 'm' {
|
||||
t.Errorf("ReadByte = %q; want %q", c, 'm')
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,8 +11,8 @@ import (
|
|||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// Compare returns an integer comparing the two byte arrays lexicographically.
|
||||
// The result will be 0 if a==b, -1 if a < b, and +1 if a > b
|
||||
// Compare returns an integer comparing two byte slices lexicographically.
|
||||
// The result will be 0 if a==b, -1 if a < b, and +1 if a > b.
|
||||
// A nil argument is equivalent to an empty slice.
|
||||
func Compare(a, b []byte) int {
|
||||
m := len(a)
|
||||
|
@ -53,8 +53,8 @@ func equalPortable(a, b []byte) bool {
|
|||
return true
|
||||
}
|
||||
|
||||
// explode splits s into an array of UTF-8 sequences, one per Unicode character (still arrays of bytes),
|
||||
// up to a maximum of n byte arrays. Invalid UTF-8 sequences are chopped into individual bytes.
|
||||
// explode splits s into a slice of UTF-8 sequences, one per Unicode character (still slices of bytes),
|
||||
// up to a maximum of n byte slices. Invalid UTF-8 sequences are chopped into individual bytes.
|
||||
func explode(s []byte, n int) [][]byte {
|
||||
if n <= 0 {
|
||||
n = len(s)
|
||||
|
@ -226,7 +226,7 @@ func LastIndexAny(s []byte, chars string) int {
|
|||
}
|
||||
|
||||
// Generic split: splits after each instance of sep,
|
||||
// including sepSave bytes of sep in the subarrays.
|
||||
// including sepSave bytes of sep in the subslices.
|
||||
func genSplit(s, sep []byte, sepSave, n int) [][]byte {
|
||||
if n == 0 {
|
||||
return nil
|
||||
|
@ -287,15 +287,15 @@ func SplitAfter(s, sep []byte) [][]byte {
|
|||
return genSplit(s, sep, len(sep), -1)
|
||||
}
|
||||
|
||||
// Fields splits the array s around each instance of one or more consecutive white space
|
||||
// characters, returning a slice of subarrays of s or an empty list if s contains only white space.
|
||||
// Fields splits the slice s around each instance of one or more consecutive white space
|
||||
// characters, returning a slice of subslices of s or an empty list if s contains only white space.
|
||||
func Fields(s []byte) [][]byte {
|
||||
return FieldsFunc(s, unicode.IsSpace)
|
||||
}
|
||||
|
||||
// FieldsFunc interprets s as a sequence of UTF-8-encoded Unicode code points.
|
||||
// It splits the array s at each run of code points c satisfying f(c) and
|
||||
// returns a slice of subarrays of s. If no code points in s satisfy f(c), an
|
||||
// It splits the slice s at each run of code points c satisfying f(c) and
|
||||
// returns a slice of subslices of s. If no code points in s satisfy f(c), an
|
||||
// empty slice is returned.
|
||||
func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
|
||||
n := 0
|
||||
|
@ -333,46 +333,46 @@ func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
|
|||
return a[0:na]
|
||||
}
|
||||
|
||||
// Join concatenates the elements of a to create a new byte array. The separator
|
||||
// sep is placed between elements in the resulting array.
|
||||
func Join(a [][]byte, sep []byte) []byte {
|
||||
if len(a) == 0 {
|
||||
// Join concatenates the elements of s to create a new byte slice. The separator
|
||||
// sep is placed between elements in the resulting slice.
|
||||
func Join(s [][]byte, sep []byte) []byte {
|
||||
if len(s) == 0 {
|
||||
return []byte{}
|
||||
}
|
||||
if len(a) == 1 {
|
||||
if len(s) == 1 {
|
||||
// Just return a copy.
|
||||
return append([]byte(nil), a[0]...)
|
||||
return append([]byte(nil), s[0]...)
|
||||
}
|
||||
n := len(sep) * (len(a) - 1)
|
||||
for i := 0; i < len(a); i++ {
|
||||
n += len(a[i])
|
||||
n := len(sep) * (len(s) - 1)
|
||||
for _, v := range s {
|
||||
n += len(v)
|
||||
}
|
||||
|
||||
b := make([]byte, n)
|
||||
bp := copy(b, a[0])
|
||||
for _, s := range a[1:] {
|
||||
bp := copy(b, s[0])
|
||||
for _, v := range s[1:] {
|
||||
bp += copy(b[bp:], sep)
|
||||
bp += copy(b[bp:], s)
|
||||
bp += copy(b[bp:], v)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// HasPrefix tests whether the byte array s begins with prefix.
|
||||
// HasPrefix tests whether the byte slice s begins with prefix.
|
||||
func HasPrefix(s, prefix []byte) bool {
|
||||
return len(s) >= len(prefix) && Equal(s[0:len(prefix)], prefix)
|
||||
}
|
||||
|
||||
// HasSuffix tests whether the byte array s ends with suffix.
|
||||
// HasSuffix tests whether the byte slice s ends with suffix.
|
||||
func HasSuffix(s, suffix []byte) bool {
|
||||
return len(s) >= len(suffix) && Equal(s[len(s)-len(suffix):], suffix)
|
||||
}
|
||||
|
||||
// Map returns a copy of the byte array s with all its characters modified
|
||||
// Map returns a copy of the byte slice s with all its characters modified
|
||||
// according to the mapping function. If mapping returns a negative value, the character is
|
||||
// dropped from the string with no replacement. The characters in s and the
|
||||
// output are interpreted as UTF-8-encoded Unicode code points.
|
||||
func Map(mapping func(r rune) rune, s []byte) []byte {
|
||||
// In the worst case, the array can grow when mapped, making
|
||||
// In the worst case, the slice can grow when mapped, making
|
||||
// things unpleasant. But it's so rare we barge in assuming it's
|
||||
// fine. It could also shrink but that falls out naturally.
|
||||
maxbytes := len(s) // length of b
|
||||
|
@ -413,28 +413,28 @@ func Repeat(b []byte, count int) []byte {
|
|||
return nb
|
||||
}
|
||||
|
||||
// ToUpper returns a copy of the byte array s with all Unicode letters mapped to their upper case.
|
||||
// ToUpper returns a copy of the byte slice s with all Unicode letters mapped to their upper case.
|
||||
func ToUpper(s []byte) []byte { return Map(unicode.ToUpper, s) }
|
||||
|
||||
// ToLower returns a copy of the byte array s with all Unicode letters mapped to their lower case.
|
||||
// ToLower returns a copy of the byte slice s with all Unicode letters mapped to their lower case.
|
||||
func ToLower(s []byte) []byte { return Map(unicode.ToLower, s) }
|
||||
|
||||
// ToTitle returns a copy of the byte array s with all Unicode letters mapped to their title case.
|
||||
// ToTitle returns a copy of the byte slice s with all Unicode letters mapped to their title case.
|
||||
func ToTitle(s []byte) []byte { return Map(unicode.ToTitle, s) }
|
||||
|
||||
// ToUpperSpecial returns a copy of the byte array s with all Unicode letters mapped to their
|
||||
// ToUpperSpecial returns a copy of the byte slice s with all Unicode letters mapped to their
|
||||
// upper case, giving priority to the special casing rules.
|
||||
func ToUpperSpecial(_case unicode.SpecialCase, s []byte) []byte {
|
||||
return Map(func(r rune) rune { return _case.ToUpper(r) }, s)
|
||||
}
|
||||
|
||||
// ToLowerSpecial returns a copy of the byte array s with all Unicode letters mapped to their
|
||||
// ToLowerSpecial returns a copy of the byte slice s with all Unicode letters mapped to their
|
||||
// lower case, giving priority to the special casing rules.
|
||||
func ToLowerSpecial(_case unicode.SpecialCase, s []byte) []byte {
|
||||
return Map(func(r rune) rune { return _case.ToLower(r) }, s)
|
||||
}
|
||||
|
||||
// ToTitleSpecial returns a copy of the byte array s with all Unicode letters mapped to their
|
||||
// ToTitleSpecial returns a copy of the byte slice s with all Unicode letters mapped to their
|
||||
// title case, giving priority to the special casing rules.
|
||||
func ToTitleSpecial(_case unicode.SpecialCase, s []byte) []byte {
|
||||
return Map(func(r rune) rune { return _case.ToTitle(r) }, s)
|
||||
|
|
|
@ -25,16 +25,16 @@ func eq(a, b []string) bool {
|
|||
return true
|
||||
}
|
||||
|
||||
func arrayOfString(a [][]byte) []string {
|
||||
result := make([]string, len(a))
|
||||
for j := 0; j < len(a); j++ {
|
||||
result[j] = string(a[j])
|
||||
func sliceOfString(s [][]byte) []string {
|
||||
result := make([]string, len(s))
|
||||
for i, v := range s {
|
||||
result[i] = string(v)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// For ease of reading, the test cases use strings that are converted to byte
|
||||
// arrays before invoking the functions.
|
||||
// slices before invoking the functions.
|
||||
|
||||
var abcd = "abcd"
|
||||
var faces = "☺☻☹"
|
||||
|
@ -435,7 +435,7 @@ var explodetests = []ExplodeTest{
|
|||
func TestExplode(t *testing.T) {
|
||||
for _, tt := range explodetests {
|
||||
a := SplitN([]byte(tt.s), nil, tt.n)
|
||||
result := arrayOfString(a)
|
||||
result := sliceOfString(a)
|
||||
if !eq(result, tt.a) {
|
||||
t.Errorf(`Explode("%s", %d) = %v; want %v`, tt.s, tt.n, result, tt.a)
|
||||
continue
|
||||
|
@ -473,7 +473,7 @@ var splittests = []SplitTest{
|
|||
func TestSplit(t *testing.T) {
|
||||
for _, tt := range splittests {
|
||||
a := SplitN([]byte(tt.s), []byte(tt.sep), tt.n)
|
||||
result := arrayOfString(a)
|
||||
result := sliceOfString(a)
|
||||
if !eq(result, tt.a) {
|
||||
t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, result, tt.a)
|
||||
continue
|
||||
|
@ -519,7 +519,7 @@ var splitaftertests = []SplitTest{
|
|||
func TestSplitAfter(t *testing.T) {
|
||||
for _, tt := range splitaftertests {
|
||||
a := SplitAfterN([]byte(tt.s), []byte(tt.sep), tt.n)
|
||||
result := arrayOfString(a)
|
||||
result := sliceOfString(a)
|
||||
if !eq(result, tt.a) {
|
||||
t.Errorf(`Split(%q, %q, %d) = %v; want %v`, tt.s, tt.sep, tt.n, result, tt.a)
|
||||
continue
|
||||
|
@ -559,7 +559,7 @@ var fieldstests = []FieldsTest{
|
|||
func TestFields(t *testing.T) {
|
||||
for _, tt := range fieldstests {
|
||||
a := Fields([]byte(tt.s))
|
||||
result := arrayOfString(a)
|
||||
result := sliceOfString(a)
|
||||
if !eq(result, tt.a) {
|
||||
t.Errorf("Fields(%q) = %v; want %v", tt.s, a, tt.a)
|
||||
continue
|
||||
|
@ -570,7 +570,7 @@ func TestFields(t *testing.T) {
|
|||
func TestFieldsFunc(t *testing.T) {
|
||||
for _, tt := range fieldstests {
|
||||
a := FieldsFunc([]byte(tt.s), unicode.IsSpace)
|
||||
result := arrayOfString(a)
|
||||
result := sliceOfString(a)
|
||||
if !eq(result, tt.a) {
|
||||
t.Errorf("FieldsFunc(%q, unicode.IsSpace) = %v; want %v", tt.s, a, tt.a)
|
||||
continue
|
||||
|
@ -585,15 +585,15 @@ func TestFieldsFunc(t *testing.T) {
|
|||
}
|
||||
for _, tt := range fieldsFuncTests {
|
||||
a := FieldsFunc([]byte(tt.s), pred)
|
||||
result := arrayOfString(a)
|
||||
result := sliceOfString(a)
|
||||
if !eq(result, tt.a) {
|
||||
t.Errorf("FieldsFunc(%q) = %v, want %v", tt.s, a, tt.a)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test case for any function which accepts and returns a byte array.
|
||||
// For ease of creation, we write the byte arrays as strings.
|
||||
// Test case for any function which accepts and returns a byte slice.
|
||||
// For ease of creation, we write the byte slices as strings.
|
||||
type StringTest struct {
|
||||
in, out string
|
||||
}
|
||||
|
|
|
@ -10,6 +10,7 @@ import (
|
|||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"sort"
|
||||
)
|
||||
|
||||
func ExampleBuffer() {
|
||||
|
@ -27,3 +28,41 @@ func ExampleBuffer_reader() {
|
|||
io.Copy(os.Stdout, dec)
|
||||
// Output: Gophers rule!
|
||||
}
|
||||
|
||||
func ExampleCompare() {
|
||||
// Interpret Compare's result by comparing it to zero.
|
||||
var a, b []byte
|
||||
if bytes.Compare(a, b) < 0 {
|
||||
// a less b
|
||||
}
|
||||
if bytes.Compare(a, b) <= 0 {
|
||||
// a less or equal b
|
||||
}
|
||||
if bytes.Compare(a, b) > 0 {
|
||||
// a greater b
|
||||
}
|
||||
if bytes.Compare(a, b) >= 0 {
|
||||
// a greater or equal b
|
||||
}
|
||||
|
||||
// Prefer Equal to Compare for equality comparisons.
|
||||
if bytes.Equal(a, b) {
|
||||
// a equal b
|
||||
}
|
||||
if !bytes.Equal(a, b) {
|
||||
// a not equal b
|
||||
}
|
||||
}
|
||||
|
||||
func ExampleCompare_search() {
|
||||
// Binary search to find a matching byte slice.
|
||||
var needle []byte
|
||||
var haystack [][]byte // Assume sorted
|
||||
i := sort.Search(len(haystack), func(i int) bool {
|
||||
// Return haystack[i] >= needle.
|
||||
return bytes.Compare(haystack[i], needle) >= 0
|
||||
})
|
||||
if i < len(haystack) && bytes.Equal(haystack[i], needle) {
|
||||
// Found it!
|
||||
}
|
||||
}
|
||||
|
|
|
@ -124,8 +124,7 @@ func (r *sparseReader) Read(b []byte) (n int, err error) {
|
|||
|
||||
func TestVeryLongSparseChunk(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Logf("skipping sparse chunk during short test")
|
||||
return
|
||||
t.Skip("skipping sparse chunk during short test")
|
||||
}
|
||||
w, err := NewWriter(ioutil.Discard, 1)
|
||||
if err != nil {
|
||||
|
|
|
@ -0,0 +1,74 @@
|
|||
package flate
|
||||
|
||||
// autogenerated by gen.go, DO NOT EDIT
|
||||
|
||||
var fixedHuffmanDecoder = huffmanDecoder{
|
||||
7,
|
||||
[huffmanNumChunks]uint32{
|
||||
0x1007, 0x0508, 0x0108, 0x1188, 0x1107, 0x0708, 0x0308, 0x0c09,
|
||||
0x1087, 0x0608, 0x0208, 0x0a09, 0x0008, 0x0808, 0x0408, 0x0e09,
|
||||
0x1047, 0x0588, 0x0188, 0x0909, 0x1147, 0x0788, 0x0388, 0x0d09,
|
||||
0x10c7, 0x0688, 0x0288, 0x0b09, 0x0088, 0x0888, 0x0488, 0x0f09,
|
||||
0x1027, 0x0548, 0x0148, 0x11c8, 0x1127, 0x0748, 0x0348, 0x0c89,
|
||||
0x10a7, 0x0648, 0x0248, 0x0a89, 0x0048, 0x0848, 0x0448, 0x0e89,
|
||||
0x1067, 0x05c8, 0x01c8, 0x0989, 0x1167, 0x07c8, 0x03c8, 0x0d89,
|
||||
0x10e7, 0x06c8, 0x02c8, 0x0b89, 0x00c8, 0x08c8, 0x04c8, 0x0f89,
|
||||
0x1017, 0x0528, 0x0128, 0x11a8, 0x1117, 0x0728, 0x0328, 0x0c49,
|
||||
0x1097, 0x0628, 0x0228, 0x0a49, 0x0028, 0x0828, 0x0428, 0x0e49,
|
||||
0x1057, 0x05a8, 0x01a8, 0x0949, 0x1157, 0x07a8, 0x03a8, 0x0d49,
|
||||
0x10d7, 0x06a8, 0x02a8, 0x0b49, 0x00a8, 0x08a8, 0x04a8, 0x0f49,
|
||||
0x1037, 0x0568, 0x0168, 0x11e8, 0x1137, 0x0768, 0x0368, 0x0cc9,
|
||||
0x10b7, 0x0668, 0x0268, 0x0ac9, 0x0068, 0x0868, 0x0468, 0x0ec9,
|
||||
0x1077, 0x05e8, 0x01e8, 0x09c9, 0x1177, 0x07e8, 0x03e8, 0x0dc9,
|
||||
0x10f7, 0x06e8, 0x02e8, 0x0bc9, 0x00e8, 0x08e8, 0x04e8, 0x0fc9,
|
||||
0x1007, 0x0518, 0x0118, 0x1198, 0x1107, 0x0718, 0x0318, 0x0c29,
|
||||
0x1087, 0x0618, 0x0218, 0x0a29, 0x0018, 0x0818, 0x0418, 0x0e29,
|
||||
0x1047, 0x0598, 0x0198, 0x0929, 0x1147, 0x0798, 0x0398, 0x0d29,
|
||||
0x10c7, 0x0698, 0x0298, 0x0b29, 0x0098, 0x0898, 0x0498, 0x0f29,
|
||||
0x1027, 0x0558, 0x0158, 0x11d8, 0x1127, 0x0758, 0x0358, 0x0ca9,
|
||||
0x10a7, 0x0658, 0x0258, 0x0aa9, 0x0058, 0x0858, 0x0458, 0x0ea9,
|
||||
0x1067, 0x05d8, 0x01d8, 0x09a9, 0x1167, 0x07d8, 0x03d8, 0x0da9,
|
||||
0x10e7, 0x06d8, 0x02d8, 0x0ba9, 0x00d8, 0x08d8, 0x04d8, 0x0fa9,
|
||||
0x1017, 0x0538, 0x0138, 0x11b8, 0x1117, 0x0738, 0x0338, 0x0c69,
|
||||
0x1097, 0x0638, 0x0238, 0x0a69, 0x0038, 0x0838, 0x0438, 0x0e69,
|
||||
0x1057, 0x05b8, 0x01b8, 0x0969, 0x1157, 0x07b8, 0x03b8, 0x0d69,
|
||||
0x10d7, 0x06b8, 0x02b8, 0x0b69, 0x00b8, 0x08b8, 0x04b8, 0x0f69,
|
||||
0x1037, 0x0578, 0x0178, 0x11f8, 0x1137, 0x0778, 0x0378, 0x0ce9,
|
||||
0x10b7, 0x0678, 0x0278, 0x0ae9, 0x0078, 0x0878, 0x0478, 0x0ee9,
|
||||
0x1077, 0x05f8, 0x01f8, 0x09e9, 0x1177, 0x07f8, 0x03f8, 0x0de9,
|
||||
0x10f7, 0x06f8, 0x02f8, 0x0be9, 0x00f8, 0x08f8, 0x04f8, 0x0fe9,
|
||||
0x1007, 0x0508, 0x0108, 0x1188, 0x1107, 0x0708, 0x0308, 0x0c19,
|
||||
0x1087, 0x0608, 0x0208, 0x0a19, 0x0008, 0x0808, 0x0408, 0x0e19,
|
||||
0x1047, 0x0588, 0x0188, 0x0919, 0x1147, 0x0788, 0x0388, 0x0d19,
|
||||
0x10c7, 0x0688, 0x0288, 0x0b19, 0x0088, 0x0888, 0x0488, 0x0f19,
|
||||
0x1027, 0x0548, 0x0148, 0x11c8, 0x1127, 0x0748, 0x0348, 0x0c99,
|
||||
0x10a7, 0x0648, 0x0248, 0x0a99, 0x0048, 0x0848, 0x0448, 0x0e99,
|
||||
0x1067, 0x05c8, 0x01c8, 0x0999, 0x1167, 0x07c8, 0x03c8, 0x0d99,
|
||||
0x10e7, 0x06c8, 0x02c8, 0x0b99, 0x00c8, 0x08c8, 0x04c8, 0x0f99,
|
||||
0x1017, 0x0528, 0x0128, 0x11a8, 0x1117, 0x0728, 0x0328, 0x0c59,
|
||||
0x1097, 0x0628, 0x0228, 0x0a59, 0x0028, 0x0828, 0x0428, 0x0e59,
|
||||
0x1057, 0x05a8, 0x01a8, 0x0959, 0x1157, 0x07a8, 0x03a8, 0x0d59,
|
||||
0x10d7, 0x06a8, 0x02a8, 0x0b59, 0x00a8, 0x08a8, 0x04a8, 0x0f59,
|
||||
0x1037, 0x0568, 0x0168, 0x11e8, 0x1137, 0x0768, 0x0368, 0x0cd9,
|
||||
0x10b7, 0x0668, 0x0268, 0x0ad9, 0x0068, 0x0868, 0x0468, 0x0ed9,
|
||||
0x1077, 0x05e8, 0x01e8, 0x09d9, 0x1177, 0x07e8, 0x03e8, 0x0dd9,
|
||||
0x10f7, 0x06e8, 0x02e8, 0x0bd9, 0x00e8, 0x08e8, 0x04e8, 0x0fd9,
|
||||
0x1007, 0x0518, 0x0118, 0x1198, 0x1107, 0x0718, 0x0318, 0x0c39,
|
||||
0x1087, 0x0618, 0x0218, 0x0a39, 0x0018, 0x0818, 0x0418, 0x0e39,
|
||||
0x1047, 0x0598, 0x0198, 0x0939, 0x1147, 0x0798, 0x0398, 0x0d39,
|
||||
0x10c7, 0x0698, 0x0298, 0x0b39, 0x0098, 0x0898, 0x0498, 0x0f39,
|
||||
0x1027, 0x0558, 0x0158, 0x11d8, 0x1127, 0x0758, 0x0358, 0x0cb9,
|
||||
0x10a7, 0x0658, 0x0258, 0x0ab9, 0x0058, 0x0858, 0x0458, 0x0eb9,
|
||||
0x1067, 0x05d8, 0x01d8, 0x09b9, 0x1167, 0x07d8, 0x03d8, 0x0db9,
|
||||
0x10e7, 0x06d8, 0x02d8, 0x0bb9, 0x00d8, 0x08d8, 0x04d8, 0x0fb9,
|
||||
0x1017, 0x0538, 0x0138, 0x11b8, 0x1117, 0x0738, 0x0338, 0x0c79,
|
||||
0x1097, 0x0638, 0x0238, 0x0a79, 0x0038, 0x0838, 0x0438, 0x0e79,
|
||||
0x1057, 0x05b8, 0x01b8, 0x0979, 0x1157, 0x07b8, 0x03b8, 0x0d79,
|
||||
0x10d7, 0x06b8, 0x02b8, 0x0b79, 0x00b8, 0x08b8, 0x04b8, 0x0f79,
|
||||
0x1037, 0x0578, 0x0178, 0x11f8, 0x1137, 0x0778, 0x0378, 0x0cf9,
|
||||
0x10b7, 0x0678, 0x0278, 0x0af9, 0x0078, 0x0878, 0x0478, 0x0ef9,
|
||||
0x1077, 0x05f8, 0x01f8, 0x09f9, 0x1177, 0x07f8, 0x03f8, 0x0df9,
|
||||
0x10f7, 0x06f8, 0x02f8, 0x0bf9, 0x00f8, 0x08f8, 0x04f8, 0x0ff9,
|
||||
},
|
||||
nil, 0,
|
||||
}
|
|
@ -10,122 +10,9 @@ package flate
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// The Huffman code lengths used by the fixed-format Huffman blocks.
|
||||
var fixedHuffmanBits = [...]int{
|
||||
// 0-143 length 8
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
|
||||
// 144-255 length 9
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
||||
|
||||
// 256-279 length 7
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7,
|
||||
|
||||
// 280-287 length 8
|
||||
8, 8, 8, 8, 8, 8, 8, 8,
|
||||
}
|
||||
|
||||
type InitDecoderTest struct {
|
||||
in []int
|
||||
out huffmanDecoder
|
||||
ok bool
|
||||
}
|
||||
|
||||
var initDecoderTests = []*InitDecoderTest{
|
||||
// Example from Connell 1973,
|
||||
{
|
||||
[]int{3, 5, 2, 4, 3, 5, 5, 4, 4, 3, 4, 5},
|
||||
huffmanDecoder{
|
||||
2, 5,
|
||||
[maxCodeLen + 1]int{2: 0, 4, 13, 31},
|
||||
[maxCodeLen + 1]int{2: 0, 1, 6, 20},
|
||||
// Paper used different code assignment:
|
||||
// 2, 9, 4, 0, 10, 8, 3, 7, 1, 5, 11, 6
|
||||
// Reordered here so that codes of same length
|
||||
// are assigned to increasing numbers.
|
||||
[]int{2, 0, 4, 9, 3, 7, 8, 10, 1, 5, 6, 11},
|
||||
},
|
||||
true,
|
||||
},
|
||||
|
||||
// Example from RFC 1951 section 3.2.2
|
||||
{
|
||||
[]int{2, 1, 3, 3},
|
||||
huffmanDecoder{
|
||||
1, 3,
|
||||
[maxCodeLen + 1]int{1: 0, 2, 7},
|
||||
[maxCodeLen + 1]int{1: 0, 1, 4},
|
||||
[]int{1, 0, 2, 3},
|
||||
},
|
||||
true,
|
||||
},
|
||||
|
||||
// Second example from RFC 1951 section 3.2.2
|
||||
{
|
||||
[]int{3, 3, 3, 3, 3, 2, 4, 4},
|
||||
huffmanDecoder{
|
||||
2, 4,
|
||||
[maxCodeLen + 1]int{2: 0, 6, 15},
|
||||
[maxCodeLen + 1]int{2: 0, 1, 8},
|
||||
[]int{5, 0, 1, 2, 3, 4, 6, 7},
|
||||
},
|
||||
true,
|
||||
},
|
||||
|
||||
// Static Huffman codes (RFC 1951 section 3.2.6)
|
||||
{
|
||||
fixedHuffmanBits[0:],
|
||||
fixedHuffmanDecoder,
|
||||
true,
|
||||
},
|
||||
|
||||
// Illegal input.
|
||||
{
|
||||
[]int{},
|
||||
huffmanDecoder{},
|
||||
false,
|
||||
},
|
||||
|
||||
// Illegal input.
|
||||
{
|
||||
[]int{0, 0, 0, 0, 0, 0, 0},
|
||||
huffmanDecoder{},
|
||||
false,
|
||||
},
|
||||
}
|
||||
|
||||
func TestInitDecoder(t *testing.T) {
|
||||
for i, tt := range initDecoderTests {
|
||||
var h huffmanDecoder
|
||||
if h.init(tt.in) != tt.ok {
|
||||
t.Errorf("test %d: init = %v", i, !tt.ok)
|
||||
continue
|
||||
}
|
||||
if !reflect.DeepEqual(&h, &tt.out) {
|
||||
t.Errorf("test %d:\nhave %v\nwant %v", i, h, tt.out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestUncompressedSource(t *testing.T) {
|
||||
decoder := NewReader(bytes.NewBuffer([]byte{0x01, 0x01, 0x00, 0xfe, 0xff, 0x11}))
|
||||
output := make([]byte, 1)
|
||||
|
|
|
@ -0,0 +1,165 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
// This program generates fixedhuff.go
|
||||
// Invoke as
|
||||
//
|
||||
// go run gen.go |gofmt >fixedhuff.go
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
const maxCodeLen = 16
|
||||
|
||||
// Note: the definition of the huffmanDecoder struct is copied from
|
||||
// inflate.go, as it is private to the implementation.
|
||||
|
||||
// chunk & 15 is number of bits
|
||||
// chunk >> 4 is value, including table link
|
||||
|
||||
const (
|
||||
huffmanChunkBits = 9
|
||||
huffmanNumChunks = 1 << huffmanChunkBits
|
||||
huffmanCountMask = 15
|
||||
huffmanValueShift = 4
|
||||
)
|
||||
|
||||
type huffmanDecoder struct {
|
||||
min int // the minimum code length
|
||||
chunks [huffmanNumChunks]uint32 // chunks as described above
|
||||
links [][]uint32 // overflow links
|
||||
linkMask uint32 // mask the width of the link table
|
||||
}
|
||||
|
||||
// Initialize Huffman decoding tables from array of code lengths.
|
||||
func (h *huffmanDecoder) init(bits []int) bool {
|
||||
// Count number of codes of each length,
|
||||
// compute min and max length.
|
||||
var count [maxCodeLen]int
|
||||
var min, max int
|
||||
for _, n := range bits {
|
||||
if n == 0 {
|
||||
continue
|
||||
}
|
||||
if min == 0 || n < min {
|
||||
min = n
|
||||
}
|
||||
if n > max {
|
||||
max = n
|
||||
}
|
||||
count[n]++
|
||||
}
|
||||
if max == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
h.min = min
|
||||
var linkBits uint
|
||||
var numLinks int
|
||||
if max > huffmanChunkBits {
|
||||
linkBits = uint(max) - huffmanChunkBits
|
||||
numLinks = 1 << linkBits
|
||||
h.linkMask = uint32(numLinks - 1)
|
||||
}
|
||||
code := 0
|
||||
var nextcode [maxCodeLen]int
|
||||
for i := min; i <= max; i++ {
|
||||
if i == huffmanChunkBits+1 {
|
||||
// create link tables
|
||||
link := code >> 1
|
||||
h.links = make([][]uint32, huffmanNumChunks-link)
|
||||
for j := uint(link); j < huffmanNumChunks; j++ {
|
||||
reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8
|
||||
reverse >>= uint(16 - huffmanChunkBits)
|
||||
off := j - uint(link)
|
||||
h.chunks[reverse] = uint32(off<<huffmanValueShift + uint(i))
|
||||
h.links[off] = make([]uint32, 1<<linkBits)
|
||||
}
|
||||
}
|
||||
n := count[i]
|
||||
nextcode[i] = code
|
||||
code += n
|
||||
code <<= 1
|
||||
}
|
||||
|
||||
for i, n := range bits {
|
||||
if n == 0 {
|
||||
continue
|
||||
}
|
||||
code := nextcode[n]
|
||||
nextcode[n]++
|
||||
chunk := uint32(i<<huffmanValueShift | n)
|
||||
reverse := int(reverseByte[code>>8]) | int(reverseByte[code&0xff])<<8
|
||||
reverse >>= uint(16 - n)
|
||||
if n <= huffmanChunkBits {
|
||||
for off := reverse; off < huffmanNumChunks; off += 1 << uint(n) {
|
||||
h.chunks[off] = chunk
|
||||
}
|
||||
} else {
|
||||
linktab := h.links[h.chunks[reverse&(huffmanNumChunks-1)]>>huffmanValueShift]
|
||||
reverse >>= huffmanChunkBits
|
||||
for off := reverse; off < numLinks; off += 1 << uint(n-huffmanChunkBits) {
|
||||
linktab[off] = chunk
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func main() {
|
||||
var h huffmanDecoder
|
||||
var bits [288]int
|
||||
initReverseByte()
|
||||
for i := 0; i < 144; i++ {
|
||||
bits[i] = 8
|
||||
}
|
||||
for i := 144; i < 256; i++ {
|
||||
bits[i] = 9
|
||||
}
|
||||
for i := 256; i < 280; i++ {
|
||||
bits[i] = 7
|
||||
}
|
||||
for i := 280; i < 288; i++ {
|
||||
bits[i] = 8
|
||||
}
|
||||
h.init(bits[:])
|
||||
fmt.Println("package flate")
|
||||
fmt.Println()
|
||||
fmt.Println("// autogenerated by gen.go, DO NOT EDIT")
|
||||
fmt.Println()
|
||||
fmt.Println("var fixedHuffmanDecoder = huffmanDecoder{")
|
||||
fmt.Printf("\t%d,\n", h.min)
|
||||
fmt.Println("\t[huffmanNumChunks]uint32{")
|
||||
for i := 0; i < huffmanNumChunks; i++ {
|
||||
if i&7 == 0 {
|
||||
fmt.Printf("\t\t")
|
||||
} else {
|
||||
fmt.Printf(" ")
|
||||
}
|
||||
fmt.Printf("0x%04x,", h.chunks[i])
|
||||
if i&7 == 7 {
|
||||
fmt.Println()
|
||||
}
|
||||
}
|
||||
fmt.Println("\t},")
|
||||
fmt.Println("\tnil, 0,")
|
||||
fmt.Println("}")
|
||||
}
|
||||
|
||||
var reverseByte [256]byte
|
||||
|
||||
func initReverseByte() {
|
||||
for x := 0; x < 256; x++ {
|
||||
var result byte
|
||||
for i := uint(0); i < 8; i++ {
|
||||
result |= byte(((x >> i) & 1) << (7 - i))
|
||||
}
|
||||
reverseByte[x] = result
|
||||
}
|
||||
}
|
|
@ -54,32 +54,46 @@ func (e *WriteError) Error() string {
|
|||
return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
|
||||
}
|
||||
|
||||
// Huffman decoder is based on
|
||||
// J. Brian Connell, ``A Huffman-Shannon-Fano Code,''
|
||||
// Proceedings of the IEEE, 61(7) (July 1973), pp 1046-1047.
|
||||
// Note that much of the implemenation of huffmanDecoder is also copied
|
||||
// into gen.go (in package main) for the purpose of precomputing the
|
||||
// fixed huffman tables so they can be included statically.
|
||||
|
||||
// The data structure for decoding Huffman tables is based on that of
|
||||
// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits),
|
||||
// For codes smaller than the table width, there are multiple entries
|
||||
// (each combination of trailing bits has the same value). For codes
|
||||
// larger than the table width, the table contains a link to an overflow
|
||||
// table. The width of each entry in the link table is the maximum code
|
||||
// size minus the chunk width.
|
||||
|
||||
// Note that you can do a lookup in the table even without all bits
|
||||
// filled. Since the extra bits are zero, and the DEFLATE Huffman codes
|
||||
// have the property that shorter codes come before longer ones, the
|
||||
// bit length estimate in the result is a lower bound on the actual
|
||||
// number of bits.
|
||||
|
||||
// chunk & 15 is number of bits
|
||||
// chunk >> 4 is value, including table link
|
||||
|
||||
const (
|
||||
huffmanChunkBits = 9
|
||||
huffmanNumChunks = 1 << huffmanChunkBits
|
||||
huffmanCountMask = 15
|
||||
huffmanValueShift = 4
|
||||
)
|
||||
|
||||
type huffmanDecoder struct {
|
||||
// min, max code length
|
||||
min, max int
|
||||
|
||||
// limit[i] = largest code word of length i
|
||||
// Given code v of length n,
|
||||
// need more bits if v > limit[n].
|
||||
limit [maxCodeLen + 1]int
|
||||
|
||||
// base[i] = smallest code word of length i - seq number
|
||||
base [maxCodeLen + 1]int
|
||||
|
||||
// codes[seq number] = output code.
|
||||
// Given code v of length n, value is
|
||||
// codes[v - base[n]].
|
||||
codes []int
|
||||
min int // the minimum code length
|
||||
chunks [huffmanNumChunks]uint32 // chunks as described above
|
||||
links [][]uint32 // overflow links
|
||||
linkMask uint32 // mask the width of the link table
|
||||
}
|
||||
|
||||
// Initialize Huffman decoding tables from array of code lengths.
|
||||
func (h *huffmanDecoder) init(bits []int) bool {
|
||||
// Count number of codes of each length,
|
||||
// compute min and max length.
|
||||
var count [maxCodeLen + 1]int
|
||||
var count [maxCodeLen]int
|
||||
var min, max int
|
||||
for _, n := range bits {
|
||||
if n == 0 {
|
||||
|
@ -98,93 +112,58 @@ func (h *huffmanDecoder) init(bits []int) bool {
|
|||
}
|
||||
|
||||
h.min = min
|
||||
h.max = max
|
||||
|
||||
// For each code range, compute
|
||||
// nextcode (first code of that length),
|
||||
// limit (last code of that length), and
|
||||
// base (offset from first code to sequence number).
|
||||
var linkBits uint
|
||||
var numLinks int
|
||||
if max > huffmanChunkBits {
|
||||
linkBits = uint(max) - huffmanChunkBits
|
||||
numLinks = 1 << linkBits
|
||||
h.linkMask = uint32(numLinks - 1)
|
||||
}
|
||||
code := 0
|
||||
seq := 0
|
||||
var nextcode [maxCodeLen]int
|
||||
for i := min; i <= max; i++ {
|
||||
if i == huffmanChunkBits+1 {
|
||||
// create link tables
|
||||
link := code >> 1
|
||||
h.links = make([][]uint32, huffmanNumChunks-link)
|
||||
for j := uint(link); j < huffmanNumChunks; j++ {
|
||||
reverse := int(reverseByte[j>>8]) | int(reverseByte[j&0xff])<<8
|
||||
reverse >>= uint(16 - huffmanChunkBits)
|
||||
off := j - uint(link)
|
||||
h.chunks[reverse] = uint32(off<<huffmanValueShift + uint(i))
|
||||
h.links[off] = make([]uint32, 1<<linkBits)
|
||||
}
|
||||
}
|
||||
n := count[i]
|
||||
nextcode[i] = code
|
||||
h.base[i] = code - seq
|
||||
code += n
|
||||
seq += n
|
||||
h.limit[i] = code - 1
|
||||
code <<= 1
|
||||
}
|
||||
|
||||
// Make array mapping sequence numbers to codes.
|
||||
if len(h.codes) < len(bits) {
|
||||
h.codes = make([]int, len(bits))
|
||||
}
|
||||
for i, n := range bits {
|
||||
if n == 0 {
|
||||
continue
|
||||
}
|
||||
code := nextcode[n]
|
||||
nextcode[n]++
|
||||
seq := code - h.base[n]
|
||||
h.codes[seq] = i
|
||||
chunk := uint32(i<<huffmanValueShift | n)
|
||||
reverse := int(reverseByte[code>>8]) | int(reverseByte[code&0xff])<<8
|
||||
reverse >>= uint(16 - n)
|
||||
if n <= huffmanChunkBits {
|
||||
for off := reverse; off < huffmanNumChunks; off += 1 << uint(n) {
|
||||
h.chunks[off] = chunk
|
||||
}
|
||||
} else {
|
||||
linktab := h.links[h.chunks[reverse&(huffmanNumChunks-1)]>>huffmanValueShift]
|
||||
reverse >>= huffmanChunkBits
|
||||
for off := reverse; off < numLinks; off += 1 << uint(n-huffmanChunkBits) {
|
||||
linktab[off] = chunk
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Hard-coded Huffman tables for DEFLATE algorithm.
|
||||
// See RFC 1951, section 3.2.6.
|
||||
var fixedHuffmanDecoder = huffmanDecoder{
|
||||
7, 9,
|
||||
[maxCodeLen + 1]int{7: 23, 199, 511},
|
||||
[maxCodeLen + 1]int{7: 0, 24, 224},
|
||||
[]int{
|
||||
// length 7: 256-279
|
||||
256, 257, 258, 259, 260, 261, 262,
|
||||
263, 264, 265, 266, 267, 268, 269,
|
||||
270, 271, 272, 273, 274, 275, 276,
|
||||
277, 278, 279,
|
||||
|
||||
// length 8: 0-143
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
|
||||
12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
|
||||
22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
|
||||
42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
|
||||
52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
|
||||
62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
|
||||
72, 73, 74, 75, 76, 77, 78, 79, 80, 81,
|
||||
82, 83, 84, 85, 86, 87, 88, 89, 90, 91,
|
||||
92, 93, 94, 95, 96, 97, 98, 99, 100,
|
||||
101, 102, 103, 104, 105, 106, 107, 108,
|
||||
109, 110, 111, 112, 113, 114, 115, 116,
|
||||
117, 118, 119, 120, 121, 122, 123, 124,
|
||||
125, 126, 127, 128, 129, 130, 131, 132,
|
||||
133, 134, 135, 136, 137, 138, 139, 140,
|
||||
141, 142, 143,
|
||||
|
||||
// length 8: 280-287
|
||||
280, 281, 282, 283, 284, 285, 286, 287,
|
||||
|
||||
// length 9: 144-255
|
||||
144, 145, 146, 147, 148, 149, 150, 151,
|
||||
152, 153, 154, 155, 156, 157, 158, 159,
|
||||
160, 161, 162, 163, 164, 165, 166, 167,
|
||||
168, 169, 170, 171, 172, 173, 174, 175,
|
||||
176, 177, 178, 179, 180, 181, 182, 183,
|
||||
184, 185, 186, 187, 188, 189, 190, 191,
|
||||
192, 193, 194, 195, 196, 197, 198, 199,
|
||||
200, 201, 202, 203, 204, 205, 206, 207,
|
||||
208, 209, 210, 211, 212, 213, 214, 215,
|
||||
216, 217, 218, 219, 220, 221, 222, 223,
|
||||
224, 225, 226, 227, 228, 229, 230, 231,
|
||||
232, 233, 234, 235, 236, 237, 238, 239,
|
||||
240, 241, 242, 243, 244, 245, 246, 247,
|
||||
248, 249, 250, 251, 252, 253, 254, 255,
|
||||
},
|
||||
}
|
||||
|
||||
// The actual read interface needed by NewReader.
|
||||
// If the passed in io.Reader does not also have ReadByte,
|
||||
// the NewReader will introduce its own buffering.
|
||||
|
@ -644,23 +623,23 @@ func (f *decompressor) moreBits() error {
|
|||
|
||||
// Read the next Huffman-encoded symbol from f according to h.
|
||||
func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) {
|
||||
for n := uint(h.min); n <= uint(h.max); n++ {
|
||||
lim := h.limit[n]
|
||||
if lim == -1 {
|
||||
continue
|
||||
}
|
||||
n := uint(h.min)
|
||||
for {
|
||||
for f.nb < n {
|
||||
if err := f.moreBits(); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
v := int(f.b & uint32(1<<n-1))
|
||||
v <<= 16 - n
|
||||
v = int(reverseByte[v>>8]) | int(reverseByte[v&0xFF])<<8 // reverse bits
|
||||
if v <= lim {
|
||||
chunk := h.chunks[f.b&(huffmanNumChunks-1)]
|
||||
n = uint(chunk & huffmanCountMask)
|
||||
if n > huffmanChunkBits {
|
||||
chunk = h.links[chunk>>huffmanValueShift][(f.b>>huffmanChunkBits)&h.linkMask]
|
||||
n = uint(chunk & huffmanCountMask)
|
||||
}
|
||||
if n <= f.nb {
|
||||
f.b >>= n
|
||||
f.nb -= n
|
||||
return h.codes[v-h.base[n]], nil
|
||||
return int(chunk >> huffmanValueShift), nil
|
||||
}
|
||||
}
|
||||
return 0, CorruptInputError(f.roffset)
|
||||
|
|
|
@ -13,7 +13,7 @@ import (
|
|||
|
||||
// A writer is a buffered, flushable writer.
|
||||
type writer interface {
|
||||
WriteByte(byte) error
|
||||
io.ByteWriter
|
||||
Flush() error
|
||||
}
|
||||
|
||||
|
|
|
@ -108,6 +108,8 @@ func (l *List) insertValue(v interface{}, at *Element) *Element {
|
|||
func (l *List) remove(e *Element) *Element {
|
||||
e.prev.next = e.next
|
||||
e.next.prev = e.prev
|
||||
e.next = nil // avoid memory leaks
|
||||
e.prev = nil // avoid memory leaks
|
||||
e.list = nil
|
||||
l.len--
|
||||
return e
|
||||
|
|
|
@ -28,13 +28,10 @@ func (r StreamReader) Read(dst []byte) (n int, err error) {
|
|||
type StreamWriter struct {
|
||||
S Stream
|
||||
W io.Writer
|
||||
Err error
|
||||
Err error // unused
|
||||
}
|
||||
|
||||
func (w StreamWriter) Write(src []byte) (n int, err error) {
|
||||
if w.Err != nil {
|
||||
return 0, w.Err
|
||||
}
|
||||
c := make([]byte, len(src))
|
||||
w.S.XORKeyStream(c, src)
|
||||
n, err = w.W.Write(c)
|
||||
|
@ -42,7 +39,6 @@ func (w StreamWriter) Write(src []byte) (n int, err error) {
|
|||
if err == nil { // should never happen
|
||||
err = io.ErrShortWrite
|
||||
}
|
||||
w.Err = err
|
||||
}
|
||||
return
|
||||
}
|
||||
|
|
|
@ -1503,3 +1503,21 @@ func TestSubstitutionTableKnownAnswerDecrypt(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func ExampleNewTripleDESCipher() {
|
||||
// NewTripleDESCipher can also be used when EDE2 is required by
|
||||
// duplicating the first 8 bytes of the 16-byte key.
|
||||
ede2Key := []byte("example key 1234")
|
||||
|
||||
var tripleDESKey []byte
|
||||
tripleDESKey = append(tripleDESKey, ede2Key[:16]...)
|
||||
tripleDESKey = append(tripleDESKey, ede2Key[:8]...)
|
||||
|
||||
_, err := NewTripleDESCipher(tripleDESKey)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
// See crypto/cipher for how to use a cipher.Block for encryption and
|
||||
// decryption.
|
||||
}
|
||||
|
|
|
@ -10,6 +10,21 @@ import (
|
|||
"math/big"
|
||||
)
|
||||
|
||||
// smallPrimes is a list of small, prime numbers that allows us to rapidly
|
||||
// exclude some fraction of composite candidates when searching for a random
|
||||
// prime. This list is truncated at the point where smallPrimesProduct exceeds
|
||||
// a uint64. It does not include two because we ensure that the candidates are
|
||||
// odd by construction.
|
||||
var smallPrimes = []uint8{
|
||||
3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53,
|
||||
}
|
||||
|
||||
// smallPrimesProduct is the product of the values in smallPrimes and allows us
|
||||
// to reduce a candidate prime by this number and then determine whether it's
|
||||
// coprime to all the elements of smallPrimes without further big.Int
|
||||
// operations.
|
||||
var smallPrimesProduct = new(big.Int).SetUint64(16294579238595022365)
|
||||
|
||||
// Prime returns a number, p, of the given size, such that p is prime
|
||||
// with high probability.
|
||||
func Prime(rand io.Reader, bits int) (p *big.Int, err error) {
|
||||
|
@ -25,6 +40,8 @@ func Prime(rand io.Reader, bits int) (p *big.Int, err error) {
|
|||
bytes := make([]byte, (bits+7)/8)
|
||||
p = new(big.Int)
|
||||
|
||||
bigMod := new(big.Int)
|
||||
|
||||
for {
|
||||
_, err = io.ReadFull(rand, bytes)
|
||||
if err != nil {
|
||||
|
@ -33,13 +50,51 @@ func Prime(rand io.Reader, bits int) (p *big.Int, err error) {
|
|||
|
||||
// Clear bits in the first byte to make sure the candidate has a size <= bits.
|
||||
bytes[0] &= uint8(int(1<<b) - 1)
|
||||
// Don't let the value be too small, i.e, set the most significant bit.
|
||||
bytes[0] |= 1 << (b - 1)
|
||||
// Don't let the value be too small, i.e, set the most significant two bits.
|
||||
// Setting the top two bits, rather than just the top bit,
|
||||
// means that when two of these values are multiplied together,
|
||||
// the result isn't ever one bit short.
|
||||
if b >= 2 {
|
||||
bytes[0] |= 3 << (b - 2)
|
||||
} else {
|
||||
// Here b==1, because b cannot be zero.
|
||||
bytes[0] |= 1
|
||||
if len(bytes) > 1 {
|
||||
bytes[1] |= 0x80
|
||||
}
|
||||
}
|
||||
// Make the value odd since an even number this large certainly isn't prime.
|
||||
bytes[len(bytes)-1] |= 1
|
||||
|
||||
p.SetBytes(bytes)
|
||||
if p.ProbablyPrime(20) {
|
||||
|
||||
// Calculate the value mod the product of smallPrimes. If it's
|
||||
// a multiple of any of these primes we add two until it isn't.
|
||||
// The probability of overflowing is minimal and can be ignored
|
||||
// because we still perform Miller-Rabin tests on the result.
|
||||
bigMod.Mod(p, smallPrimesProduct)
|
||||
mod := bigMod.Uint64()
|
||||
|
||||
NextDelta:
|
||||
for delta := uint64(0); delta < 1<<20; delta += 2 {
|
||||
m := mod + delta
|
||||
for _, prime := range smallPrimes {
|
||||
if m%uint64(prime) == 0 {
|
||||
continue NextDelta
|
||||
}
|
||||
}
|
||||
|
||||
if delta > 0 {
|
||||
bigMod.SetUint64(delta)
|
||||
p.Add(p, bigMod)
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
// There is a tiny possibility that, by adding delta, we caused
|
||||
// the number to be one bit too long. Thus we check BitLen
|
||||
// here.
|
||||
if p.ProbablyPrime(20) && p.BitLen() == bits {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
|
|
@ -57,7 +57,7 @@ func TestDecryptPKCS1v15(t *testing.T) {
|
|||
t.Errorf("#%d error decrypting", i)
|
||||
}
|
||||
want := []byte(test.out)
|
||||
if bytes.Compare(out, want) != 0 {
|
||||
if !bytes.Equal(out, want) {
|
||||
t.Errorf("#%d got:%#v want:%#v", i, out, want)
|
||||
}
|
||||
}
|
||||
|
@ -90,7 +90,7 @@ func TestEncryptPKCS1v15(t *testing.T) {
|
|||
return false
|
||||
}
|
||||
|
||||
if bytes.Compare(plaintext, in) != 0 {
|
||||
if !bytes.Equal(plaintext, in) {
|
||||
t.Errorf("output mismatch: %#v %#v", plaintext, in)
|
||||
return false
|
||||
}
|
||||
|
@ -132,7 +132,7 @@ func TestEncryptPKCS1v15SessionKey(t *testing.T) {
|
|||
t.Errorf("#%d error decrypting", i)
|
||||
}
|
||||
want := []byte(test.out)
|
||||
if bytes.Compare(key, want) != 0 {
|
||||
if !bytes.Equal(key, want) {
|
||||
t.Errorf("#%d got:%#v want:%#v", i, key, want)
|
||||
}
|
||||
}
|
||||
|
@ -176,7 +176,7 @@ func TestSignPKCS1v15(t *testing.T) {
|
|||
}
|
||||
|
||||
expected, _ := hex.DecodeString(test.out)
|
||||
if bytes.Compare(s, expected) != 0 {
|
||||
if !bytes.Equal(s, expected) {
|
||||
t.Errorf("#%d got: %x want: %x", i, s, expected)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -175,6 +175,11 @@ NextSetOfPrimes:
|
|||
pminus1.Sub(prime, bigOne)
|
||||
totient.Mul(totient, pminus1)
|
||||
}
|
||||
if n.BitLen() != bits {
|
||||
// This should never happen because crypto/rand should
|
||||
// set the top two bits in each prime.
|
||||
continue NextSetOfPrimes
|
||||
}
|
||||
|
||||
g := new(big.Int)
|
||||
priv.D = new(big.Int)
|
||||
|
|
|
@ -21,6 +21,9 @@ func TestKeyGeneration(t *testing.T) {
|
|||
if err != nil {
|
||||
t.Errorf("failed to generate key")
|
||||
}
|
||||
if bits := priv.N.BitLen(); bits != size {
|
||||
t.Errorf("key too short (%d vs %d)", bits, size)
|
||||
}
|
||||
testKeyBasics(t, priv)
|
||||
}
|
||||
|
||||
|
@ -176,7 +179,7 @@ func TestEncryptOAEP(t *testing.T) {
|
|||
if err != nil {
|
||||
t.Errorf("#%d,%d error: %s", i, j, err)
|
||||
}
|
||||
if bytes.Compare(out, message.out) != 0 {
|
||||
if !bytes.Equal(out, message.out) {
|
||||
t.Errorf("#%d,%d bad result: %x (want %x)", i, j, out, message.out)
|
||||
}
|
||||
}
|
||||
|
@ -200,7 +203,7 @@ func TestDecryptOAEP(t *testing.T) {
|
|||
out, err := DecryptOAEP(sha1, nil, private, message.out, nil)
|
||||
if err != nil {
|
||||
t.Errorf("#%d,%d error: %s", i, j, err)
|
||||
} else if bytes.Compare(out, message.in) != 0 {
|
||||
} else if !bytes.Equal(out, message.in) {
|
||||
t.Errorf("#%d,%d bad result: %#v (want %#v)", i, j, out, message.in)
|
||||
}
|
||||
|
||||
|
@ -208,7 +211,7 @@ func TestDecryptOAEP(t *testing.T) {
|
|||
out, err = DecryptOAEP(sha1, random, private, message.out, nil)
|
||||
if err != nil {
|
||||
t.Errorf("#%d,%d (blind) error: %s", i, j, err)
|
||||
} else if bytes.Compare(out, message.in) != 0 {
|
||||
} else if !bytes.Equal(out, message.in) {
|
||||
t.Errorf("#%d,%d (blind) bad result: %#v (want %#v)", i, j, out, message.in)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -184,6 +184,12 @@ type Config struct {
|
|||
// is nil, TLS uses a list of suites supported by the implementation.
|
||||
CipherSuites []uint16
|
||||
|
||||
// PreferServerCipherSuites controls whether the server selects the
|
||||
// client's most preferred ciphersuite, or the server's most preferred
|
||||
// ciphersuite. If true then the server's preference, as expressed in
|
||||
// the order of elements in CipherSuites, is used.
|
||||
PreferServerCipherSuites bool
|
||||
|
||||
// SessionTicketsDisabled may be set to true to disable session ticket
|
||||
// (resumption) support.
|
||||
SessionTicketsDisabled bool
|
||||
|
|
|
@ -180,8 +180,17 @@ Curves:
|
|||
return true, nil
|
||||
}
|
||||
|
||||
for _, id := range hs.clientHello.cipherSuites {
|
||||
if hs.suite = c.tryCipherSuite(id, hs.ellipticOk); hs.suite != nil {
|
||||
var preferenceList, supportedList []uint16
|
||||
if c.config.PreferServerCipherSuites {
|
||||
preferenceList = c.config.cipherSuites()
|
||||
supportedList = hs.clientHello.cipherSuites
|
||||
} else {
|
||||
preferenceList = hs.clientHello.cipherSuites
|
||||
supportedList = c.config.cipherSuites()
|
||||
}
|
||||
|
||||
for _, id := range preferenceList {
|
||||
if hs.suite = c.tryCipherSuite(id, supportedList, hs.ellipticOk); hs.suite != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
@ -222,7 +231,7 @@ func (hs *serverHandshakeState) checkForResumption() bool {
|
|||
}
|
||||
|
||||
// Check that we also support the ciphersuite from the session.
|
||||
hs.suite = c.tryCipherSuite(hs.sessionState.cipherSuite, hs.ellipticOk)
|
||||
hs.suite = c.tryCipherSuite(hs.sessionState.cipherSuite, c.config.cipherSuites(), hs.ellipticOk)
|
||||
if hs.suite == nil {
|
||||
return false
|
||||
}
|
||||
|
@ -568,8 +577,8 @@ func (hs *serverHandshakeState) processCertsFromClient(certificates [][]byte) (*
|
|||
|
||||
// tryCipherSuite returns a cipherSuite with the given id if that cipher suite
|
||||
// is acceptable to use.
|
||||
func (c *Conn) tryCipherSuite(id uint16, ellipticOk bool) *cipherSuite {
|
||||
for _, supported := range c.config.cipherSuites() {
|
||||
func (c *Conn) tryCipherSuite(id uint16, supportedCipherSuites []uint16, ellipticOk bool) *cipherSuite {
|
||||
for _, supported := range supportedCipherSuites {
|
||||
if id == supported {
|
||||
var candidate *cipherSuite
|
||||
|
||||
|
|
|
@ -125,6 +125,50 @@ func TestClose(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func testHandshake(clientConfig, serverConfig *Config) (state ConnectionState, err error) {
|
||||
c, s := net.Pipe()
|
||||
go func() {
|
||||
cli := Client(c, clientConfig)
|
||||
cli.Handshake()
|
||||
c.Close()
|
||||
}()
|
||||
server := Server(s, serverConfig)
|
||||
err = server.Handshake()
|
||||
if err == nil {
|
||||
state = server.ConnectionState()
|
||||
}
|
||||
s.Close()
|
||||
return
|
||||
}
|
||||
|
||||
func TestCipherSuitePreference(t *testing.T) {
|
||||
serverConfig := &Config{
|
||||
CipherSuites: []uint16{TLS_RSA_WITH_RC4_128_SHA, TLS_RSA_WITH_AES_128_CBC_SHA, TLS_ECDHE_RSA_WITH_RC4_128_SHA},
|
||||
Certificates: testConfig.Certificates,
|
||||
}
|
||||
clientConfig := &Config{
|
||||
CipherSuites: []uint16{TLS_RSA_WITH_AES_128_CBC_SHA, TLS_RSA_WITH_RC4_128_SHA},
|
||||
InsecureSkipVerify: true,
|
||||
}
|
||||
state, err := testHandshake(clientConfig, serverConfig)
|
||||
if err != nil {
|
||||
t.Fatalf("handshake failed: %s", err)
|
||||
}
|
||||
if state.CipherSuite != TLS_RSA_WITH_AES_128_CBC_SHA {
|
||||
// By default the server should use the client's preference.
|
||||
t.Fatalf("Client's preference was not used, got %x", state.CipherSuite)
|
||||
}
|
||||
|
||||
serverConfig.PreferServerCipherSuites = true
|
||||
state, err = testHandshake(clientConfig, serverConfig)
|
||||
if err != nil {
|
||||
t.Fatalf("handshake failed: %s", err)
|
||||
}
|
||||
if state.CipherSuite != TLS_RSA_WITH_RC4_128_SHA {
|
||||
t.Fatalf("Server's preference was not used, got %x", state.CipherSuite)
|
||||
}
|
||||
}
|
||||
|
||||
func testServerScript(t *testing.T, name string, serverScript [][]byte, config *Config, peers []*x509.Certificate) {
|
||||
c, s := net.Pipe()
|
||||
srv := Server(s, config)
|
||||
|
|
|
@ -70,11 +70,12 @@ func initSystemRoots() {
|
|||
|
||||
var data C.CFDataRef = nil
|
||||
err := C.FetchPEMRoots(&data)
|
||||
if err != -1 {
|
||||
defer C.CFRelease(C.CFTypeRef(data))
|
||||
buf := C.GoBytes(unsafe.Pointer(C.CFDataGetBytePtr(data)), C.int(C.CFDataGetLength(data)))
|
||||
roots.AppendCertsFromPEM(buf)
|
||||
if err == -1 {
|
||||
return
|
||||
}
|
||||
|
||||
defer C.CFRelease(C.CFTypeRef(data))
|
||||
buf := C.GoBytes(unsafe.Pointer(C.CFDataGetBytePtr(data)), C.int(C.CFDataGetLength(data)))
|
||||
roots.AppendCertsFromPEM(buf)
|
||||
systemRoots = roots
|
||||
}
|
||||
|
|
|
@ -23,9 +23,11 @@ func initSystemRoots() {
|
|||
data, err := ioutil.ReadFile(file)
|
||||
if err == nil {
|
||||
roots.AppendCertsFromPEM(data)
|
||||
break
|
||||
systemRoots = roots
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
systemRoots = roots
|
||||
// All of the files failed to load. systemRoots will be nil which will
|
||||
// trigger a specific error at verification time.
|
||||
}
|
||||
|
|
|
@ -11,5 +11,4 @@ func (c *Certificate) systemVerify(opts *VerifyOptions) (chains [][]*Certificate
|
|||
}
|
||||
|
||||
func initSystemRoots() {
|
||||
systemRoots = NewCertPool()
|
||||
}
|
||||
|
|
|
@ -27,9 +27,11 @@ func initSystemRoots() {
|
|||
data, err := ioutil.ReadFile(file)
|
||||
if err == nil {
|
||||
roots.AppendCertsFromPEM(data)
|
||||
break
|
||||
systemRoots = roots
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
systemRoots = roots
|
||||
// All of the files failed to load. systemRoots will be nil which will
|
||||
// trigger a specific error at verification time.
|
||||
}
|
||||
|
|
|
@ -226,5 +226,4 @@ func (c *Certificate) systemVerify(opts *VerifyOptions) (chains [][]*Certificate
|
|||
}
|
||||
|
||||
func initSystemRoots() {
|
||||
systemRoots = NewCertPool()
|
||||
}
|
||||
|
|
|
@ -82,6 +82,14 @@ func (e UnknownAuthorityError) Error() string {
|
|||
return "x509: certificate signed by unknown authority"
|
||||
}
|
||||
|
||||
// SystemRootsError results when we fail to load the system root certificates.
|
||||
type SystemRootsError struct {
|
||||
}
|
||||
|
||||
func (e SystemRootsError) Error() string {
|
||||
return "x509: failed to load system roots and no roots provided"
|
||||
}
|
||||
|
||||
// VerifyOptions contains parameters for Certificate.Verify. It's a structure
|
||||
// because other PKIX verification APIs have ended up needing many options.
|
||||
type VerifyOptions struct {
|
||||
|
@ -170,6 +178,9 @@ func (c *Certificate) Verify(opts VerifyOptions) (chains [][]*Certificate, err e
|
|||
|
||||
if opts.Roots == nil {
|
||||
opts.Roots = systemRootsPool()
|
||||
if opts.Roots == nil {
|
||||
return nil, SystemRootsError{}
|
||||
}
|
||||
}
|
||||
|
||||
err = c.isValid(leafCertificate, nil, &opts)
|
||||
|
|
|
@ -15,19 +15,31 @@ import (
|
|||
)
|
||||
|
||||
type verifyTest struct {
|
||||
leaf string
|
||||
intermediates []string
|
||||
roots []string
|
||||
currentTime int64
|
||||
dnsName string
|
||||
systemSkip bool
|
||||
keyUsages []ExtKeyUsage
|
||||
leaf string
|
||||
intermediates []string
|
||||
roots []string
|
||||
currentTime int64
|
||||
dnsName string
|
||||
systemSkip bool
|
||||
keyUsages []ExtKeyUsage
|
||||
testSystemRootsError bool
|
||||
|
||||
errorCallback func(*testing.T, int, error) bool
|
||||
expectedChains [][]string
|
||||
}
|
||||
|
||||
var verifyTests = []verifyTest{
|
||||
{
|
||||
leaf: googleLeaf,
|
||||
intermediates: []string{thawteIntermediate},
|
||||
currentTime: 1302726541,
|
||||
dnsName: "www.google.com",
|
||||
testSystemRootsError: true,
|
||||
|
||||
// Without any roots specified we should get a system roots
|
||||
// error.
|
||||
errorCallback: expectSystemRootsError,
|
||||
},
|
||||
{
|
||||
leaf: googleLeaf,
|
||||
intermediates: []string{thawteIntermediate},
|
||||
|
@ -180,6 +192,14 @@ func expectAuthorityUnknown(t *testing.T, i int, err error) (ok bool) {
|
|||
return true
|
||||
}
|
||||
|
||||
func expectSystemRootsError(t *testing.T, i int, err error) bool {
|
||||
if _, ok := err.(SystemRootsError); !ok {
|
||||
t.Errorf("#%d: error was not SystemRootsError: %s", i, err)
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func certificateFromPEM(pemBytes string) (*Certificate, error) {
|
||||
block, _ := pem.Decode([]byte(pemBytes))
|
||||
if block == nil {
|
||||
|
@ -193,6 +213,9 @@ func testVerify(t *testing.T, useSystemRoots bool) {
|
|||
if useSystemRoots && test.systemSkip {
|
||||
continue
|
||||
}
|
||||
if runtime.GOOS == "windows" && test.testSystemRootsError {
|
||||
continue
|
||||
}
|
||||
|
||||
opts := VerifyOptions{
|
||||
Intermediates: NewCertPool(),
|
||||
|
@ -226,8 +249,19 @@ func testVerify(t *testing.T, useSystemRoots bool) {
|
|||
return
|
||||
}
|
||||
|
||||
var oldSystemRoots *CertPool
|
||||
if test.testSystemRootsError {
|
||||
oldSystemRoots = systemRootsPool()
|
||||
systemRoots = nil
|
||||
opts.Roots = nil
|
||||
}
|
||||
|
||||
chains, err := leaf.Verify(opts)
|
||||
|
||||
if test.testSystemRootsError {
|
||||
systemRoots = oldSystemRoots
|
||||
}
|
||||
|
||||
if test.errorCallback == nil && err != nil {
|
||||
t.Errorf("#%d: unexpected error: %s", i, err)
|
||||
}
|
||||
|
@ -275,8 +309,7 @@ func TestGoVerify(t *testing.T) {
|
|||
|
||||
func TestSystemVerify(t *testing.T) {
|
||||
if runtime.GOOS != "windows" {
|
||||
t.Logf("skipping verify test using system APIs on %q", runtime.GOOS)
|
||||
return
|
||||
t.Skipf("skipping verify test using system APIs on %q", runtime.GOOS)
|
||||
}
|
||||
|
||||
testVerify(t, true)
|
||||
|
|
|
@ -369,19 +369,11 @@ func (db *DB) exec(query string, args []interface{}) (res Result, err error) {
|
|||
}
|
||||
defer sti.Close()
|
||||
|
||||
dargs, err := driverArgs(sti, args)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resi, err := sti.Exec(dargs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return result{resi}, nil
|
||||
return resultFromStatement(sti, args...)
|
||||
}
|
||||
|
||||
// Query executes a query that returns rows, typically a SELECT.
|
||||
// The args are for any placeholder parameters in the query.
|
||||
func (db *DB) Query(query string, args ...interface{}) (*Rows, error) {
|
||||
stmt, err := db.Prepare(query)
|
||||
if err != nil {
|
||||
|
@ -608,16 +600,7 @@ func (tx *Tx) Exec(query string, args ...interface{}) (Result, error) {
|
|||
}
|
||||
defer sti.Close()
|
||||
|
||||
dargs, err := driverArgs(sti, args)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resi, err := sti.Exec(dargs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return result{resi}, nil
|
||||
return resultFromStatement(sti, args...)
|
||||
}
|
||||
|
||||
// Query executes a query that returns rows, typically a SELECT.
|
||||
|
@ -682,6 +665,10 @@ func (s *Stmt) Exec(args ...interface{}) (Result, error) {
|
|||
}
|
||||
defer releaseConn(nil)
|
||||
|
||||
return resultFromStatement(si, args...)
|
||||
}
|
||||
|
||||
func resultFromStatement(si driver.Stmt, args ...interface{}) (Result, error) {
|
||||
// -1 means the driver doesn't know how to count the number of
|
||||
// placeholders, so we won't sanity check input here and instead let the
|
||||
// driver deal with errors.
|
||||
|
|
|
@ -52,6 +52,14 @@ func dotest() bool {
|
|||
return true
|
||||
}
|
||||
|
||||
func endtest() {
|
||||
if pclineTempDir != "" {
|
||||
os.RemoveAll(pclineTempDir)
|
||||
pclineTempDir = ""
|
||||
pclinetestBinary = ""
|
||||
}
|
||||
}
|
||||
|
||||
func getTable(t *testing.T) *Table {
|
||||
f, tab := crack(os.Args[0], t)
|
||||
f.Close()
|
||||
|
@ -95,6 +103,7 @@ func TestLineFromAline(t *testing.T) {
|
|||
if !dotest() {
|
||||
return
|
||||
}
|
||||
defer endtest()
|
||||
|
||||
tab := getTable(t)
|
||||
|
||||
|
@ -129,7 +138,7 @@ func TestLineFromAline(t *testing.T) {
|
|||
if !ok {
|
||||
t.Errorf("file %s starts on line %d", path, line)
|
||||
} else if line != ll+1 {
|
||||
t.Errorf("expected next line of file %s to be %d, got %d", path, ll+1, line)
|
||||
t.Fatalf("expected next line of file %s to be %d, got %d", path, ll+1, line)
|
||||
}
|
||||
lastline[path] = line
|
||||
}
|
||||
|
@ -142,6 +151,7 @@ func TestLineAline(t *testing.T) {
|
|||
if !dotest() {
|
||||
return
|
||||
}
|
||||
defer endtest()
|
||||
|
||||
tab := getTable(t)
|
||||
|
||||
|
@ -183,7 +193,7 @@ func TestPCLine(t *testing.T) {
|
|||
if !dotest() {
|
||||
return
|
||||
}
|
||||
defer os.RemoveAll(pclineTempDir)
|
||||
defer endtest()
|
||||
|
||||
f, tab := crack(pclinetestBinary, t)
|
||||
text := f.Section(".text")
|
||||
|
|
|
@ -13,6 +13,7 @@ package gosym
|
|||
// and the Go format is the runtime source, specifically ../../runtime/symtab.c.
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
@ -104,11 +105,18 @@ type sym struct {
|
|||
name []byte
|
||||
}
|
||||
|
||||
var littleEndianSymtab = []byte{0xFE, 0xFF, 0xFF, 0xFF, 0x00, 0x00}
|
||||
|
||||
func walksymtab(data []byte, fn func(sym) error) error {
|
||||
var order binary.ByteOrder = binary.BigEndian
|
||||
if bytes.HasPrefix(data, littleEndianSymtab) {
|
||||
data = data[6:]
|
||||
order = binary.LittleEndian
|
||||
}
|
||||
var s sym
|
||||
p := data
|
||||
for len(p) >= 6 {
|
||||
s.value = binary.BigEndian.Uint32(p[0:4])
|
||||
s.value = order.Uint32(p[0:4])
|
||||
typ := p[4]
|
||||
if typ&0x80 == 0 {
|
||||
return &DecodingError{len(data) - len(p) + 4, "bad symbol type", typ}
|
||||
|
@ -139,7 +147,7 @@ func walksymtab(data []byte, fn func(sym) error) error {
|
|||
}
|
||||
s.name = p[0:i]
|
||||
i += nnul
|
||||
s.gotype = binary.BigEndian.Uint32(p[i : i+4])
|
||||
s.gotype = order.Uint32(p[i : i+4])
|
||||
p = p[i+4:]
|
||||
fn(s)
|
||||
}
|
||||
|
|
|
@ -124,7 +124,7 @@ func TestBitString(t *testing.T) {
|
|||
t.Errorf("#%d: Incorrect error result (did fail? %v, expected: %v)", i, err == nil, test.ok)
|
||||
}
|
||||
if err == nil {
|
||||
if test.bitLength != ret.BitLength || bytes.Compare(ret.Bytes, test.out) != 0 {
|
||||
if test.bitLength != ret.BitLength || !bytes.Equal(ret.Bytes, test.out) {
|
||||
t.Errorf("#%d: Bad result: %v (expected %v %v)", i, ret, test.out, test.bitLength)
|
||||
}
|
||||
}
|
||||
|
@ -166,7 +166,7 @@ func TestBitStringRightAlign(t *testing.T) {
|
|||
for i, test := range bitStringRightAlignTests {
|
||||
bs := BitString{test.in, test.inlen}
|
||||
out := bs.RightAlign()
|
||||
if bytes.Compare(out, test.out) != 0 {
|
||||
if !bytes.Equal(out, test.out) {
|
||||
t.Errorf("#%d got: %x want: %x", i, out, test.out)
|
||||
}
|
||||
}
|
||||
|
@ -477,7 +477,7 @@ func TestRawStructs(t *testing.T) {
|
|||
if s.A != 0x50 {
|
||||
t.Errorf("bad value for A: got %d want %d", s.A, 0x50)
|
||||
}
|
||||
if bytes.Compare([]byte(s.Raw), input) != 0 {
|
||||
if !bytes.Equal([]byte(s.Raw), input) {
|
||||
t.Errorf("bad value for Raw: got %x want %x", s.Raw, input)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -132,7 +132,7 @@ func TestMarshal(t *testing.T) {
|
|||
t.Errorf("#%d failed: %s", i, err)
|
||||
}
|
||||
out, _ := hex.DecodeString(test.out)
|
||||
if bytes.Compare(out, data) != 0 {
|
||||
if !bytes.Equal(out, data) {
|
||||
t.Errorf("#%d got: %x want %x\n\t%q\n\t%q", i, data, out, data, out)
|
||||
|
||||
}
|
||||
|
|
|
@ -67,11 +67,13 @@ point values may be received into any floating point variable. However,
|
|||
the destination variable must be able to represent the value or the decode
|
||||
operation will fail.
|
||||
|
||||
Structs, arrays and slices are also supported. Strings and arrays of bytes are
|
||||
supported with a special, efficient representation (see below). When a slice is
|
||||
decoded, if the existing slice has capacity the slice will be extended in place;
|
||||
if not, a new array is allocated. Regardless, the length of the resulting slice
|
||||
reports the number of elements decoded.
|
||||
Structs, arrays and slices are also supported. Structs encode and
|
||||
decode only exported fields. Strings and arrays of bytes are supported
|
||||
with a special, efficient representation (see below). When a slice
|
||||
is decoded, if the existing slice has capacity the slice will be
|
||||
extended in place; if not, a new array is allocated. Regardless,
|
||||
the length of the resulting slice reports the number of elements
|
||||
decoded.
|
||||
|
||||
Functions and channels cannot be sent in a gob. Attempting
|
||||
to encode a value that contains one will fail.
|
||||
|
|
|
@ -137,8 +137,8 @@ func (enc *Encoder) sendType(w io.Writer, state *encoderState, origt reflect.Typ
|
|||
ut := userType(origt)
|
||||
if ut.isGobEncoder {
|
||||
// The rules are different: regardless of the underlying type's representation,
|
||||
// we need to tell the other side that this exact type is a GobEncoder.
|
||||
return enc.sendActualType(w, state, ut, ut.user)
|
||||
// we need to tell the other side that the base type is a GobEncoder.
|
||||
return enc.sendActualType(w, state, ut, ut.base)
|
||||
}
|
||||
|
||||
// It's a concrete value, so drill down to the base type.
|
||||
|
|
|
@ -142,6 +142,18 @@ type GobTest5 struct {
|
|||
V *ValueGobber
|
||||
}
|
||||
|
||||
type GobTest6 struct {
|
||||
X int // guarantee we have something in common with GobTest*
|
||||
V ValueGobber
|
||||
W *ValueGobber
|
||||
}
|
||||
|
||||
type GobTest7 struct {
|
||||
X int // guarantee we have something in common with GobTest*
|
||||
V *ValueGobber
|
||||
W ValueGobber
|
||||
}
|
||||
|
||||
type GobTestIgnoreEncoder struct {
|
||||
X int // guarantee we have something in common with GobTest*
|
||||
}
|
||||
|
@ -360,6 +372,61 @@ func TestGobEncoderValueEncoder(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
// Test that we can use a value then a pointer type of a GobEncoder
|
||||
// in the same encoded value. Bug 4647.
|
||||
func TestGobEncoderValueThenPointer(t *testing.T) {
|
||||
v := ValueGobber("forty-two")
|
||||
w := ValueGobber("six-by-nine")
|
||||
|
||||
// this was a bug: encoding a GobEncoder by value before a GobEncoder
|
||||
// pointer would cause duplicate type definitions to be sent.
|
||||
|
||||
b := new(bytes.Buffer)
|
||||
enc := NewEncoder(b)
|
||||
if err := enc.Encode(GobTest6{42, v, &w}); err != nil {
|
||||
t.Fatal("encode error:", err)
|
||||
}
|
||||
dec := NewDecoder(b)
|
||||
x := new(GobTest6)
|
||||
if err := dec.Decode(x); err != nil {
|
||||
t.Fatal("decode error:", err)
|
||||
}
|
||||
if got, want := x.V, v; got != want {
|
||||
t.Errorf("v = %q, want %q", got, want)
|
||||
}
|
||||
if got, want := x.W, w; got == nil {
|
||||
t.Errorf("w = nil, want %q", want)
|
||||
} else if *got != want {
|
||||
t.Errorf("w = %q, want %q", *got, want)
|
||||
}
|
||||
}
|
||||
|
||||
// Test that we can use a pointer then a value type of a GobEncoder
|
||||
// in the same encoded value.
|
||||
func TestGobEncoderPointerThenValue(t *testing.T) {
|
||||
v := ValueGobber("forty-two")
|
||||
w := ValueGobber("six-by-nine")
|
||||
|
||||
b := new(bytes.Buffer)
|
||||
enc := NewEncoder(b)
|
||||
if err := enc.Encode(GobTest7{42, &v, w}); err != nil {
|
||||
t.Fatal("encode error:", err)
|
||||
}
|
||||
dec := NewDecoder(b)
|
||||
x := new(GobTest7)
|
||||
if err := dec.Decode(x); err != nil {
|
||||
t.Fatal("decode error:", err)
|
||||
}
|
||||
if got, want := x.V, v; got == nil {
|
||||
t.Errorf("v = nil, want %q", want)
|
||||
} else if *got != want {
|
||||
t.Errorf("v = %q, want %q", got, want)
|
||||
}
|
||||
if got, want := x.W, w; got != want {
|
||||
t.Errorf("w = %q, want %q", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGobEncoderFieldTypeError(t *testing.T) {
|
||||
// GobEncoder to non-decoder: error
|
||||
b := new(bytes.Buffer)
|
||||
|
|
|
@ -65,7 +65,7 @@ func TestDecodeString(t *testing.T) {
|
|||
t.Errorf("#%d: unexpected err value: %s", i, err)
|
||||
continue
|
||||
}
|
||||
if bytes.Compare(dst, test.dec) != 0 {
|
||||
if !bytes.Equal(dst, test.dec) {
|
||||
t.Errorf("#%d: got: %#v want: #%v", i, dst, test.dec)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -52,6 +52,25 @@ import (
|
|||
// an UnmarshalTypeError describing the earliest such error.
|
||||
//
|
||||
func Unmarshal(data []byte, v interface{}) error {
|
||||
|
||||
// skip heavy processing for primitive values
|
||||
var first byte
|
||||
var i int
|
||||
for i, first = range data {
|
||||
if !isSpace(rune(first)) {
|
||||
break
|
||||
}
|
||||
}
|
||||
if first != '{' && first != '[' {
|
||||
rv := reflect.ValueOf(v)
|
||||
if rv.Kind() != reflect.Ptr || rv.IsNil() {
|
||||
return &InvalidUnmarshalError{reflect.TypeOf(v)}
|
||||
}
|
||||
var d decodeState
|
||||
d.literalStore(data[i:], rv.Elem(), false)
|
||||
return d.savedError
|
||||
}
|
||||
|
||||
d := new(decodeState).init(data)
|
||||
|
||||
// Quick check for well-formedness.
|
||||
|
@ -87,6 +106,7 @@ func (e *UnmarshalTypeError) Error() string {
|
|||
|
||||
// An UnmarshalFieldError describes a JSON object key that
|
||||
// led to an unexported (and therefore unwritable) struct field.
|
||||
// (No longer used; kept for compatibility.)
|
||||
type UnmarshalFieldError struct {
|
||||
Key string
|
||||
Type reflect.Type
|
||||
|
@ -328,15 +348,19 @@ func (d *decodeState) array(v reflect.Value) {
|
|||
|
||||
// Check type of target.
|
||||
switch v.Kind() {
|
||||
case reflect.Interface:
|
||||
if v.NumMethod() == 0 {
|
||||
// Decoding into nil interface? Switch to non-reflect code.
|
||||
v.Set(reflect.ValueOf(d.arrayInterface()))
|
||||
return
|
||||
}
|
||||
// Otherwise it's invalid.
|
||||
fallthrough
|
||||
default:
|
||||
d.saveError(&UnmarshalTypeError{"array", v.Type()})
|
||||
d.off--
|
||||
d.next()
|
||||
return
|
||||
case reflect.Interface:
|
||||
// Decoding into nil interface? Switch to non-reflect code.
|
||||
v.Set(reflect.ValueOf(d.arrayInterface()))
|
||||
return
|
||||
case reflect.Array:
|
||||
case reflect.Slice:
|
||||
break
|
||||
|
@ -422,7 +446,7 @@ func (d *decodeState) object(v reflect.Value) {
|
|||
v = pv
|
||||
|
||||
// Decoding into nil interface? Switch to non-reflect code.
|
||||
if v.Kind() == reflect.Interface {
|
||||
if v.Kind() == reflect.Interface && v.NumMethod() == 0 {
|
||||
v.Set(reflect.ValueOf(d.objectInterface()))
|
||||
return
|
||||
}
|
||||
|
@ -430,9 +454,9 @@ func (d *decodeState) object(v reflect.Value) {
|
|||
// Check type of target: struct or map[string]T
|
||||
switch v.Kind() {
|
||||
case reflect.Map:
|
||||
// map must have string type
|
||||
// map must have string kind
|
||||
t := v.Type()
|
||||
if t.Key() != reflect.TypeOf("") {
|
||||
if t.Key().Kind() != reflect.String {
|
||||
d.saveError(&UnmarshalTypeError{"object", v.Type()})
|
||||
break
|
||||
}
|
||||
|
@ -440,11 +464,9 @@ func (d *decodeState) object(v reflect.Value) {
|
|||
v.Set(reflect.MakeMap(t))
|
||||
}
|
||||
case reflect.Struct:
|
||||
|
||||
default:
|
||||
d.saveError(&UnmarshalTypeError{"object", v.Type()})
|
||||
}
|
||||
|
||||
if !v.IsValid() {
|
||||
d.off--
|
||||
d.next() // skip over { } in input
|
||||
return
|
||||
|
@ -509,15 +531,6 @@ func (d *decodeState) object(v reflect.Value) {
|
|||
}
|
||||
subv = subv.Field(i)
|
||||
}
|
||||
} else {
|
||||
// To give a good error, a quick scan for unexported fields in top level.
|
||||
st := v.Type()
|
||||
for i := 0; i < st.NumField(); i++ {
|
||||
f := st.Field(i)
|
||||
if f.PkgPath != "" && strings.EqualFold(f.Name, key) {
|
||||
d.saveError(&UnmarshalFieldError{key, st, f})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -536,10 +549,12 @@ func (d *decodeState) object(v reflect.Value) {
|
|||
} else {
|
||||
d.value(subv)
|
||||
}
|
||||
|
||||
// Write value back to map;
|
||||
// if using struct, subv points into struct already.
|
||||
if v.Kind() == reflect.Map {
|
||||
v.SetMapIndex(reflect.ValueOf(key), subv)
|
||||
kv := reflect.ValueOf(key).Convert(v.Type().Key())
|
||||
v.SetMapIndex(kv, subv)
|
||||
}
|
||||
|
||||
// Next token must be , or }.
|
||||
|
@ -625,7 +640,11 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
|
|||
case reflect.Bool:
|
||||
v.SetBool(value)
|
||||
case reflect.Interface:
|
||||
v.Set(reflect.ValueOf(value))
|
||||
if v.NumMethod() == 0 {
|
||||
v.Set(reflect.ValueOf(value))
|
||||
} else {
|
||||
d.saveError(&UnmarshalTypeError{"bool", v.Type()})
|
||||
}
|
||||
}
|
||||
|
||||
case '"': // string
|
||||
|
@ -655,7 +674,11 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
|
|||
case reflect.String:
|
||||
v.SetString(string(s))
|
||||
case reflect.Interface:
|
||||
v.Set(reflect.ValueOf(string(s)))
|
||||
if v.NumMethod() == 0 {
|
||||
v.Set(reflect.ValueOf(string(s)))
|
||||
} else {
|
||||
d.saveError(&UnmarshalTypeError{"string", v.Type()})
|
||||
}
|
||||
}
|
||||
|
||||
default: // number
|
||||
|
@ -684,6 +707,10 @@ func (d *decodeState) literalStore(item []byte, v reflect.Value, fromQuoted bool
|
|||
d.saveError(err)
|
||||
break
|
||||
}
|
||||
if v.NumMethod() != 0 {
|
||||
d.saveError(&UnmarshalTypeError{"number", v.Type()})
|
||||
break
|
||||
}
|
||||
v.Set(reflect.ValueOf(n))
|
||||
|
||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
||||
|
|
|
@ -199,12 +199,19 @@ var unmarshalTests = []unmarshalTest{
|
|||
{in: `"invalid: \uD834x\uDD1E"`, ptr: new(string), out: "invalid: \uFFFDx\uFFFD"},
|
||||
{in: "null", ptr: new(interface{}), out: nil},
|
||||
{in: `{"X": [1,2,3], "Y": 4}`, ptr: new(T), out: T{Y: 4}, err: &UnmarshalTypeError{"array", reflect.TypeOf("")}},
|
||||
{in: `{"x": 1}`, ptr: new(tx), out: tx{}, err: &UnmarshalFieldError{"x", txType, txType.Field(0)}},
|
||||
{in: `{"x": 1}`, ptr: new(tx), out: tx{}},
|
||||
{in: `{"F1":1,"F2":2,"F3":3}`, ptr: new(V), out: V{F1: float64(1), F2: int32(2), F3: Number("3")}},
|
||||
{in: `{"F1":1,"F2":2,"F3":3}`, ptr: new(V), out: V{F1: Number("1"), F2: int32(2), F3: Number("3")}, useNumber: true},
|
||||
{in: `{"k1":1,"k2":"s","k3":[1,2.0,3e-3],"k4":{"kk1":"s","kk2":2}}`, ptr: new(interface{}), out: ifaceNumAsFloat64},
|
||||
{in: `{"k1":1,"k2":"s","k3":[1,2.0,3e-3],"k4":{"kk1":"s","kk2":2}}`, ptr: new(interface{}), out: ifaceNumAsNumber, useNumber: true},
|
||||
|
||||
// raw values with whitespace
|
||||
{in: "\n true ", ptr: new(bool), out: true},
|
||||
{in: "\t 1 ", ptr: new(int), out: 1},
|
||||
{in: "\r 1.2 ", ptr: new(float64), out: 1.2},
|
||||
{in: "\t -5 \n", ptr: new(int16), out: int16(-5)},
|
||||
{in: "\t \"a\\u1234\" \n", ptr: new(string), out: "a\u1234"},
|
||||
|
||||
// Z has a "-" tag.
|
||||
{in: `{"Y": 1, "Z": 2}`, ptr: new(T), out: T{Y: 1}},
|
||||
|
||||
|
@ -217,6 +224,16 @@ var unmarshalTests = []unmarshalTest{
|
|||
{in: `[1, 2, 3+]`, err: &SyntaxError{"invalid character '+' after array element", 9}},
|
||||
{in: `{"X":12x}`, err: &SyntaxError{"invalid character 'x' after object key:value pair", 8}, useNumber: true},
|
||||
|
||||
// raw value errors
|
||||
{in: "\x01 42", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}},
|
||||
{in: " 42 \x01", err: &SyntaxError{"invalid character '\\x01' after top-level value", 5}},
|
||||
{in: "\x01 true", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}},
|
||||
{in: " false \x01", err: &SyntaxError{"invalid character '\\x01' after top-level value", 8}},
|
||||
{in: "\x01 1.2", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}},
|
||||
{in: " 3.4 \x01", err: &SyntaxError{"invalid character '\\x01' after top-level value", 6}},
|
||||
{in: "\x01 \"string\"", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}},
|
||||
{in: " \"string\" \x01", err: &SyntaxError{"invalid character '\\x01' after top-level value", 11}},
|
||||
|
||||
// array tests
|
||||
{in: `[1, 2, 3]`, ptr: new([3]int), out: [3]int{1, 2, 3}},
|
||||
{in: `[1, 2, 3]`, ptr: new([1]int), out: [1]int{1}},
|
||||
|
@ -422,7 +439,7 @@ func TestUnmarshalMarshal(t *testing.T) {
|
|||
if err != nil {
|
||||
t.Fatalf("Marshal: %v", err)
|
||||
}
|
||||
if bytes.Compare(jsonBig, b) != 0 {
|
||||
if !bytes.Equal(jsonBig, b) {
|
||||
t.Errorf("Marshal jsonBig")
|
||||
diff(t, b, jsonBig)
|
||||
return
|
||||
|
@ -474,7 +491,7 @@ func TestLargeByteSlice(t *testing.T) {
|
|||
if err := Unmarshal(b, &s1); err != nil {
|
||||
t.Fatalf("Unmarshal: %v", err)
|
||||
}
|
||||
if bytes.Compare(s0, s1) != 0 {
|
||||
if !bytes.Equal(s0, s1) {
|
||||
t.Errorf("Marshal large byte slice")
|
||||
diff(t, s0, s1)
|
||||
}
|
||||
|
@ -1000,3 +1017,72 @@ func TestUnmarshalNulls(t *testing.T) {
|
|||
t.Errorf("Unmarshal of null values affected primitives")
|
||||
}
|
||||
}
|
||||
|
||||
func TestStringKind(t *testing.T) {
|
||||
type stringKind string
|
||||
type aMap map[stringKind]int
|
||||
|
||||
var m1, m2 map[stringKind]int
|
||||
m1 = map[stringKind]int{
|
||||
"foo": 42,
|
||||
}
|
||||
|
||||
data, err := Marshal(m1)
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error marshalling: %v", err)
|
||||
}
|
||||
|
||||
err = Unmarshal(data, &m2)
|
||||
if err != nil {
|
||||
t.Errorf("Unexpected error unmarshalling: %v", err)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(m1, m2) {
|
||||
t.Error("Items should be equal after encoding and then decoding")
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
var decodeTypeErrorTests = []struct {
|
||||
dest interface{}
|
||||
src string
|
||||
}{
|
||||
{new(string), `{"user": "name"}`}, // issue 4628.
|
||||
{new(error), `{}`}, // issue 4222
|
||||
{new(error), `[]`},
|
||||
{new(error), `""`},
|
||||
{new(error), `123`},
|
||||
{new(error), `true`},
|
||||
}
|
||||
|
||||
func TestUnmarshalTypeError(t *testing.T) {
|
||||
for _, item := range decodeTypeErrorTests {
|
||||
err := Unmarshal([]byte(item.src), item.dest)
|
||||
if _, ok := err.(*UnmarshalTypeError); !ok {
|
||||
t.Errorf("expected type error for Unmarshal(%q, type %T): got %v instead",
|
||||
item.src, item.dest, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Test handling of unexported fields that should be ignored.
|
||||
// Issue 4660
|
||||
type unexportedFields struct {
|
||||
Name string
|
||||
m map[string]interface{} `json:"-"`
|
||||
m2 map[string]interface{} `json:"abcd"`
|
||||
}
|
||||
|
||||
func TestUnmarshalUnexported(t *testing.T) {
|
||||
input := `{"Name": "Bob", "m": {"x": 123}, "m2": {"y": 456}, "abcd": {"z": 789}}`
|
||||
want := &unexportedFields{Name: "Bob"}
|
||||
|
||||
out := &unexportedFields{}
|
||||
err := Unmarshal([]byte(input), out)
|
||||
if err != nil {
|
||||
t.Errorf("got error %v, expected nil", err)
|
||||
}
|
||||
if !reflect.DeepEqual(out, want) {
|
||||
t.Errorf("got %q, want %q", out, want)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -75,8 +75,9 @@ import (
|
|||
// Field int `json:",omitempty"`
|
||||
//
|
||||
// The "string" option signals that a field is stored as JSON inside a
|
||||
// JSON-encoded string. This extra level of encoding is sometimes
|
||||
// used when communicating with JavaScript programs:
|
||||
// JSON-encoded string. It applies only to fields of string, floating point,
|
||||
// or integer types. This extra level of encoding is sometimes used when
|
||||
// communicating with JavaScript programs:
|
||||
//
|
||||
// Int64String int64 `json:",string"`
|
||||
//
|
||||
|
@ -437,7 +438,7 @@ func isValidTag(s string) bool {
|
|||
}
|
||||
for _, c := range s {
|
||||
switch {
|
||||
case strings.ContainsRune("!#$%&()*+-./:<=>?@[]^_{|}~", c):
|
||||
case strings.ContainsRune("!#$%&()*+-./:<=>?@[]^_{|}~ ", c):
|
||||
// Backslash and quote chars are reserved, but
|
||||
// otherwise any punctuation chars are allowed
|
||||
// in a tag name.
|
||||
|
@ -617,13 +618,20 @@ func typeFields(t reflect.Type) []field {
|
|||
index := make([]int, len(f.index)+1)
|
||||
copy(index, f.index)
|
||||
index[len(f.index)] = i
|
||||
|
||||
ft := sf.Type
|
||||
if ft.Name() == "" && ft.Kind() == reflect.Ptr {
|
||||
// Follow pointer.
|
||||
ft = ft.Elem()
|
||||
}
|
||||
|
||||
// Record found field and index sequence.
|
||||
if name != "" || !sf.Anonymous {
|
||||
if name != "" || !sf.Anonymous || ft.Kind() != reflect.Struct {
|
||||
tagged := name != ""
|
||||
if name == "" {
|
||||
name = sf.Name
|
||||
}
|
||||
fields = append(fields, field{name, tagged, index, sf.Type,
|
||||
fields = append(fields, field{name, tagged, index, ft,
|
||||
opts.Contains("omitempty"), opts.Contains("string")})
|
||||
if count[f.typ] > 1 {
|
||||
// If there were multiple instances, add a second,
|
||||
|
@ -636,11 +644,6 @@ func typeFields(t reflect.Type) []field {
|
|||
}
|
||||
|
||||
// Record new anonymous struct to explore in next round.
|
||||
ft := sf.Type
|
||||
if ft.Name() == "" {
|
||||
// Must be pointer.
|
||||
ft = ft.Elem()
|
||||
}
|
||||
nextCount[ft]++
|
||||
if nextCount[ft] == 1 {
|
||||
next = append(next, field{name: ft.Name(), index: index, typ: ft})
|
||||
|
|
|
@ -186,3 +186,23 @@ func TestMarshalerEscaping(t *testing.T) {
|
|||
t.Errorf("got %q, want %q", got, want)
|
||||
}
|
||||
}
|
||||
|
||||
type IntType int
|
||||
|
||||
type MyStruct struct {
|
||||
IntType
|
||||
}
|
||||
|
||||
func TestAnonymousNonstruct(t *testing.T) {
|
||||
var i IntType = 11
|
||||
a := MyStruct{i}
|
||||
const want = `{"IntType":11}`
|
||||
|
||||
b, err := Marshal(a)
|
||||
if err != nil {
|
||||
t.Fatalf("Marshal: %v", err)
|
||||
}
|
||||
if got := string(b); got != want {
|
||||
t.Errorf("got %q, want %q", got, want)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -92,7 +92,7 @@ func TestCompactBig(t *testing.T) {
|
|||
t.Fatalf("Compact: %v", err)
|
||||
}
|
||||
b := buf.Bytes()
|
||||
if bytes.Compare(b, jsonBig) != 0 {
|
||||
if !bytes.Equal(b, jsonBig) {
|
||||
t.Error("Compact(jsonBig) != jsonBig")
|
||||
diff(t, b, jsonBig)
|
||||
return
|
||||
|
@ -118,7 +118,7 @@ func TestIndentBig(t *testing.T) {
|
|||
t.Fatalf("Indent2: %v", err)
|
||||
}
|
||||
b1 := buf1.Bytes()
|
||||
if bytes.Compare(b1, b) != 0 {
|
||||
if !bytes.Equal(b1, b) {
|
||||
t.Error("Indent(Indent(jsonBig)) != Indent(jsonBig)")
|
||||
diff(t, b1, b)
|
||||
return
|
||||
|
@ -130,7 +130,7 @@ func TestIndentBig(t *testing.T) {
|
|||
t.Fatalf("Compact: %v", err)
|
||||
}
|
||||
b1 = buf1.Bytes()
|
||||
if bytes.Compare(b1, jsonBig) != 0 {
|
||||
if !bytes.Equal(b1, jsonBig) {
|
||||
t.Error("Compact(Indent(jsonBig)) != jsonBig")
|
||||
diff(t, b1, jsonBig)
|
||||
return
|
||||
|
|
|
@ -60,6 +60,14 @@ type badCodeTag struct {
|
|||
Z string `json:" !\"#&'()*+,."`
|
||||
}
|
||||
|
||||
type spaceTag struct {
|
||||
Q string `json:"With space"`
|
||||
}
|
||||
|
||||
type unicodeTag struct {
|
||||
W string `json:"Ελλάδα"`
|
||||
}
|
||||
|
||||
var structTagObjectKeyTests = []struct {
|
||||
raw interface{}
|
||||
value string
|
||||
|
@ -78,6 +86,8 @@ var structTagObjectKeyTests = []struct {
|
|||
{badCodeTag{"Reliable Man"}, "Reliable Man", "Z"},
|
||||
{percentSlashTag{"brut"}, "brut", "text/html%"},
|
||||
{punctuationTag{"Union Rags"}, "Union Rags", "!#$%&()*+-./:<=>?@[]^_{|}~"},
|
||||
{spaceTag{"Perreddu"}, "Perreddu", "With space"},
|
||||
{unicodeTag{"Loukanikos"}, "Loukanikos", "Ελλάδα"},
|
||||
}
|
||||
|
||||
func TestStructTagObjectKey(t *testing.T) {
|
||||
|
|
|
@ -241,7 +241,7 @@ func (p *printer) marshalSimple(typ reflect.Type, val reflect.Value) error {
|
|||
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
|
||||
p.WriteString(strconv.FormatUint(val.Uint(), 10))
|
||||
case reflect.Float32, reflect.Float64:
|
||||
p.WriteString(strconv.FormatFloat(val.Float(), 'g', -1, 64))
|
||||
p.WriteString(strconv.FormatFloat(val.Float(), 'g', -1, val.Type().Bits()))
|
||||
case reflect.String:
|
||||
// TODO: Add EscapeString.
|
||||
Escape(p, []byte(val.String()))
|
||||
|
@ -273,19 +273,32 @@ func (p *printer) marshalStruct(tinfo *typeInfo, val reflect.Value) error {
|
|||
s := parentStack{printer: p}
|
||||
for i := range tinfo.fields {
|
||||
finfo := &tinfo.fields[i]
|
||||
if finfo.flags&(fAttr|fAny) != 0 {
|
||||
if finfo.flags&(fAttr) != 0 {
|
||||
continue
|
||||
}
|
||||
vf := finfo.value(val)
|
||||
switch finfo.flags & fMode {
|
||||
case fCharData:
|
||||
var scratch [64]byte
|
||||
switch vf.Kind() {
|
||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
||||
Escape(p, strconv.AppendInt(scratch[:0], vf.Int(), 10))
|
||||
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
|
||||
Escape(p, strconv.AppendUint(scratch[:0], vf.Uint(), 10))
|
||||
case reflect.Float32, reflect.Float64:
|
||||
Escape(p, strconv.AppendFloat(scratch[:0], vf.Float(), 'g', -1, vf.Type().Bits()))
|
||||
case reflect.Bool:
|
||||
Escape(p, strconv.AppendBool(scratch[:0], vf.Bool()))
|
||||
case reflect.String:
|
||||
Escape(p, []byte(vf.String()))
|
||||
case reflect.Slice:
|
||||
if elem, ok := vf.Interface().([]byte); ok {
|
||||
Escape(p, elem)
|
||||
}
|
||||
case reflect.Struct:
|
||||
if vf.Type() == timeType {
|
||||
Escape(p, []byte(vf.Interface().(time.Time).Format(time.RFC3339Nano)))
|
||||
}
|
||||
}
|
||||
continue
|
||||
|
||||
|
@ -340,7 +353,7 @@ func (p *printer) marshalStruct(tinfo *typeInfo, val reflect.Value) error {
|
|||
continue
|
||||
}
|
||||
|
||||
case fElement:
|
||||
case fElement, fElement | fAny:
|
||||
s.trim(finfo.parents)
|
||||
if len(finfo.parents) > len(s.stack) {
|
||||
if vf.Kind() != reflect.Ptr && vf.Kind() != reflect.Interface || !vf.IsNil() {
|
||||
|
|
|
@ -59,6 +59,36 @@ type Book struct {
|
|||
Title string `xml:",chardata"`
|
||||
}
|
||||
|
||||
type Event struct {
|
||||
XMLName struct{} `xml:"event"`
|
||||
Year int `xml:",chardata"`
|
||||
}
|
||||
|
||||
type Movie struct {
|
||||
XMLName struct{} `xml:"movie"`
|
||||
Length uint `xml:",chardata"`
|
||||
}
|
||||
|
||||
type Pi struct {
|
||||
XMLName struct{} `xml:"pi"`
|
||||
Approximation float32 `xml:",chardata"`
|
||||
}
|
||||
|
||||
type Universe struct {
|
||||
XMLName struct{} `xml:"universe"`
|
||||
Visible float64 `xml:",chardata"`
|
||||
}
|
||||
|
||||
type Particle struct {
|
||||
XMLName struct{} `xml:"particle"`
|
||||
HasMass bool `xml:",chardata"`
|
||||
}
|
||||
|
||||
type Departure struct {
|
||||
XMLName struct{} `xml:"departure"`
|
||||
When time.Time `xml:",chardata"`
|
||||
}
|
||||
|
||||
type SecretAgent struct {
|
||||
XMLName struct{} `xml:"agent"`
|
||||
Handle string `xml:"handle,attr"`
|
||||
|
@ -188,6 +218,18 @@ type AnyTest struct {
|
|||
AnyField AnyHolder `xml:",any"`
|
||||
}
|
||||
|
||||
type AnyOmitTest struct {
|
||||
XMLName struct{} `xml:"a"`
|
||||
Nested string `xml:"nested>value"`
|
||||
AnyField *AnyHolder `xml:",any,omitempty"`
|
||||
}
|
||||
|
||||
type AnySliceTest struct {
|
||||
XMLName struct{} `xml:"a"`
|
||||
Nested string `xml:"nested>value"`
|
||||
AnyField []AnyHolder `xml:",any"`
|
||||
}
|
||||
|
||||
type AnyHolder struct {
|
||||
XMLName Name
|
||||
XML string `xml:",innerxml"`
|
||||
|
@ -333,6 +375,12 @@ var marshalTests = []struct {
|
|||
{Value: &Domain{Name: []byte("google.com&friends")}, ExpectXML: `<domain>google.com&friends</domain>`},
|
||||
{Value: &Domain{Name: []byte("google.com"), Comment: []byte(" &friends ")}, ExpectXML: `<domain>google.com<!-- &friends --></domain>`},
|
||||
{Value: &Book{Title: "Pride & Prejudice"}, ExpectXML: `<book>Pride & Prejudice</book>`},
|
||||
{Value: &Event{Year: -3114}, ExpectXML: `<event>-3114</event>`},
|
||||
{Value: &Movie{Length: 13440}, ExpectXML: `<movie>13440</movie>`},
|
||||
{Value: &Pi{Approximation: 3.14159265}, ExpectXML: `<pi>3.1415927</pi>`},
|
||||
{Value: &Universe{Visible: 9.3e13}, ExpectXML: `<universe>9.3e+13</universe>`},
|
||||
{Value: &Particle{HasMass: true}, ExpectXML: `<particle>true</particle>`},
|
||||
{Value: &Departure{When: ParseTime("2013-01-09T00:15:00-09:00")}, ExpectXML: `<departure>2013-01-09T00:15:00-09:00</departure>`},
|
||||
{Value: atomValue, ExpectXML: atomXml},
|
||||
{
|
||||
Value: &Ship{
|
||||
|
@ -652,12 +700,43 @@ var marshalTests = []struct {
|
|||
XML: "<sub>unknown</sub>",
|
||||
},
|
||||
},
|
||||
UnmarshalOnly: true,
|
||||
},
|
||||
{
|
||||
Value: &AnyTest{Nested: "known", AnyField: AnyHolder{XML: "<unknown/>"}},
|
||||
ExpectXML: `<a><nested><value>known</value></nested></a>`,
|
||||
MarshalOnly: true,
|
||||
Value: &AnyTest{Nested: "known",
|
||||
AnyField: AnyHolder{
|
||||
XML: "<unknown/>",
|
||||
XMLName: Name{Local: "AnyField"},
|
||||
},
|
||||
},
|
||||
ExpectXML: `<a><nested><value>known</value></nested><AnyField><unknown/></AnyField></a>`,
|
||||
},
|
||||
{
|
||||
ExpectXML: `<a><nested><value>b</value></nested></a>`,
|
||||
Value: &AnyOmitTest{
|
||||
Nested: "b",
|
||||
},
|
||||
},
|
||||
{
|
||||
ExpectXML: `<a><nested><value>b</value></nested><c><d>e</d></c><g xmlns="f"><h>i</h></g></a>`,
|
||||
Value: &AnySliceTest{
|
||||
Nested: "b",
|
||||
AnyField: []AnyHolder{
|
||||
{
|
||||
XMLName: Name{Local: "c"},
|
||||
XML: "<d>e</d>",
|
||||
},
|
||||
{
|
||||
XMLName: Name{Space: "f", Local: "g"},
|
||||
XML: "<h>i</h>",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
ExpectXML: `<a><nested><value>b</value></nested></a>`,
|
||||
Value: &AnySliceTest{
|
||||
Nested: "b",
|
||||
},
|
||||
},
|
||||
|
||||
// Test recursive types.
|
||||
|
@ -690,15 +769,17 @@ var marshalTests = []struct {
|
|||
|
||||
// Test escaping.
|
||||
{
|
||||
ExpectXML: `<a><nested><value>dquote: "; squote: '; ampersand: &; less: <; greater: >;</value></nested></a>`,
|
||||
ExpectXML: `<a><nested><value>dquote: "; squote: '; ampersand: &; less: <; greater: >;</value></nested><empty></empty></a>`,
|
||||
Value: &AnyTest{
|
||||
Nested: `dquote: "; squote: '; ampersand: &; less: <; greater: >;`,
|
||||
Nested: `dquote: "; squote: '; ampersand: &; less: <; greater: >;`,
|
||||
AnyField: AnyHolder{XMLName: Name{Local: "empty"}},
|
||||
},
|
||||
},
|
||||
{
|
||||
ExpectXML: `<a><nested><value>newline: 
; cr: 
; tab: 	;</value></nested></a>`,
|
||||
ExpectXML: `<a><nested><value>newline: 
; cr: 
; tab: 	;</value></nested><AnyField></AnyField></a>`,
|
||||
Value: &AnyTest{
|
||||
Nested: "newline: \n; cr: \r; tab: \t;",
|
||||
Nested: "newline: \n; cr: \r; tab: \t;",
|
||||
AnyField: AnyHolder{XMLName: Name{Local: "AnyField"}},
|
||||
},
|
||||
},
|
||||
{
|
||||
|
|
|
@ -279,7 +279,7 @@ func (p *Decoder) unmarshal(val reflect.Value, start *StartElement) error {
|
|||
saveComment = finfo.value(sv)
|
||||
}
|
||||
|
||||
case fAny:
|
||||
case fAny, fAny | fElement:
|
||||
if !saveAny.IsValid() {
|
||||
saveAny = finfo.value(sv)
|
||||
}
|
||||
|
@ -374,68 +374,58 @@ Loop:
|
|||
}
|
||||
|
||||
func copyValue(dst reflect.Value, src []byte) (err error) {
|
||||
// Helper functions for integer and unsigned integer conversions
|
||||
var itmp int64
|
||||
getInt64 := func() bool {
|
||||
itmp, err = strconv.ParseInt(string(src), 10, 64)
|
||||
// TODO: should check sizes
|
||||
return err == nil
|
||||
}
|
||||
var utmp uint64
|
||||
getUint64 := func() bool {
|
||||
utmp, err = strconv.ParseUint(string(src), 10, 64)
|
||||
// TODO: check for overflow?
|
||||
return err == nil
|
||||
}
|
||||
var ftmp float64
|
||||
getFloat64 := func() bool {
|
||||
ftmp, err = strconv.ParseFloat(string(src), 64)
|
||||
// TODO: check for overflow?
|
||||
return err == nil
|
||||
if dst.Kind() == reflect.Ptr {
|
||||
if dst.IsNil() {
|
||||
dst.Set(reflect.New(dst.Type().Elem()))
|
||||
}
|
||||
dst = dst.Elem()
|
||||
}
|
||||
|
||||
// Save accumulated data.
|
||||
switch t := dst; t.Kind() {
|
||||
switch dst.Kind() {
|
||||
case reflect.Invalid:
|
||||
// Probably a comment.
|
||||
// Probably a commendst.
|
||||
default:
|
||||
return errors.New("cannot happen: unknown type " + t.Type().String())
|
||||
return errors.New("cannot happen: unknown type " + dst.Type().String())
|
||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
||||
if !getInt64() {
|
||||
itmp, err := strconv.ParseInt(string(src), 10, dst.Type().Bits())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
t.SetInt(itmp)
|
||||
dst.SetInt(itmp)
|
||||
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
|
||||
if !getUint64() {
|
||||
utmp, err := strconv.ParseUint(string(src), 10, dst.Type().Bits())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
t.SetUint(utmp)
|
||||
dst.SetUint(utmp)
|
||||
case reflect.Float32, reflect.Float64:
|
||||
if !getFloat64() {
|
||||
ftmp, err := strconv.ParseFloat(string(src), dst.Type().Bits())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
t.SetFloat(ftmp)
|
||||
dst.SetFloat(ftmp)
|
||||
case reflect.Bool:
|
||||
value, err := strconv.ParseBool(strings.TrimSpace(string(src)))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
t.SetBool(value)
|
||||
dst.SetBool(value)
|
||||
case reflect.String:
|
||||
t.SetString(string(src))
|
||||
dst.SetString(string(src))
|
||||
case reflect.Slice:
|
||||
if len(src) == 0 {
|
||||
// non-nil to flag presence
|
||||
src = []byte{}
|
||||
}
|
||||
t.SetBytes(src)
|
||||
dst.SetBytes(src)
|
||||
case reflect.Struct:
|
||||
if t.Type() == timeType {
|
||||
if dst.Type() == timeType {
|
||||
tv, err := time.Parse(time.RFC3339, string(src))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
t.Set(reflect.ValueOf(tv))
|
||||
dst.Set(reflect.ValueOf(tv))
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
|
|
@ -355,3 +355,47 @@ func TestUnmarshalWithoutNameType(t *testing.T) {
|
|||
t.Fatalf("have %v\nwant %v", x.Attr, OK)
|
||||
}
|
||||
}
|
||||
|
||||
func TestUnmarshalAttr(t *testing.T) {
|
||||
type ParamVal struct {
|
||||
Int int `xml:"int,attr"`
|
||||
}
|
||||
|
||||
type ParamPtr struct {
|
||||
Int *int `xml:"int,attr"`
|
||||
}
|
||||
|
||||
type ParamStringPtr struct {
|
||||
Int *string `xml:"int,attr"`
|
||||
}
|
||||
|
||||
x := []byte(`<Param int="1" />`)
|
||||
|
||||
p1 := &ParamPtr{}
|
||||
if err := Unmarshal(x, p1); err != nil {
|
||||
t.Fatalf("Unmarshal: %s", err)
|
||||
}
|
||||
if p1.Int == nil {
|
||||
t.Fatalf("Unmarshal failed in to *int field")
|
||||
} else if *p1.Int != 1 {
|
||||
t.Fatalf("Unmarshal with %s failed:\nhave %#v,\n want %#v", x, p1.Int, 1)
|
||||
}
|
||||
|
||||
p2 := &ParamVal{}
|
||||
if err := Unmarshal(x, p2); err != nil {
|
||||
t.Fatalf("Unmarshal: %s", err)
|
||||
}
|
||||
if p2.Int != 1 {
|
||||
t.Fatalf("Unmarshal with %s failed:\nhave %#v,\n want %#v", x, p2.Int, 1)
|
||||
}
|
||||
|
||||
p3 := &ParamStringPtr{}
|
||||
if err := Unmarshal(x, p3); err != nil {
|
||||
t.Fatalf("Unmarshal: %s", err)
|
||||
}
|
||||
if p3.Int == nil {
|
||||
t.Fatalf("Unmarshal failed in to *string field")
|
||||
} else if *p3.Int != "1" {
|
||||
t.Fatalf("Unmarshal with %s failed:\nhave %#v,\n want %#v", x, p3.Int, 1)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -154,6 +154,9 @@ func structFieldInfo(typ reflect.Type, f *reflect.StructField) (*fieldInfo, erro
|
|||
// This will also catch multiple modes in a single field.
|
||||
valid = false
|
||||
}
|
||||
if finfo.flags&fMode == fAny {
|
||||
finfo.flags |= fElement
|
||||
}
|
||||
if finfo.flags&fOmitEmpty != 0 && finfo.flags&(fElement|fAttr) == 0 {
|
||||
valid = false
|
||||
}
|
||||
|
|
|
@ -5,14 +5,13 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"exp/types"
|
||||
"flag"
|
||||
"fmt"
|
||||
"go/ast"
|
||||
"go/parser"
|
||||
"go/scanner"
|
||||
"go/token"
|
||||
"go/types"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
@ -92,8 +91,7 @@ func parse(fset *token.FileSet, filename string, src []byte) *ast.File {
|
|||
return file
|
||||
}
|
||||
|
||||
func parseStdin(fset *token.FileSet) (files map[string]*ast.File) {
|
||||
files = make(map[string]*ast.File)
|
||||
func parseStdin(fset *token.FileSet) (files []*ast.File) {
|
||||
src, err := ioutil.ReadAll(os.Stdin)
|
||||
if err != nil {
|
||||
report(err)
|
||||
|
@ -101,13 +99,12 @@ func parseStdin(fset *token.FileSet) (files map[string]*ast.File) {
|
|||
}
|
||||
const filename = "<standard input>"
|
||||
if file := parse(fset, filename, src); file != nil {
|
||||
files[filename] = file
|
||||
files = []*ast.File{file}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func parseFiles(fset *token.FileSet, filenames []string) (files map[string]*ast.File) {
|
||||
files = make(map[string]*ast.File)
|
||||
func parseFiles(fset *token.FileSet, filenames []string) (files []*ast.File) {
|
||||
for _, filename := range filenames {
|
||||
src, err := ioutil.ReadFile(filename)
|
||||
if err != nil {
|
||||
|
@ -115,11 +112,7 @@ func parseFiles(fset *token.FileSet, filenames []string) (files map[string]*ast.
|
|||
continue
|
||||
}
|
||||
if file := parse(fset, filename, src); file != nil {
|
||||
if files[filename] != nil {
|
||||
report(errors.New(fmt.Sprintf("%q: duplicate file", filename)))
|
||||
continue
|
||||
}
|
||||
files[filename] = file
|
||||
files = append(files, file)
|
||||
}
|
||||
}
|
||||
return
|
||||
|
@ -169,15 +162,10 @@ func processFiles(filenames []string, allFiles bool) {
|
|||
processPackage(fset, parseFiles(fset, filenames[0:i]))
|
||||
}
|
||||
|
||||
func processPackage(fset *token.FileSet, files map[string]*ast.File) {
|
||||
// make a package (resolve all identifiers)
|
||||
pkg, err := ast.NewPackage(fset, files, types.GcImport, types.Universe)
|
||||
func processPackage(fset *token.FileSet, files []*ast.File) {
|
||||
_, err := types.Check(fset, files)
|
||||
if err != nil {
|
||||
report(err)
|
||||
return
|
||||
}
|
||||
if err = types.Check(fset, pkg, nil, nil); err != nil {
|
||||
report(err)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -51,17 +51,20 @@ var tests = []string{
|
|||
"exp/gotype/testdata/test1.go",
|
||||
|
||||
// directories
|
||||
// Note: packages that don't typecheck yet are commented out
|
||||
// Note: Packages that don't typecheck yet are commented out.
|
||||
// Unless there is a comment next to the commented out packages,
|
||||
// the package doesn't typecheck due to errors in the shift
|
||||
// expression checker.
|
||||
"archive/tar",
|
||||
"archive/zip",
|
||||
|
||||
"bufio",
|
||||
"bytes",
|
||||
|
||||
"compress/bzip2",
|
||||
// "compress/bzip2",
|
||||
"compress/flate",
|
||||
"compress/gzip",
|
||||
"compress/lzw",
|
||||
// "compress/lzw",
|
||||
"compress/zlib",
|
||||
|
||||
"container/heap",
|
||||
|
@ -77,7 +80,7 @@ var tests = []string{
|
|||
"crypto/elliptic",
|
||||
"crypto/hmac",
|
||||
"crypto/md5",
|
||||
"crypto/rand",
|
||||
// "crypto/rand",
|
||||
"crypto/rc4",
|
||||
// "crypto/rsa", // intermittent failure: /home/gri/go2/src/pkg/crypto/rsa/pkcs1v15.go:21:27: undeclared name: io
|
||||
"crypto/sha1",
|
||||
|
@ -91,14 +94,14 @@ var tests = []string{
|
|||
"database/sql",
|
||||
"database/sql/driver",
|
||||
|
||||
"debug/dwarf",
|
||||
// "debug/dwarf",
|
||||
"debug/elf",
|
||||
"debug/gosym",
|
||||
"debug/macho",
|
||||
"debug/pe",
|
||||
|
||||
"encoding/ascii85",
|
||||
"encoding/asn1",
|
||||
// "encoding/asn1",
|
||||
"encoding/base32",
|
||||
"encoding/base64",
|
||||
"encoding/binary",
|
||||
|
@ -114,7 +117,6 @@ var tests = []string{
|
|||
"flag",
|
||||
"fmt",
|
||||
|
||||
"exp/types",
|
||||
"exp/gotype",
|
||||
|
||||
"go/ast",
|
||||
|
@ -124,7 +126,8 @@ var tests = []string{
|
|||
"go/parser",
|
||||
"go/printer",
|
||||
"go/scanner",
|
||||
"go/token",
|
||||
// "go/token",
|
||||
"go/types",
|
||||
|
||||
"hash/adler32",
|
||||
"hash/crc32",
|
||||
|
@ -135,7 +138,7 @@ var tests = []string{
|
|||
"image/color",
|
||||
"image/draw",
|
||||
"image/gif",
|
||||
"image/jpeg",
|
||||
// "image/jpeg",
|
||||
"image/png",
|
||||
|
||||
"index/suffixarray",
|
||||
|
@ -146,15 +149,15 @@ var tests = []string{
|
|||
"log",
|
||||
"log/syslog",
|
||||
|
||||
"math",
|
||||
"math/big",
|
||||
// "math",
|
||||
//"math/big",
|
||||
"math/cmplx",
|
||||
"math/rand",
|
||||
|
||||
"mime",
|
||||
"mime/multipart",
|
||||
|
||||
// "net", // c:\go\root\src\pkg\net\interface_windows.go:54:13: invalid operation: division by zero
|
||||
// "net",
|
||||
"net/http",
|
||||
"net/http/cgi",
|
||||
"net/http/fcgi",
|
||||
|
@ -165,41 +168,41 @@ var tests = []string{
|
|||
"net/rpc",
|
||||
"net/rpc/jsonrpc",
|
||||
"net/smtp",
|
||||
"net/textproto",
|
||||
// "net/textproto",
|
||||
"net/url",
|
||||
|
||||
"path",
|
||||
"path/filepath",
|
||||
|
||||
// "reflect", // unsafe.Sizeof must return size > 0 for pointer types
|
||||
"reflect",
|
||||
|
||||
"regexp",
|
||||
"regexp/syntax",
|
||||
|
||||
"runtime",
|
||||
// "runtime",
|
||||
"runtime/cgo",
|
||||
"runtime/debug",
|
||||
"runtime/pprof",
|
||||
|
||||
"sort",
|
||||
// "strconv", // bug in switch case duplicate detection
|
||||
// "strconv",
|
||||
"strings",
|
||||
|
||||
"sync",
|
||||
"sync/atomic",
|
||||
|
||||
// "syscall", c:\go\root\src\pkg\syscall\syscall_windows.go:35:16: cannot convert EINVAL (constant 536870951) to error
|
||||
// "syscall",
|
||||
|
||||
"testing",
|
||||
"testing/iotest",
|
||||
"testing/quick",
|
||||
|
||||
"text/scanner",
|
||||
// "text/scanner",
|
||||
"text/tabwriter",
|
||||
"text/template",
|
||||
"text/template/parse",
|
||||
|
||||
// "time", // local const decls without initialization expressions
|
||||
"time",
|
||||
"unicode",
|
||||
"unicode/utf16",
|
||||
"unicode/utf8",
|
||||
|
|
|
@ -4,7 +4,6 @@
|
|||
|
||||
/*
|
||||
Package html implements an HTML5-compliant tokenizer and parser.
|
||||
INCOMPLETE.
|
||||
|
||||
Tokenization is done by creating a Tokenizer for an io.Reader r. It is the
|
||||
caller's responsibility to ensure that r provides UTF-8 encoded HTML.
|
||||
|
|
|
@ -382,15 +382,9 @@ func BenchmarkParser(b *testing.B) {
|
|||
}
|
||||
b.SetBytes(int64(len(buf)))
|
||||
runtime.GC()
|
||||
var ms runtime.MemStats
|
||||
runtime.ReadMemStats(&ms)
|
||||
mallocs := ms.Mallocs
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
Parse(bytes.NewBuffer(buf))
|
||||
}
|
||||
b.StopTimer()
|
||||
runtime.ReadMemStats(&ms)
|
||||
mallocs = ms.Mallocs - mallocs
|
||||
b.Logf("%d iterations, %d mallocs per iteration\n", b.N, int(mallocs)/b.N)
|
||||
}
|
||||
|
|
|
@ -14,7 +14,7 @@ import (
|
|||
|
||||
type writer interface {
|
||||
io.Writer
|
||||
WriteByte(byte) error
|
||||
io.ByteWriter
|
||||
WriteString(string) (int, error)
|
||||
}
|
||||
|
||||
|
|
|
@ -634,9 +634,7 @@ func benchmarkTokenizer(b *testing.B, level int) {
|
|||
}
|
||||
b.SetBytes(int64(len(buf)))
|
||||
runtime.GC()
|
||||
var ms runtime.MemStats
|
||||
runtime.ReadMemStats(&ms)
|
||||
mallocs := ms.Mallocs
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
z := NewTokenizer(bytes.NewBuffer(buf))
|
||||
|
@ -674,10 +672,6 @@ func benchmarkTokenizer(b *testing.B, level int) {
|
|||
}
|
||||
}
|
||||
}
|
||||
b.StopTimer()
|
||||
runtime.ReadMemStats(&ms)
|
||||
mallocs = ms.Mallocs - mallocs
|
||||
b.Logf("%d iterations, %d mallocs per iteration\n", b.N, int(mallocs)/b.N)
|
||||
}
|
||||
|
||||
func BenchmarkRawLevelTokenizer(b *testing.B) { benchmarkTokenizer(b, rawLevel) }
|
||||
|
|
|
@ -98,24 +98,24 @@ func (b *Builder) Tailoring(locale string) *Tailoring {
|
|||
// a value for each colelem that is a variable. (See the reference above.)
|
||||
func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error {
|
||||
str := string(runes)
|
||||
elems := make([][]int, len(colelems))
|
||||
elems := make([]rawCE, len(colelems))
|
||||
for i, ce := range colelems {
|
||||
elems[i] = append(elems[i], ce...)
|
||||
if len(ce) == 0 {
|
||||
elems[i] = append(elems[i], []int{0, 0, 0, 0}...)
|
||||
break
|
||||
}
|
||||
elems[i] = makeRawCE(ce, 0)
|
||||
if len(ce) == 1 {
|
||||
elems[i] = append(elems[i], defaultSecondary)
|
||||
elems[i].w[1] = defaultSecondary
|
||||
}
|
||||
if len(ce) <= 2 {
|
||||
elems[i] = append(elems[i], defaultTertiary)
|
||||
elems[i].w[2] = defaultTertiary
|
||||
}
|
||||
if len(ce) <= 3 {
|
||||
elems[i] = append(elems[i], ce[0])
|
||||
elems[i].w[3] = ce[0]
|
||||
}
|
||||
}
|
||||
for i, ce := range elems {
|
||||
p := ce.w[0]
|
||||
isvar := false
|
||||
for _, j := range variables {
|
||||
if i == j {
|
||||
|
@ -123,18 +123,18 @@ func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error {
|
|||
}
|
||||
}
|
||||
if isvar {
|
||||
if ce[0] >= b.minNonVar && b.minNonVar > 0 {
|
||||
return fmt.Errorf("primary value %X of variable is larger than the smallest non-variable %X", ce[0], b.minNonVar)
|
||||
if p >= b.minNonVar && b.minNonVar > 0 {
|
||||
return fmt.Errorf("primary value %X of variable is larger than the smallest non-variable %X", p, b.minNonVar)
|
||||
}
|
||||
if ce[0] > b.varTop {
|
||||
b.varTop = ce[0]
|
||||
if p > b.varTop {
|
||||
b.varTop = p
|
||||
}
|
||||
} else if ce[0] > 1 { // 1 is a special primary value reserved for FFFE
|
||||
if ce[0] <= b.varTop {
|
||||
return fmt.Errorf("primary value %X of non-variable is smaller than the highest variable %X", ce[0], b.varTop)
|
||||
} else if p > 1 { // 1 is a special primary value reserved for FFFE
|
||||
if p <= b.varTop {
|
||||
return fmt.Errorf("primary value %X of non-variable is smaller than the highest variable %X", p, b.varTop)
|
||||
}
|
||||
if b.minNonVar == 0 || ce[0] < b.minNonVar {
|
||||
b.minNonVar = ce[0]
|
||||
if b.minNonVar == 0 || p < b.minNonVar {
|
||||
b.minNonVar = p
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -142,16 +142,42 @@ func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
cccs := []uint8{}
|
||||
nfd := norm.NFD.String(str)
|
||||
for i := range nfd {
|
||||
cccs = append(cccs, norm.NFD.PropertiesString(nfd[i:]).CCC())
|
||||
}
|
||||
if len(cccs) < len(elems) {
|
||||
if len(cccs) > 2 {
|
||||
return fmt.Errorf("number of decomposed characters should be greater or equal to the number of collation elements for len(colelems) > 3 (%d < %d)", len(cccs), len(elems))
|
||||
}
|
||||
p := len(elems) - 1
|
||||
for ; p > 0 && elems[p].w[0] == 0; p-- {
|
||||
elems[p].ccc = cccs[len(cccs)-1]
|
||||
}
|
||||
for ; p >= 0; p-- {
|
||||
elems[p].ccc = cccs[0]
|
||||
}
|
||||
} else {
|
||||
for i := range elems {
|
||||
elems[i].ccc = cccs[i]
|
||||
}
|
||||
}
|
||||
// doNorm in collate.go assumes that the following conditions hold.
|
||||
if len(elems) > 1 && len(cccs) > 1 && cccs[0] != 0 && cccs[0] != cccs[len(cccs)-1] {
|
||||
return fmt.Errorf("incompatible CCC values for expansion %X (%d)", runes, cccs)
|
||||
}
|
||||
b.root.newEntry(str, elems)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *Tailoring) setAnchor(anchor string) error {
|
||||
anchor = norm.NFD.String(anchor)
|
||||
anchor = norm.NFC.String(anchor)
|
||||
a := t.index.find(anchor)
|
||||
if a == nil {
|
||||
a = t.index.newEntry(anchor, nil)
|
||||
a.implicit = true
|
||||
a.modified = true
|
||||
for _, r := range []rune(anchor) {
|
||||
e := t.index.find(string(r))
|
||||
e.lock = true
|
||||
|
@ -221,7 +247,7 @@ func (t *Tailoring) Insert(level collate.Level, str, extend string) error {
|
|||
if t.anchor == nil {
|
||||
return fmt.Errorf("%s:Insert: no anchor point set for tailoring of %s", t.id, str)
|
||||
}
|
||||
str = norm.NFD.String(str)
|
||||
str = norm.NFC.String(str)
|
||||
e := t.index.find(str)
|
||||
if e == nil {
|
||||
e = t.index.newEntry(str, nil)
|
||||
|
@ -262,12 +288,13 @@ func (t *Tailoring) Insert(level collate.Level, str, extend string) error {
|
|||
}
|
||||
e.extend = norm.NFD.String(extend)
|
||||
e.exclude = false
|
||||
e.modified = true
|
||||
e.elems = nil
|
||||
t.anchor = e
|
||||
return nil
|
||||
}
|
||||
|
||||
func (o *ordering) getWeight(e *entry) [][]int {
|
||||
func (o *ordering) getWeight(e *entry) []rawCE {
|
||||
if len(e.elems) == 0 && e.logical == noAnchor {
|
||||
if e.implicit {
|
||||
for _, r := range e.runes {
|
||||
|
@ -279,11 +306,10 @@ func (o *ordering) getWeight(e *entry) [][]int {
|
|||
for ; a.elems == nil && !a.implicit; a = a.next {
|
||||
count[a.level]++
|
||||
}
|
||||
e.elems = append([][]int(nil), make([]int, len(a.elems[0])))
|
||||
copy(e.elems[0], a.elems[0])
|
||||
e.elems = []rawCE{makeRawCE(a.elems[0].w, a.elems[0].ccc)}
|
||||
for i := collate.Primary; i < collate.Quaternary; i++ {
|
||||
if count[i] != 0 {
|
||||
e.elems[0][i] -= count[i]
|
||||
e.elems[0].w[i] -= count[i]
|
||||
break
|
||||
}
|
||||
}
|
||||
|
@ -315,11 +341,11 @@ func (o *ordering) verifyWeights(a, b *entry, level collate.Level) error {
|
|||
return nil
|
||||
}
|
||||
for i := collate.Primary; i < level; i++ {
|
||||
if a.elems[0][i] < b.elems[0][i] {
|
||||
if a.elems[0].w[i] < b.elems[0].w[i] {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
if a.elems[0][level] >= b.elems[0][level] {
|
||||
if a.elems[0].w[level] >= b.elems[0].w[level] {
|
||||
err := fmt.Errorf("%s:overflow: collation elements of %q (%X) overflows those of %q (%X) at level %d (%X >= %X)", o.id, a.str, a.runes, b.str, b.runes, level, a.elems, b.elems)
|
||||
log.Println(err)
|
||||
// TODO: return the error instead, or better, fix the conflicting entry by making room.
|
||||
|
@ -339,6 +365,54 @@ func (b *Builder) errorID(locale string, e error) {
|
|||
}
|
||||
}
|
||||
|
||||
// patchNorm ensures that NFC and NFD counterparts are consistent.
|
||||
func (o *ordering) patchNorm() {
|
||||
// Insert the NFD counterparts, if necessary.
|
||||
for _, e := range o.ordered {
|
||||
nfd := norm.NFD.String(e.str)
|
||||
if nfd != e.str {
|
||||
if e0 := o.find(nfd); e0 != nil && !e0.modified {
|
||||
e0.elems = e.elems
|
||||
} else if e.modified && !equalCEArrays(o.genColElems(nfd), e.elems) {
|
||||
e := o.newEntry(nfd, e.elems)
|
||||
e.modified = true
|
||||
}
|
||||
}
|
||||
}
|
||||
// Update unchanged composed forms if one of their parts changed.
|
||||
for _, e := range o.ordered {
|
||||
nfd := norm.NFD.String(e.str)
|
||||
if e.modified || nfd == e.str {
|
||||
continue
|
||||
}
|
||||
if e0 := o.find(nfd); e0 != nil {
|
||||
e.elems = e0.elems
|
||||
} else {
|
||||
e.elems = o.genColElems(nfd)
|
||||
if norm.NFD.LastBoundary([]byte(nfd)) == 0 {
|
||||
r := []rune(nfd)
|
||||
head := string(r[0])
|
||||
tail := ""
|
||||
for i := 1; i < len(r); i++ {
|
||||
s := norm.NFC.String(head + string(r[i]))
|
||||
if e0 := o.find(s); e0 != nil && e0.modified {
|
||||
head = s
|
||||
} else {
|
||||
tail += string(r[i])
|
||||
}
|
||||
}
|
||||
e.elems = append(o.genColElems(head), o.genColElems(tail)...)
|
||||
}
|
||||
}
|
||||
}
|
||||
// Exclude entries for which the individual runes generate the same collation elements.
|
||||
for _, e := range o.ordered {
|
||||
if len(e.runes) > 1 && equalCEArrays(o.genColElems(e.str), e.elems) {
|
||||
e.exclude = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (b *Builder) buildOrdering(o *ordering) {
|
||||
for _, e := range o.ordered {
|
||||
o.getWeight(e)
|
||||
|
@ -346,6 +420,7 @@ func (b *Builder) buildOrdering(o *ordering) {
|
|||
for _, e := range o.ordered {
|
||||
o.addExtension(e)
|
||||
}
|
||||
o.patchNorm()
|
||||
o.sort()
|
||||
simplify(o)
|
||||
b.processExpansions(o) // requires simplify
|
||||
|
@ -392,11 +467,11 @@ func (b *Builder) Build() (*collate.Collator, error) {
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
c := collate.Init(t)
|
||||
if c == nil {
|
||||
table := collate.Init(t)
|
||||
if table == nil {
|
||||
panic("generated table of incompatible type")
|
||||
}
|
||||
return c, nil
|
||||
return collate.NewFromTable(table), nil
|
||||
}
|
||||
|
||||
// Build builds a Collator for Tailoring t.
|
||||
|
@ -436,20 +511,20 @@ func (b *Builder) Print(w io.Writer) (n int, err error) {
|
|||
|
||||
// reproducibleFromNFKD checks whether the given expansion could be generated
|
||||
// from an NFKD expansion.
|
||||
func reproducibleFromNFKD(e *entry, exp, nfkd [][]int) bool {
|
||||
func reproducibleFromNFKD(e *entry, exp, nfkd []rawCE) bool {
|
||||
// Length must be equal.
|
||||
if len(exp) != len(nfkd) {
|
||||
return false
|
||||
}
|
||||
for i, ce := range exp {
|
||||
// Primary and secondary values should be equal.
|
||||
if ce[0] != nfkd[i][0] || ce[1] != nfkd[i][1] {
|
||||
if ce.w[0] != nfkd[i].w[0] || ce.w[1] != nfkd[i].w[1] {
|
||||
return false
|
||||
}
|
||||
// Tertiary values should be equal to maxTertiary for third element onwards.
|
||||
// TODO: there seem to be a lot of cases in CLDR (e.g. ㏭ in zh.xml) that can
|
||||
// simply be dropped. Try this out by dropping the following code.
|
||||
if i >= 2 && ce[2] != maxTertiary {
|
||||
if i >= 2 && ce.w[2] != maxTertiary {
|
||||
return false
|
||||
}
|
||||
if _, err := makeCE(ce); err != nil {
|
||||
|
@ -469,22 +544,12 @@ func simplify(o *ordering) {
|
|||
keep[e.runes[0]] = true
|
||||
}
|
||||
}
|
||||
// Remove entries for which the runes normalize (using NFD) to identical values.
|
||||
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
||||
s := e.str
|
||||
nfd := norm.NFD.String(s)
|
||||
if len(e.runes) > 1 || keep[e.runes[0]] || nfd == s {
|
||||
continue
|
||||
}
|
||||
if equalCEArrays(o.genColElems(nfd), e.elems) {
|
||||
e.remove()
|
||||
}
|
||||
}
|
||||
// Tag entries for which the runes NFKD decompose to identical values.
|
||||
for e := o.front(); e != nil; e, _ = e.nextIndexed() {
|
||||
s := e.str
|
||||
nfkd := norm.NFKD.String(s)
|
||||
if e.decompose || len(e.runes) > 1 || len(e.elems) == 1 || keep[e.runes[0]] || nfkd == s {
|
||||
nfd := norm.NFD.String(s)
|
||||
if e.decompose || len(e.runes) > 1 || len(e.elems) == 1 || keep[e.runes[0]] || nfkd == nfd {
|
||||
continue
|
||||
}
|
||||
if reproducibleFromNFKD(e, e.elems, o.genColElems(nfkd)) {
|
||||
|
@ -589,18 +654,18 @@ func (b *Builder) processContractions(o *ordering) {
|
|||
// Bucket sort entries in index order.
|
||||
es := make([]*entry, len(l))
|
||||
for _, e := range l {
|
||||
var o, sn int
|
||||
var p, sn int
|
||||
if len(e.runes) > 1 {
|
||||
str := []byte(string(e.runes[1:]))
|
||||
o, sn = t.contractTries.lookup(handle, str)
|
||||
p, sn = t.contractTries.lookup(handle, str)
|
||||
if sn != len(str) {
|
||||
log.Fatalf("processContractions: unexpected length for '%X'; len=%d; want %d", e.runes, sn, len(str))
|
||||
log.Fatalf("%s: processContractions: unexpected length for '%X'; len=%d; want %d", o.id, e.runes, sn, len(str))
|
||||
}
|
||||
}
|
||||
if es[o] != nil {
|
||||
log.Fatalf("Multiple contractions for position %d for rune %U", o, e.runes[0])
|
||||
if es[p] != nil {
|
||||
log.Fatalf("%s: multiple contractions for position %d for rune %U", o.id, p, e.runes[0])
|
||||
}
|
||||
es[o] = e
|
||||
es[p] = e
|
||||
}
|
||||
// Create collation elements for contractions.
|
||||
elems := []uint32{}
|
||||
|
|
|
@ -7,48 +7,64 @@ package build
|
|||
import "testing"
|
||||
|
||||
// cjk returns an implicit collation element for a CJK rune.
|
||||
func cjk(r rune) [][]int {
|
||||
func cjk(r rune) []rawCE {
|
||||
// A CJK character C is represented in the DUCET as
|
||||
// [.AAAA.0020.0002.C][.BBBB.0000.0000.C]
|
||||
// Where AAAA is the most significant 15 bits plus a base value.
|
||||
// Any base value will work for the test, so we pick the common value of FB40.
|
||||
const base = 0xFB40
|
||||
return [][]int{
|
||||
{base + int(r>>15), defaultSecondary, defaultTertiary, int(r)},
|
||||
{int(r&0x7FFF) | 0x8000, 0, 0, int(r)},
|
||||
return []rawCE{
|
||||
{w: []int{base + int(r>>15), defaultSecondary, defaultTertiary, int(r)}},
|
||||
{w: []int{int(r&0x7FFF) | 0x8000, 0, 0, int(r)}},
|
||||
}
|
||||
}
|
||||
|
||||
func pCE(p int) [][]int {
|
||||
return [][]int{{p, defaultSecondary, defaultTertiary, 0}}
|
||||
func pCE(p int) []rawCE {
|
||||
return mkCE([]int{p, defaultSecondary, defaultTertiary, 0}, 0)
|
||||
}
|
||||
|
||||
func pqCE(p, q int) [][]int {
|
||||
return [][]int{{p, defaultSecondary, defaultTertiary, q}}
|
||||
func pqCE(p, q int) []rawCE {
|
||||
return mkCE([]int{p, defaultSecondary, defaultTertiary, q}, 0)
|
||||
}
|
||||
|
||||
func ptCE(p, t int) [][]int {
|
||||
return [][]int{{p, defaultSecondary, t, 0}}
|
||||
func ptCE(p, t int) []rawCE {
|
||||
return mkCE([]int{p, defaultSecondary, t, 0}, 0)
|
||||
}
|
||||
|
||||
func sCE(s int) [][]int {
|
||||
return [][]int{{0, s, defaultTertiary, 0}}
|
||||
func ptcCE(p, t int, ccc uint8) []rawCE {
|
||||
return mkCE([]int{p, defaultSecondary, t, 0}, ccc)
|
||||
}
|
||||
|
||||
func stCE(s, t int) [][]int {
|
||||
return [][]int{{0, s, t, 0}}
|
||||
func sCE(s int) []rawCE {
|
||||
return mkCE([]int{0, s, defaultTertiary, 0}, 0)
|
||||
}
|
||||
|
||||
func stCE(s, t int) []rawCE {
|
||||
return mkCE([]int{0, s, t, 0}, 0)
|
||||
}
|
||||
|
||||
func scCE(s int, ccc uint8) []rawCE {
|
||||
return mkCE([]int{0, s, defaultTertiary, 0}, ccc)
|
||||
}
|
||||
|
||||
func mkCE(w []int, ccc uint8) []rawCE {
|
||||
return []rawCE{rawCE{w, ccc}}
|
||||
}
|
||||
|
||||
// ducetElem is used to define test data that is used to generate a table.
|
||||
type ducetElem struct {
|
||||
str string
|
||||
ces [][]int
|
||||
ces []rawCE
|
||||
}
|
||||
|
||||
func newBuilder(t *testing.T, ducet []ducetElem) *Builder {
|
||||
b := NewBuilder()
|
||||
for _, e := range ducet {
|
||||
if err := b.Add([]rune(e.str), e.ces, nil); err != nil {
|
||||
ces := [][]int{}
|
||||
for _, ce := range e.ces {
|
||||
ces = append(ces, ce.w)
|
||||
}
|
||||
if err := b.Add([]rune(e.str), ces, nil); err != nil {
|
||||
t.Errorf(err.Error())
|
||||
}
|
||||
}
|
||||
|
@ -58,7 +74,7 @@ func newBuilder(t *testing.T, ducet []ducetElem) *Builder {
|
|||
}
|
||||
|
||||
type convertTest struct {
|
||||
in, out [][]int
|
||||
in, out []rawCE
|
||||
err bool
|
||||
}
|
||||
|
||||
|
@ -73,7 +89,10 @@ var convLargeTests = []convertTest{
|
|||
|
||||
func TestConvertLarge(t *testing.T) {
|
||||
for i, tt := range convLargeTests {
|
||||
e := &entry{elems: tt.in}
|
||||
e := new(entry)
|
||||
for _, ce := range tt.in {
|
||||
e.elems = append(e.elems, makeRawCE(ce.w, ce.ccc))
|
||||
}
|
||||
elems, err := convertLargeWeights(e.elems)
|
||||
if tt.err {
|
||||
if err == nil {
|
||||
|
@ -173,16 +192,18 @@ func TestSimplify(t *testing.T) {
|
|||
}
|
||||
|
||||
var expandTest = []ducetElem{
|
||||
{"\u00C0", append(ptCE(100, 8), sCE(30)...)},
|
||||
{"\u00C8", append(ptCE(105, 8), sCE(30)...)},
|
||||
{"\u00C9", append(ptCE(105, 8), sCE(30)...)}, // identical expansion
|
||||
{"\u0300", append(scCE(29, 230), scCE(30, 230)...)},
|
||||
{"\u00C0", append(ptCE(100, 8), scCE(30, 230)...)},
|
||||
{"\u00C8", append(ptCE(105, 8), scCE(30, 230)...)},
|
||||
{"\u00C9", append(ptCE(105, 8), scCE(30, 230)...)}, // identical expansion
|
||||
{"\u05F2", append(ptCE(200, 4), ptCE(200, 4)[0], ptCE(200, 4)[0])},
|
||||
{"\u01FF", append(ptCE(200, 4), ptcCE(201, 4, 0)[0], scCE(30, 230)[0])},
|
||||
}
|
||||
|
||||
func TestExpand(t *testing.T) {
|
||||
const (
|
||||
totalExpansions = 3
|
||||
totalElements = 2 + 2 + 3 + totalExpansions
|
||||
totalExpansions = 5
|
||||
totalElements = 2 + 2 + 2 + 3 + 3 + totalExpansions
|
||||
)
|
||||
b := newBuilder(t, expandTest)
|
||||
o := &b.root
|
||||
|
|
|
@ -16,6 +16,17 @@ const (
|
|||
maxTertiary = 0x1F
|
||||
)
|
||||
|
||||
type rawCE struct {
|
||||
w []int
|
||||
ccc uint8
|
||||
}
|
||||
|
||||
func makeRawCE(w []int, ccc uint8) rawCE {
|
||||
ce := rawCE{w: make([]int, 4), ccc: ccc}
|
||||
copy(ce.w, w)
|
||||
return ce
|
||||
}
|
||||
|
||||
// A collation element is represented as an uint32.
|
||||
// In the typical case, a rune maps to a single collation element. If a rune
|
||||
// can be the start of a contraction or expands into multiple collation elements,
|
||||
|
@ -29,29 +40,36 @@ const (
|
|||
// 01pppppp pppppppp ppppppp0 ssssssss
|
||||
// - p* is primary collation value
|
||||
// - s* is the secondary collation value
|
||||
// or
|
||||
// 00pppppp pppppppp ppppppps sssttttt, where
|
||||
// - p* is primary collation value
|
||||
// - s* offset of secondary from default value.
|
||||
// - t* is the tertiary collation value
|
||||
// 100ttttt cccccccc pppppppp pppppppp
|
||||
// - t* is the tertiar collation value
|
||||
// - c* is the cannonical combining class
|
||||
// - p* is the primary collation value
|
||||
// Collation elements with a secondary value are of the form
|
||||
// 10000000 0000ssss ssssssss tttttttt, where
|
||||
// - 16 BMP implicit -> weight
|
||||
// - 8 bit s
|
||||
// - default tertiary
|
||||
// 1010cccc ccccssss ssssssss tttttttt, where
|
||||
// - c* is the canonical combining class
|
||||
// - s* is the secondary collation value
|
||||
// - t* is the tertiary collation value
|
||||
const (
|
||||
maxPrimaryBits = 21
|
||||
maxPrimaryCompactBits = 16
|
||||
maxSecondaryBits = 12
|
||||
maxSecondaryCompactBits = 8
|
||||
maxCCCBits = 8
|
||||
maxSecondaryDiffBits = 4
|
||||
maxTertiaryBits = 8
|
||||
maxTertiaryCompactBits = 5
|
||||
|
||||
isSecondary = 0x80000000
|
||||
isPrimary = 0x40000000
|
||||
isPrimary = 0x40000000
|
||||
isPrimaryCCC = 0x80000000
|
||||
isSecondary = 0xA0000000
|
||||
)
|
||||
|
||||
func makeCE(weights []int) (uint32, error) {
|
||||
func makeCE(rce rawCE) (uint32, error) {
|
||||
weights := rce.w
|
||||
if w := weights[0]; w >= 1<<maxPrimaryBits || w < 0 {
|
||||
return 0, fmt.Errorf("makeCE: primary weight out of bounds: %x >= %x", w, 1<<maxPrimaryBits)
|
||||
}
|
||||
|
@ -63,14 +81,25 @@ func makeCE(weights []int) (uint32, error) {
|
|||
}
|
||||
ce := uint32(0)
|
||||
if weights[0] != 0 {
|
||||
if weights[2] == defaultTertiary {
|
||||
if rce.ccc != 0 {
|
||||
if weights[0] >= 1<<maxPrimaryCompactBits {
|
||||
return 0, fmt.Errorf("makeCE: primary weight with non-zero CCC out of bounds: %x >= %x", weights[0], 1<<maxPrimaryCompactBits)
|
||||
}
|
||||
if weights[1] != defaultSecondary {
|
||||
return 0, fmt.Errorf("makeCE: cannot combine non-default secondary value (%x) with non-zero CCC (%x)", weights[1], rce.ccc)
|
||||
}
|
||||
ce = uint32(weights[2] << (maxPrimaryCompactBits + maxCCCBits))
|
||||
ce |= uint32(rce.ccc) << maxPrimaryCompactBits
|
||||
ce |= uint32(weights[0])
|
||||
ce |= isPrimaryCCC
|
||||
} else if weights[2] == defaultTertiary {
|
||||
if weights[1] >= 1<<maxSecondaryCompactBits {
|
||||
return 0, fmt.Errorf("makeCE: secondary weight with non-zero primary out of bounds: %x >= %x", weights[1], 1<<maxSecondaryCompactBits)
|
||||
}
|
||||
ce = uint32(weights[0]<<(maxSecondaryCompactBits+1) + weights[1])
|
||||
ce |= isPrimary
|
||||
} else {
|
||||
d := weights[1] - defaultSecondary + 4
|
||||
d := weights[1] - defaultSecondary + maxSecondaryDiffBits
|
||||
if d >= 1<<maxSecondaryDiffBits || d < 0 {
|
||||
return 0, fmt.Errorf("makeCE: secondary weight diff out of bounds: %x < 0 || %x > %x", d, d, 1<<maxSecondaryDiffBits)
|
||||
}
|
||||
|
@ -82,6 +111,7 @@ func makeCE(weights []int) (uint32, error) {
|
|||
}
|
||||
} else {
|
||||
ce = uint32(weights[1]<<maxTertiaryBits + weights[2])
|
||||
ce += uint32(rce.ccc) << (maxSecondaryBits + maxTertiaryBits)
|
||||
ce |= isSecondary
|
||||
}
|
||||
return ce, nil
|
||||
|
@ -207,7 +237,7 @@ func implicitPrimary(r rune) int {
|
|||
// We will rewrite these characters to a single CE.
|
||||
// We assume the CJK values start at 0x8000.
|
||||
// See http://unicode.org/reports/tr10/#Implicit_Weights
|
||||
func convertLargeWeights(elems [][]int) (res [][]int, err error) {
|
||||
func convertLargeWeights(elems []rawCE) (res []rawCE, err error) {
|
||||
const (
|
||||
cjkPrimaryStart = 0xFB40
|
||||
rarePrimaryStart = 0xFB80
|
||||
|
@ -219,7 +249,7 @@ func convertLargeWeights(elems [][]int) (res [][]int, err error) {
|
|||
shiftBits = 15
|
||||
)
|
||||
for i := 0; i < len(elems); i++ {
|
||||
ce := elems[i]
|
||||
ce := elems[i].w
|
||||
p := ce[0]
|
||||
if p < cjkPrimaryStart {
|
||||
continue
|
||||
|
@ -233,10 +263,10 @@ func convertLargeWeights(elems [][]int) (res [][]int, err error) {
|
|||
if i+1 >= len(elems) {
|
||||
return elems, fmt.Errorf("second part of double primary weight missing: %v", elems)
|
||||
}
|
||||
if elems[i+1][0]&lowBitsFlag == 0 {
|
||||
if elems[i+1].w[0]&lowBitsFlag == 0 {
|
||||
return elems, fmt.Errorf("malformed second part of double primary weight: %v", elems)
|
||||
}
|
||||
np := ((p & highBitsMask) << shiftBits) + elems[i+1][0]&lowBitsMask
|
||||
np := ((p & highBitsMask) << shiftBits) + elems[i+1].w[0]&lowBitsMask
|
||||
switch {
|
||||
case p < rarePrimaryStart:
|
||||
np += commonUnifiedOffset
|
||||
|
@ -257,26 +287,25 @@ func convertLargeWeights(elems [][]int) (res [][]int, err error) {
|
|||
|
||||
// nextWeight computes the first possible collation weights following elems
|
||||
// for the given level.
|
||||
func nextWeight(level collate.Level, elems [][]int) [][]int {
|
||||
func nextWeight(level collate.Level, elems []rawCE) []rawCE {
|
||||
if level == collate.Identity {
|
||||
next := make([][]int, len(elems))
|
||||
next := make([]rawCE, len(elems))
|
||||
copy(next, elems)
|
||||
return next
|
||||
}
|
||||
next := [][]int{make([]int, len(elems[0]))}
|
||||
copy(next[0], elems[0])
|
||||
next[0][level]++
|
||||
next := []rawCE{makeRawCE(elems[0].w, elems[0].ccc)}
|
||||
next[0].w[level]++
|
||||
if level < collate.Secondary {
|
||||
next[0][collate.Secondary] = defaultSecondary
|
||||
next[0].w[collate.Secondary] = defaultSecondary
|
||||
}
|
||||
if level < collate.Tertiary {
|
||||
next[0][collate.Tertiary] = defaultTertiary
|
||||
next[0].w[collate.Tertiary] = defaultTertiary
|
||||
}
|
||||
// Filter entries that cannot influence ordering.
|
||||
for _, ce := range elems[1:] {
|
||||
skip := true
|
||||
for i := collate.Primary; i < level; i++ {
|
||||
skip = skip && ce[i] == 0
|
||||
skip = skip && ce.w[i] == 0
|
||||
}
|
||||
if !skip {
|
||||
next = append(next, ce)
|
||||
|
@ -285,18 +314,18 @@ func nextWeight(level collate.Level, elems [][]int) [][]int {
|
|||
return next
|
||||
}
|
||||
|
||||
func nextVal(elems [][]int, i int, level collate.Level) (index, value int) {
|
||||
for ; i < len(elems) && elems[i][level] == 0; i++ {
|
||||
func nextVal(elems []rawCE, i int, level collate.Level) (index, value int) {
|
||||
for ; i < len(elems) && elems[i].w[level] == 0; i++ {
|
||||
}
|
||||
if i < len(elems) {
|
||||
return i, elems[i][level]
|
||||
return i, elems[i].w[level]
|
||||
}
|
||||
return i, 0
|
||||
}
|
||||
|
||||
// compareWeights returns -1 if a < b, 1 if a > b, or 0 otherwise.
|
||||
// It also returns the collation level at which the difference is found.
|
||||
func compareWeights(a, b [][]int) (result int, level collate.Level) {
|
||||
func compareWeights(a, b []rawCE) (result int, level collate.Level) {
|
||||
for level := collate.Primary; level < collate.Identity; level++ {
|
||||
var va, vb int
|
||||
for ia, ib := 0, 0; ia < len(a) || ib < len(b); ia, ib = ia+1, ib+1 {
|
||||
|
@ -314,19 +343,16 @@ func compareWeights(a, b [][]int) (result int, level collate.Level) {
|
|||
return 0, collate.Identity
|
||||
}
|
||||
|
||||
func equalCE(a, b []int) bool {
|
||||
if len(a) != len(b) {
|
||||
return false
|
||||
}
|
||||
func equalCE(a, b rawCE) bool {
|
||||
for i := 0; i < 3; i++ {
|
||||
if b[i] != a[i] {
|
||||
if b.w[i] != a.w[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func equalCEArrays(a, b [][]int) bool {
|
||||
func equalCEArrays(a, b []rawCE) bool {
|
||||
if len(a) != len(b) {
|
||||
return false
|
||||
}
|
||||
|
|
|
@ -16,7 +16,7 @@ type ceTest struct {
|
|||
}
|
||||
|
||||
func normalCE(in []int) (ce uint32, err error) {
|
||||
return makeCE(in)
|
||||
return makeCE(rawCE{w: in[:3], ccc: uint8(in[3])})
|
||||
}
|
||||
|
||||
func expandCE(in []int) (ce uint32, err error) {
|
||||
|
@ -32,17 +32,20 @@ func decompCE(in []int) (ce uint32, err error) {
|
|||
}
|
||||
|
||||
var ceTests = []ceTest{
|
||||
{normalCE, []int{0, 0, 0}, 0x80000000},
|
||||
{normalCE, []int{0, 0x28, 3}, 0x80002803},
|
||||
{normalCE, []int{100, defaultSecondary, 3}, 0x0000C883},
|
||||
{normalCE, []int{0, 0, 0, 0}, 0xA0000000},
|
||||
{normalCE, []int{0, 0x28, 3, 0}, 0xA0002803},
|
||||
{normalCE, []int{0, 0x28, 3, 0xFF}, 0xAFF02803},
|
||||
{normalCE, []int{100, defaultSecondary, 3, 0}, 0x0000C883},
|
||||
// non-ignorable primary with non-default secondary
|
||||
{normalCE, []int{100, 0x28, defaultTertiary}, 0x4000C828},
|
||||
{normalCE, []int{100, defaultSecondary + 8, 3}, 0x0000C983},
|
||||
{normalCE, []int{100, 0, 3}, 0xFFFF}, // non-ignorable primary with non-supported secondary
|
||||
{normalCE, []int{100, 1, 3}, 0xFFFF},
|
||||
{normalCE, []int{1 << maxPrimaryBits, defaultSecondary, 0}, 0xFFFF},
|
||||
{normalCE, []int{0, 1 << maxSecondaryBits, 0}, 0xFFFF},
|
||||
{normalCE, []int{100, defaultSecondary, 1 << maxTertiaryBits}, 0xFFFF},
|
||||
{normalCE, []int{100, 0x28, defaultTertiary, 0}, 0x4000C828},
|
||||
{normalCE, []int{100, defaultSecondary + 8, 3, 0}, 0x0000C983},
|
||||
{normalCE, []int{100, 0, 3, 0}, 0xFFFF}, // non-ignorable primary with non-supported secondary
|
||||
{normalCE, []int{100, 1, 3, 0}, 0xFFFF},
|
||||
{normalCE, []int{1 << maxPrimaryBits, defaultSecondary, 0, 0}, 0xFFFF},
|
||||
{normalCE, []int{0, 1 << maxSecondaryBits, 0, 0}, 0xFFFF},
|
||||
{normalCE, []int{100, defaultSecondary, 1 << maxTertiaryBits, 0}, 0xFFFF},
|
||||
{normalCE, []int{0x123, defaultSecondary, 8, 0xFF}, 0x88FF0123},
|
||||
{normalCE, []int{0x123, defaultSecondary + 1, 8, 0xFF}, 0xFFFF},
|
||||
|
||||
{contractCE, []int{0, 0, 0}, 0xC0000000},
|
||||
{contractCE, []int{1, 1, 1}, 0xC0010011},
|
||||
|
@ -85,6 +88,14 @@ func TestColElem(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func mkRawCES(in [][]int) []rawCE {
|
||||
out := []rawCE{}
|
||||
for _, w := range in {
|
||||
out = append(out, rawCE{w: w})
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
type weightsTest struct {
|
||||
a, b [][]int
|
||||
level collate.Level
|
||||
|
@ -119,8 +130,8 @@ var extra = [][]int{{200, 32, 8, 0}, {0, 32, 8, 0}, {0, 0, 8, 0}, {0, 0, 0, 0}}
|
|||
func TestNextWeight(t *testing.T) {
|
||||
for i, tt := range nextWeightTests {
|
||||
test := func(l collate.Level, tt weightsTest, a, gold [][]int) {
|
||||
res := nextWeight(tt.level, a)
|
||||
if !equalCEArrays(gold, res) {
|
||||
res := nextWeight(tt.level, mkRawCES(a))
|
||||
if !equalCEArrays(mkRawCES(gold), res) {
|
||||
t.Errorf("%d:%d: expected weights %d; found %d", i, l, gold, res)
|
||||
}
|
||||
}
|
||||
|
@ -189,7 +200,7 @@ var compareTests = []weightsTest{
|
|||
func TestCompareWeights(t *testing.T) {
|
||||
for i, tt := range compareTests {
|
||||
test := func(tt weightsTest, a, b [][]int) {
|
||||
res, level := compareWeights(a, b)
|
||||
res, level := compareWeights(mkRawCES(a), mkRawCES(b))
|
||||
if res != tt.result {
|
||||
t.Errorf("%d: expected comparisson result %d; found %d", i, tt.result, res)
|
||||
}
|
||||
|
|
|
@ -6,6 +6,7 @@ package build
|
|||
|
||||
import (
|
||||
"exp/locale/collate"
|
||||
"exp/norm"
|
||||
"fmt"
|
||||
"log"
|
||||
"sort"
|
||||
|
@ -28,7 +29,7 @@ const (
|
|||
type entry struct {
|
||||
str string // same as string(runes)
|
||||
runes []rune
|
||||
elems [][]int // the collation elements
|
||||
elems []rawCE // the collation elements
|
||||
extend string // weights of extend to be appended to elems
|
||||
before bool // weights relative to next instead of previous.
|
||||
lock bool // entry is used in extension and can no longer be moved.
|
||||
|
@ -41,6 +42,7 @@ type entry struct {
|
|||
decompose bool // can use NFKD decomposition to generate elems
|
||||
exclude bool // do not include in table
|
||||
implicit bool // derived, is not included in the list
|
||||
modified bool // entry was modified in tailoring
|
||||
logical logicalAnchor
|
||||
|
||||
expansionIndex int // used to store index into expansion table
|
||||
|
@ -162,10 +164,10 @@ func (e *entry) encode() (ce uint32, err error) {
|
|||
}
|
||||
switch {
|
||||
case e.decompose:
|
||||
t1 := e.elems[0][2]
|
||||
t1 := e.elems[0].w[2]
|
||||
t2 := 0
|
||||
if len(e.elems) > 1 {
|
||||
t2 = e.elems[1][2]
|
||||
t2 = e.elems[1].w[2]
|
||||
}
|
||||
ce, err = makeDecompose(t1, t2)
|
||||
case e.contractionStarter():
|
||||
|
@ -231,7 +233,7 @@ func (o *ordering) insert(e *entry) {
|
|||
|
||||
// newEntry creates a new entry for the given info and inserts it into
|
||||
// the index.
|
||||
func (o *ordering) newEntry(s string, ces [][]int) *entry {
|
||||
func (o *ordering) newEntry(s string, ces []rawCE) *entry {
|
||||
e := &entry{
|
||||
runes: []rune(s),
|
||||
elems: ces,
|
||||
|
@ -249,14 +251,29 @@ func (o *ordering) find(str string) *entry {
|
|||
if e == nil {
|
||||
r := []rune(str)
|
||||
if len(r) == 1 {
|
||||
e = o.newEntry(string(r[0]), [][]int{
|
||||
{
|
||||
implicitPrimary(r[0]),
|
||||
defaultSecondary,
|
||||
defaultTertiary,
|
||||
int(r[0]),
|
||||
},
|
||||
})
|
||||
const (
|
||||
firstHangul = 0xAC00
|
||||
lastHangul = 0xD7A3
|
||||
)
|
||||
if r[0] >= firstHangul && r[0] <= lastHangul {
|
||||
ce := []rawCE{}
|
||||
nfd := norm.NFD.String(str)
|
||||
for _, r := range nfd {
|
||||
ce = append(ce, o.find(string(r)).elems...)
|
||||
}
|
||||
e = o.newEntry(nfd, ce)
|
||||
} else {
|
||||
e = o.newEntry(string(r[0]), []rawCE{
|
||||
{w: []int{
|
||||
implicitPrimary(r[0]),
|
||||
defaultSecondary,
|
||||
defaultTertiary,
|
||||
int(r[0]),
|
||||
},
|
||||
},
|
||||
})
|
||||
e.modified = true
|
||||
}
|
||||
e.exclude = true // do not index implicits
|
||||
}
|
||||
}
|
||||
|
@ -275,7 +292,7 @@ func makeRootOrdering() ordering {
|
|||
}
|
||||
insert := func(typ logicalAnchor, s string, ce []int) {
|
||||
e := &entry{
|
||||
elems: [][]int{ce},
|
||||
elems: []rawCE{{w: ce}},
|
||||
str: s,
|
||||
exclude: true,
|
||||
logical: typ,
|
||||
|
@ -362,10 +379,14 @@ func (o *ordering) sort() {
|
|||
|
||||
// genColElems generates a collation element array from the runes in str. This
|
||||
// assumes that all collation elements have already been added to the Builder.
|
||||
func (o *ordering) genColElems(str string) [][]int {
|
||||
elems := [][]int{}
|
||||
func (o *ordering) genColElems(str string) []rawCE {
|
||||
elems := []rawCE{}
|
||||
for _, r := range []rune(str) {
|
||||
elems = append(elems, o.find(string(r)).elems...)
|
||||
for _, ce := range o.find(string(r)).elems {
|
||||
if ce.w[0] != 0 || ce.w[1] != 0 || ce.w[2] != 0 {
|
||||
elems = append(elems, ce)
|
||||
}
|
||||
}
|
||||
}
|
||||
return elems
|
||||
}
|
||||
|
|
|
@ -20,7 +20,7 @@ type entryTest struct {
|
|||
// entries plus a leading and trailing anchor.
|
||||
func makeList(n int) []*entry {
|
||||
es := make([]*entry, n+2)
|
||||
weights := [][]int{{100, 20, 5, 0}}
|
||||
weights := []rawCE{{w: []int{100, 20, 5, 0}}}
|
||||
for i := range es {
|
||||
runes := []rune{rune(i)}
|
||||
es[i] = &entry{
|
||||
|
@ -176,8 +176,8 @@ type entryLessTest struct {
|
|||
}
|
||||
|
||||
var (
|
||||
w1 = [][]int{{100, 20, 5, 5}}
|
||||
w2 = [][]int{{101, 20, 5, 5}}
|
||||
w1 = []rawCE{{w: []int{100, 20, 5, 5}}}
|
||||
w2 = []rawCE{{w: []int{101, 20, 5, 5}}}
|
||||
)
|
||||
|
||||
var entryLessTests = []entryLessTest{
|
||||
|
|
|
@ -69,30 +69,14 @@ func (t *table) fprint(w io.Writer, name string) (n, size int, err error) {
|
|||
}
|
||||
size += sz
|
||||
}
|
||||
p := func(f string, a ...interface{}) {
|
||||
nn, e := fmt.Fprintf(w, f, a...)
|
||||
update(nn, 0, e)
|
||||
}
|
||||
// Write main table.
|
||||
size += int(reflect.TypeOf(*t).Size())
|
||||
p("var %sTable = table{\n", name)
|
||||
update(t.index.printStruct(w, t.root, name))
|
||||
p(",\n")
|
||||
p("%sExpandElem[:],\n", name)
|
||||
update(t.contractTries.printStruct(w, name))
|
||||
p(",\n")
|
||||
p("%sContractElem[:],\n", name)
|
||||
p("%d,\n", t.maxContractLen)
|
||||
p("0x%X,\n", t.variableTop)
|
||||
p("}\n\n")
|
||||
|
||||
// Write arrays needed for the structure.
|
||||
update(printColElems(w, t.expandElem, name+"ExpandElem"))
|
||||
update(printColElems(w, t.contractElem, name+"ContractElem"))
|
||||
update(t.index.printArrays(w, name))
|
||||
update(t.contractTries.printArray(w, name))
|
||||
|
||||
p("// Total size of %sTable is %d bytes\n", name, size)
|
||||
nn, e := fmt.Fprintf(w, "// Total size of %sTable is %d bytes\n", name, size)
|
||||
update(nn, 0, e)
|
||||
return
|
||||
}
|
||||
|
||||
|
|
|
@ -8,27 +8,43 @@ import (
|
|||
"unicode"
|
||||
)
|
||||
|
||||
// Level identifies the collation comparison level.
|
||||
// The primary level corresponds to the basic sorting of text.
|
||||
// The secondary level corresponds to accents and related linguistic elements.
|
||||
// The tertiary level corresponds to casing and related concepts.
|
||||
// The quaternary level is derived from the other levels by the
|
||||
// various algorithms for handling variable elements.
|
||||
type Level int
|
||||
|
||||
const (
|
||||
Primary Level = iota
|
||||
Secondary
|
||||
Tertiary
|
||||
Quaternary
|
||||
Identity
|
||||
)
|
||||
|
||||
const (
|
||||
defaultSecondary = 0x20
|
||||
defaultTertiary = 0x2
|
||||
maxTertiary = 0x1F
|
||||
maxQuaternary = 0x1FFFFF // 21 bits.
|
||||
MaxQuaternary = 0x1FFFFF // 21 bits.
|
||||
)
|
||||
|
||||
// colElem is a representation of a collation element.
|
||||
// In the typical case, a rune maps to a single collation element. If a rune
|
||||
// can be the start of a contraction or expands into multiple collation elements,
|
||||
// then the colElem that is associated with a rune will have a special form to represent
|
||||
// such m to n mappings. Such special colElems have a value >= 0x80000000.
|
||||
type colElem uint32
|
||||
// Elem is a representation of a collation element. This API provides ways to encode
|
||||
// and decode Elems. Implementations of collation tables may use values greater
|
||||
// or equal to PrivateUse for their own purposes. However, these should never be
|
||||
// returned by AppendNext.
|
||||
type Elem uint32
|
||||
|
||||
const (
|
||||
maxCE colElem = 0x80FFFFFF
|
||||
minContract = 0xC0000000
|
||||
maxContract = 0xDFFFFFFF
|
||||
minExpand = 0xE0000000
|
||||
maxExpand = 0xEFFFFFFF
|
||||
minDecomp = 0xF0000000
|
||||
maxCE Elem = 0xAFFFFFFF
|
||||
PrivateUse = minContract
|
||||
minContract = 0xC0000000
|
||||
maxContract = 0xDFFFFFFF
|
||||
minExpand = 0xE0000000
|
||||
maxExpand = 0xEFFFFFFF
|
||||
minDecomp = 0xF0000000
|
||||
)
|
||||
|
||||
type ceType int
|
||||
|
@ -40,7 +56,7 @@ const (
|
|||
ceDecompose // rune expands using NFKC decomposition
|
||||
)
|
||||
|
||||
func (ce colElem) ctype() ceType {
|
||||
func (ce Elem) ctype() ceType {
|
||||
if ce <= maxCE {
|
||||
return ceNormal
|
||||
}
|
||||
|
@ -62,69 +78,115 @@ func (ce colElem) ctype() ceType {
|
|||
// 01pppppp pppppppp ppppppp0 ssssssss
|
||||
// - p* is primary collation value
|
||||
// - s* is the secondary collation value
|
||||
// or
|
||||
// 00pppppp pppppppp ppppppps sssttttt, where
|
||||
// - p* is primary collation value
|
||||
// - s* offset of secondary from default value.
|
||||
// - t* is the tertiary collation value
|
||||
// 100ttttt cccccccc pppppppp pppppppp
|
||||
// - t* is the tertiar collation value
|
||||
// - c* is the cannonical combining class
|
||||
// - p* is the primary collation value
|
||||
// Collation elements with a secondary value are of the form
|
||||
// 10000000 0000ssss ssssssss tttttttt, where
|
||||
// - 16 BMP implicit -> weight
|
||||
// - 8 bit s
|
||||
// - default tertiary
|
||||
// 1010cccc ccccssss ssssssss tttttttt, where
|
||||
// - c* is the canonical combining class
|
||||
// - s* is the secondary collation value
|
||||
// - t* is the tertiary collation value
|
||||
// 11qqqqqq qqqqqqqq qqqqqqq0 00000000
|
||||
// - q* quaternary value
|
||||
const (
|
||||
ceTypeMask = 0xC0000000
|
||||
ceTypeMaskExt = 0xE0000000
|
||||
ceType1 = 0x40000000
|
||||
ceType2 = 0x00000000
|
||||
ceType3 = 0x80000000
|
||||
ceType3or4 = 0x80000000
|
||||
ceType4 = 0xA0000000
|
||||
ceTypeQ = 0xC0000000
|
||||
ceIgnore = ceType3
|
||||
ceIgnore = ceType4
|
||||
firstNonPrimary = 0x80000000
|
||||
lastSpecialPrimary = 0xA0000000
|
||||
secondaryMask = 0x80000000
|
||||
hasTertiaryMask = 0x40000000
|
||||
primaryValueMask = 0x3FFFFE00
|
||||
primaryShift = 9
|
||||
compactPrimaryBits = 16
|
||||
compactSecondaryShift = 5
|
||||
minCompactSecondary = defaultSecondary - 4
|
||||
)
|
||||
|
||||
func makeImplicitCE(primary int) colElem {
|
||||
return ceType1 | colElem(primary<<primaryShift) | defaultSecondary
|
||||
func makeImplicitCE(primary int) Elem {
|
||||
return ceType1 | Elem(primary<<primaryShift) | defaultSecondary
|
||||
}
|
||||
|
||||
func makeQuaternary(primary int) colElem {
|
||||
return ceTypeQ | colElem(primary<<primaryShift)
|
||||
// MakeElem returns an Elem for the given values. It will return an error
|
||||
// if the given combination of values is invalid.
|
||||
func MakeElem(primary, secondary, tertiary int, ccc uint8) (Elem, error) {
|
||||
// TODO: implement
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func (ce colElem) primary() int {
|
||||
// MakeQuaternary returns an Elem with the given quaternary value.
|
||||
func MakeQuaternary(v int) Elem {
|
||||
return ceTypeQ | Elem(v<<primaryShift)
|
||||
}
|
||||
|
||||
// Mask sets weights for any level smaller than l to 0.
|
||||
// The resulting Elem can be used to test for equality with
|
||||
// other Elems to which the same mask has been applied.
|
||||
func (ce Elem) Mask(l Level) uint32 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// CCC returns the canoncial combining class associated with the underlying character,
|
||||
// if applicable, or 0 otherwise.
|
||||
func (ce Elem) CCC() uint8 {
|
||||
if ce&ceType3or4 != 0 {
|
||||
if ce&ceType4 == ceType3or4 {
|
||||
return uint8(ce >> 16)
|
||||
}
|
||||
return uint8(ce >> 20)
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// Primary returns the primary collation weight for ce.
|
||||
func (ce Elem) Primary() int {
|
||||
if ce >= firstNonPrimary {
|
||||
return 0
|
||||
if ce > lastSpecialPrimary {
|
||||
return 0
|
||||
}
|
||||
return int(uint16(ce))
|
||||
}
|
||||
return int(ce&primaryValueMask) >> primaryShift
|
||||
}
|
||||
|
||||
func (ce colElem) secondary() int {
|
||||
// Secondary returns the secondary collation weight for ce.
|
||||
func (ce Elem) Secondary() int {
|
||||
switch ce & ceTypeMask {
|
||||
case ceType1:
|
||||
return int(uint8(ce))
|
||||
case ceType2:
|
||||
return minCompactSecondary + int((ce>>compactSecondaryShift)&0xF)
|
||||
case ceType3:
|
||||
return int(uint16(ce >> 8))
|
||||
case ceType3or4:
|
||||
if ce < ceType4 {
|
||||
return defaultSecondary
|
||||
}
|
||||
return int(ce>>8) & 0xFFF
|
||||
case ceTypeQ:
|
||||
return 0
|
||||
}
|
||||
panic("should not reach here")
|
||||
}
|
||||
|
||||
func (ce colElem) tertiary() uint8 {
|
||||
// Tertiary returns the tertiary collation weight for ce.
|
||||
func (ce Elem) Tertiary() uint8 {
|
||||
if ce&hasTertiaryMask == 0 {
|
||||
if ce&ceType3 == 0 {
|
||||
if ce&ceType3or4 == 0 {
|
||||
return uint8(ce & 0x1F)
|
||||
}
|
||||
return uint8(ce)
|
||||
if ce&ceType4 == ceType4 {
|
||||
return uint8(ce)
|
||||
}
|
||||
return uint8(ce>>24) & 0x1F // type 2
|
||||
} else if ce&ceTypeMask == ceType1 {
|
||||
return defaultTertiary
|
||||
}
|
||||
|
@ -132,27 +194,47 @@ func (ce colElem) tertiary() uint8 {
|
|||
return 0
|
||||
}
|
||||
|
||||
func (ce colElem) updateTertiary(t uint8) colElem {
|
||||
func (ce Elem) updateTertiary(t uint8) Elem {
|
||||
if ce&ceTypeMask == ceType1 {
|
||||
// convert to type 4
|
||||
nce := ce & primaryValueMask
|
||||
nce |= colElem(uint8(ce)-minCompactSecondary) << compactSecondaryShift
|
||||
nce |= Elem(uint8(ce)-minCompactSecondary) << compactSecondaryShift
|
||||
ce = nce
|
||||
} else if ce&ceTypeMaskExt == ceType3or4 {
|
||||
ce &= ^Elem(maxTertiary << 24)
|
||||
return ce | (Elem(t) << 24)
|
||||
} else {
|
||||
ce &= ^colElem(maxTertiary)
|
||||
// type 2 or 4
|
||||
ce &= ^Elem(maxTertiary)
|
||||
}
|
||||
return ce | colElem(t)
|
||||
return ce | Elem(t)
|
||||
}
|
||||
|
||||
// quaternary returns the quaternary value if explicitly specified,
|
||||
// 0 if ce == ceIgnore, or maxQuaternary otherwise.
|
||||
// Quaternary returns the quaternary value if explicitly specified,
|
||||
// 0 if ce == ceIgnore, or MaxQuaternary otherwise.
|
||||
// Quaternary values are used only for shifted variants.
|
||||
func (ce colElem) quaternary() int {
|
||||
func (ce Elem) Quaternary() int {
|
||||
if ce&ceTypeMask == ceTypeQ {
|
||||
return int(ce&primaryValueMask) >> primaryShift
|
||||
} else if ce == ceIgnore {
|
||||
return 0
|
||||
}
|
||||
return maxQuaternary
|
||||
return MaxQuaternary
|
||||
}
|
||||
|
||||
// Weight returns the collation weight for the given level.
|
||||
func (ce Elem) Weight(l Level) int {
|
||||
switch l {
|
||||
case Primary:
|
||||
return ce.Primary()
|
||||
case Secondary:
|
||||
return ce.Secondary()
|
||||
case Tertiary:
|
||||
return int(ce.Tertiary())
|
||||
case Quaternary:
|
||||
return ce.Quaternary()
|
||||
}
|
||||
return 0 // return 0 (ignore) for undefined levels.
|
||||
}
|
||||
|
||||
// For contractions, collation elements are of the form
|
||||
|
@ -167,7 +249,7 @@ const (
|
|||
maxContractOffsetBits = 13
|
||||
)
|
||||
|
||||
func splitContractIndex(ce colElem) (index, n, offset int) {
|
||||
func splitContractIndex(ce Elem) (index, n, offset int) {
|
||||
n = int(ce & (1<<maxNBits - 1))
|
||||
ce >>= maxNBits
|
||||
index = int(ce & (1<<maxTrieIndexBits - 1))
|
||||
|
@ -176,23 +258,23 @@ func splitContractIndex(ce colElem) (index, n, offset int) {
|
|||
return
|
||||
}
|
||||
|
||||
// For expansions, colElems are of the form 11100000 00000000 bbbbbbbb bbbbbbbb,
|
||||
// For expansions, Elems are of the form 11100000 00000000 bbbbbbbb bbbbbbbb,
|
||||
// where b* is the index into the expansion sequence table.
|
||||
const maxExpandIndexBits = 16
|
||||
|
||||
func splitExpandIndex(ce colElem) (index int) {
|
||||
func splitExpandIndex(ce Elem) (index int) {
|
||||
return int(uint16(ce))
|
||||
}
|
||||
|
||||
// Some runes can be expanded using NFKD decomposition. Instead of storing the full
|
||||
// sequence of collation elements, we decompose the rune and lookup the collation
|
||||
// elements for each rune in the decomposition and modify the tertiary weights.
|
||||
// The colElem, in this case, is of the form 11110000 00000000 wwwwwwww vvvvvvvv, where
|
||||
// The Elem, in this case, is of the form 11110000 00000000 wwwwwwww vvvvvvvv, where
|
||||
// - v* is the replacement tertiary weight for the first rune,
|
||||
// - w* is the replacement tertiary weight for the second rune,
|
||||
// Tertiary weights of subsequent runes should be replaced with maxTertiary.
|
||||
// See http://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details.
|
||||
func splitDecompose(ce colElem) (t1, t2 uint8) {
|
||||
func splitDecompose(ce Elem) (t1, t2 uint8) {
|
||||
return uint8(ce), uint8(ce >> 8)
|
||||
}
|
||||
|
||||
|
|
|
@ -10,12 +10,12 @@ import (
|
|||
)
|
||||
|
||||
type ceTest struct {
|
||||
f func(inout []int) (colElem, ceType)
|
||||
f func(inout []int) (Elem, ceType)
|
||||
arg []int
|
||||
}
|
||||
|
||||
// The make* funcs are simplified versions of the functions in build/colelem.go
|
||||
func makeCE(weights []int) colElem {
|
||||
func makeCE(weights []int) Elem {
|
||||
const (
|
||||
maxPrimaryBits = 21
|
||||
maxSecondaryBits = 12
|
||||
|
@ -23,72 +23,81 @@ func makeCE(weights []int) colElem {
|
|||
maxSecondaryDiffBits = 4
|
||||
maxTertiaryBits = 8
|
||||
maxTertiaryCompactBits = 5
|
||||
isSecondary = 0x80000000
|
||||
isPrimary = 0x40000000
|
||||
isPrimaryCCC = 0x80000000
|
||||
isSecondary = 0xA0000000
|
||||
)
|
||||
var ce colElem
|
||||
var ce Elem
|
||||
ccc := weights[3]
|
||||
if weights[0] != 0 {
|
||||
if weights[2] == defaultTertiary {
|
||||
ce = colElem(weights[0]<<(maxSecondaryCompactBits+1) + weights[1])
|
||||
if ccc != 0 {
|
||||
ce = Elem(weights[2] << 24)
|
||||
ce |= Elem(ccc) << 16
|
||||
ce |= Elem(weights[0])
|
||||
ce |= isPrimaryCCC
|
||||
} else if weights[2] == defaultTertiary {
|
||||
ce = Elem(weights[0]<<(maxSecondaryCompactBits+1) + weights[1])
|
||||
ce |= isPrimary
|
||||
} else {
|
||||
d := weights[1] - defaultSecondary + 4
|
||||
ce = colElem(weights[0]<<maxSecondaryDiffBits + d)
|
||||
ce = ce<<maxTertiaryCompactBits + colElem(weights[2])
|
||||
ce = Elem(weights[0]<<maxSecondaryDiffBits + d)
|
||||
ce = ce<<maxTertiaryCompactBits + Elem(weights[2])
|
||||
}
|
||||
} else {
|
||||
ce = colElem(weights[1]<<maxTertiaryBits + weights[2])
|
||||
ce = Elem(weights[1]<<maxTertiaryBits + weights[2])
|
||||
ce += Elem(ccc) << 20
|
||||
ce |= isSecondary
|
||||
}
|
||||
return ce
|
||||
}
|
||||
|
||||
func makeContractIndex(index, n, offset int) colElem {
|
||||
func makeContractIndex(index, n, offset int) Elem {
|
||||
const (
|
||||
contractID = 0xC0000000
|
||||
maxNBits = 4
|
||||
maxTrieIndexBits = 12
|
||||
maxContractOffsetBits = 13
|
||||
)
|
||||
ce := colElem(contractID)
|
||||
ce += colElem(offset << (maxNBits + maxTrieIndexBits))
|
||||
ce += colElem(index << maxNBits)
|
||||
ce += colElem(n)
|
||||
ce := Elem(contractID)
|
||||
ce += Elem(offset << (maxNBits + maxTrieIndexBits))
|
||||
ce += Elem(index << maxNBits)
|
||||
ce += Elem(n)
|
||||
return ce
|
||||
}
|
||||
|
||||
func makeExpandIndex(index int) colElem {
|
||||
func makeExpandIndex(index int) Elem {
|
||||
const expandID = 0xE0000000
|
||||
return expandID + colElem(index)
|
||||
return expandID + Elem(index)
|
||||
}
|
||||
|
||||
func makeDecompose(t1, t2 int) colElem {
|
||||
func makeDecompose(t1, t2 int) Elem {
|
||||
const decompID = 0xF0000000
|
||||
return colElem(t2<<8+t1) + decompID
|
||||
return Elem(t2<<8+t1) + decompID
|
||||
}
|
||||
|
||||
func normalCE(inout []int) (ce colElem, t ceType) {
|
||||
w := makeCE(inout)
|
||||
inout[0] = w.primary()
|
||||
inout[1] = w.secondary()
|
||||
inout[2] = int(w.tertiary())
|
||||
func normalCE(inout []int) (ce Elem, t ceType) {
|
||||
ce = makeCE(inout)
|
||||
inout[0] = ce.Primary()
|
||||
inout[1] = ce.Secondary()
|
||||
inout[2] = int(ce.Tertiary())
|
||||
inout[3] = int(ce.CCC())
|
||||
return ce, ceNormal
|
||||
}
|
||||
|
||||
func expandCE(inout []int) (ce colElem, t ceType) {
|
||||
func expandCE(inout []int) (ce Elem, t ceType) {
|
||||
ce = makeExpandIndex(inout[0])
|
||||
inout[0] = splitExpandIndex(ce)
|
||||
return ce, ceExpansionIndex
|
||||
}
|
||||
|
||||
func contractCE(inout []int) (ce colElem, t ceType) {
|
||||
func contractCE(inout []int) (ce Elem, t ceType) {
|
||||
ce = makeContractIndex(inout[0], inout[1], inout[2])
|
||||
i, n, o := splitContractIndex(ce)
|
||||
inout[0], inout[1], inout[2] = i, n, o
|
||||
return ce, ceContractionIndex
|
||||
}
|
||||
|
||||
func decompCE(inout []int) (ce colElem, t ceType) {
|
||||
func decompCE(inout []int) (ce Elem, t ceType) {
|
||||
ce = makeDecompose(inout[0], inout[1])
|
||||
t1, t2 := splitDecompose(ce)
|
||||
inout[0], inout[1] = int(t1), int(t2)
|
||||
|
@ -102,9 +111,13 @@ const (
|
|||
)
|
||||
|
||||
var ceTests = []ceTest{
|
||||
{normalCE, []int{0, 0, 0}},
|
||||
{normalCE, []int{0, 30, 3}},
|
||||
{normalCE, []int{100, defaultSecondary, 3}},
|
||||
{normalCE, []int{0, 0, 0, 0}},
|
||||
{normalCE, []int{0, 30, 3, 0}},
|
||||
{normalCE, []int{0, 30, 3, 0xFF}},
|
||||
{normalCE, []int{100, defaultSecondary, defaultTertiary, 0}},
|
||||
{normalCE, []int{100, defaultSecondary, defaultTertiary, 0xFF}},
|
||||
{normalCE, []int{100, defaultSecondary, 3, 0}},
|
||||
{normalCE, []int{0x123, defaultSecondary, 8, 0xFF}},
|
||||
|
||||
{contractCE, []int{0, 0, 0}},
|
||||
{contractCE, []int{1, 1, 1}},
|
||||
|
@ -127,11 +140,11 @@ func TestColElem(t *testing.T) {
|
|||
copy(inout, tt.arg)
|
||||
ce, typ := tt.f(inout)
|
||||
if ce.ctype() != typ {
|
||||
t.Errorf("%d: type is %d; want %d", i, ce.ctype(), typ)
|
||||
t.Errorf("%d: type is %d; want %d (ColElem: %X)", i, ce.ctype(), typ, ce)
|
||||
}
|
||||
for j, a := range tt.arg {
|
||||
if inout[j] != a {
|
||||
t.Errorf("%d: argument %d is %X; want %X", i, j, inout[j], a)
|
||||
t.Errorf("%d: argument %d is %X; want %X (ColElem: %X)", i, j, inout[j], a, ce)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -170,13 +183,14 @@ func TestImplicit(t *testing.T) {
|
|||
|
||||
func TestUpdateTertiary(t *testing.T) {
|
||||
tests := []struct {
|
||||
in, out colElem
|
||||
in, out Elem
|
||||
t uint8
|
||||
}{
|
||||
{0x4000FE20, 0x0000FE8A, 0x0A},
|
||||
{0x4000FE21, 0x0000FEAA, 0x0A},
|
||||
{0x0000FE8B, 0x0000FE83, 0x03},
|
||||
{0x8000CC02, 0x8000CC1B, 0x1B},
|
||||
{0x82FF0188, 0x9BFF0188, 0x1B},
|
||||
{0xAFF0CC02, 0xAFF0CC1B, 0x1B},
|
||||
}
|
||||
for i, tt := range tests {
|
||||
if out := tt.in.updateTertiary(tt.t); out != tt.out {
|
||||
|
@ -184,3 +198,77 @@ func TestUpdateTertiary(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDoNorm(t *testing.T) {
|
||||
const div = -1 // The insertion point of the next block.
|
||||
tests := []struct {
|
||||
in, out []int
|
||||
}{
|
||||
{in: []int{4, div, 3},
|
||||
out: []int{3, 4},
|
||||
},
|
||||
{in: []int{4, div, 3, 3, 3},
|
||||
out: []int{3, 3, 3, 4},
|
||||
},
|
||||
{in: []int{0, 4, div, 3},
|
||||
out: []int{0, 3, 4},
|
||||
},
|
||||
{in: []int{0, 0, 4, 5, div, 3, 3},
|
||||
out: []int{0, 0, 3, 3, 4, 5},
|
||||
},
|
||||
{in: []int{0, 0, 1, 4, 5, div, 3, 3},
|
||||
out: []int{0, 0, 1, 3, 3, 4, 5},
|
||||
},
|
||||
{in: []int{0, 0, 1, 4, 5, div, 4, 4},
|
||||
out: []int{0, 0, 1, 4, 4, 4, 5},
|
||||
},
|
||||
}
|
||||
for j, tt := range tests {
|
||||
i := iter{}
|
||||
var w, p, s int
|
||||
for k, cc := range tt.in {
|
||||
if cc == 0 {
|
||||
s = 0
|
||||
}
|
||||
if cc == div {
|
||||
w = 100
|
||||
p = k
|
||||
i.pStarter = s
|
||||
continue
|
||||
}
|
||||
i.ce = append(i.ce, makeCE([]int{w, 20, 2, cc}))
|
||||
}
|
||||
i.prevCCC = i.ce[p-1].CCC()
|
||||
i.doNorm(p, i.ce[p].CCC())
|
||||
if len(i.ce) != len(tt.out) {
|
||||
t.Errorf("%d: length was %d; want %d", j, len(i.ce), len(tt.out))
|
||||
}
|
||||
prevCCC := uint8(0)
|
||||
for k, ce := range i.ce {
|
||||
if int(ce.CCC()) != tt.out[k] {
|
||||
t.Errorf("%d:%d: unexpected CCC. Was %d; want %d", j, k, ce.CCC(), tt.out[k])
|
||||
}
|
||||
if k > 0 && ce.CCC() == prevCCC && i.ce[k-1].Primary() > ce.Primary() {
|
||||
t.Errorf("%d:%d: normalization crossed across CCC boundary.", j, k)
|
||||
}
|
||||
}
|
||||
}
|
||||
// test cutoff of large sequence of combining characters.
|
||||
result := []uint8{8, 8, 8, 5, 5}
|
||||
for o := -2; o <= 2; o++ {
|
||||
i := iter{pStarter: 2, prevCCC: 8}
|
||||
n := maxCombiningCharacters + 1 + o
|
||||
for j := 1; j < n+i.pStarter; j++ {
|
||||
i.ce = append(i.ce, makeCE([]int{100, 20, 2, 8}))
|
||||
}
|
||||
p := len(i.ce)
|
||||
i.ce = append(i.ce, makeCE([]int{0, 20, 2, 5}))
|
||||
i.doNorm(p, 5)
|
||||
if i.prevCCC != result[o+2] {
|
||||
t.Errorf("%d: i.prevCCC was %d; want %d", n, i.prevCCC, result[o+2])
|
||||
}
|
||||
if result[o+2] == 5 && i.pStarter != p {
|
||||
t.Errorf("%d: i.pStarter was %d; want %d", n, i.pStarter, p)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,22 +12,6 @@ import (
|
|||
"exp/norm"
|
||||
)
|
||||
|
||||
// Level identifies the collation comparison level.
|
||||
// The primary level corresponds to the basic sorting of text.
|
||||
// The secondary level corresponds to accents and related linguistic elements.
|
||||
// The tertiary level corresponds to casing and related concepts.
|
||||
// The quaternary level is derived from the other levels by the
|
||||
// various algorithms for handling variable elements.
|
||||
type Level int
|
||||
|
||||
const (
|
||||
Primary Level = iota
|
||||
Secondary
|
||||
Tertiary
|
||||
Quaternary
|
||||
Identity
|
||||
)
|
||||
|
||||
// AlternateHandling identifies the various ways in which variables are handled.
|
||||
// A rune with a primary weight lower than the variable top is considered a
|
||||
// variable.
|
||||
|
@ -55,6 +39,12 @@ const (
|
|||
// Collator provides functionality for comparing strings for a given
|
||||
// collation order.
|
||||
type Collator struct {
|
||||
// TODO: hide most of these options. Low-level options are set through the locale
|
||||
// identifier (as defined by LDML) while high-level options are set through SetOptions.
|
||||
// Using high-level options allows us to be more flexible (such as not ignoring
|
||||
// Thai vowels for IgnoreDiacriticals) and more user-friendly (such as allowing
|
||||
// diacritical marks to be ignored but not case without having to fiddle with levels).
|
||||
|
||||
// Strength sets the maximum level to use in comparison.
|
||||
Strength Level
|
||||
|
||||
|
@ -80,13 +70,39 @@ type Collator struct {
|
|||
// at a primary level with its numeric value. For example, "A-21" < "A-123".
|
||||
Numeric bool
|
||||
|
||||
// The largest primary value that is considered to be variable.
|
||||
variableTop uint32
|
||||
|
||||
f norm.Form
|
||||
|
||||
t *table
|
||||
t Weigher
|
||||
|
||||
sorter sorter
|
||||
|
||||
_iter [2]iter
|
||||
}
|
||||
|
||||
// An Option is used to change the behavior of Collator. They override the
|
||||
// settings passed through the locale identifier.
|
||||
type Option int
|
||||
|
||||
const (
|
||||
Numeric Option = 1 << iota // Sort numbers numerically ("2" < "12").
|
||||
IgnoreCase // Case-insensitive search.
|
||||
IgnoreDiacritics // Ignore diacritical marks. ("o" == "ö").
|
||||
IgnoreWidth // Ignore full versus normal width.
|
||||
UpperFirst // Sort upper case before lower case.
|
||||
LowerFirst // Sort lower case before upper case.
|
||||
Force // Force ordering if strings are equivalent but not equal.
|
||||
|
||||
Loose = IgnoreDiacritics | IgnoreWidth | IgnoreCase
|
||||
)
|
||||
|
||||
// SetOptions accepts a Options or-ed together. All previous calls to SetOptions are ignored.
|
||||
func (c *Collator) SetOptions(o Option) {
|
||||
// TODO: implement
|
||||
}
|
||||
|
||||
func (c *Collator) iter(i int) *iter {
|
||||
// TODO: evaluate performance for making the second iterator optional.
|
||||
return &c._iter[i]
|
||||
|
@ -101,18 +117,20 @@ func Locales() []string {
|
|||
// New returns a new Collator initialized for the given locale.
|
||||
func New(loc string) *Collator {
|
||||
// TODO: handle locale selection according to spec.
|
||||
t := &mainTable
|
||||
var t tableIndex
|
||||
if loc != "" {
|
||||
if idx, ok := locales[loc]; ok {
|
||||
t = mainTable.indexedTable(idx)
|
||||
t = idx
|
||||
} else {
|
||||
t = locales["root"]
|
||||
}
|
||||
}
|
||||
return newCollator(t)
|
||||
return NewFromTable(Init(t))
|
||||
}
|
||||
|
||||
func newCollator(t *table) *Collator {
|
||||
func NewFromTable(t Weigher) *Collator {
|
||||
c := &Collator{
|
||||
Strength: Quaternary,
|
||||
Strength: Tertiary,
|
||||
f: norm.NFD,
|
||||
t: t,
|
||||
}
|
||||
|
@ -121,12 +139,6 @@ func newCollator(t *table) *Collator {
|
|||
return c
|
||||
}
|
||||
|
||||
// SetVariableTop sets all runes with primary strength less than the primary
|
||||
// strength of r to be variable and thus affected by alternate handling.
|
||||
func (c *Collator) SetVariableTop(r rune) {
|
||||
// TODO: implement
|
||||
}
|
||||
|
||||
// Buffer holds keys generated by Key and KeyString.
|
||||
type Buffer struct {
|
||||
buf [4096]byte
|
||||
|
@ -149,8 +161,8 @@ func (b *Buffer) Reset() {
|
|||
func (c *Collator) Compare(a, b []byte) int {
|
||||
// TODO: skip identical prefixes once we have a fast way to detect if a rune is
|
||||
// part of a contraction. This would lead to roughly a 10% speedup for the colcmp regtest.
|
||||
c.iter(0).setInput(c, a)
|
||||
c.iter(1).setInput(c, b)
|
||||
c.iter(0).setInput(a)
|
||||
c.iter(1).setInput(b)
|
||||
if res := c.compare(); res != 0 {
|
||||
return res
|
||||
}
|
||||
|
@ -165,8 +177,8 @@ func (c *Collator) Compare(a, b []byte) int {
|
|||
func (c *Collator) CompareString(a, b string) int {
|
||||
// TODO: skip identical prefixes once we have a fast way to detect if a rune is
|
||||
// part of a contraction. This would lead to roughly a 10% speedup for the colcmp regtest.
|
||||
c.iter(0).setInputString(c, a)
|
||||
c.iter(1).setInputString(c, b)
|
||||
c.iter(0).setInputString(a)
|
||||
c.iter(1).setInputString(b)
|
||||
if res := c.compare(); res != 0 {
|
||||
return res
|
||||
}
|
||||
|
@ -234,11 +246,6 @@ func (c *Collator) compare() int {
|
|||
return 0
|
||||
}
|
||||
|
||||
func (c *Collator) Prefix(s, prefix []byte) int {
|
||||
// iterate over s, track bytes consumed.
|
||||
return 0
|
||||
}
|
||||
|
||||
// Key returns the collation key for str.
|
||||
// Passing the buffer buf may avoid memory allocations.
|
||||
// The returned slice will point to an allocation in Buffer and will remain
|
||||
|
@ -259,114 +266,184 @@ func (c *Collator) KeyFromString(buf *Buffer, str string) []byte {
|
|||
return c.key(buf, c.getColElemsString(str))
|
||||
}
|
||||
|
||||
func (c *Collator) key(buf *Buffer, w []colElem) []byte {
|
||||
processWeights(c.Alternate, c.t.variableTop, w)
|
||||
func (c *Collator) key(buf *Buffer, w []Elem) []byte {
|
||||
processWeights(c.Alternate, c.variableTop, w)
|
||||
kn := len(buf.key)
|
||||
c.keyFromElems(buf, w)
|
||||
return buf.key[kn:]
|
||||
}
|
||||
|
||||
func (c *Collator) getColElems(str []byte) []colElem {
|
||||
func (c *Collator) getColElems(str []byte) []Elem {
|
||||
i := c.iter(0)
|
||||
i.setInput(c, str)
|
||||
for !i.done() {
|
||||
i.next()
|
||||
i.setInput(str)
|
||||
for i.next() {
|
||||
}
|
||||
return i.ce
|
||||
}
|
||||
|
||||
func (c *Collator) getColElemsString(str string) []colElem {
|
||||
func (c *Collator) getColElemsString(str string) []Elem {
|
||||
i := c.iter(0)
|
||||
i.setInputString(c, str)
|
||||
for !i.done() {
|
||||
i.next()
|
||||
i.setInputString(str)
|
||||
for i.next() {
|
||||
}
|
||||
return i.ce
|
||||
}
|
||||
|
||||
type iter struct {
|
||||
src norm.Iter
|
||||
norm [1024]byte
|
||||
buf []byte
|
||||
p int
|
||||
minBufSize int
|
||||
bytes []byte
|
||||
str string
|
||||
|
||||
wa [512]colElem
|
||||
ce []colElem
|
||||
wa [512]Elem
|
||||
ce []Elem
|
||||
pce int
|
||||
nce int // nce <= len(nce)
|
||||
|
||||
t *table
|
||||
_done, eof bool
|
||||
prevCCC uint8
|
||||
pStarter int
|
||||
|
||||
t Weigher
|
||||
}
|
||||
|
||||
func (i *iter) init(c *Collator) {
|
||||
i.t = c.t
|
||||
i.minBufSize = c.t.maxContractLen
|
||||
i.ce = i.wa[:0]
|
||||
i.buf = i.norm[:0]
|
||||
}
|
||||
|
||||
func (i *iter) reset() {
|
||||
i.ce = i.ce[:0]
|
||||
i.buf = i.buf[:0]
|
||||
i.p = 0
|
||||
i.eof = i.src.Done()
|
||||
i._done = i.eof
|
||||
i.nce = 0
|
||||
i.prevCCC = 0
|
||||
i.pStarter = 0
|
||||
}
|
||||
|
||||
func (i *iter) setInput(c *Collator, s []byte) *iter {
|
||||
i.src.SetInput(c.f, s)
|
||||
func (i *iter) setInput(s []byte) *iter {
|
||||
i.bytes = s
|
||||
i.str = ""
|
||||
i.reset()
|
||||
return i
|
||||
}
|
||||
|
||||
func (i *iter) setInputString(c *Collator, s string) *iter {
|
||||
i.src.SetInputString(c.f, s)
|
||||
func (i *iter) setInputString(s string) *iter {
|
||||
i.str = s
|
||||
i.bytes = nil
|
||||
i.reset()
|
||||
return i
|
||||
}
|
||||
|
||||
func (i *iter) done() bool {
|
||||
return i._done
|
||||
return len(i.str) == 0 && len(i.bytes) == 0
|
||||
}
|
||||
|
||||
func (i *iter) next() {
|
||||
if !i.eof && len(i.buf)-i.p < i.minBufSize {
|
||||
// replenish buffer
|
||||
n := copy(i.buf, i.buf[i.p:])
|
||||
n += i.src.Next(i.buf[n:cap(i.buf)])
|
||||
i.buf = i.buf[:n]
|
||||
i.p = 0
|
||||
i.eof = i.src.Done()
|
||||
func (i *iter) tail(n int) {
|
||||
if i.bytes == nil {
|
||||
i.str = i.str[n:]
|
||||
} else {
|
||||
i.bytes = i.bytes[n:]
|
||||
}
|
||||
if i.p == len(i.buf) {
|
||||
i._done = true
|
||||
}
|
||||
|
||||
func (i *iter) appendNext() int {
|
||||
var sz int
|
||||
if i.bytes == nil {
|
||||
i.ce, sz = i.t.AppendNextString(i.ce, i.str)
|
||||
} else {
|
||||
i.ce, sz = i.t.AppendNext(i.ce, i.bytes)
|
||||
}
|
||||
return sz
|
||||
}
|
||||
|
||||
// next appends Elems to the internal array until it adds an element with CCC=0.
|
||||
// In the majority of cases, a Elem with a primary value > 0 will have
|
||||
// a CCC of 0. The CCC values of colation elements are also used to detect if the
|
||||
// input string was not normalized and to adjust the result accordingly.
|
||||
func (i *iter) next() bool {
|
||||
for !i.done() {
|
||||
p0 := len(i.ce)
|
||||
sz := i.appendNext()
|
||||
i.tail(sz)
|
||||
last := len(i.ce) - 1
|
||||
if ccc := i.ce[last].CCC(); ccc == 0 {
|
||||
i.nce = len(i.ce)
|
||||
i.pStarter = last
|
||||
i.prevCCC = 0
|
||||
return true
|
||||
} else if p0 < last && i.ce[p0].CCC() == 0 {
|
||||
// set i.nce to only cover part of i.ce for which ccc == 0 and
|
||||
// use rest the next call to next.
|
||||
for p0++; p0 < last && i.ce[p0].CCC() == 0; p0++ {
|
||||
}
|
||||
i.nce = p0
|
||||
i.pStarter = p0 - 1
|
||||
i.prevCCC = ccc
|
||||
return true
|
||||
} else if ccc < i.prevCCC {
|
||||
i.doNorm(p0, ccc) // should be rare for most common cases
|
||||
} else {
|
||||
i.prevCCC = ccc
|
||||
}
|
||||
}
|
||||
if len(i.ce) != i.nce {
|
||||
i.nce = len(i.ce)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// nextPlain is the same as next, but does not "normalize" the collation
|
||||
// elements.
|
||||
// TODO: remove this function. Using this instead of next does not seem
|
||||
// to improve performance in any significant way. We retain this until
|
||||
// later for evaluation purposes.
|
||||
func (i *iter) nextPlain() bool {
|
||||
if i.done() {
|
||||
return false
|
||||
}
|
||||
sz := i.appendNext()
|
||||
i.tail(sz)
|
||||
i.nce = len(i.ce)
|
||||
return true
|
||||
}
|
||||
|
||||
const maxCombiningCharacters = 30
|
||||
|
||||
// doNorm reorders the collation elements in i.ce.
|
||||
// It assumes that blocks of collation elements added with appendNext
|
||||
// either start and end with the same CCC or start with CCC == 0.
|
||||
// This allows for a single insertion point for the entire block.
|
||||
// The correctness of this assumption is verified in builder.go.
|
||||
func (i *iter) doNorm(p int, ccc uint8) {
|
||||
if p-i.pStarter > maxCombiningCharacters {
|
||||
i.prevCCC = i.ce[len(i.ce)-1].CCC()
|
||||
i.pStarter = len(i.ce) - 1
|
||||
return
|
||||
}
|
||||
sz := 0
|
||||
i.ce, sz = i.t.appendNext(i.ce, i.buf[i.p:])
|
||||
i.p += sz
|
||||
n := len(i.ce)
|
||||
k := p
|
||||
for p--; p > i.pStarter && ccc < i.ce[p-1].CCC(); p-- {
|
||||
}
|
||||
i.ce = append(i.ce, i.ce[p:k]...)
|
||||
copy(i.ce[p:], i.ce[k:])
|
||||
i.ce = i.ce[:n]
|
||||
}
|
||||
|
||||
func (i *iter) nextPrimary() int {
|
||||
for {
|
||||
for ; i.pce < len(i.ce); i.pce++ {
|
||||
if v := i.ce[i.pce].primary(); v != 0 {
|
||||
for ; i.pce < i.nce; i.pce++ {
|
||||
if v := i.ce[i.pce].Primary(); v != 0 {
|
||||
i.pce++
|
||||
return v
|
||||
}
|
||||
}
|
||||
if i.done() {
|
||||
if !i.next() {
|
||||
return 0
|
||||
}
|
||||
i.next()
|
||||
}
|
||||
panic("should not reach here")
|
||||
}
|
||||
|
||||
func (i *iter) nextSecondary() int {
|
||||
for ; i.pce < len(i.ce); i.pce++ {
|
||||
if v := i.ce[i.pce].secondary(); v != 0 {
|
||||
if v := i.ce[i.pce].Secondary(); v != 0 {
|
||||
i.pce++
|
||||
return v
|
||||
}
|
||||
|
@ -376,7 +453,7 @@ func (i *iter) nextSecondary() int {
|
|||
|
||||
func (i *iter) prevSecondary() int {
|
||||
for ; i.pce < len(i.ce); i.pce++ {
|
||||
if v := i.ce[len(i.ce)-i.pce-1].secondary(); v != 0 {
|
||||
if v := i.ce[len(i.ce)-i.pce-1].Secondary(); v != 0 {
|
||||
i.pce++
|
||||
return v
|
||||
}
|
||||
|
@ -386,7 +463,7 @@ func (i *iter) prevSecondary() int {
|
|||
|
||||
func (i *iter) nextTertiary() int {
|
||||
for ; i.pce < len(i.ce); i.pce++ {
|
||||
if v := i.ce[i.pce].tertiary(); v != 0 {
|
||||
if v := i.ce[i.pce].Tertiary(); v != 0 {
|
||||
i.pce++
|
||||
return int(v)
|
||||
}
|
||||
|
@ -396,7 +473,7 @@ func (i *iter) nextTertiary() int {
|
|||
|
||||
func (i *iter) nextQuaternary() int {
|
||||
for ; i.pce < len(i.ce); i.pce++ {
|
||||
if v := i.ce[i.pce].quaternary(); v != 0 {
|
||||
if v := i.ce[i.pce].Quaternary(); v != 0 {
|
||||
i.pce++
|
||||
return v
|
||||
}
|
||||
|
@ -416,9 +493,9 @@ func appendPrimary(key []byte, p int) []byte {
|
|||
|
||||
// keyFromElems converts the weights ws to a compact sequence of bytes.
|
||||
// The result will be appended to the byte buffer in buf.
|
||||
func (c *Collator) keyFromElems(buf *Buffer, ws []colElem) {
|
||||
func (c *Collator) keyFromElems(buf *Buffer, ws []Elem) {
|
||||
for _, v := range ws {
|
||||
if w := v.primary(); w > 0 {
|
||||
if w := v.Primary(); w > 0 {
|
||||
buf.key = appendPrimary(buf.key, w)
|
||||
}
|
||||
}
|
||||
|
@ -427,13 +504,13 @@ func (c *Collator) keyFromElems(buf *Buffer, ws []colElem) {
|
|||
// TODO: we can use one 0 if we can guarantee that all non-zero weights are > 0xFF.
|
||||
if !c.Backwards {
|
||||
for _, v := range ws {
|
||||
if w := v.secondary(); w > 0 {
|
||||
if w := v.Secondary(); w > 0 {
|
||||
buf.key = append(buf.key, uint8(w>>8), uint8(w))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for i := len(ws) - 1; i >= 0; i-- {
|
||||
if w := ws[i].secondary(); w > 0 {
|
||||
if w := ws[i].Secondary(); w > 0 {
|
||||
buf.key = append(buf.key, uint8(w>>8), uint8(w))
|
||||
}
|
||||
}
|
||||
|
@ -444,12 +521,12 @@ func (c *Collator) keyFromElems(buf *Buffer, ws []colElem) {
|
|||
if Tertiary <= c.Strength || c.CaseLevel {
|
||||
buf.key = append(buf.key, 0, 0)
|
||||
for _, v := range ws {
|
||||
if w := v.tertiary(); w > 0 {
|
||||
if w := v.Tertiary(); w > 0 {
|
||||
buf.key = append(buf.key, uint8(w))
|
||||
}
|
||||
}
|
||||
// Derive the quaternary weights from the options and other levels.
|
||||
// Note that we represent maxQuaternary as 0xFF. The first byte of the
|
||||
// Note that we represent MaxQuaternary as 0xFF. The first byte of the
|
||||
// representation of a primary weight is always smaller than 0xFF,
|
||||
// so using this single byte value will compare correctly.
|
||||
if Quaternary <= c.Strength && c.Alternate >= AltShifted {
|
||||
|
@ -457,7 +534,7 @@ func (c *Collator) keyFromElems(buf *Buffer, ws []colElem) {
|
|||
lastNonFFFF := len(buf.key)
|
||||
buf.key = append(buf.key, 0)
|
||||
for _, v := range ws {
|
||||
if w := v.quaternary(); w == maxQuaternary {
|
||||
if w := v.Quaternary(); w == MaxQuaternary {
|
||||
buf.key = append(buf.key, 0xFF)
|
||||
} else if w > 0 {
|
||||
buf.key = appendPrimary(buf.key, w)
|
||||
|
@ -468,7 +545,7 @@ func (c *Collator) keyFromElems(buf *Buffer, ws []colElem) {
|
|||
} else {
|
||||
buf.key = append(buf.key, 0)
|
||||
for _, v := range ws {
|
||||
if w := v.quaternary(); w == maxQuaternary {
|
||||
if w := v.Quaternary(); w == MaxQuaternary {
|
||||
buf.key = append(buf.key, 0xFF)
|
||||
} else if w > 0 {
|
||||
buf.key = appendPrimary(buf.key, w)
|
||||
|
@ -479,14 +556,14 @@ func (c *Collator) keyFromElems(buf *Buffer, ws []colElem) {
|
|||
}
|
||||
}
|
||||
|
||||
func processWeights(vw AlternateHandling, top uint32, wa []colElem) {
|
||||
func processWeights(vw AlternateHandling, top uint32, wa []Elem) {
|
||||
ignore := false
|
||||
vtop := int(top)
|
||||
switch vw {
|
||||
case AltShifted, AltShiftTrimmed:
|
||||
for i := range wa {
|
||||
if p := wa[i].primary(); p <= vtop && p != 0 {
|
||||
wa[i] = makeQuaternary(p)
|
||||
if p := wa[i].Primary(); p <= vtop && p != 0 {
|
||||
wa[i] = MakeQuaternary(p)
|
||||
ignore = true
|
||||
} else if p == 0 {
|
||||
if ignore {
|
||||
|
@ -498,7 +575,7 @@ func processWeights(vw AlternateHandling, top uint32, wa []colElem) {
|
|||
}
|
||||
case AltBlanked:
|
||||
for i := range wa {
|
||||
if p := wa[i].primary(); p <= vtop && (ignore || p != 0) {
|
||||
if p := wa[i].Primary(); p <= vtop && (ignore || p != 0) {
|
||||
wa[i] = ceIgnore
|
||||
ignore = true
|
||||
} else {
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package collate
|
||||
|
||||
// A Weigher can be used as a source for Collator and Searcher.
|
||||
type Weigher interface {
|
||||
// Start finds the start of the segment that includes position p.
|
||||
Start(p int, b []byte) int
|
||||
|
||||
// StartString finds the start of the segment that includes position p.
|
||||
StartString(p int, s string) int
|
||||
|
||||
// AppendNext appends Elems to buf corresponding to the longest match
|
||||
// of a single character or contraction from the start of s.
|
||||
// It returns the new buf and the number of bytes consumed.
|
||||
AppendNext(buf []Elem, s []byte) (ce []Elem, n int)
|
||||
|
||||
// AppendNextString appends Elems to buf corresponding to the longest match
|
||||
// of a single character or contraction from the start of s.
|
||||
// It returns the new buf and the number of bytes consumed.
|
||||
AppendNextString(buf []Elem, s string) (ce []Elem, n int)
|
||||
|
||||
// Domain returns a slice of all single characters and contractions for which
|
||||
// collation elements are defined in this table.
|
||||
Domain() []string
|
||||
}
|
|
@ -27,8 +27,21 @@ type ctScanner struct {
|
|||
done bool
|
||||
}
|
||||
|
||||
type ctScannerString struct {
|
||||
states contractTrieSet
|
||||
s string
|
||||
n int
|
||||
index int
|
||||
pindex int
|
||||
done bool
|
||||
}
|
||||
|
||||
func (t contractTrieSet) scanner(index, n int, b []byte) ctScanner {
|
||||
return ctScanner{states: t[index:], s: b, n: n}
|
||||
return ctScanner{s: b, states: t[index:], n: n}
|
||||
}
|
||||
|
||||
func (t contractTrieSet) scannerString(index, n int, str string) ctScannerString {
|
||||
return ctScannerString{s: str, states: t[index:], n: n}
|
||||
}
|
||||
|
||||
// result returns the offset i and bytes consumed p so far. If no suffix
|
||||
|
@ -37,6 +50,10 @@ func (s *ctScanner) result() (i, p int) {
|
|||
return s.index, s.pindex
|
||||
}
|
||||
|
||||
func (s *ctScannerString) result() (i, p int) {
|
||||
return s.index, s.pindex
|
||||
}
|
||||
|
||||
const (
|
||||
final = 0
|
||||
noIndex = 0xFF
|
||||
|
@ -84,3 +101,45 @@ func (s *ctScanner) scan(p int) int {
|
|||
}
|
||||
return pr
|
||||
}
|
||||
|
||||
// scan is a verbatim copy of ctScanner.scan.
|
||||
func (s *ctScannerString) scan(p int) int {
|
||||
pr := p // the p at the rune start
|
||||
str := s.s
|
||||
states, n := s.states, s.n
|
||||
for i := 0; i < n && p < len(str); {
|
||||
e := states[i]
|
||||
c := str[p]
|
||||
// TODO: a significant number of contractions are of a form that
|
||||
// cannot match discontiguous UTF-8 in a normalized string. We could let
|
||||
// a negative value of e.n mean that we can set s.done = true and avoid
|
||||
// the need for additional matches.
|
||||
if c >= e.l {
|
||||
if e.l == c {
|
||||
p++
|
||||
if e.i != noIndex {
|
||||
s.index = int(e.i)
|
||||
s.pindex = p
|
||||
}
|
||||
if e.n != final {
|
||||
i, states, n = 0, states[int(e.h)+n:], int(e.n)
|
||||
if p >= len(str) || utf8.RuneStart(str[p]) {
|
||||
s.states, s.n, pr = states, n, p
|
||||
}
|
||||
} else {
|
||||
s.done = true
|
||||
return p
|
||||
}
|
||||
continue
|
||||
} else if e.n == final && c <= e.h {
|
||||
p++
|
||||
s.done = true
|
||||
s.index = int(c-e.l) + int(e.i)
|
||||
s.pindex = p
|
||||
return p
|
||||
}
|
||||
}
|
||||
i++
|
||||
}
|
||||
return pr
|
||||
}
|
||||
|
|
|
@ -4,9 +4,8 @@
|
|||
|
||||
package collate
|
||||
|
||||
// Init is used by type Builder in exp/locale/collate/build/
|
||||
// to create Collator instances. It is for internal use only.
|
||||
func Init(data interface{}) *Collator {
|
||||
// Init is for internal use only.
|
||||
func Init(data interface{}) Weigher {
|
||||
init, ok := data.(tableInitializer)
|
||||
if !ok {
|
||||
return nil
|
||||
|
@ -14,15 +13,15 @@ func Init(data interface{}) *Collator {
|
|||
t := &table{}
|
||||
loff, voff := init.FirstBlockOffsets()
|
||||
t.index.index = init.TrieIndex()
|
||||
t.index.index0 = t.index.index[blockSize*loff:]
|
||||
t.index.index0 = t.index.index[blockSize*int(loff):]
|
||||
t.index.values = init.TrieValues()
|
||||
t.index.values0 = t.index.values[blockSize*voff:]
|
||||
t.index.values0 = t.index.values[blockSize*int(voff):]
|
||||
t.expandElem = init.ExpandElems()
|
||||
t.contractTries = init.ContractTries()
|
||||
t.contractElem = init.ContractElems()
|
||||
t.maxContractLen = init.MaxContractLen()
|
||||
t.variableTop = init.VariableTop()
|
||||
return newCollator(t)
|
||||
return t
|
||||
}
|
||||
|
||||
type tableInitializer interface {
|
||||
|
|
|
@ -25,43 +25,43 @@ func W(ce ...int) Weights {
|
|||
if len(ce) > 3 {
|
||||
w.Quaternary = ce[3]
|
||||
} else if w.Tertiary != 0 {
|
||||
w.Quaternary = maxQuaternary
|
||||
w.Quaternary = MaxQuaternary
|
||||
}
|
||||
return w
|
||||
}
|
||||
func (w Weights) String() string {
|
||||
return fmt.Sprintf("[%d.%d.%d.%d]", w.Primary, w.Secondary, w.Tertiary, w.Quaternary)
|
||||
return fmt.Sprintf("[%X.%X.%X.%X]", w.Primary, w.Secondary, w.Tertiary, w.Quaternary)
|
||||
}
|
||||
|
||||
type Table struct {
|
||||
t *table
|
||||
t Weigher
|
||||
}
|
||||
|
||||
func GetTable(c *Collator) *Table {
|
||||
return &Table{c.t}
|
||||
}
|
||||
|
||||
func convertToWeights(ws []colElem) []Weights {
|
||||
func convertToWeights(ws []Elem) []Weights {
|
||||
out := make([]Weights, len(ws))
|
||||
for i, w := range ws {
|
||||
out[i] = Weights{int(w.primary()), int(w.secondary()), int(w.tertiary()), int(w.quaternary())}
|
||||
out[i] = Weights{int(w.Primary()), int(w.Secondary()), int(w.Tertiary()), int(w.Quaternary())}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func convertFromWeights(ws []Weights) []colElem {
|
||||
out := make([]colElem, len(ws))
|
||||
func convertFromWeights(ws []Weights) []Elem {
|
||||
out := make([]Elem, len(ws))
|
||||
for i, w := range ws {
|
||||
out[i] = makeCE([]int{w.Primary, w.Secondary, w.Tertiary})
|
||||
out[i] = makeCE([]int{w.Primary, w.Secondary, w.Tertiary, 0})
|
||||
if out[i] == ceIgnore && w.Quaternary > 0 {
|
||||
out[i] = makeQuaternary(w.Quaternary)
|
||||
out[i] = MakeQuaternary(w.Quaternary)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (t *Table) AppendNext(s []byte) ([]Weights, int) {
|
||||
w, n := t.t.appendNext(nil, s)
|
||||
w, n := t.t.AppendNext(nil, s)
|
||||
return convertToWeights(w), n
|
||||
}
|
||||
|
||||
|
@ -69,7 +69,7 @@ func SetTop(c *Collator, top int) {
|
|||
if c.t == nil {
|
||||
c.t = &table{}
|
||||
}
|
||||
c.t.variableTop = uint32(top)
|
||||
c.variableTop = uint32(top)
|
||||
}
|
||||
|
||||
func GetColElems(c *Collator, str []byte) []Weights {
|
||||
|
|
|
@ -674,7 +674,7 @@ func testCollator(c *collate.Collator) {
|
|||
for _, str := range testInput.values() {
|
||||
k0 := c0.KeyFromString(&buf, str)
|
||||
k := c.KeyFromString(&buf, str)
|
||||
if bytes.Compare(k0, k) != 0 {
|
||||
if !bytes.Equal(k0, k) {
|
||||
failOnError(fmt.Errorf("test:%U: keys differ (%x vs %x)", []rune(str), k0, k))
|
||||
}
|
||||
buf.Reset()
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package collate
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"sort"
|
||||
)
|
||||
|
||||
const (
|
||||
maxSortBuffer = 40960
|
||||
maxSortEntries = 4096
|
||||
)
|
||||
|
||||
type swapper interface {
|
||||
Swap(i, j int)
|
||||
}
|
||||
|
||||
type sorter struct {
|
||||
buf *Buffer
|
||||
keys [][]byte
|
||||
src swapper
|
||||
}
|
||||
|
||||
func (s *sorter) init(n int) {
|
||||
if s.buf == nil {
|
||||
s.buf = &Buffer{}
|
||||
s.buf.init()
|
||||
}
|
||||
if cap(s.keys) < n {
|
||||
s.keys = make([][]byte, n)
|
||||
}
|
||||
s.keys = s.keys[0:n]
|
||||
}
|
||||
|
||||
func (s *sorter) clean() {
|
||||
if len(s.buf.key) > maxSortBuffer {
|
||||
s.buf.key = s.buf.buf[:0]
|
||||
}
|
||||
if len(s.keys) > maxSortEntries {
|
||||
s.keys = nil
|
||||
}
|
||||
}
|
||||
|
||||
func (s *sorter) sort(src swapper) {
|
||||
s.src = src
|
||||
sort.Sort(s)
|
||||
}
|
||||
|
||||
func (s sorter) Len() int {
|
||||
return len(s.keys)
|
||||
}
|
||||
|
||||
func (s sorter) Less(i, j int) bool {
|
||||
return bytes.Compare(s.keys[i], s.keys[j]) == -1
|
||||
}
|
||||
|
||||
func (s sorter) Swap(i, j int) {
|
||||
s.keys[i], s.keys[j] = s.keys[j], s.keys[i]
|
||||
s.src.Swap(i, j)
|
||||
}
|
||||
|
||||
// A Lister can be sorted by Collator's Sort method.
|
||||
type Lister interface {
|
||||
Len() int
|
||||
Swap(i, j int)
|
||||
// Bytes returns the bytes of the text at index i.
|
||||
Bytes(i int) []byte
|
||||
}
|
||||
|
||||
// Sort uses sort.Sort to sort the strings represented by x using the rules of c.
|
||||
func (c *Collator) Sort(x Lister) {
|
||||
n := x.Len()
|
||||
c.sorter.init(n)
|
||||
for i := 0; i < n; i++ {
|
||||
c.sorter.keys[i] = c.Key(c.sorter.buf, x.Bytes(i))
|
||||
}
|
||||
c.sorter.sort(x)
|
||||
}
|
||||
|
||||
// Strings sorts x using the rules of c.
|
||||
func (c *Collator) Strings(x []string) {
|
||||
c.sorter.init(len(x))
|
||||
for i, s := range x {
|
||||
c.sorter.keys[i] = c.KeyFromString(c.sorter.buf, s)
|
||||
}
|
||||
c.sorter.sort(sort.StringSlice(x))
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package collate_test
|
||||
|
||||
import (
|
||||
"exp/locale/collate"
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func ExampleCollator_Strings() {
|
||||
c := collate.New("root")
|
||||
strings := []string{
|
||||
"ad",
|
||||
"äb",
|
||||
"ac",
|
||||
}
|
||||
c.Strings(strings)
|
||||
fmt.Println(strings)
|
||||
// Output: [äb ac ad]
|
||||
}
|
||||
|
||||
type sorter []string
|
||||
|
||||
func (s sorter) Len() int {
|
||||
return len(s)
|
||||
}
|
||||
|
||||
func (s sorter) Swap(i, j int) {
|
||||
s[j], s[i] = s[i], s[j]
|
||||
}
|
||||
|
||||
func (s sorter) Bytes(i int) []byte {
|
||||
return []byte(s[i])
|
||||
}
|
||||
|
||||
func TestSort(t *testing.T) {
|
||||
c := collate.New("en")
|
||||
strings := []string{
|
||||
"bcd",
|
||||
"abc",
|
||||
"ddd",
|
||||
}
|
||||
c.Sort(sorter(strings))
|
||||
res := fmt.Sprint(strings)
|
||||
want := "[abc bcd ddd]"
|
||||
if res != want {
|
||||
t.Errorf("found %s; want %s", res, want)
|
||||
}
|
||||
}
|
|
@ -37,18 +37,96 @@ func (t *table) indexedTable(idx tableIndex) *table {
|
|||
return &nt
|
||||
}
|
||||
|
||||
func (t *table) AppendNext(w []Elem, b []byte) (res []Elem, n int) {
|
||||
return t.appendNext(w, source{bytes: b})
|
||||
}
|
||||
|
||||
func (t *table) AppendNextString(w []Elem, s string) (res []Elem, n int) {
|
||||
return t.appendNext(w, source{str: s})
|
||||
}
|
||||
|
||||
func (t *table) Start(p int, b []byte) int {
|
||||
// TODO: implement
|
||||
panic("not implemented")
|
||||
}
|
||||
|
||||
func (t *table) StartString(p int, s string) int {
|
||||
// TODO: implement
|
||||
panic("not implemented")
|
||||
}
|
||||
|
||||
func (t *table) Domain() []string {
|
||||
// TODO: implement
|
||||
panic("not implemented")
|
||||
}
|
||||
|
||||
type source struct {
|
||||
str string
|
||||
bytes []byte
|
||||
}
|
||||
|
||||
func (src *source) lookup(t *table) (ce Elem, sz int) {
|
||||
if src.bytes == nil {
|
||||
return t.index.lookupString(src.str)
|
||||
}
|
||||
return t.index.lookup(src.bytes)
|
||||
}
|
||||
|
||||
func (src *source) tail(sz int) {
|
||||
if src.bytes == nil {
|
||||
src.str = src.str[sz:]
|
||||
} else {
|
||||
src.bytes = src.bytes[sz:]
|
||||
}
|
||||
}
|
||||
|
||||
func (src *source) nfd(buf []byte, end int) []byte {
|
||||
if src.bytes == nil {
|
||||
return norm.NFD.AppendString(buf[:0], src.str[:end])
|
||||
}
|
||||
return norm.NFD.Append(buf[:0], src.bytes[:end]...)
|
||||
}
|
||||
|
||||
func (src *source) rune() (r rune, sz int) {
|
||||
if src.bytes == nil {
|
||||
return utf8.DecodeRuneInString(src.str)
|
||||
}
|
||||
return utf8.DecodeRune(src.bytes)
|
||||
}
|
||||
|
||||
func (src *source) properties(f norm.Form) norm.Properties {
|
||||
if src.bytes == nil {
|
||||
return f.PropertiesString(src.str)
|
||||
}
|
||||
return f.Properties(src.bytes)
|
||||
}
|
||||
|
||||
// appendNext appends the weights corresponding to the next rune or
|
||||
// contraction in s. If a contraction is matched to a discontinuous
|
||||
// sequence of runes, the weights for the interstitial runes are
|
||||
// appended as well. It returns a new slice that includes the appended
|
||||
// weights and the number of bytes consumed from s.
|
||||
func (t *table) appendNext(w []colElem, s []byte) ([]colElem, int) {
|
||||
v, sz := t.index.lookup(s)
|
||||
ce := colElem(v)
|
||||
func (t *table) appendNext(w []Elem, src source) (res []Elem, n int) {
|
||||
ce, sz := src.lookup(t)
|
||||
tp := ce.ctype()
|
||||
if tp == ceNormal {
|
||||
if ce == 0 {
|
||||
r, _ := utf8.DecodeRune(s)
|
||||
r, _ := src.rune()
|
||||
const (
|
||||
hangulSize = 3
|
||||
firstHangul = 0xAC00
|
||||
lastHangul = 0xD7A3
|
||||
)
|
||||
if r >= firstHangul && r <= lastHangul {
|
||||
// TODO: performance can be considerably improved here.
|
||||
n = sz
|
||||
var buf [16]byte // Used for decomposing Hangul.
|
||||
for b := src.nfd(buf[:0], hangulSize); len(b) > 0; b = b[sz:] {
|
||||
ce, sz = t.index.lookup(b)
|
||||
w = append(w, ce)
|
||||
}
|
||||
return w, n
|
||||
}
|
||||
ce = makeImplicitCE(implicitPrimary(r))
|
||||
}
|
||||
w = append(w, ce)
|
||||
|
@ -56,15 +134,20 @@ func (t *table) appendNext(w []colElem, s []byte) ([]colElem, int) {
|
|||
w = t.appendExpansion(w, ce)
|
||||
} else if tp == ceContractionIndex {
|
||||
n := 0
|
||||
w, n = t.matchContraction(w, ce, s[sz:])
|
||||
src.tail(sz)
|
||||
if src.bytes == nil {
|
||||
w, n = t.matchContractionString(w, ce, src.str)
|
||||
} else {
|
||||
w, n = t.matchContraction(w, ce, src.bytes)
|
||||
}
|
||||
sz += n
|
||||
} else if tp == ceDecompose {
|
||||
// Decompose using NFCK and replace tertiary weights.
|
||||
// Decompose using NFKD and replace tertiary weights.
|
||||
t1, t2 := splitDecompose(ce)
|
||||
i := len(w)
|
||||
nfkd := norm.NFKD.Properties(s).Decomposition()
|
||||
nfkd := src.properties(norm.NFKD).Decomposition()
|
||||
for p := 0; len(nfkd) > 0; nfkd = nfkd[p:] {
|
||||
w, p = t.appendNext(w, nfkd)
|
||||
w, p = t.appendNext(w, source{bytes: nfkd})
|
||||
}
|
||||
w[i] = w[i].updateTertiary(t1)
|
||||
if i++; i < len(w) {
|
||||
|
@ -77,17 +160,17 @@ func (t *table) appendNext(w []colElem, s []byte) ([]colElem, int) {
|
|||
return w, sz
|
||||
}
|
||||
|
||||
func (t *table) appendExpansion(w []colElem, ce colElem) []colElem {
|
||||
func (t *table) appendExpansion(w []Elem, ce Elem) []Elem {
|
||||
i := splitExpandIndex(ce)
|
||||
n := int(t.expandElem[i])
|
||||
i++
|
||||
for _, ce := range t.expandElem[i : i+n] {
|
||||
w = append(w, colElem(ce))
|
||||
w = append(w, Elem(ce))
|
||||
}
|
||||
return w
|
||||
}
|
||||
|
||||
func (t *table) matchContraction(w []colElem, ce colElem, suffix []byte) ([]colElem, int) {
|
||||
func (t *table) matchContraction(w []Elem, ce Elem, suffix []byte) ([]Elem, int) {
|
||||
index, n, offset := splitContractIndex(ce)
|
||||
|
||||
scan := t.contractTries.scanner(index, n, suffix)
|
||||
|
@ -99,16 +182,17 @@ func (t *table) matchContraction(w []colElem, ce colElem, suffix []byte) ([]colE
|
|||
// By now we should have filtered most cases.
|
||||
p0 := p
|
||||
bufn := 0
|
||||
rune := norm.NFC.Properties(suffix[p:])
|
||||
rune := norm.NFD.Properties(suffix[p:])
|
||||
p += rune.Size()
|
||||
if prevCC := rune.TrailCCC(); prevCC != 0 {
|
||||
if rune.LeadCCC() != 0 {
|
||||
prevCC := rune.TrailCCC()
|
||||
// A gap may only occur in the last normalization segment.
|
||||
// This also ensures that len(scan.s) < norm.MaxSegmentSize.
|
||||
if end := norm.NFC.FirstBoundary(suffix[p:]); end != -1 {
|
||||
if end := norm.NFD.FirstBoundary(suffix[p:]); end != -1 {
|
||||
scan.s = suffix[:p+end]
|
||||
}
|
||||
for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf {
|
||||
rune = norm.NFC.Properties(suffix[p:])
|
||||
rune = norm.NFD.Properties(suffix[p:])
|
||||
if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc {
|
||||
break
|
||||
}
|
||||
|
@ -128,7 +212,7 @@ func (t *table) matchContraction(w []colElem, ce colElem, suffix []byte) ([]colE
|
|||
}
|
||||
// Append weights for the matched contraction, which may be an expansion.
|
||||
i, n := scan.result()
|
||||
ce = colElem(t.contractElem[i+offset])
|
||||
ce = Elem(t.contractElem[i+offset])
|
||||
if ce.ctype() == ceNormal {
|
||||
w = append(w, ce)
|
||||
} else {
|
||||
|
@ -136,7 +220,98 @@ func (t *table) matchContraction(w []colElem, ce colElem, suffix []byte) ([]colE
|
|||
}
|
||||
// Append weights for the runes in the segment not part of the contraction.
|
||||
for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] {
|
||||
w, p = t.appendNext(w, b)
|
||||
w, p = t.appendNext(w, source{bytes: b})
|
||||
}
|
||||
return w, n
|
||||
}
|
||||
|
||||
// TODO: unify the two implementations. This is best done after first simplifying
|
||||
// the algorithm taking into account the inclusion of both NFC and NFD forms
|
||||
// in the table.
|
||||
func (t *table) matchContractionString(w []Elem, ce Elem, suffix string) ([]Elem, int) {
|
||||
index, n, offset := splitContractIndex(ce)
|
||||
|
||||
scan := t.contractTries.scannerString(index, n, suffix)
|
||||
buf := [norm.MaxSegmentSize]byte{}
|
||||
bufp := 0
|
||||
p := scan.scan(0)
|
||||
|
||||
if !scan.done && p < len(suffix) && suffix[p] >= utf8.RuneSelf {
|
||||
// By now we should have filtered most cases.
|
||||
p0 := p
|
||||
bufn := 0
|
||||
rune := norm.NFD.PropertiesString(suffix[p:])
|
||||
p += rune.Size()
|
||||
if rune.LeadCCC() != 0 {
|
||||
prevCC := rune.TrailCCC()
|
||||
// A gap may only occur in the last normalization segment.
|
||||
// This also ensures that len(scan.s) < norm.MaxSegmentSize.
|
||||
if end := norm.NFD.FirstBoundaryInString(suffix[p:]); end != -1 {
|
||||
scan.s = suffix[:p+end]
|
||||
}
|
||||
for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf {
|
||||
rune = norm.NFD.PropertiesString(suffix[p:])
|
||||
if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc {
|
||||
break
|
||||
}
|
||||
prevCC = rune.TrailCCC()
|
||||
if pp := scan.scan(p); pp != p {
|
||||
// Copy the interstitial runes for later processing.
|
||||
bufn += copy(buf[bufn:], suffix[p0:p])
|
||||
if scan.pindex == pp {
|
||||
bufp = bufn
|
||||
}
|
||||
p, p0 = pp, pp
|
||||
} else {
|
||||
p += rune.Size()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Append weights for the matched contraction, which may be an expansion.
|
||||
i, n := scan.result()
|
||||
ce = Elem(t.contractElem[i+offset])
|
||||
if ce.ctype() == ceNormal {
|
||||
w = append(w, ce)
|
||||
} else {
|
||||
w = t.appendExpansion(w, ce)
|
||||
}
|
||||
// Append weights for the runes in the segment not part of the contraction.
|
||||
for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] {
|
||||
w, p = t.appendNext(w, source{bytes: b})
|
||||
}
|
||||
return w, n
|
||||
}
|
||||
|
||||
// TODO: this should stay after the rest of this file is moved to colltab
|
||||
func (t tableIndex) TrieIndex() []uint16 {
|
||||
return mainLookup[:]
|
||||
}
|
||||
|
||||
func (t tableIndex) TrieValues() []uint32 {
|
||||
return mainValues[:]
|
||||
}
|
||||
|
||||
func (t tableIndex) FirstBlockOffsets() (lookup, value uint16) {
|
||||
return uint16(t.lookupOffset), uint16(t.valuesOffset)
|
||||
}
|
||||
|
||||
func (t tableIndex) ExpandElems() []uint32 {
|
||||
return mainExpandElem[:]
|
||||
}
|
||||
|
||||
func (t tableIndex) ContractTries() []struct{ l, h, n, i uint8 } {
|
||||
return mainCTEntries[:]
|
||||
}
|
||||
|
||||
func (t tableIndex) ContractElems() []uint32 {
|
||||
return mainContractElem[:]
|
||||
}
|
||||
|
||||
func (t tableIndex) MaxContractLen() int {
|
||||
return 18
|
||||
}
|
||||
|
||||
func (t tableIndex) VariableTop() uint32 {
|
||||
return 0x30E
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -31,18 +31,79 @@ const (
|
|||
te = 0xFE // 1111 1110
|
||||
)
|
||||
|
||||
func (t *trie) lookupValue(n uint16, b byte) colElem {
|
||||
return colElem(t.values[int(n)<<6+int(b)])
|
||||
func (t *trie) lookupValue(n uint16, b byte) Elem {
|
||||
return Elem(t.values[int(n)<<6+int(b)])
|
||||
}
|
||||
|
||||
// lookup returns the trie value for the first UTF-8 encoding in s and
|
||||
// the width in bytes of this encoding. The size will be 0 if s does not
|
||||
// hold enough bytes to complete the encoding. len(s) must be greater than 0.
|
||||
func (t *trie) lookup(s []byte) (v colElem, sz int) {
|
||||
func (t *trie) lookup(s []byte) (v Elem, sz int) {
|
||||
c0 := s[0]
|
||||
switch {
|
||||
case c0 < tx:
|
||||
return colElem(t.values0[c0]), 1
|
||||
return Elem(t.values0[c0]), 1
|
||||
case c0 < t2:
|
||||
return 0, 1
|
||||
case c0 < t3:
|
||||
if len(s) < 2 {
|
||||
return 0, 0
|
||||
}
|
||||
i := t.index0[c0]
|
||||
c1 := s[1]
|
||||
if c1 < tx || t2 <= c1 {
|
||||
return 0, 1
|
||||
}
|
||||
return t.lookupValue(i, c1), 2
|
||||
case c0 < t4:
|
||||
if len(s) < 3 {
|
||||
return 0, 0
|
||||
}
|
||||
i := t.index0[c0]
|
||||
c1 := s[1]
|
||||
if c1 < tx || t2 <= c1 {
|
||||
return 0, 1
|
||||
}
|
||||
o := int(i)<<6 + int(c1)
|
||||
i = t.index[o]
|
||||
c2 := s[2]
|
||||
if c2 < tx || t2 <= c2 {
|
||||
return 0, 2
|
||||
}
|
||||
return t.lookupValue(i, c2), 3
|
||||
case c0 < t5:
|
||||
if len(s) < 4 {
|
||||
return 0, 0
|
||||
}
|
||||
i := t.index0[c0]
|
||||
c1 := s[1]
|
||||
if c1 < tx || t2 <= c1 {
|
||||
return 0, 1
|
||||
}
|
||||
o := int(i)<<6 + int(c1)
|
||||
i = t.index[o]
|
||||
c2 := s[2]
|
||||
if c2 < tx || t2 <= c2 {
|
||||
return 0, 2
|
||||
}
|
||||
o = int(i)<<6 + int(c2)
|
||||
i = t.index[o]
|
||||
c3 := s[3]
|
||||
if c3 < tx || t2 <= c3 {
|
||||
return 0, 3
|
||||
}
|
||||
return t.lookupValue(i, c3), 4
|
||||
}
|
||||
// Illegal rune
|
||||
return 0, 1
|
||||
}
|
||||
|
||||
// The body of lookupString is a verbatim copy of that of lookup.
|
||||
func (t *trie) lookupString(s string) (v Elem, sz int) {
|
||||
c0 := s[0]
|
||||
switch {
|
||||
case c0 < tx:
|
||||
return Elem(t.values0[c0]), 1
|
||||
case c0 < t2:
|
||||
return 0, 1
|
||||
case c0 < t3:
|
||||
|
|
|
@ -28,24 +28,20 @@ type reorderBuffer struct {
|
|||
nbyte uint8 // Number or bytes.
|
||||
f formInfo
|
||||
|
||||
src input
|
||||
nsrc int
|
||||
srcBytes inputBytes
|
||||
srcString inputString
|
||||
tmpBytes inputBytes
|
||||
src input
|
||||
nsrc int
|
||||
tmpBytes input
|
||||
}
|
||||
|
||||
func (rb *reorderBuffer) init(f Form, src []byte) {
|
||||
rb.f = *formTable[f]
|
||||
rb.srcBytes = inputBytes(src)
|
||||
rb.src = &rb.srcBytes
|
||||
rb.src.setBytes(src)
|
||||
rb.nsrc = len(src)
|
||||
}
|
||||
|
||||
func (rb *reorderBuffer) initString(f Form, src string) {
|
||||
rb.f = *formTable[f]
|
||||
rb.srcString = inputString(src)
|
||||
rb.src = &rb.srcString
|
||||
rb.src.setString(src)
|
||||
rb.nsrc = len(src)
|
||||
}
|
||||
|
||||
|
@ -121,9 +117,9 @@ func (rb *reorderBuffer) insert(src input, i int, info Properties) bool {
|
|||
// in dcomp. dcomp must be a sequence of decomposed UTF-8-encoded runes.
|
||||
func (rb *reorderBuffer) insertDecomposed(dcomp []byte) bool {
|
||||
saveNrune, saveNbyte := rb.nrune, rb.nbyte
|
||||
rb.tmpBytes = inputBytes(dcomp)
|
||||
rb.tmpBytes.setBytes(dcomp)
|
||||
for i := 0; i < len(dcomp); {
|
||||
info := rb.f.info(&rb.tmpBytes, i)
|
||||
info := rb.f.info(rb.tmpBytes, i)
|
||||
pos := rb.nbyte
|
||||
if !rb.insertOrdered(info) {
|
||||
rb.nrune, rb.nbyte = saveNrune, saveNbyte
|
||||
|
|
|
@ -81,7 +81,7 @@ func flushF(rb *reorderBuffer) []byte {
|
|||
}
|
||||
|
||||
func flushCopyF(rb *reorderBuffer) []byte {
|
||||
out := make([]byte, MaxSegmentSize)
|
||||
out := make([]byte, maxByteBufferSize)
|
||||
n := rb.flushCopy(out)
|
||||
return out[:n]
|
||||
}
|
||||
|
|
|
@ -0,0 +1,81 @@
|
|||
// Copyright 2012 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package norm_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"exp/norm"
|
||||
"fmt"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// EqualSimple uses a norm.Iter to compare two non-normalized
|
||||
// strings for equivalence.
|
||||
func EqualSimple(a, b string) bool {
|
||||
var ia, ib norm.Iter
|
||||
ia.InitString(norm.NFKD, a)
|
||||
ib.InitString(norm.NFKD, b)
|
||||
for !ia.Done() && !ib.Done() {
|
||||
if !bytes.Equal(ia.Next(), ib.Next()) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return ia.Done() && ib.Done()
|
||||
}
|
||||
|
||||
// FindPrefix finds the longest common prefix of ASCII characters
|
||||
// of a and b.
|
||||
func FindPrefix(a, b string) int {
|
||||
i := 0
|
||||
for ; i < len(a) && i < len(b) && a[i] < utf8.RuneSelf && a[i] == b[i]; i++ {
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
// EqualOpt is like EqualSimple, but optimizes the special
|
||||
// case for ASCII characters.
|
||||
func EqualOpt(a, b string) bool {
|
||||
n := FindPrefix(a, b)
|
||||
a, b = a[n:], b[n:]
|
||||
var ia, ib norm.Iter
|
||||
ia.InitString(norm.NFKD, a)
|
||||
ib.InitString(norm.NFKD, b)
|
||||
for !ia.Done() && !ib.Done() {
|
||||
if !bytes.Equal(ia.Next(), ib.Next()) {
|
||||
return false
|
||||
}
|
||||
if n := int64(FindPrefix(a[ia.Pos():], b[ib.Pos():])); n != 0 {
|
||||
ia.Seek(n, 1)
|
||||
ib.Seek(n, 1)
|
||||
}
|
||||
}
|
||||
return ia.Done() && ib.Done()
|
||||
}
|
||||
|
||||
var compareTests = []struct{ a, b string }{
|
||||
{"aaa", "aaa"},
|
||||
{"aaa", "aab"},
|
||||
{"a\u0300a", "\u00E0a"},
|
||||
{"a\u0300\u0320b", "a\u0320\u0300b"},
|
||||
{"\u1E0A\u0323", "\x44\u0323\u0307"},
|
||||
// A character that decomposes into multiple segments
|
||||
// spans several iterations.
|
||||
{"\u3304", "\u30A4\u30CB\u30F3\u30AF\u3099"},
|
||||
}
|
||||
|
||||
func ExampleIter() {
|
||||
for i, t := range compareTests {
|
||||
r0 := EqualSimple(t.a, t.b)
|
||||
r1 := EqualOpt(t.a, t.b)
|
||||
fmt.Printf("%d: %v %v\n", i, r0, r1)
|
||||
}
|
||||
// Output:
|
||||
// 0: true true
|
||||
// 1: false false
|
||||
// 2: true true
|
||||
// 3: true true
|
||||
// 4: true true
|
||||
// 5: true true
|
||||
}
|
|
@ -50,6 +50,7 @@ type formInfo struct {
|
|||
form Form
|
||||
composing, compatibility bool // form type
|
||||
info lookupFunc
|
||||
nextMain iterFunc
|
||||
}
|
||||
|
||||
var formTable []*formInfo
|
||||
|
@ -67,7 +68,9 @@ func init() {
|
|||
} else {
|
||||
f.info = lookupInfoNFC
|
||||
}
|
||||
f.nextMain = nextDecomposed
|
||||
if Form(i) == NFC || Form(i) == NFKC {
|
||||
f.nextMain = nextComposed
|
||||
f.composing = true
|
||||
}
|
||||
}
|
||||
|
@ -117,6 +120,10 @@ func (p Properties) isInert() bool {
|
|||
return p.flags&0xf == 0 && p.ccc == 0
|
||||
}
|
||||
|
||||
func (p Properties) multiSegment() bool {
|
||||
return p.index >= firstMulti && p.index < endMulti
|
||||
}
|
||||
|
||||
// Decomposition returns the decomposition for the underlying rune
|
||||
// or nil if there is none.
|
||||
func (p Properties) Decomposition() []byte {
|
||||
|
|
|
@ -6,91 +6,100 @@ package norm
|
|||
|
||||
import "unicode/utf8"
|
||||
|
||||
type input interface {
|
||||
skipASCII(p, max int) int
|
||||
skipNonStarter(p int) int
|
||||
appendSlice(buf []byte, s, e int) []byte
|
||||
copySlice(buf []byte, s, e int)
|
||||
charinfoNFC(p int) (uint16, int)
|
||||
charinfoNFKC(p int) (uint16, int)
|
||||
hangul(p int) rune
|
||||
type input struct {
|
||||
str string
|
||||
bytes []byte
|
||||
}
|
||||
|
||||
type inputString string
|
||||
func inputBytes(str []byte) input {
|
||||
return input{bytes: str}
|
||||
}
|
||||
|
||||
func (s inputString) skipASCII(p, max int) int {
|
||||
for ; p < max && s[p] < utf8.RuneSelf; p++ {
|
||||
func inputString(str string) input {
|
||||
return input{str: str}
|
||||
}
|
||||
|
||||
func (in *input) setBytes(str []byte) {
|
||||
in.str = ""
|
||||
in.bytes = str
|
||||
}
|
||||
|
||||
func (in *input) setString(str string) {
|
||||
in.str = str
|
||||
in.bytes = nil
|
||||
}
|
||||
|
||||
func (in *input) _byte(p int) byte {
|
||||
if in.bytes == nil {
|
||||
return in.str[p]
|
||||
}
|
||||
return in.bytes[p]
|
||||
}
|
||||
|
||||
func (in *input) skipASCII(p, max int) int {
|
||||
if in.bytes == nil {
|
||||
for ; p < max && in.str[p] < utf8.RuneSelf; p++ {
|
||||
}
|
||||
} else {
|
||||
for ; p < max && in.bytes[p] < utf8.RuneSelf; p++ {
|
||||
}
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
func (s inputString) skipNonStarter(p int) int {
|
||||
for ; p < len(s) && !utf8.RuneStart(s[p]); p++ {
|
||||
func (in *input) skipNonStarter(p int) int {
|
||||
if in.bytes == nil {
|
||||
for ; p < len(in.str) && !utf8.RuneStart(in.str[p]); p++ {
|
||||
}
|
||||
} else {
|
||||
for ; p < len(in.bytes) && !utf8.RuneStart(in.bytes[p]); p++ {
|
||||
}
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
func (s inputString) appendSlice(buf []byte, b, e int) []byte {
|
||||
func (in *input) appendSlice(buf []byte, b, e int) []byte {
|
||||
if in.bytes != nil {
|
||||
return append(buf, in.bytes[b:e]...)
|
||||
}
|
||||
for i := b; i < e; i++ {
|
||||
buf = append(buf, s[i])
|
||||
buf = append(buf, in.str[i])
|
||||
}
|
||||
return buf
|
||||
}
|
||||
|
||||
func (s inputString) copySlice(buf []byte, b, e int) {
|
||||
copy(buf, s[b:e])
|
||||
}
|
||||
|
||||
func (s inputString) charinfoNFC(p int) (uint16, int) {
|
||||
return nfcTrie.lookupString(string(s[p:]))
|
||||
}
|
||||
|
||||
func (s inputString) charinfoNFKC(p int) (uint16, int) {
|
||||
return nfkcTrie.lookupString(string(s[p:]))
|
||||
}
|
||||
|
||||
func (s inputString) hangul(p int) rune {
|
||||
if !isHangulString(string(s[p:])) {
|
||||
return 0
|
||||
func (in *input) copySlice(buf []byte, b, e int) int {
|
||||
if in.bytes == nil {
|
||||
return copy(buf, in.str[b:e])
|
||||
}
|
||||
rune, _ := utf8.DecodeRuneInString(string(s[p:]))
|
||||
return rune
|
||||
return copy(buf, in.bytes[b:e])
|
||||
}
|
||||
|
||||
type inputBytes []byte
|
||||
|
||||
func (s inputBytes) skipASCII(p, max int) int {
|
||||
for ; p < max && s[p] < utf8.RuneSelf; p++ {
|
||||
func (in *input) charinfoNFC(p int) (uint16, int) {
|
||||
if in.bytes == nil {
|
||||
return nfcTrie.lookupString(in.str[p:])
|
||||
}
|
||||
return p
|
||||
return nfcTrie.lookup(in.bytes[p:])
|
||||
}
|
||||
|
||||
func (s inputBytes) skipNonStarter(p int) int {
|
||||
for ; p < len(s) && !utf8.RuneStart(s[p]); p++ {
|
||||
func (in *input) charinfoNFKC(p int) (uint16, int) {
|
||||
if in.bytes == nil {
|
||||
return nfkcTrie.lookupString(in.str[p:])
|
||||
}
|
||||
return p
|
||||
return nfkcTrie.lookup(in.bytes[p:])
|
||||
}
|
||||
|
||||
func (s inputBytes) appendSlice(buf []byte, b, e int) []byte {
|
||||
return append(buf, s[b:e]...)
|
||||
}
|
||||
|
||||
func (s inputBytes) copySlice(buf []byte, b, e int) {
|
||||
copy(buf, s[b:e])
|
||||
}
|
||||
|
||||
func (s inputBytes) charinfoNFC(p int) (uint16, int) {
|
||||
return nfcTrie.lookup(s[p:])
|
||||
}
|
||||
|
||||
func (s inputBytes) charinfoNFKC(p int) (uint16, int) {
|
||||
return nfkcTrie.lookup(s[p:])
|
||||
}
|
||||
|
||||
func (s inputBytes) hangul(p int) rune {
|
||||
if !isHangul(s[p:]) {
|
||||
return 0
|
||||
func (in *input) hangul(p int) (r rune) {
|
||||
if in.bytes == nil {
|
||||
if !isHangulString(in.str[p:]) {
|
||||
return 0
|
||||
}
|
||||
r, _ = utf8.DecodeRuneInString(in.str[p:])
|
||||
} else {
|
||||
if !isHangul(in.bytes[p:]) {
|
||||
return 0
|
||||
}
|
||||
r, _ = utf8.DecodeRune(in.bytes[p:])
|
||||
}
|
||||
rune, _ := utf8.DecodeRune(s[p:])
|
||||
return rune
|
||||
return r
|
||||
}
|
||||
|
|
|
@ -4,53 +4,96 @@
|
|||
|
||||
package norm
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
const MaxSegmentSize = maxByteBufferSize
|
||||
|
||||
// An Iter iterates over a string or byte slice, while normalizing it
|
||||
// to a given Form.
|
||||
type Iter struct {
|
||||
rb reorderBuffer
|
||||
info Properties // first character saved from previous iteration
|
||||
next iterFunc // implementation of next depends on form
|
||||
rb reorderBuffer
|
||||
buf [maxByteBufferSize]byte
|
||||
info Properties // first character saved from previous iteration
|
||||
next iterFunc // implementation of next depends on form
|
||||
asciiF iterFunc
|
||||
|
||||
p int // current position in input source
|
||||
outStart int // start of current segment in output buffer
|
||||
inStart int // start of current segment in input source
|
||||
maxp int // position in output buffer after which not to start a new segment
|
||||
maxseg int // for tracking an excess of combining characters
|
||||
|
||||
tccc uint8
|
||||
done bool
|
||||
p int // current position in input source
|
||||
multiSeg []byte // remainder of multi-segment decomposition
|
||||
}
|
||||
|
||||
type iterFunc func(*Iter, []byte) int
|
||||
type iterFunc func(*Iter) []byte
|
||||
|
||||
// SetInput initializes i to iterate over src after normalizing it to Form f.
|
||||
func (i *Iter) SetInput(f Form, src []byte) {
|
||||
// Init initializes i to iterate over src after normalizing it to Form f.
|
||||
func (i *Iter) Init(f Form, src []byte) {
|
||||
i.p = 0
|
||||
if len(src) == 0 {
|
||||
i.setDone()
|
||||
i.rb.nsrc = 0
|
||||
return
|
||||
}
|
||||
i.multiSeg = nil
|
||||
i.rb.init(f, src)
|
||||
if i.rb.f.composing {
|
||||
i.next = nextComposed
|
||||
} else {
|
||||
i.next = nextDecomposed
|
||||
}
|
||||
i.p = 0
|
||||
if i.done = len(src) == 0; !i.done {
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
}
|
||||
i.next = i.rb.f.nextMain
|
||||
i.asciiF = nextASCIIBytes
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
}
|
||||
|
||||
// SetInputString initializes i to iterate over src after normalizing it to Form f.
|
||||
func (i *Iter) SetInputString(f Form, src string) {
|
||||
i.rb.initString(f, src)
|
||||
if i.rb.f.composing {
|
||||
i.next = nextComposed
|
||||
} else {
|
||||
i.next = nextDecomposed
|
||||
}
|
||||
// InitString initializes i to iterate over src after normalizing it to Form f.
|
||||
func (i *Iter) InitString(f Form, src string) {
|
||||
i.p = 0
|
||||
if i.done = len(src) == 0; !i.done {
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
if len(src) == 0 {
|
||||
i.setDone()
|
||||
i.rb.nsrc = 0
|
||||
return
|
||||
}
|
||||
i.multiSeg = nil
|
||||
i.rb.initString(f, src)
|
||||
i.next = i.rb.f.nextMain
|
||||
i.asciiF = nextASCIIString
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
}
|
||||
|
||||
// Seek sets the segment to be returned by the next call to Next to start
|
||||
// at position p. It is the responsibility of the caller to set p to the
|
||||
// start of a UTF8 rune.
|
||||
func (i *Iter) Seek(offset int64, whence int) (int64, error) {
|
||||
var abs int64
|
||||
switch whence {
|
||||
case 0:
|
||||
abs = offset
|
||||
case 1:
|
||||
abs = int64(i.p) + offset
|
||||
case 2:
|
||||
abs = int64(i.rb.nsrc) + offset
|
||||
default:
|
||||
return 0, fmt.Errorf("norm: invalid whence")
|
||||
}
|
||||
if abs < 0 {
|
||||
return 0, fmt.Errorf("norm: negative position")
|
||||
}
|
||||
if int(abs) >= i.rb.nsrc {
|
||||
i.setDone()
|
||||
return int64(i.p), nil
|
||||
}
|
||||
i.p = int(abs)
|
||||
i.multiSeg = nil
|
||||
i.next = i.rb.f.nextMain
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
return abs, nil
|
||||
}
|
||||
|
||||
// returnSlice returns a slice of the underlying input type as a byte slice.
|
||||
// If the underlying is of type []byte, it will simply return a slice.
|
||||
// If the underlying is of type string, it will copy the slice to the buffer
|
||||
// and return that.
|
||||
func (i *Iter) returnSlice(a, b int) []byte {
|
||||
if i.rb.src.bytes == nil {
|
||||
return i.buf[:copy(i.buf[:], i.rb.src.str[a:b])]
|
||||
}
|
||||
return i.rb.src.bytes[a:b]
|
||||
}
|
||||
|
||||
// Pos returns the byte position at which the next call to Next will commence processing.
|
||||
|
@ -58,140 +101,232 @@ func (i *Iter) Pos() int {
|
|||
return i.p
|
||||
}
|
||||
|
||||
func (i *Iter) setDone() {
|
||||
i.next = nextDone
|
||||
i.p = i.rb.nsrc
|
||||
}
|
||||
|
||||
// Done returns true if there is no more input to process.
|
||||
func (i *Iter) Done() bool {
|
||||
return i.done
|
||||
return i.p >= i.rb.nsrc
|
||||
}
|
||||
|
||||
// Next writes f(i.input[i.Pos():n]...) to buffer buf, where n is the
|
||||
// largest boundary of i.input such that the result fits in buf.
|
||||
// It returns the number of bytes written to buf.
|
||||
// len(buf) should be at least MaxSegmentSize.
|
||||
// Done must be false before calling Next.
|
||||
func (i *Iter) Next(buf []byte) int {
|
||||
return i.next(i, buf)
|
||||
// Next returns f(i.input[i.Pos():n]), where n is a boundary of i.input.
|
||||
// For any input a and b for which f(a) == f(b), subsequent calls
|
||||
// to Next will return the same segments.
|
||||
// Modifying runes are grouped together with the preceding starter, if such a starter exists.
|
||||
// Although not guaranteed, n will typically be the smallest possible n.
|
||||
func (i *Iter) Next() []byte {
|
||||
return i.next(i)
|
||||
}
|
||||
|
||||
func (i *Iter) initNext(outn, inStart int) {
|
||||
i.outStart = 0
|
||||
i.inStart = inStart
|
||||
i.maxp = outn - MaxSegmentSize
|
||||
i.maxseg = MaxSegmentSize
|
||||
}
|
||||
|
||||
// setStart resets the start of the new segment to the given position.
|
||||
// It returns true if there is not enough room for the new segment.
|
||||
func (i *Iter) setStart(outp, inp int) bool {
|
||||
if outp > i.maxp {
|
||||
return true
|
||||
func nextASCIIBytes(i *Iter) []byte {
|
||||
p := i.p + 1
|
||||
if p >= i.rb.nsrc {
|
||||
i.setDone()
|
||||
return i.rb.src.bytes[i.p:p]
|
||||
}
|
||||
i.outStart = outp
|
||||
i.inStart = inp
|
||||
i.maxseg = outp + MaxSegmentSize
|
||||
return false
|
||||
if i.rb.src.bytes[p] < utf8.RuneSelf {
|
||||
p0 := i.p
|
||||
i.p = p
|
||||
return i.rb.src.bytes[p0:p]
|
||||
}
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
i.next = i.rb.f.nextMain
|
||||
return i.next(i)
|
||||
}
|
||||
|
||||
func min(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
func nextASCIIString(i *Iter) []byte {
|
||||
p := i.p + 1
|
||||
if p >= i.rb.nsrc {
|
||||
i.buf[0] = i.rb.src.str[i.p]
|
||||
i.setDone()
|
||||
return i.buf[:1]
|
||||
}
|
||||
return b
|
||||
if i.rb.src.str[p] < utf8.RuneSelf {
|
||||
i.buf[0] = i.rb.src.str[i.p]
|
||||
i.p = p
|
||||
return i.buf[:1]
|
||||
}
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
i.next = i.rb.f.nextMain
|
||||
return i.next(i)
|
||||
}
|
||||
|
||||
func nextHangul(i *Iter) []byte {
|
||||
if r := i.rb.src.hangul(i.p); r != 0 {
|
||||
i.p += hangulUTF8Size
|
||||
if i.p >= i.rb.nsrc {
|
||||
i.setDone()
|
||||
}
|
||||
return i.buf[:decomposeHangul(i.buf[:], r)]
|
||||
}
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
i.next = i.rb.f.nextMain
|
||||
return i.next(i)
|
||||
}
|
||||
|
||||
func nextDone(i *Iter) []byte {
|
||||
return nil
|
||||
}
|
||||
|
||||
// nextMulti is used for iterating over multi-segment decompositions
|
||||
// for decomposing normal forms.
|
||||
func nextMulti(i *Iter) []byte {
|
||||
j := 0
|
||||
d := i.multiSeg
|
||||
// skip first rune
|
||||
for j = 1; j < len(d) && !utf8.RuneStart(d[j]); j++ {
|
||||
}
|
||||
for j < len(d) {
|
||||
info := i.rb.f.info(input{bytes: d}, j)
|
||||
if info.ccc == 0 {
|
||||
i.multiSeg = d[j:]
|
||||
return d[:j]
|
||||
}
|
||||
j += int(info.size)
|
||||
}
|
||||
// treat last segment as normal decomposition
|
||||
i.next = i.rb.f.nextMain
|
||||
return i.next(i)
|
||||
}
|
||||
|
||||
// nextMultiNorm is used for iterating over multi-segment decompositions
|
||||
// for composing normal forms.
|
||||
func nextMultiNorm(i *Iter) []byte {
|
||||
j := 0
|
||||
d := i.multiSeg
|
||||
// skip first rune
|
||||
for j = 1; j < len(d) && !utf8.RuneStart(d[j]); j++ {
|
||||
}
|
||||
for j < len(d) {
|
||||
info := i.rb.f.info(input{bytes: d}, j)
|
||||
if info.ccc == 0 {
|
||||
i.multiSeg = d[j:]
|
||||
return d[:j]
|
||||
}
|
||||
j += int(info.size)
|
||||
}
|
||||
i.multiSeg = nil
|
||||
i.next = nextComposed
|
||||
i.p++ // restore old valud of i.p. See nextComposed.
|
||||
if i.p >= i.rb.nsrc {
|
||||
i.setDone()
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
// nextDecomposed is the implementation of Next for forms NFD and NFKD.
|
||||
func nextDecomposed(i *Iter, out []byte) int {
|
||||
var outp int
|
||||
i.initNext(len(out), i.p)
|
||||
doFast:
|
||||
inCopyStart, outCopyStart := i.p, outp // invariant xCopyStart <= i.xStart
|
||||
func nextDecomposed(i *Iter) (next []byte) {
|
||||
startp, outp := i.p, 0
|
||||
inCopyStart, outCopyStart := i.p, 0
|
||||
for {
|
||||
if sz := int(i.info.size); sz <= 1 {
|
||||
// ASCII or illegal byte. Either way, advance by 1.
|
||||
i.p++
|
||||
p := i.p
|
||||
i.p++ // ASCII or illegal byte. Either way, advance by 1.
|
||||
if i.p >= i.rb.nsrc {
|
||||
i.setDone()
|
||||
return i.returnSlice(p, i.p)
|
||||
} else if i.rb.src._byte(i.p) < utf8.RuneSelf {
|
||||
i.next = i.asciiF
|
||||
return i.returnSlice(p, i.p)
|
||||
}
|
||||
outp++
|
||||
max := min(i.rb.nsrc, len(out)-outp+i.p)
|
||||
if np := i.rb.src.skipASCII(i.p, max); np > i.p {
|
||||
outp += np - i.p
|
||||
i.p = np
|
||||
if i.p >= i.rb.nsrc {
|
||||
break
|
||||
}
|
||||
// ASCII may combine with consecutive runes.
|
||||
if i.setStart(outp-1, i.p-1) {
|
||||
i.p--
|
||||
outp--
|
||||
i.info.size = 1
|
||||
break
|
||||
}
|
||||
}
|
||||
} else if d := i.info.Decomposition(); d != nil {
|
||||
i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.p)
|
||||
// Note: If leading CCC != 0, then len(d) == 2 and last is also non-zero.
|
||||
// Case 1: there is a leftover to copy. In this case the decomposition
|
||||
// must begin with a modifier and should always be appended.
|
||||
// Case 2: no leftover. Simply return d if followed by a ccc == 0 value.
|
||||
p := outp + len(d)
|
||||
if p > i.maxseg && i.setStart(outp, i.p) {
|
||||
return outp
|
||||
if outp > 0 {
|
||||
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
|
||||
if p > len(i.buf) {
|
||||
return i.buf[:outp]
|
||||
}
|
||||
} else if i.info.multiSegment() {
|
||||
// outp must be 0 as multi-segment decompositions always
|
||||
// start a new segment.
|
||||
if i.multiSeg == nil {
|
||||
i.multiSeg = d
|
||||
i.next = nextMulti
|
||||
return nextMulti(i)
|
||||
}
|
||||
// We are in the last segment. Treat as normal decomposition.
|
||||
d = i.multiSeg
|
||||
i.multiSeg = nil
|
||||
p = len(d)
|
||||
}
|
||||
copy(out[outp:], d)
|
||||
prevCC := i.info.tccc
|
||||
if i.p += sz; i.p >= i.rb.nsrc {
|
||||
i.setDone()
|
||||
i.info = Properties{} // Force BoundaryBefore to succeed.
|
||||
} else {
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
}
|
||||
if i.info.BoundaryBefore() {
|
||||
if outp > 0 {
|
||||
copy(i.buf[outp:], d)
|
||||
return i.buf[:p]
|
||||
}
|
||||
return d
|
||||
}
|
||||
copy(i.buf[outp:], d)
|
||||
outp = p
|
||||
i.p += sz
|
||||
inCopyStart, outCopyStart = i.p, outp
|
||||
} else if r := i.rb.src.hangul(i.p); r != 0 {
|
||||
i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.p)
|
||||
for {
|
||||
outp += decomposeHangul(out[outp:], r)
|
||||
i.p += hangulUTF8Size
|
||||
if r = i.rb.src.hangul(i.p); r == 0 {
|
||||
break
|
||||
}
|
||||
if i.setStart(outp, i.p) {
|
||||
return outp
|
||||
}
|
||||
if i.info.ccc < prevCC {
|
||||
goto doNorm
|
||||
}
|
||||
inCopyStart, outCopyStart = i.p, outp
|
||||
continue
|
||||
} else if r := i.rb.src.hangul(i.p); r != 0 {
|
||||
i.next = nextHangul
|
||||
i.p += hangulUTF8Size
|
||||
if i.p >= i.rb.nsrc {
|
||||
i.setDone()
|
||||
}
|
||||
return i.buf[:decomposeHangul(i.buf[:], r)]
|
||||
} else {
|
||||
p := outp + sz
|
||||
if p > i.maxseg && i.setStart(outp, i.p) {
|
||||
if p > len(i.buf) {
|
||||
break
|
||||
}
|
||||
outp = p
|
||||
i.p += sz
|
||||
}
|
||||
if i.p >= i.rb.nsrc {
|
||||
i.setDone()
|
||||
break
|
||||
}
|
||||
prevCC := i.info.tccc
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
if cc := i.info.ccc; cc == 0 {
|
||||
if i.setStart(outp, i.p) {
|
||||
break
|
||||
}
|
||||
} else if cc < prevCC {
|
||||
if i.info.BoundaryBefore() {
|
||||
break
|
||||
} else if i.info.ccc < prevCC {
|
||||
goto doNorm
|
||||
}
|
||||
}
|
||||
if inCopyStart != i.p {
|
||||
i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.p)
|
||||
if outCopyStart == 0 {
|
||||
return i.returnSlice(inCopyStart, i.p)
|
||||
} else if inCopyStart < i.p {
|
||||
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
|
||||
}
|
||||
i.done = i.p >= i.rb.nsrc
|
||||
return outp
|
||||
return i.buf[:outp]
|
||||
doNorm:
|
||||
// Insert what we have decomposed so far in the reorderBuffer.
|
||||
// As we will only reorder, there will always be enough room.
|
||||
i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.p)
|
||||
if !i.rb.insertDecomposed(out[i.outStart:outp]) {
|
||||
i.rb.src.copySlice(i.buf[outCopyStart:], inCopyStart, i.p)
|
||||
if !i.rb.insertDecomposed(i.buf[0:outp]) {
|
||||
// Start over to prevent decompositions from crossing segment boundaries.
|
||||
// This is a rare occurrence.
|
||||
i.p = i.inStart
|
||||
i.p = startp
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
}
|
||||
outp = i.outStart
|
||||
for {
|
||||
if !i.rb.insert(i.rb.src, i.p, i.info) {
|
||||
break
|
||||
}
|
||||
if i.p += int(i.info.size); i.p >= i.rb.nsrc {
|
||||
outp += i.rb.flushCopy(out[outp:])
|
||||
i.done = true
|
||||
return outp
|
||||
i.setDone()
|
||||
break
|
||||
}
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
if i.info.ccc == 0 {
|
||||
|
@ -199,27 +334,19 @@ doNorm:
|
|||
}
|
||||
}
|
||||
// new segment or too many combining characters: exit normalization
|
||||
if outp += i.rb.flushCopy(out[outp:]); i.setStart(outp, i.p) {
|
||||
return outp
|
||||
}
|
||||
goto doFast
|
||||
return i.buf[:i.rb.flushCopy(i.buf[:])]
|
||||
}
|
||||
|
||||
// nextComposed is the implementation of Next for forms NFC and NFKC.
|
||||
func nextComposed(i *Iter, out []byte) int {
|
||||
var outp int
|
||||
i.initNext(len(out), i.p)
|
||||
doFast:
|
||||
inCopyStart, outCopyStart := i.p, outp // invariant xCopyStart <= i.xStart
|
||||
func nextComposed(i *Iter) []byte {
|
||||
outp, startp := 0, i.p
|
||||
var prevCC uint8
|
||||
for {
|
||||
if !i.info.isYesC() {
|
||||
goto doNorm
|
||||
}
|
||||
if cc := i.info.ccc; cc == 0 {
|
||||
if i.setStart(outp, i.p) {
|
||||
break
|
||||
}
|
||||
if cc := i.info.ccc; cc == 0 && outp > 0 {
|
||||
break
|
||||
} else if cc < prevCC {
|
||||
goto doNorm
|
||||
}
|
||||
|
@ -229,49 +356,33 @@ doFast:
|
|||
sz = 1 // illegal rune: copy byte-by-byte
|
||||
}
|
||||
p := outp + sz
|
||||
if p > i.maxseg && i.setStart(outp, i.p) {
|
||||
if p > len(i.buf) {
|
||||
break
|
||||
}
|
||||
outp = p
|
||||
i.p += sz
|
||||
max := min(i.rb.nsrc, len(out)-outp+i.p)
|
||||
if np := i.rb.src.skipASCII(i.p, max); np > i.p {
|
||||
outp += np - i.p
|
||||
i.p = np
|
||||
if i.p >= i.rb.nsrc {
|
||||
break
|
||||
}
|
||||
// ASCII may combine with consecutive runes.
|
||||
if i.setStart(outp-1, i.p-1) {
|
||||
i.p--
|
||||
outp--
|
||||
i.info = Properties{size: 1}
|
||||
break
|
||||
}
|
||||
}
|
||||
if i.p >= i.rb.nsrc {
|
||||
i.setDone()
|
||||
break
|
||||
} else if i.rb.src._byte(i.p) < utf8.RuneSelf {
|
||||
i.next = i.asciiF
|
||||
break
|
||||
}
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
}
|
||||
if inCopyStart != i.p {
|
||||
i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.p)
|
||||
}
|
||||
i.done = i.p >= i.rb.nsrc
|
||||
return outp
|
||||
return i.returnSlice(startp, i.p)
|
||||
doNorm:
|
||||
i.rb.src.copySlice(out[outCopyStart:], inCopyStart, i.inStart)
|
||||
outp, i.p = i.outStart, i.inStart
|
||||
multi := false
|
||||
i.p = startp
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
for {
|
||||
if !i.rb.insert(i.rb.src, i.p, i.info) {
|
||||
break
|
||||
}
|
||||
multi = multi || i.info.multiSegment()
|
||||
if i.p += int(i.info.size); i.p >= i.rb.nsrc {
|
||||
i.rb.compose()
|
||||
outp += i.rb.flushCopy(out[outp:])
|
||||
i.done = true
|
||||
return outp
|
||||
i.setDone()
|
||||
break
|
||||
}
|
||||
i.info = i.rb.f.info(i.rb.src, i.p)
|
||||
if i.info.BoundaryBefore() {
|
||||
|
@ -279,8 +390,12 @@ doNorm:
|
|||
}
|
||||
}
|
||||
i.rb.compose()
|
||||
if outp += i.rb.flushCopy(out[outp:]); i.setStart(outp, i.p) {
|
||||
return outp
|
||||
seg := i.buf[:i.rb.flushCopy(i.buf[:])]
|
||||
if multi {
|
||||
i.p-- // fake not being done yet
|
||||
i.multiSeg = seg
|
||||
i.next = nextMultiNorm
|
||||
return nextMultiNorm(i)
|
||||
}
|
||||
goto doFast
|
||||
return seg
|
||||
}
|
||||
|
|
|
@ -9,21 +9,12 @@ import (
|
|||
"testing"
|
||||
)
|
||||
|
||||
var iterBufSizes = []int{
|
||||
MaxSegmentSize,
|
||||
1.5 * MaxSegmentSize,
|
||||
2 * MaxSegmentSize,
|
||||
3 * MaxSegmentSize,
|
||||
100 * MaxSegmentSize,
|
||||
}
|
||||
|
||||
func doIterNorm(f Form, buf []byte, s string) []byte {
|
||||
func doIterNorm(f Form, s string) []byte {
|
||||
acc := []byte{}
|
||||
i := Iter{}
|
||||
i.SetInputString(f, s)
|
||||
i.InitString(f, s)
|
||||
for !i.Done() {
|
||||
n := i.Next(buf)
|
||||
acc = append(acc, buf[:n]...)
|
||||
acc = append(acc, i.Next()...)
|
||||
}
|
||||
return acc
|
||||
}
|
||||
|
@ -35,30 +26,28 @@ func runIterTests(t *testing.T, name string, f Form, tests []AppendTest, norm bo
|
|||
if norm {
|
||||
gold = string(f.AppendString(nil, test.out))
|
||||
}
|
||||
for _, sz := range iterBufSizes {
|
||||
buf := make([]byte, sz)
|
||||
out := string(doIterNorm(f, buf, in))
|
||||
if len(out) != len(gold) {
|
||||
const msg = "%s:%d:%d: length is %d; want %d"
|
||||
t.Errorf(msg, name, i, sz, len(out), len(gold))
|
||||
}
|
||||
if out != gold {
|
||||
// Find first rune that differs and show context.
|
||||
ir := []rune(out)
|
||||
ig := []rune(gold)
|
||||
for j := 0; j < len(ir) && j < len(ig); j++ {
|
||||
if ir[j] == ig[j] {
|
||||
continue
|
||||
}
|
||||
if j -= 3; j < 0 {
|
||||
j = 0
|
||||
}
|
||||
for e := j + 7; j < e && j < len(ir) && j < len(ig); j++ {
|
||||
const msg = "%s:%d:%d: runeAt(%d) = %U; want %U"
|
||||
t.Errorf(msg, name, i, sz, j, ir[j], ig[j])
|
||||
}
|
||||
break
|
||||
out := string(doIterNorm(f, in))
|
||||
if len(out) != len(gold) {
|
||||
const msg = "%s:%d: length is %d; want %d"
|
||||
t.Errorf(msg, name, i, len(out), len(gold))
|
||||
}
|
||||
if out != gold {
|
||||
// Find first rune that differs and show context.
|
||||
ir := []rune(out)
|
||||
ig := []rune(gold)
|
||||
t.Errorf("\n%X != \n%X", ir, ig)
|
||||
for j := 0; j < len(ir) && j < len(ig); j++ {
|
||||
if ir[j] == ig[j] {
|
||||
continue
|
||||
}
|
||||
if j -= 3; j < 0 {
|
||||
j = 0
|
||||
}
|
||||
for e := j + 7; j < e && j < len(ir) && j < len(ig); j++ {
|
||||
const msg = "%s:%d: runeAt(%d) = %U; want %U"
|
||||
t.Errorf(msg, name, i, j, ir[j], ig[j])
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -68,42 +57,44 @@ func rep(r rune, n int) string {
|
|||
return strings.Repeat(string(r), n)
|
||||
}
|
||||
|
||||
const segSize = maxByteBufferSize
|
||||
|
||||
var iterTests = []AppendTest{
|
||||
{"", ascii, ascii},
|
||||
{"", txt_all, txt_all},
|
||||
{"", "a" + rep(0x0300, MaxSegmentSize/2), "a" + rep(0x0300, MaxSegmentSize/2)},
|
||||
{"", "a" + rep(0x0300, segSize/2), "a" + rep(0x0300, segSize/2)},
|
||||
}
|
||||
|
||||
var iterTestsD = []AppendTest{
|
||||
{ // segment overflow on unchanged character
|
||||
"",
|
||||
"a" + rep(0x0300, MaxSegmentSize/2) + "\u0316",
|
||||
"a" + rep(0x0300, MaxSegmentSize/2-1) + "\u0316\u0300",
|
||||
"a" + rep(0x0300, segSize/2) + "\u0316",
|
||||
"a" + rep(0x0300, segSize/2-1) + "\u0316\u0300",
|
||||
},
|
||||
{ // segment overflow on unchanged character + start value
|
||||
"",
|
||||
"a" + rep(0x0300, MaxSegmentSize/2+maxCombiningChars+4) + "\u0316",
|
||||
"a" + rep(0x0300, MaxSegmentSize/2+maxCombiningChars) + "\u0316" + rep(0x300, 4),
|
||||
"a" + rep(0x0300, segSize/2+maxCombiningChars+4) + "\u0316",
|
||||
"a" + rep(0x0300, segSize/2+maxCombiningChars) + "\u0316" + rep(0x300, 4),
|
||||
},
|
||||
{ // segment overflow on decomposition
|
||||
"",
|
||||
"a" + rep(0x0300, MaxSegmentSize/2-1) + "\u0340",
|
||||
"a" + rep(0x0300, MaxSegmentSize/2),
|
||||
"a" + rep(0x0300, segSize/2-1) + "\u0340",
|
||||
"a" + rep(0x0300, segSize/2),
|
||||
},
|
||||
{ // segment overflow on decomposition + start value
|
||||
"",
|
||||
"a" + rep(0x0300, MaxSegmentSize/2-1) + "\u0340" + rep(0x300, maxCombiningChars+4) + "\u0320",
|
||||
"a" + rep(0x0300, MaxSegmentSize/2-1) + rep(0x300, maxCombiningChars+1) + "\u0320" + rep(0x300, 4),
|
||||
"a" + rep(0x0300, segSize/2-1) + "\u0340" + rep(0x300, maxCombiningChars+4) + "\u0320",
|
||||
"a" + rep(0x0300, segSize/2-1) + rep(0x300, maxCombiningChars+1) + "\u0320" + rep(0x300, 4),
|
||||
},
|
||||
{ // start value after ASCII overflow
|
||||
"",
|
||||
rep('a', MaxSegmentSize) + rep(0x300, maxCombiningChars+2) + "\u0320",
|
||||
rep('a', MaxSegmentSize) + rep(0x300, maxCombiningChars) + "\u0320\u0300\u0300",
|
||||
rep('a', segSize) + rep(0x300, maxCombiningChars+2) + "\u0320",
|
||||
rep('a', segSize) + rep(0x300, maxCombiningChars) + "\u0320\u0300\u0300",
|
||||
},
|
||||
{ // start value after Hangul overflow
|
||||
"",
|
||||
rep(0xAC00, MaxSegmentSize/6) + rep(0x300, maxCombiningChars+2) + "\u0320",
|
||||
strings.Repeat("\u1100\u1161", MaxSegmentSize/6) + rep(0x300, maxCombiningChars-1) + "\u0320" + rep(0x300, 3),
|
||||
rep(0xAC00, segSize/6) + rep(0x300, maxCombiningChars+2) + "\u0320",
|
||||
strings.Repeat("\u1100\u1161", segSize/6) + rep(0x300, maxCombiningChars+1) + "\u0320" + rep(0x300, 1),
|
||||
},
|
||||
{ // start value after cc=0
|
||||
"",
|
||||
|
@ -125,8 +116,8 @@ var iterTestsC = []AppendTest{
|
|||
},
|
||||
{ // segment overflow
|
||||
"",
|
||||
"a" + rep(0x0305, MaxSegmentSize/2+4) + "\u0316",
|
||||
"a" + rep(0x0305, MaxSegmentSize/2-1) + "\u0316" + rep(0x305, 5),
|
||||
"a" + rep(0x0305, segSize/2+4) + "\u0316",
|
||||
"a" + rep(0x0305, segSize/2-1) + "\u0316" + rep(0x305, 5),
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -148,27 +139,39 @@ type SegmentTest struct {
|
|||
}
|
||||
|
||||
var segmentTests = []SegmentTest{
|
||||
{rep('a', MaxSegmentSize), []string{rep('a', MaxSegmentSize), ""}},
|
||||
{rep('a', MaxSegmentSize+2), []string{rep('a', MaxSegmentSize-1), "aaa", ""}},
|
||||
{rep('a', MaxSegmentSize) + "\u0300aa", []string{rep('a', MaxSegmentSize-1), "a\u0300", "aa", ""}},
|
||||
{"\u1E0A\u0323a", []string{"\x44\u0323\u0307", "a", ""}},
|
||||
{rep('a', segSize), append(strings.Split(rep('a', segSize), ""), "")},
|
||||
{rep('a', segSize+2), append(strings.Split(rep('a', segSize+2), ""), "")},
|
||||
{rep('a', segSize) + "\u0300aa",
|
||||
append(strings.Split(rep('a', segSize-1), ""), "a\u0300", "a", "a", "")},
|
||||
}
|
||||
|
||||
var segmentTestsK = []SegmentTest{
|
||||
{"\u3332", []string{"\u30D5", "\u30A1", "\u30E9", "\u30C3", "\u30C8\u3099", ""}},
|
||||
// last segment of multi-segment decomposition needs normalization
|
||||
{"\u3332\u093C", []string{"\u30D5", "\u30A1", "\u30E9", "\u30C3", "\u30C8\u093C\u3099", ""}},
|
||||
// Hangul and Jamo are grouped togeter.
|
||||
{"\uAC00", []string{"\u1100\u1161", ""}},
|
||||
{"\uAC01", []string{"\u1100\u1161\u11A8", ""}},
|
||||
{"\u1100\u1161", []string{"\u1100\u1161", ""}},
|
||||
}
|
||||
|
||||
// Note that, by design, segmentation is equal for composing and decomposing forms.
|
||||
func TestIterSegmentation(t *testing.T) {
|
||||
segmentTest(t, "SegmentTestD", NFD, segmentTests)
|
||||
segmentTest(t, "SegmentTestC", NFC, segmentTests)
|
||||
segmentTest(t, "SegmentTestD", NFKD, segmentTestsK)
|
||||
segmentTest(t, "SegmentTestC", NFKC, segmentTestsK)
|
||||
}
|
||||
|
||||
func segmentTest(t *testing.T, name string, f Form, tests []SegmentTest) {
|
||||
iter := Iter{}
|
||||
for i, tt := range segmentTests {
|
||||
buf := make([]byte, MaxSegmentSize)
|
||||
iter.SetInputString(f, tt.in)
|
||||
for i, tt := range tests {
|
||||
iter.InitString(f, tt.in)
|
||||
for j, seg := range tt.out {
|
||||
if seg == "" {
|
||||
if !iter.Done() {
|
||||
n := iter.Next(buf)
|
||||
res := string(buf[:n])
|
||||
res := string(iter.Next())
|
||||
t.Errorf(`%s:%d:%d: expected Done()==true, found segment "%s"`, name, i, j, res)
|
||||
}
|
||||
continue
|
||||
|
@ -176,10 +179,9 @@ func segmentTest(t *testing.T, name string, f Form, tests []SegmentTest) {
|
|||
if iter.Done() {
|
||||
t.Errorf("%s:%d:%d: Done()==true, want false", name, i, j)
|
||||
}
|
||||
n := iter.Next(buf)
|
||||
seg = f.String(seg)
|
||||
if res := string(buf[:n]); res != seg {
|
||||
t.Errorf(`%s:%d:%d" segment was "%s" (%d); want "%s" (%d)`, name, i, j, res, len(res), seg, len(seg))
|
||||
if res := string(iter.Next()); res != seg {
|
||||
t.Errorf(`%s:%d:%d" segment was "%s" (%d); want "%s" (%d) %X %X`, name, i, j, res, len(res), seg, len(seg), []rune(res), []rune(seg))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -574,7 +574,19 @@ func makeEntry(f *FormInfo) uint16 {
|
|||
|
||||
// decompSet keeps track of unique decompositions, grouped by whether
|
||||
// the decomposition is followed by a trailing and/or leading CCC.
|
||||
type decompSet [4]map[string]bool
|
||||
type decompSet [6]map[string]bool
|
||||
|
||||
const (
|
||||
normalDecomp = iota
|
||||
firstMulti
|
||||
firstCCC
|
||||
endMulti
|
||||
firstLeadingCCC
|
||||
firstCCCZeroExcept
|
||||
lastDecomp
|
||||
)
|
||||
|
||||
var cname = []string{"firstMulti", "firstCCC", "endMulti", "firstLeadingCCC", "firstCCCZeroExcept", "lastDecomp"}
|
||||
|
||||
func makeDecompSet() decompSet {
|
||||
m := decompSet{}
|
||||
|
@ -614,20 +626,30 @@ func printCharInfoTables() int {
|
|||
const msg = "%U: lccc (%d) must be <= tcc (%d)"
|
||||
logger.Fatalf(msg, r, lccc, tccc)
|
||||
}
|
||||
index := 0
|
||||
index := normalDecomp
|
||||
if tccc > 0 || lccc > 0 {
|
||||
s += string([]byte{tccc})
|
||||
index = 1
|
||||
index = endMulti
|
||||
for _, r := range d[1:] {
|
||||
if ccc(r) == 0 {
|
||||
index = firstCCC
|
||||
}
|
||||
}
|
||||
if lccc > 0 {
|
||||
s += string([]byte{lccc})
|
||||
index = 2
|
||||
if index == firstCCC {
|
||||
logger.Fatalf("%U: multi-segment decomposition not supported for decompositions with leading CCC != 0", r)
|
||||
}
|
||||
index = firstLeadingCCC
|
||||
}
|
||||
if cc != lccc {
|
||||
if cc != 0 {
|
||||
logger.Fatalf("%U: for lccc != ccc, expected ccc to be 0; was %d", r, cc)
|
||||
}
|
||||
index = 3
|
||||
index = firstCCCZeroExcept
|
||||
}
|
||||
} else if len(d) > 1 {
|
||||
index = firstMulti
|
||||
}
|
||||
return index, s
|
||||
}
|
||||
|
@ -653,7 +675,6 @@ func printCharInfoTables() int {
|
|||
size := 0
|
||||
positionMap := make(map[string]uint16)
|
||||
decompositions.WriteString("\000")
|
||||
cname := []string{"firstCCC", "firstLeadingCCC", "firstCCCZeroExcept", "lastDecomp"}
|
||||
fmt.Println("const (")
|
||||
for i, m := range decompSet {
|
||||
sa := []string{}
|
||||
|
|
|
@ -6,6 +6,7 @@ package norm
|
|||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
@ -504,12 +505,35 @@ func appendBench(f Form, in []byte) func() {
|
|||
}
|
||||
|
||||
func iterBench(f Form, in []byte) func() {
|
||||
buf := make([]byte, 4*len(in))
|
||||
iter := Iter{}
|
||||
return func() {
|
||||
iter.SetInput(f, in)
|
||||
iter.Init(f, in)
|
||||
for !iter.Done() {
|
||||
iter.Next(buf)
|
||||
iter.Next()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func readerBench(f Form, in []byte) func() {
|
||||
buf := make([]byte, 4*len(in))
|
||||
return func() {
|
||||
r := f.Reader(bytes.NewReader(in))
|
||||
var err error
|
||||
for err == nil {
|
||||
_, err = r.Read(buf)
|
||||
}
|
||||
if err != io.EOF {
|
||||
panic("")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func writerBench(f Form, in []byte) func() {
|
||||
buf := make([]byte, 0, 4*len(in))
|
||||
return func() {
|
||||
r := f.Writer(bytes.NewBuffer(buf))
|
||||
if _, err := r.Write(in); err != nil {
|
||||
panic("")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -517,6 +541,8 @@ func iterBench(f Form, in []byte) func() {
|
|||
func appendBenchmarks(bm []func(), f Form, in []byte) []func() {
|
||||
//bm = append(bm, appendBench(f, in))
|
||||
bm = append(bm, iterBench(f, in))
|
||||
//bm = append(bm, readerBench(f, in))
|
||||
//bm = append(bm, writerBench(f, in))
|
||||
return bm
|
||||
}
|
||||
|
||||
|
|
|
@ -223,13 +223,11 @@ func doTest(t *Test, f norm.Form, gold, test string) {
|
|||
cmpResult(t, "Bytes", f, gold, test, string(result))
|
||||
sresult := f.String(test)
|
||||
cmpResult(t, "String", f, gold, test, sresult)
|
||||
buf := make([]byte, norm.MaxSegmentSize)
|
||||
acc := []byte{}
|
||||
i := norm.Iter{}
|
||||
i.SetInputString(f, test)
|
||||
i.InitString(f, test)
|
||||
for !i.Done() {
|
||||
n := i.Next(buf)
|
||||
acc = append(acc, buf[:n]...)
|
||||
acc = append(acc, i.Next()...)
|
||||
}
|
||||
cmpResult(t, "Iter.Next", f, gold, test, string(acc))
|
||||
for i := range test {
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,190 @@
|
|||
package ssa
|
||||
|
||||
// Simple block optimisations to simplify the control flow graph.
|
||||
|
||||
// TODO(adonovan): instead of creating several "unreachable" blocks
|
||||
// per function in the Builder, reuse a single one (e.g. at Blocks[1])
|
||||
// to reduce garbage.
|
||||
//
|
||||
// TODO(adonovan): in the absence of multiway branch instructions,
|
||||
// each BasicBlock has 0, 1, or 2 successors. We should preallocate
|
||||
// the backing array for the Succs slice inline in BasicBlock.
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
)
|
||||
|
||||
// If true, perform sanity checking and show progress at each
|
||||
// successive iteration of optimizeBlocks. Very verbose.
|
||||
const debugBlockOpt = false
|
||||
|
||||
func hasPhi(b *BasicBlock) bool {
|
||||
_, ok := b.Instrs[0].(*Phi)
|
||||
return ok
|
||||
}
|
||||
|
||||
// prune attempts to prune block b if it is unreachable (i.e. has no
|
||||
// predecessors other than itself), disconnecting it from the CFG.
|
||||
// The result is true if the optimisation was applied. i is the block
|
||||
// index within the function.
|
||||
//
|
||||
func prune(f *Function, i int, b *BasicBlock) bool {
|
||||
if i == 0 {
|
||||
return false // don't prune entry block
|
||||
}
|
||||
if len(b.Preds) == 0 || len(b.Preds) == 1 && b.Preds[0] == b {
|
||||
// Disconnect it from its successors.
|
||||
for _, c := range b.Succs {
|
||||
c.removePred(b)
|
||||
}
|
||||
if debugBlockOpt {
|
||||
fmt.Fprintln(os.Stderr, "prune", b.Name)
|
||||
}
|
||||
|
||||
// Delete b.
|
||||
f.Blocks[i] = nil
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// jumpThreading attempts to apply simple jump-threading to block b,
|
||||
// in which a->b->c become a->c if b is just a Jump.
|
||||
// The result is true if the optimisation was applied.
|
||||
// i is the block index within the function.
|
||||
//
|
||||
func jumpThreading(f *Function, i int, b *BasicBlock) bool {
|
||||
if i == 0 {
|
||||
return false // don't apply to entry block
|
||||
}
|
||||
if b.Instrs == nil {
|
||||
fmt.Println("empty block ", b.Name)
|
||||
return false
|
||||
}
|
||||
if _, ok := b.Instrs[0].(*Jump); !ok {
|
||||
return false // not just a jump
|
||||
}
|
||||
c := b.Succs[0]
|
||||
if c == b {
|
||||
return false // don't apply to degenerate jump-to-self.
|
||||
}
|
||||
if hasPhi(c) {
|
||||
return false // not sound without more effort
|
||||
}
|
||||
for j, a := range b.Preds {
|
||||
a.replaceSucc(b, c)
|
||||
|
||||
// If a now has two edges to c, replace its degenerate If by Jump.
|
||||
if len(a.Succs) == 2 && a.Succs[0] == c && a.Succs[1] == c {
|
||||
jump := new(Jump)
|
||||
jump.SetBlock(a)
|
||||
a.Instrs[len(a.Instrs)-1] = jump
|
||||
a.Succs = a.Succs[:1]
|
||||
c.removePred(b)
|
||||
} else {
|
||||
if j == 0 {
|
||||
c.replacePred(b, a)
|
||||
} else {
|
||||
c.Preds = append(c.Preds, a)
|
||||
}
|
||||
}
|
||||
|
||||
if debugBlockOpt {
|
||||
fmt.Fprintln(os.Stderr, "jumpThreading", a.Name, b.Name, c.Name)
|
||||
}
|
||||
}
|
||||
f.Blocks[i] = nil
|
||||
return true
|
||||
}
|
||||
|
||||
// fuseBlocks attempts to apply the block fusion optimisation to block
|
||||
// a, in which a->b becomes ab if len(a.Succs)==len(b.Preds)==1.
|
||||
// The result is true if the optimisation was applied.
|
||||
//
|
||||
func fuseBlocks(f *Function, a *BasicBlock) bool {
|
||||
if len(a.Succs) != 1 {
|
||||
return false
|
||||
}
|
||||
b := a.Succs[0]
|
||||
if len(b.Preds) != 1 {
|
||||
return false
|
||||
}
|
||||
// Eliminate jump at end of A, then copy all of B across.
|
||||
a.Instrs = append(a.Instrs[:len(a.Instrs)-1], b.Instrs...)
|
||||
for _, instr := range b.Instrs {
|
||||
instr.SetBlock(a)
|
||||
}
|
||||
|
||||
// A inherits B's successors
|
||||
a.Succs = b.Succs
|
||||
|
||||
// Fix up Preds links of all successors of B.
|
||||
for _, c := range b.Succs {
|
||||
c.replacePred(b, a)
|
||||
}
|
||||
|
||||
if debugBlockOpt {
|
||||
fmt.Fprintln(os.Stderr, "fuseBlocks", a.Name, b.Name)
|
||||
}
|
||||
|
||||
// Make b unreachable. Subsequent pruning will reclaim it.
|
||||
b.Preds = nil
|
||||
return true
|
||||
}
|
||||
|
||||
// optimizeBlocks() performs some simple block optimizations on a
|
||||
// completed function: dead block elimination, block fusion, jump
|
||||
// threading.
|
||||
//
|
||||
func optimizeBlocks(f *Function) {
|
||||
// Loop until no further progress.
|
||||
changed := true
|
||||
for changed {
|
||||
changed = false
|
||||
|
||||
if debugBlockOpt {
|
||||
f.DumpTo(os.Stderr)
|
||||
MustSanityCheck(f, nil)
|
||||
}
|
||||
|
||||
for i, b := range f.Blocks {
|
||||
// f.Blocks will temporarily contain nils to indicate
|
||||
// deleted blocks; we remove them at the end.
|
||||
if b == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Prune unreachable blocks (including all empty blocks).
|
||||
if prune(f, i, b) {
|
||||
changed = true
|
||||
continue // (b was pruned)
|
||||
}
|
||||
|
||||
// Fuse blocks. b->c becomes bc.
|
||||
if fuseBlocks(f, b) {
|
||||
changed = true
|
||||
}
|
||||
|
||||
// a->b->c becomes a->c if b contains only a Jump.
|
||||
if jumpThreading(f, i, b) {
|
||||
changed = true
|
||||
continue // (b was disconnected)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Eliminate nils from Blocks.
|
||||
j := 0
|
||||
for _, b := range f.Blocks {
|
||||
if b != nil {
|
||||
f.Blocks[j] = b
|
||||
j++
|
||||
}
|
||||
}
|
||||
// Nil out b.Blocks[j:] to aid GC.
|
||||
for i := j; i < len(f.Blocks); i++ {
|
||||
f.Blocks[i] = nil
|
||||
}
|
||||
f.Blocks = f.Blocks[:j]
|
||||
}
|
|
@ -0,0 +1,113 @@
|
|||
// Package ssa defines a representation of the elements of Go programs
|
||||
// (packages, types, functions, variables and constants) using a
|
||||
// static single-assignment (SSA) form intermediate representation
|
||||
// (IR) for the the bodies of functions.
|
||||
//
|
||||
// THIS INTERFACE IS EXPERIMENTAL AND IS LIKELY TO CHANGE.
|
||||
//
|
||||
// For an introduction to SSA form, see
|
||||
// http://en.wikipedia.org/wiki/Static_single_assignment_form.
|
||||
// This page provides a broader reading list:
|
||||
// http://www.dcs.gla.ac.uk/~jsinger/ssa.html.
|
||||
//
|
||||
// The level of abstraction of the SSA form is intentionally close to
|
||||
// the source language to facilitate construction of source analysis
|
||||
// tools. It is not primarily intended for machine code generation.
|
||||
//
|
||||
// All looping, branching and switching constructs are replaced with
|
||||
// unstructured control flow. We may add higher-level control flow
|
||||
// primitives in the future to facilitate constant-time dispatch of
|
||||
// switch statements, for example.
|
||||
//
|
||||
// Builder encapsulates the tasks of type-checking (using go/types)
|
||||
// abstract syntax trees (as defined by go/ast) for the source files
|
||||
// comprising a Go program, and the conversion of each function from
|
||||
// Go ASTs to the SSA representation.
|
||||
//
|
||||
// By supplying an instance of the SourceLocator function prototype,
|
||||
// clients may control how the builder locates, loads and parses Go
|
||||
// sources files for imported packages. This package provides
|
||||
// GorootLoader, which uses go/build to locate packages in the Go
|
||||
// source distribution, and go/parser to parse them.
|
||||
//
|
||||
// The builder initially builds a naive SSA form in which all local
|
||||
// variables are addresses of stack locations with explicit loads and
|
||||
// stores. If desired, registerisation and φ-node insertion using
|
||||
// dominance and dataflow can be performed as a later pass to improve
|
||||
// the accuracy and performance of subsequent analyses; this pass is
|
||||
// not yet implemented.
|
||||
//
|
||||
// The program representation constructed by this package is fully
|
||||
// resolved internally, i.e. it does not rely on the names of Values,
|
||||
// Packages, Functions, Types or BasicBlocks for the correct
|
||||
// interpretation of the program. Only the identities of objects and
|
||||
// the topology of the SSA and type graphs are semantically
|
||||
// significant. (There is one exception: Ids, used to identify field
|
||||
// and method names, contain strings.) Avoidance of name-based
|
||||
// operations simplifies the implementation of subsequent passes and
|
||||
// can make them very efficient. Many objects are nonetheless named
|
||||
// to aid in debugging, but it is not essential that the names be
|
||||
// either accurate or unambiguous. The public API exposes a number of
|
||||
// name-based maps for client convenience.
|
||||
//
|
||||
// Given a Go source package such as this:
|
||||
//
|
||||
// package main
|
||||
//
|
||||
// import "fmt"
|
||||
//
|
||||
// const message = "Hello, World!"
|
||||
//
|
||||
// func hello() {
|
||||
// fmt.Println(message)
|
||||
// }
|
||||
//
|
||||
// The SSA Builder creates a *Program containing a main *Package such
|
||||
// as this:
|
||||
//
|
||||
// Package(Name: "main")
|
||||
// Members:
|
||||
// "message": *Literal (Type: untyped string, Value: "Hello, World!")
|
||||
// "init·guard": *Global (Type: *bool)
|
||||
// "hello": *Function (Type: func())
|
||||
// Init: *Function (Type: func())
|
||||
//
|
||||
// The printed representation of the function main.hello is shown
|
||||
// below. Within the function listing, the name of each BasicBlock
|
||||
// such as ".0.entry" is printed left-aligned, followed by the block's
|
||||
// instructions, i.e. implementations of Instruction.
|
||||
// For each instruction that defines an SSA virtual register
|
||||
// (i.e. implements Value), the type of that value is shown in the
|
||||
// right column.
|
||||
//
|
||||
// # Name: main.hello
|
||||
// # Declared at hello.go:7:6
|
||||
// # Type: func()
|
||||
// func hello():
|
||||
// .0.entry:
|
||||
// t0 = new [1]interface{} *[1]interface{}
|
||||
// t1 = &t0[0:untyped integer] *interface{}
|
||||
// t2 = make interface interface{} <- string ("Hello, World!":string) interface{}
|
||||
// *t1 = t2
|
||||
// t3 = slice t0[:] []interface{}
|
||||
// t4 = fmt.Println(t3) (n int, err error)
|
||||
// ret
|
||||
//
|
||||
// TODO(adonovan): demonstrate more features in the example:
|
||||
// parameters and control flow at the least.
|
||||
//
|
||||
// TODO(adonovan): Consider how token.Pos source location information
|
||||
// should be made available generally. Currently it is only present in
|
||||
// Package, Function and CallCommon.
|
||||
//
|
||||
// TODO(adonovan): Provide an example skeleton application that loads
|
||||
// and dumps the SSA form of a program. Accommodate package-at-a-time
|
||||
// vs. whole-program operation.
|
||||
//
|
||||
// TODO(adonovan): Consider the exceptional control-flow implications
|
||||
// of defer and recover().
|
||||
//
|
||||
// TODO(adonovan): build tables/functions that relate source variables
|
||||
// to SSA variables to assist user interfaces that make queries about
|
||||
// specific source entities.
|
||||
package ssa
|
|
@ -0,0 +1,416 @@
|
|||
package ssa
|
||||
|
||||
// This file implements the Function and BasicBlock types.
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"go/ast"
|
||||
"go/types"
|
||||
"io"
|
||||
"os"
|
||||
)
|
||||
|
||||
// Mode bits for additional diagnostics and checking.
|
||||
// TODO(adonovan): move these to builder.go once submitted.
|
||||
type BuilderMode uint
|
||||
|
||||
const (
|
||||
LogPackages BuilderMode = 1 << iota // Dump package inventory to stderr
|
||||
LogFunctions // Dump function SSA code to stderr
|
||||
LogSource // Show source locations as SSA builder progresses
|
||||
SanityCheckFunctions // Perform sanity checking of function bodies
|
||||
UseGCImporter // Ignore SourceLoader; use gc-compiled object code for all imports
|
||||
)
|
||||
|
||||
// addEdge adds a control-flow graph edge from from to to.
|
||||
func addEdge(from, to *BasicBlock) {
|
||||
from.Succs = append(from.Succs, to)
|
||||
to.Preds = append(to.Preds, from)
|
||||
}
|
||||
|
||||
// emit appends an instruction to the current basic block.
|
||||
// If the instruction defines a Value, it is returned.
|
||||
//
|
||||
func (b *BasicBlock) emit(i Instruction) Value {
|
||||
i.SetBlock(b)
|
||||
b.Instrs = append(b.Instrs, i)
|
||||
v, _ := i.(Value)
|
||||
return v
|
||||
}
|
||||
|
||||
// phis returns the prefix of b.Instrs containing all the block's φ-nodes.
|
||||
func (b *BasicBlock) phis() []Instruction {
|
||||
for i, instr := range b.Instrs {
|
||||
if _, ok := instr.(*Phi); !ok {
|
||||
return b.Instrs[:i]
|
||||
}
|
||||
}
|
||||
return nil // unreachable in well-formed blocks
|
||||
}
|
||||
|
||||
// replacePred replaces all occurrences of p in b's predecessor list with q.
|
||||
// Ordinarily there should be at most one.
|
||||
//
|
||||
func (b *BasicBlock) replacePred(p, q *BasicBlock) {
|
||||
for i, pred := range b.Preds {
|
||||
if pred == p {
|
||||
b.Preds[i] = q
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// replaceSucc replaces all occurrences of p in b's successor list with q.
|
||||
// Ordinarily there should be at most one.
|
||||
//
|
||||
func (b *BasicBlock) replaceSucc(p, q *BasicBlock) {
|
||||
for i, succ := range b.Succs {
|
||||
if succ == p {
|
||||
b.Succs[i] = q
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// removePred removes all occurrences of p in b's
|
||||
// predecessor list and φ-nodes.
|
||||
// Ordinarily there should be at most one.
|
||||
//
|
||||
func (b *BasicBlock) removePred(p *BasicBlock) {
|
||||
phis := b.phis()
|
||||
|
||||
// We must preserve edge order for φ-nodes.
|
||||
j := 0
|
||||
for i, pred := range b.Preds {
|
||||
if pred != p {
|
||||
b.Preds[j] = b.Preds[i]
|
||||
// Strike out φ-edge too.
|
||||
for _, instr := range phis {
|
||||
phi := instr.(*Phi)
|
||||
phi.Edges[j] = phi.Edges[i]
|
||||
}
|
||||
j++
|
||||
}
|
||||
}
|
||||
// Nil out b.Preds[j:] and φ-edges[j:] to aid GC.
|
||||
for i := j; i < len(b.Preds); i++ {
|
||||
b.Preds[i] = nil
|
||||
for _, instr := range phis {
|
||||
instr.(*Phi).Edges[i] = nil
|
||||
}
|
||||
}
|
||||
b.Preds = b.Preds[:j]
|
||||
for _, instr := range phis {
|
||||
phi := instr.(*Phi)
|
||||
phi.Edges = phi.Edges[:j]
|
||||
}
|
||||
}
|
||||
|
||||
// Destinations associated with unlabelled for/switch/select stmts.
|
||||
// We push/pop one of these as we enter/leave each construct and for
|
||||
// each BranchStmt we scan for the innermost target of the right type.
|
||||
//
|
||||
type targets struct {
|
||||
tail *targets // rest of stack
|
||||
_break *BasicBlock
|
||||
_continue *BasicBlock
|
||||
_fallthrough *BasicBlock
|
||||
}
|
||||
|
||||
// Destinations associated with a labelled block.
|
||||
// We populate these as labels are encountered in forward gotos or
|
||||
// labelled statements.
|
||||
//
|
||||
type lblock struct {
|
||||
_goto *BasicBlock
|
||||
_break *BasicBlock
|
||||
_continue *BasicBlock
|
||||
}
|
||||
|
||||
// funcSyntax holds the syntax tree for the function declaration and body.
|
||||
type funcSyntax struct {
|
||||
recvField *ast.FieldList
|
||||
paramFields *ast.FieldList
|
||||
resultFields *ast.FieldList
|
||||
body *ast.BlockStmt
|
||||
}
|
||||
|
||||
// labelledBlock returns the branch target associated with the
|
||||
// specified label, creating it if needed.
|
||||
//
|
||||
func (f *Function) labelledBlock(label *ast.Ident) *lblock {
|
||||
lb := f.lblocks[label.Obj]
|
||||
if lb == nil {
|
||||
lb = &lblock{_goto: f.newBasicBlock("label." + label.Name)}
|
||||
f.lblocks[label.Obj] = lb
|
||||
}
|
||||
return lb
|
||||
}
|
||||
|
||||
// addParam adds a (non-escaping) parameter to f.Params of the
|
||||
// specified name and type.
|
||||
//
|
||||
func (f *Function) addParam(name string, typ types.Type) *Parameter {
|
||||
v := &Parameter{
|
||||
Name_: name,
|
||||
Type_: pointer(typ), // address of param
|
||||
}
|
||||
f.Params = append(f.Params, v)
|
||||
return v
|
||||
}
|
||||
|
||||
func (f *Function) addObjParam(obj types.Object) *Parameter {
|
||||
p := f.addParam(obj.GetName(), obj.GetType())
|
||||
f.objects[obj] = p
|
||||
return p
|
||||
}
|
||||
|
||||
// start initializes the function prior to generating SSA code for its body.
|
||||
// Precondition: f.Type() already set.
|
||||
//
|
||||
// If f.syntax != nil, f is a Go source function and idents must be a
|
||||
// mapping from syntactic identifiers to their canonical type objects;
|
||||
// Otherwise, idents is ignored and the usual set-up for Go source
|
||||
// functions is skipped.
|
||||
//
|
||||
func (f *Function) start(mode BuilderMode, idents map[*ast.Ident]types.Object) {
|
||||
if mode&LogSource != 0 {
|
||||
fmt.Fprintf(os.Stderr, "build function %s @ %s\n", f.FullName(), f.Prog.Files.Position(f.Pos))
|
||||
}
|
||||
f.currentBlock = f.newBasicBlock("entry")
|
||||
f.objects = make(map[types.Object]Value) // needed for some synthetics, e.g. init
|
||||
if f.syntax == nil {
|
||||
return // synthetic function; no syntax tree
|
||||
}
|
||||
f.lblocks = make(map[*ast.Object]*lblock)
|
||||
|
||||
// Receiver (at most one inner iteration).
|
||||
if f.syntax.recvField != nil {
|
||||
for _, field := range f.syntax.recvField.List {
|
||||
for _, n := range field.Names {
|
||||
f.addObjParam(idents[n])
|
||||
}
|
||||
if field.Names == nil {
|
||||
f.addParam(f.Signature.Recv.Name, f.Signature.Recv.Type)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Parameters.
|
||||
if f.syntax.paramFields != nil {
|
||||
for _, field := range f.syntax.paramFields.List {
|
||||
for _, n := range field.Names {
|
||||
f.addObjParam(idents[n])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Results.
|
||||
if f.syntax.resultFields != nil {
|
||||
for _, field := range f.syntax.resultFields.List {
|
||||
// Implicit "var" decl of locals for named results.
|
||||
for _, n := range field.Names {
|
||||
f.results = append(f.results, f.addNamedLocal(idents[n]))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// finish() finalizes the function after SSA code generation of its body.
|
||||
func (f *Function) finish(mode BuilderMode) {
|
||||
f.objects = nil
|
||||
f.results = nil
|
||||
f.currentBlock = nil
|
||||
f.lblocks = nil
|
||||
f.syntax = nil
|
||||
|
||||
// Remove any f.Locals that are now heap-allocated.
|
||||
j := 0
|
||||
for _, l := range f.Locals {
|
||||
if !l.Heap {
|
||||
f.Locals[j] = l
|
||||
j++
|
||||
}
|
||||
}
|
||||
// Nil out f.Locals[j:] to aid GC.
|
||||
for i := j; i < len(f.Locals); i++ {
|
||||
f.Locals[i] = nil
|
||||
}
|
||||
f.Locals = f.Locals[:j]
|
||||
|
||||
// Ensure all value-defining Instructions have register names.
|
||||
// (Non-Instruction Values are named at construction.)
|
||||
tmp := 0
|
||||
for _, b := range f.Blocks {
|
||||
for _, instr := range b.Instrs {
|
||||
switch instr := instr.(type) {
|
||||
case *Alloc:
|
||||
// Local Allocs may already be named.
|
||||
if instr.Name_ == "" {
|
||||
instr.Name_ = fmt.Sprintf("t%d", tmp)
|
||||
tmp++
|
||||
}
|
||||
case Value:
|
||||
instr.(interface {
|
||||
setNum(int)
|
||||
}).setNum(tmp)
|
||||
tmp++
|
||||
}
|
||||
}
|
||||
}
|
||||
optimizeBlocks(f)
|
||||
|
||||
if mode&LogFunctions != 0 {
|
||||
f.DumpTo(os.Stderr)
|
||||
}
|
||||
if mode&SanityCheckFunctions != 0 {
|
||||
MustSanityCheck(f, nil)
|
||||
}
|
||||
if mode&LogSource != 0 {
|
||||
fmt.Fprintf(os.Stderr, "build function %s done\n", f.FullName())
|
||||
}
|
||||
}
|
||||
|
||||
// addNamedLocal creates a local variable, adds it to function f and
|
||||
// returns it. Its name and type are taken from obj. Subsequent
|
||||
// calls to f.lookup(obj) will return the same local.
|
||||
//
|
||||
// Precondition: f.syntax != nil (i.e. a Go source function).
|
||||
//
|
||||
func (f *Function) addNamedLocal(obj types.Object) *Alloc {
|
||||
l := f.addLocal(obj.GetType())
|
||||
l.Name_ = obj.GetName()
|
||||
f.objects[obj] = l
|
||||
return l
|
||||
}
|
||||
|
||||
// addLocal creates an anonymous local variable of type typ, adds it
|
||||
// to function f and returns it.
|
||||
//
|
||||
func (f *Function) addLocal(typ types.Type) *Alloc {
|
||||
v := &Alloc{Type_: pointer(typ)}
|
||||
f.Locals = append(f.Locals, v)
|
||||
f.emit(v)
|
||||
return v
|
||||
}
|
||||
|
||||
// lookup returns the address of the named variable identified by obj
|
||||
// that is local to function f or one of its enclosing functions.
|
||||
// If escaping, the reference comes from a potentially escaping pointer
|
||||
// expression and the referent must be heap-allocated.
|
||||
//
|
||||
func (f *Function) lookup(obj types.Object, escaping bool) Value {
|
||||
if v, ok := f.objects[obj]; ok {
|
||||
if escaping {
|
||||
switch v := v.(type) {
|
||||
case *Capture:
|
||||
// TODO(adonovan): fix: we must support this case.
|
||||
// Requires copying to a 'new' Alloc.
|
||||
fmt.Fprintln(os.Stderr, "Error: escaping reference to Capture")
|
||||
case *Parameter:
|
||||
v.Heap = true
|
||||
case *Alloc:
|
||||
v.Heap = true
|
||||
default:
|
||||
panic(fmt.Sprintf("Unexpected Function.objects kind: %T", v))
|
||||
}
|
||||
}
|
||||
return v // function-local var (address)
|
||||
}
|
||||
|
||||
// Definition must be in an enclosing function;
|
||||
// plumb it through intervening closures.
|
||||
if f.Enclosing == nil {
|
||||
panic("no Value for type.Object " + obj.GetName())
|
||||
}
|
||||
v := &Capture{f.Enclosing.lookup(obj, true)} // escaping
|
||||
f.objects[obj] = v
|
||||
f.FreeVars = append(f.FreeVars, v)
|
||||
return v
|
||||
}
|
||||
|
||||
// emit emits the specified instruction to function f, updating the
|
||||
// control-flow graph if required.
|
||||
//
|
||||
func (f *Function) emit(instr Instruction) Value {
|
||||
return f.currentBlock.emit(instr)
|
||||
}
|
||||
|
||||
// DumpTo prints to w a human readable "disassembly" of the SSA code of
|
||||
// all basic blocks of function f.
|
||||
//
|
||||
func (f *Function) DumpTo(w io.Writer) {
|
||||
fmt.Fprintf(w, "# Name: %s\n", f.FullName())
|
||||
fmt.Fprintf(w, "# Declared at %s\n", f.Prog.Files.Position(f.Pos))
|
||||
fmt.Fprintf(w, "# Type: %s\n", f.Type())
|
||||
|
||||
if f.Enclosing != nil {
|
||||
fmt.Fprintf(w, "# Parent: %s\n", f.Enclosing.Name())
|
||||
}
|
||||
|
||||
if f.FreeVars != nil {
|
||||
io.WriteString(w, "# Free variables:\n")
|
||||
for i, fv := range f.FreeVars {
|
||||
fmt.Fprintf(w, "# % 3d:\t%s %s\n", i, fv.Name(), fv.Type())
|
||||
}
|
||||
}
|
||||
|
||||
params := f.Params
|
||||
if f.Signature.Recv != nil {
|
||||
fmt.Fprintf(w, "func (%s) %s(", params[0].Name(), f.Name())
|
||||
params = params[1:]
|
||||
} else {
|
||||
fmt.Fprintf(w, "func %s(", f.Name())
|
||||
}
|
||||
for i, v := range params {
|
||||
if i > 0 {
|
||||
io.WriteString(w, ", ")
|
||||
}
|
||||
io.WriteString(w, v.Name())
|
||||
}
|
||||
io.WriteString(w, "):\n")
|
||||
|
||||
for _, b := range f.Blocks {
|
||||
if b == nil {
|
||||
// Corrupt CFG.
|
||||
fmt.Fprintf(w, ".nil:\n")
|
||||
continue
|
||||
}
|
||||
fmt.Fprintf(w, ".%s:\t\t\t\t\t\t\t P:%d S:%d\n", b.Name, len(b.Preds), len(b.Succs))
|
||||
if false { // CFG debugging
|
||||
fmt.Fprintf(w, "\t# CFG: %s --> %s --> %s\n", blockNames(b.Preds), b.Name, blockNames(b.Succs))
|
||||
}
|
||||
for _, instr := range b.Instrs {
|
||||
io.WriteString(w, "\t")
|
||||
if v, ok := instr.(Value); ok {
|
||||
l := 80 // for old time's sake.
|
||||
// Left-align the instruction.
|
||||
if name := v.Name(); name != "" {
|
||||
n, _ := fmt.Fprintf(w, "%s = ", name)
|
||||
l -= n
|
||||
}
|
||||
n, _ := io.WriteString(w, instr.String())
|
||||
l -= n
|
||||
// Right-align the type.
|
||||
if t := v.Type(); t != nil {
|
||||
fmt.Fprintf(w, "%*s", l-9, t)
|
||||
}
|
||||
} else {
|
||||
io.WriteString(w, instr.String())
|
||||
}
|
||||
io.WriteString(w, "\n")
|
||||
}
|
||||
}
|
||||
fmt.Fprintf(w, "\n")
|
||||
}
|
||||
|
||||
// newBasicBlock adds to f a new basic block with a unique name and
|
||||
// returns it. It does not automatically become the current block for
|
||||
// subsequent calls to emit.
|
||||
//
|
||||
func (f *Function) newBasicBlock(name string) *BasicBlock {
|
||||
b := &BasicBlock{
|
||||
Name: fmt.Sprintf("%d.%s", len(f.Blocks), name),
|
||||
Func: f,
|
||||
}
|
||||
f.Blocks = append(f.Blocks, b)
|
||||
return b
|
||||
}
|
|
@ -0,0 +1,137 @@
|
|||
package ssa
|
||||
|
||||
// This file defines the Literal SSA value type.
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"go/types"
|
||||
"math/big"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// newLiteral returns a new literal of the specified value and type.
|
||||
// val must be valid according to the specification of Literal.Value.
|
||||
//
|
||||
func newLiteral(val interface{}, typ types.Type) *Literal {
|
||||
// This constructor exists to provide a single place to
|
||||
// insert logging/assertions during debugging.
|
||||
return &Literal{typ, val}
|
||||
}
|
||||
|
||||
// intLiteral returns an untyped integer literal that evaluates to i.
|
||||
func intLiteral(i int64) *Literal {
|
||||
return newLiteral(i, types.Typ[types.UntypedInt])
|
||||
}
|
||||
|
||||
// nilLiteral returns a nil literal of the specified (reference) type.
|
||||
func nilLiteral(typ types.Type) *Literal {
|
||||
return newLiteral(types.NilType{}, typ)
|
||||
}
|
||||
|
||||
func (l *Literal) Name() string {
|
||||
var s string
|
||||
switch x := l.Value.(type) {
|
||||
case bool:
|
||||
s = fmt.Sprintf("%v", l.Value)
|
||||
case int64:
|
||||
s = fmt.Sprintf("%d", l.Value)
|
||||
case *big.Int:
|
||||
s = x.String()
|
||||
case *big.Rat:
|
||||
s = x.FloatString(20)
|
||||
case string:
|
||||
if len(x) > 20 {
|
||||
x = x[:17] + "..." // abbreviate
|
||||
}
|
||||
s = strconv.Quote(x)
|
||||
case types.Complex:
|
||||
r := x.Re.FloatString(20)
|
||||
i := x.Im.FloatString(20)
|
||||
s = fmt.Sprintf("%s+%si", r, i)
|
||||
case types.NilType:
|
||||
s = "nil"
|
||||
default:
|
||||
panic(fmt.Sprintf("unexpected literal value: %T", x))
|
||||
}
|
||||
return s + ":" + l.Type_.String()
|
||||
}
|
||||
|
||||
func (l *Literal) Type() types.Type {
|
||||
return l.Type_
|
||||
}
|
||||
|
||||
// IsNil returns true if this literal represents a typed or untyped nil value.
|
||||
func (l *Literal) IsNil() bool {
|
||||
_, ok := l.Value.(types.NilType)
|
||||
return ok
|
||||
}
|
||||
|
||||
// Int64 returns the numeric value of this literal truncated to fit
|
||||
// a signed 64-bit integer.
|
||||
//
|
||||
func (l *Literal) Int64() int64 {
|
||||
switch x := l.Value.(type) {
|
||||
case int64:
|
||||
return x
|
||||
case *big.Int:
|
||||
return x.Int64()
|
||||
case *big.Rat:
|
||||
// TODO(adonovan): fix: is this the right rounding mode?
|
||||
var q big.Int
|
||||
return q.Quo(x.Num(), x.Denom()).Int64()
|
||||
}
|
||||
panic(fmt.Sprintf("unexpected literal value: %T", l.Value))
|
||||
}
|
||||
|
||||
// Uint64 returns the numeric value of this literal truncated to fit
|
||||
// an unsigned 64-bit integer.
|
||||
//
|
||||
func (l *Literal) Uint64() uint64 {
|
||||
switch x := l.Value.(type) {
|
||||
case int64:
|
||||
if x < 0 {
|
||||
return 0
|
||||
}
|
||||
return uint64(x)
|
||||
case *big.Int:
|
||||
return x.Uint64()
|
||||
case *big.Rat:
|
||||
// TODO(adonovan): fix: is this right?
|
||||
var q big.Int
|
||||
return q.Quo(x.Num(), x.Denom()).Uint64()
|
||||
}
|
||||
panic(fmt.Sprintf("unexpected literal value: %T", l.Value))
|
||||
}
|
||||
|
||||
// Float64 returns the numeric value of this literal truncated to fit
|
||||
// a float64.
|
||||
//
|
||||
func (l *Literal) Float64() float64 {
|
||||
switch x := l.Value.(type) {
|
||||
case int64:
|
||||
return float64(x)
|
||||
case *big.Int:
|
||||
var r big.Rat
|
||||
f, _ := r.SetInt(x).Float64()
|
||||
return f
|
||||
case *big.Rat:
|
||||
f, _ := x.Float64()
|
||||
return f
|
||||
}
|
||||
panic(fmt.Sprintf("unexpected literal value: %T", l.Value))
|
||||
}
|
||||
|
||||
// Complex128 returns the complex value of this literal truncated to
|
||||
// fit a complex128.
|
||||
//
|
||||
func (l *Literal) Complex128() complex128 {
|
||||
switch x := l.Value.(type) {
|
||||
case int64, *big.Int, *big.Rat:
|
||||
return complex(l.Float64(), 0)
|
||||
case types.Complex:
|
||||
re64, _ := x.Re.Float64()
|
||||
im64, _ := x.Im.Float64()
|
||||
return complex(re64, im64)
|
||||
}
|
||||
panic(fmt.Sprintf("unexpected literal value: %T", l.Value))
|
||||
}
|
|
@ -0,0 +1,383 @@
|
|||
package ssa
|
||||
|
||||
// This file implements the String() methods for all Value and
|
||||
// Instruction types.
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"go/ast"
|
||||
"go/types"
|
||||
)
|
||||
|
||||
func (id Id) String() string {
|
||||
if id.Pkg == nil {
|
||||
return id.Name
|
||||
}
|
||||
return fmt.Sprintf("%s/%s", id.Pkg.Path, id.Name)
|
||||
}
|
||||
|
||||
// relName returns the name of v relative to i.
|
||||
// In most cases, this is identical to v.Name(), but for cross-package
|
||||
// references to Functions (including methods) and Globals, the
|
||||
// package-qualified FullName is used instead.
|
||||
//
|
||||
func relName(v Value, i Instruction) string {
|
||||
switch v := v.(type) {
|
||||
case *Global:
|
||||
if v.Pkg == i.Block().Func.Pkg {
|
||||
return v.Name()
|
||||
}
|
||||
return v.FullName()
|
||||
case *Function:
|
||||
if v.Pkg == nil || v.Pkg == i.Block().Func.Pkg {
|
||||
return v.Name()
|
||||
}
|
||||
return v.FullName()
|
||||
}
|
||||
return v.Name()
|
||||
}
|
||||
|
||||
// Value.String()
|
||||
//
|
||||
// This method is provided only for debugging.
|
||||
// It never appears in disassembly, which uses Value.Name().
|
||||
|
||||
func (v *Literal) String() string {
|
||||
return fmt.Sprintf("literal %s rep=%T", v.Name(), v.Value)
|
||||
}
|
||||
|
||||
func (v *Parameter) String() string {
|
||||
return fmt.Sprintf("parameter %s : %s", v.Name(), v.Type())
|
||||
}
|
||||
|
||||
func (v *Capture) String() string {
|
||||
return fmt.Sprintf("capture %s : %s", v.Name(), v.Type())
|
||||
}
|
||||
|
||||
func (v *Global) String() string {
|
||||
return fmt.Sprintf("global %s : %s", v.Name(), v.Type())
|
||||
}
|
||||
|
||||
func (v *Builtin) String() string {
|
||||
return fmt.Sprintf("builtin %s : %s", v.Name(), v.Type())
|
||||
}
|
||||
|
||||
func (r *Function) String() string {
|
||||
return fmt.Sprintf("function %s : %s", r.Name(), r.Type())
|
||||
}
|
||||
|
||||
// FullName returns the name of this function qualified by the
|
||||
// package name, unless it is anonymous or synthetic.
|
||||
//
|
||||
// TODO(adonovan): move to func.go when it's submitted.
|
||||
//
|
||||
func (f *Function) FullName() string {
|
||||
if f.Enclosing != nil || f.Pkg == nil {
|
||||
return f.Name_ // anonymous or synthetic
|
||||
}
|
||||
return fmt.Sprintf("%s.%s", f.Pkg.ImportPath, f.Name_)
|
||||
}
|
||||
|
||||
// FullName returns g's package-qualified name.
|
||||
func (g *Global) FullName() string {
|
||||
return fmt.Sprintf("%s.%s", g.Pkg.ImportPath, g.Name_)
|
||||
}
|
||||
|
||||
// Instruction.String()
|
||||
|
||||
func (v *Alloc) String() string {
|
||||
op := "local"
|
||||
if v.Heap {
|
||||
op = "new"
|
||||
}
|
||||
return fmt.Sprintf("%s %s", op, indirectType(v.Type()))
|
||||
}
|
||||
|
||||
func (v *Phi) String() string {
|
||||
var b bytes.Buffer
|
||||
b.WriteString("phi [")
|
||||
for i, edge := range v.Edges {
|
||||
if i > 0 {
|
||||
b.WriteString(", ")
|
||||
}
|
||||
// Be robust against malformed CFG.
|
||||
blockname := "?"
|
||||
if v.Block_ != nil && i < len(v.Block_.Preds) {
|
||||
blockname = v.Block_.Preds[i].Name
|
||||
}
|
||||
b.WriteString(blockname)
|
||||
b.WriteString(": ")
|
||||
b.WriteString(relName(edge, v))
|
||||
}
|
||||
b.WriteString("]")
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func printCall(v *CallCommon, prefix string, instr Instruction) string {
|
||||
var b bytes.Buffer
|
||||
b.WriteString(prefix)
|
||||
if v.Func != nil {
|
||||
b.WriteString(relName(v.Func, instr))
|
||||
} else {
|
||||
name := underlyingType(v.Recv.Type()).(*types.Interface).Methods[v.Method].Name
|
||||
fmt.Fprintf(&b, "invoke %s.%s [#%d]", relName(v.Recv, instr), name, v.Method)
|
||||
}
|
||||
b.WriteString("(")
|
||||
for i, arg := range v.Args {
|
||||
if i > 0 {
|
||||
b.WriteString(", ")
|
||||
}
|
||||
b.WriteString(relName(arg, instr))
|
||||
}
|
||||
if v.HasEllipsis {
|
||||
b.WriteString("...")
|
||||
}
|
||||
b.WriteString(")")
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func (v *Call) String() string {
|
||||
return printCall(&v.CallCommon, "", v)
|
||||
}
|
||||
|
||||
func (v *BinOp) String() string {
|
||||
return fmt.Sprintf("%s %s %s", relName(v.X, v), v.Op.String(), relName(v.Y, v))
|
||||
}
|
||||
|
||||
func (v *UnOp) String() string {
|
||||
return fmt.Sprintf("%s%s%s", v.Op, relName(v.X, v), commaOk(v.CommaOk))
|
||||
}
|
||||
|
||||
func (v *Conv) String() string {
|
||||
return fmt.Sprintf("convert %s <- %s (%s)", v.Type(), v.X.Type(), relName(v.X, v))
|
||||
}
|
||||
|
||||
func (v *ChangeInterface) String() string {
|
||||
return fmt.Sprintf("change interface %s <- %s (%s)", v.Type(), v.X.Type(), relName(v.X, v))
|
||||
}
|
||||
|
||||
func (v *MakeInterface) String() string {
|
||||
return fmt.Sprintf("make interface %s <- %s (%s)", v.Type(), v.X.Type(), relName(v.X, v))
|
||||
}
|
||||
|
||||
func (v *MakeClosure) String() string {
|
||||
var b bytes.Buffer
|
||||
fmt.Fprintf(&b, "make closure %s", relName(v.Fn, v))
|
||||
if v.Bindings != nil {
|
||||
b.WriteString(" [")
|
||||
for i, c := range v.Bindings {
|
||||
if i > 0 {
|
||||
b.WriteString(", ")
|
||||
}
|
||||
b.WriteString(relName(c, v))
|
||||
}
|
||||
b.WriteString("]")
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func (v *MakeSlice) String() string {
|
||||
var b bytes.Buffer
|
||||
b.WriteString("make slice ")
|
||||
b.WriteString(v.Type().String())
|
||||
b.WriteString(" ")
|
||||
b.WriteString(relName(v.Len, v))
|
||||
b.WriteString(" ")
|
||||
b.WriteString(relName(v.Cap, v))
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func (v *Slice) String() string {
|
||||
var b bytes.Buffer
|
||||
b.WriteString("slice ")
|
||||
b.WriteString(relName(v.X, v))
|
||||
b.WriteString("[")
|
||||
if v.Low != nil {
|
||||
b.WriteString(relName(v.Low, v))
|
||||
}
|
||||
b.WriteString(":")
|
||||
if v.High != nil {
|
||||
b.WriteString(relName(v.High, v))
|
||||
}
|
||||
b.WriteString("]")
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func (v *MakeMap) String() string {
|
||||
res := ""
|
||||
if v.Reserve != nil {
|
||||
res = relName(v.Reserve, v)
|
||||
}
|
||||
return fmt.Sprintf("make %s %s", v.Type(), res)
|
||||
}
|
||||
|
||||
func (v *MakeChan) String() string {
|
||||
return fmt.Sprintf("make %s %s", v.Type(), relName(v.Size, v))
|
||||
}
|
||||
|
||||
func (v *FieldAddr) String() string {
|
||||
fields := underlyingType(indirectType(v.X.Type())).(*types.Struct).Fields
|
||||
// Be robust against a bad index.
|
||||
name := "?"
|
||||
if v.Field >= 0 && v.Field < len(fields) {
|
||||
name = fields[v.Field].Name
|
||||
}
|
||||
return fmt.Sprintf("&%s.%s [#%d]", relName(v.X, v), name, v.Field)
|
||||
}
|
||||
|
||||
func (v *Field) String() string {
|
||||
fields := underlyingType(v.X.Type()).(*types.Struct).Fields
|
||||
// Be robust against a bad index.
|
||||
name := "?"
|
||||
if v.Field >= 0 && v.Field < len(fields) {
|
||||
name = fields[v.Field].Name
|
||||
}
|
||||
return fmt.Sprintf("%s.%s [#%d]", relName(v.X, v), name, v.Field)
|
||||
}
|
||||
|
||||
func (v *IndexAddr) String() string {
|
||||
return fmt.Sprintf("&%s[%s]", relName(v.X, v), relName(v.Index, v))
|
||||
}
|
||||
|
||||
func (v *Index) String() string {
|
||||
return fmt.Sprintf("%s[%s]", relName(v.X, v), relName(v.Index, v))
|
||||
}
|
||||
|
||||
func (v *Lookup) String() string {
|
||||
return fmt.Sprintf("%s[%s]%s", relName(v.X, v), relName(v.Index, v), commaOk(v.CommaOk))
|
||||
}
|
||||
|
||||
func (v *Range) String() string {
|
||||
return "range " + relName(v.X, v)
|
||||
}
|
||||
|
||||
func (v *Next) String() string {
|
||||
return "next " + relName(v.Iter, v)
|
||||
}
|
||||
|
||||
func (v *TypeAssert) String() string {
|
||||
return fmt.Sprintf("typeassert%s %s.(%s)", commaOk(v.CommaOk), relName(v.X, v), v.AssertedType)
|
||||
}
|
||||
|
||||
func (v *Extract) String() string {
|
||||
return fmt.Sprintf("extract %s #%d", relName(v.Tuple, v), v.Index)
|
||||
}
|
||||
|
||||
func (s *Jump) String() string {
|
||||
// Be robust against malformed CFG.
|
||||
blockname := "?"
|
||||
if s.Block_ != nil && len(s.Block_.Succs) == 1 {
|
||||
blockname = s.Block_.Succs[0].Name
|
||||
}
|
||||
return fmt.Sprintf("jump %s", blockname)
|
||||
}
|
||||
|
||||
func (s *If) String() string {
|
||||
// Be robust against malformed CFG.
|
||||
tblockname, fblockname := "?", "?"
|
||||
if s.Block_ != nil && len(s.Block_.Succs) == 2 {
|
||||
tblockname = s.Block_.Succs[0].Name
|
||||
fblockname = s.Block_.Succs[1].Name
|
||||
}
|
||||
return fmt.Sprintf("if %s goto %s else %s", relName(s.Cond, s), tblockname, fblockname)
|
||||
}
|
||||
|
||||
func (s *Go) String() string {
|
||||
return printCall(&s.CallCommon, "go ", s)
|
||||
}
|
||||
|
||||
func (s *Ret) String() string {
|
||||
var b bytes.Buffer
|
||||
b.WriteString("ret")
|
||||
for i, r := range s.Results {
|
||||
if i == 0 {
|
||||
b.WriteString(" ")
|
||||
} else {
|
||||
b.WriteString(", ")
|
||||
}
|
||||
b.WriteString(relName(r, s))
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func (s *Send) String() string {
|
||||
return fmt.Sprintf("send %s <- %s", relName(s.Chan, s), relName(s.X, s))
|
||||
}
|
||||
|
||||
func (s *Defer) String() string {
|
||||
return printCall(&s.CallCommon, "defer ", s)
|
||||
}
|
||||
|
||||
func (s *Select) String() string {
|
||||
var b bytes.Buffer
|
||||
for i, st := range s.States {
|
||||
if i > 0 {
|
||||
b.WriteString(", ")
|
||||
}
|
||||
if st.Dir == ast.RECV {
|
||||
b.WriteString("<-")
|
||||
b.WriteString(relName(st.Chan, s))
|
||||
} else {
|
||||
b.WriteString(relName(st.Chan, s))
|
||||
b.WriteString("<-")
|
||||
b.WriteString(relName(st.Send, s))
|
||||
}
|
||||
}
|
||||
non := ""
|
||||
if !s.Blocking {
|
||||
non = "non"
|
||||
}
|
||||
return fmt.Sprintf("select %sblocking [%s]", non, b.String())
|
||||
}
|
||||
|
||||
func (s *Store) String() string {
|
||||
return fmt.Sprintf("*%s = %s", relName(s.Addr, s), relName(s.Val, s))
|
||||
}
|
||||
|
||||
func (s *MapUpdate) String() string {
|
||||
return fmt.Sprintf("%s[%s] = %s", relName(s.Map, s), relName(s.Key, s), relName(s.Value, s))
|
||||
}
|
||||
|
||||
func (p *Package) String() string {
|
||||
// TODO(adonovan): prettify output.
|
||||
var b bytes.Buffer
|
||||
fmt.Fprintf(&b, "Package %s at %s:\n", p.ImportPath, p.Prog.Files.File(p.Pos).Name())
|
||||
|
||||
// TODO(adonovan): make order deterministic.
|
||||
maxname := 0
|
||||
for name := range p.Members {
|
||||
if l := len(name); l > maxname {
|
||||
maxname = l
|
||||
}
|
||||
}
|
||||
|
||||
for name, mem := range p.Members {
|
||||
switch mem := mem.(type) {
|
||||
case *Literal:
|
||||
fmt.Fprintf(&b, " const %-*s %s\n", maxname, name, mem.Name())
|
||||
|
||||
case *Function:
|
||||
fmt.Fprintf(&b, " func %-*s %s\n", maxname, name, mem.Type())
|
||||
|
||||
case *Type:
|
||||
fmt.Fprintf(&b, " type %-*s %s\n", maxname, name, mem.NamedType.Underlying)
|
||||
// TODO(adonovan): make order deterministic.
|
||||
for name, method := range mem.Methods {
|
||||
fmt.Fprintf(&b, " method %s %s\n", name, method.Signature)
|
||||
}
|
||||
|
||||
case *Global:
|
||||
fmt.Fprintf(&b, " var %-*s %s\n", maxname, name, mem.Type())
|
||||
|
||||
}
|
||||
}
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func commaOk(x bool) string {
|
||||
if x {
|
||||
return ",ok"
|
||||
}
|
||||
return ""
|
||||
}
|
|
@ -0,0 +1,263 @@
|
|||
package ssa
|
||||
|
||||
// An optional pass for sanity checking invariants of the SSA representation.
|
||||
// Currently it checks CFG invariants but little at the instruction level.
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
)
|
||||
|
||||
type sanity struct {
|
||||
reporter io.Writer
|
||||
fn *Function
|
||||
block *BasicBlock
|
||||
insane bool
|
||||
}
|
||||
|
||||
// SanityCheck performs integrity checking of the SSA representation
|
||||
// of the function fn and returns true if it was valid. Diagnostics
|
||||
// are written to reporter if non-nil, os.Stderr otherwise. Some
|
||||
// diagnostics are only warnings and do not imply a negative result.
|
||||
//
|
||||
// Sanity checking is intended to facilitate the debugging of code
|
||||
// transformation passes.
|
||||
//
|
||||
func SanityCheck(fn *Function, reporter io.Writer) bool {
|
||||
if reporter == nil {
|
||||
reporter = os.Stderr
|
||||
}
|
||||
return (&sanity{reporter: reporter}).checkFunction(fn)
|
||||
}
|
||||
|
||||
// MustSanityCheck is like SanityCheck but panics instead of returning
|
||||
// a negative result.
|
||||
//
|
||||
func MustSanityCheck(fn *Function, reporter io.Writer) {
|
||||
if !SanityCheck(fn, reporter) {
|
||||
panic("SanityCheck failed")
|
||||
}
|
||||
}
|
||||
|
||||
// blockNames returns the names of the specified blocks as a
|
||||
// human-readable string.
|
||||
//
|
||||
func blockNames(blocks []*BasicBlock) string {
|
||||
var buf bytes.Buffer
|
||||
for i, b := range blocks {
|
||||
if i > 0 {
|
||||
io.WriteString(&buf, ", ")
|
||||
}
|
||||
io.WriteString(&buf, b.Name)
|
||||
}
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
func (s *sanity) diagnostic(prefix, format string, args ...interface{}) {
|
||||
fmt.Fprintf(s.reporter, "%s: function %s", prefix, s.fn.FullName())
|
||||
if s.block != nil {
|
||||
fmt.Fprintf(s.reporter, ", block %s", s.block.Name)
|
||||
}
|
||||
io.WriteString(s.reporter, ": ")
|
||||
fmt.Fprintf(s.reporter, format, args...)
|
||||
io.WriteString(s.reporter, "\n")
|
||||
}
|
||||
|
||||
func (s *sanity) errorf(format string, args ...interface{}) {
|
||||
s.insane = true
|
||||
s.diagnostic("Error", format, args...)
|
||||
}
|
||||
|
||||
func (s *sanity) warnf(format string, args ...interface{}) {
|
||||
s.diagnostic("Warning", format, args...)
|
||||
}
|
||||
|
||||
// findDuplicate returns an arbitrary basic block that appeared more
|
||||
// than once in blocks, or nil if all were unique.
|
||||
func findDuplicate(blocks []*BasicBlock) *BasicBlock {
|
||||
if len(blocks) < 2 {
|
||||
return nil
|
||||
}
|
||||
if blocks[0] == blocks[1] {
|
||||
return blocks[0]
|
||||
}
|
||||
// Slow path:
|
||||
m := make(map[*BasicBlock]bool)
|
||||
for _, b := range blocks {
|
||||
if m[b] {
|
||||
return b
|
||||
}
|
||||
m[b] = true
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *sanity) checkInstr(idx int, instr Instruction) {
|
||||
switch instr := instr.(type) {
|
||||
case *If, *Jump, *Ret:
|
||||
s.errorf("control flow instruction not at end of block")
|
||||
case *Phi:
|
||||
if idx == 0 {
|
||||
// It suffices to apply this check to just the first phi node.
|
||||
if dup := findDuplicate(s.block.Preds); dup != nil {
|
||||
s.errorf("phi node in block with duplicate predecessor %s", dup.Name)
|
||||
}
|
||||
} else {
|
||||
prev := s.block.Instrs[idx-1]
|
||||
if _, ok := prev.(*Phi); !ok {
|
||||
s.errorf("Phi instruction follows a non-Phi: %T", prev)
|
||||
}
|
||||
}
|
||||
if ne, np := len(instr.Edges), len(s.block.Preds); ne != np {
|
||||
s.errorf("phi node has %d edges but %d predecessors", ne, np)
|
||||
}
|
||||
|
||||
case *Alloc:
|
||||
case *Call:
|
||||
case *BinOp:
|
||||
case *UnOp:
|
||||
case *MakeClosure:
|
||||
case *MakeChan:
|
||||
case *MakeMap:
|
||||
case *MakeSlice:
|
||||
case *Slice:
|
||||
case *Field:
|
||||
case *FieldAddr:
|
||||
case *IndexAddr:
|
||||
case *Index:
|
||||
case *Select:
|
||||
case *Range:
|
||||
case *TypeAssert:
|
||||
case *Extract:
|
||||
case *Go:
|
||||
case *Defer:
|
||||
case *Send:
|
||||
case *Store:
|
||||
case *MapUpdate:
|
||||
case *Next:
|
||||
case *Lookup:
|
||||
case *Conv:
|
||||
case *ChangeInterface:
|
||||
case *MakeInterface:
|
||||
// TODO(adonovan): implement checks.
|
||||
default:
|
||||
panic(fmt.Sprintf("Unknown instruction type: %T", instr))
|
||||
}
|
||||
}
|
||||
|
||||
func (s *sanity) checkFinalInstr(idx int, instr Instruction) {
|
||||
switch instr.(type) {
|
||||
case *If:
|
||||
if nsuccs := len(s.block.Succs); nsuccs != 2 {
|
||||
s.errorf("If-terminated block has %d successors; expected 2", nsuccs)
|
||||
return
|
||||
}
|
||||
if s.block.Succs[0] == s.block.Succs[1] {
|
||||
s.errorf("If-instruction has same True, False target blocks: %s", s.block.Succs[0].Name)
|
||||
return
|
||||
}
|
||||
|
||||
case *Jump:
|
||||
if nsuccs := len(s.block.Succs); nsuccs != 1 {
|
||||
s.errorf("Jump-terminated block has %d successors; expected 1", nsuccs)
|
||||
return
|
||||
}
|
||||
|
||||
case *Ret:
|
||||
if nsuccs := len(s.block.Succs); nsuccs != 0 {
|
||||
s.errorf("Ret-terminated block has %d successors; expected none", nsuccs)
|
||||
return
|
||||
}
|
||||
// TODO(adonovan): check number and types of results
|
||||
|
||||
default:
|
||||
s.errorf("non-control flow instruction at end of block")
|
||||
}
|
||||
}
|
||||
|
||||
func (s *sanity) checkBlock(b *BasicBlock, isEntry bool) {
|
||||
s.block = b
|
||||
|
||||
// Check all blocks are reachable.
|
||||
// (The entry block is always implicitly reachable.)
|
||||
if !isEntry && len(b.Preds) == 0 {
|
||||
s.warnf("unreachable block")
|
||||
if b.Instrs == nil {
|
||||
// Since this block is about to be pruned,
|
||||
// tolerating transient problems in it
|
||||
// simplifies other optimisations.
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Check predecessor and successor relations are dual.
|
||||
for _, a := range b.Preds {
|
||||
found := false
|
||||
for _, bb := range a.Succs {
|
||||
if bb == b {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
s.errorf("expected successor edge in predecessor %s; found only: %s", a.Name, blockNames(a.Succs))
|
||||
}
|
||||
}
|
||||
for _, c := range b.Succs {
|
||||
found := false
|
||||
for _, bb := range c.Preds {
|
||||
if bb == b {
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
s.errorf("expected predecessor edge in successor %s; found only: %s", c.Name, blockNames(c.Preds))
|
||||
}
|
||||
}
|
||||
|
||||
// Check each instruction is sane.
|
||||
n := len(b.Instrs)
|
||||
if n == 0 {
|
||||
s.errorf("basic block contains no instructions")
|
||||
}
|
||||
for j, instr := range b.Instrs {
|
||||
if b2 := instr.Block(); b2 == nil {
|
||||
s.errorf("nil Block() for instruction at index %d", j)
|
||||
continue
|
||||
} else if b2 != b {
|
||||
s.errorf("wrong Block() (%s) for instruction at index %d ", b2.Name, j)
|
||||
continue
|
||||
}
|
||||
if j < n-1 {
|
||||
s.checkInstr(j, instr)
|
||||
} else {
|
||||
s.checkFinalInstr(j, instr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *sanity) checkFunction(fn *Function) bool {
|
||||
// TODO(adonovan): check Function invariants:
|
||||
// - check owning Package (if any) contains this function.
|
||||
// - check params match signature
|
||||
// - check locals are all !Heap
|
||||
// - check transient fields are nil
|
||||
// - check block labels are unique (warning)
|
||||
s.fn = fn
|
||||
if fn.Prog == nil {
|
||||
s.errorf("nil Prog")
|
||||
}
|
||||
for i, b := range fn.Blocks {
|
||||
if b == nil {
|
||||
s.warnf("nil *BasicBlock at f.Blocks[%d]", i)
|
||||
continue
|
||||
}
|
||||
s.checkBlock(b, i == 0)
|
||||
}
|
||||
s.block = nil
|
||||
s.fn = nil
|
||||
return !s.insane
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue