2013-11-06 20:49:01 +01:00
|
|
|
// Copyright 2013 The Go Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
package html
|
|
|
|
|
2015-10-31 01:59:47 +01:00
|
|
|
import (
|
|
|
|
"strings"
|
|
|
|
"testing"
|
|
|
|
)
|
2013-11-06 20:49:01 +01:00
|
|
|
|
|
|
|
type unescapeTest struct {
|
|
|
|
// A short description of the test case.
|
|
|
|
desc string
|
|
|
|
// The HTML text.
|
|
|
|
html string
|
|
|
|
// The unescaped text.
|
|
|
|
unescaped string
|
|
|
|
}
|
|
|
|
|
|
|
|
var unescapeTests = []unescapeTest{
|
|
|
|
// Handle no entities.
|
|
|
|
{
|
|
|
|
"copy",
|
|
|
|
"A\ttext\nstring",
|
|
|
|
"A\ttext\nstring",
|
|
|
|
},
|
|
|
|
// Handle simple named entities.
|
|
|
|
{
|
|
|
|
"simple",
|
|
|
|
"& > <",
|
|
|
|
"& > <",
|
|
|
|
},
|
|
|
|
// Handle hitting the end of the string.
|
|
|
|
{
|
|
|
|
"stringEnd",
|
|
|
|
"& &",
|
|
|
|
"& &",
|
|
|
|
},
|
|
|
|
// Handle entities with two codepoints.
|
|
|
|
{
|
|
|
|
"multiCodepoint",
|
|
|
|
"text ⋛︀ blah",
|
|
|
|
"text \u22db\ufe00 blah",
|
|
|
|
},
|
|
|
|
// Handle decimal numeric entities.
|
|
|
|
{
|
|
|
|
"decimalEntity",
|
|
|
|
"Delta = Δ ",
|
|
|
|
"Delta = Δ ",
|
|
|
|
},
|
|
|
|
// Handle hexadecimal numeric entities.
|
|
|
|
{
|
|
|
|
"hexadecimalEntity",
|
|
|
|
"Lambda = λ = λ ",
|
|
|
|
"Lambda = λ = λ ",
|
|
|
|
},
|
|
|
|
// Handle numeric early termination.
|
|
|
|
{
|
|
|
|
"numericEnds",
|
|
|
|
"&# &#x €43 © = ©f = ©",
|
|
|
|
"&# &#x €43 © = ©f = ©",
|
|
|
|
},
|
|
|
|
// Handle numeric ISO-8859-1 entity replacements.
|
|
|
|
{
|
|
|
|
"numericReplacements",
|
|
|
|
"Footnote‡",
|
|
|
|
"Footnote‡",
|
|
|
|
},
|
2014-06-05 01:15:33 +02:00
|
|
|
// Handle single ampersand.
|
|
|
|
{
|
|
|
|
"copySingleAmpersand",
|
|
|
|
"&",
|
|
|
|
"&",
|
|
|
|
},
|
|
|
|
// Handle ampersand followed by non-entity.
|
|
|
|
{
|
|
|
|
"copyAmpersandNonEntity",
|
|
|
|
"text &test",
|
|
|
|
"text &test",
|
|
|
|
},
|
|
|
|
// Handle "&#".
|
|
|
|
{
|
|
|
|
"copyAmpersandHash",
|
|
|
|
"text &#",
|
|
|
|
"text &#",
|
|
|
|
},
|
2013-11-06 20:49:01 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
func TestUnescape(t *testing.T) {
|
|
|
|
for _, tt := range unescapeTests {
|
|
|
|
unescaped := UnescapeString(tt.html)
|
|
|
|
if unescaped != tt.unescaped {
|
|
|
|
t.Errorf("TestUnescape %s: want %q, got %q", tt.desc, tt.unescaped, unescaped)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestUnescapeEscape(t *testing.T) {
|
|
|
|
ss := []string{
|
|
|
|
``,
|
|
|
|
`abc def`,
|
|
|
|
`a & b`,
|
|
|
|
`a&b`,
|
|
|
|
`a & b`,
|
|
|
|
`"`,
|
|
|
|
`"`,
|
|
|
|
`"<&>"`,
|
|
|
|
`"<&>"`,
|
|
|
|
`3&5==1 && 0<1, "0<1", a+acute=á`,
|
|
|
|
`The special characters are: <, >, &, ' and "`,
|
|
|
|
}
|
|
|
|
for _, s := range ss {
|
|
|
|
if got := UnescapeString(EscapeString(s)); got != s {
|
|
|
|
t.Errorf("got %q want %q", got, s)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2015-10-31 01:59:47 +01:00
|
|
|
|
|
|
|
var (
|
2016-02-03 22:58:02 +01:00
|
|
|
benchEscapeData = strings.Repeat("AAAAA < BBBBB > CCCCC & DDDDD ' EEEEE \" ", 100)
|
|
|
|
benchEscapeNone = strings.Repeat("AAAAA x BBBBB x CCCCC x DDDDD x EEEEE x ", 100)
|
|
|
|
benchUnescapeSparse = strings.Repeat(strings.Repeat("AAAAA x BBBBB x CCCCC x DDDDD x EEEEE x ", 10)+"&", 10)
|
|
|
|
benchUnescapeDense = strings.Repeat("&< & <", 100)
|
2015-10-31 01:59:47 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
func BenchmarkEscape(b *testing.B) {
|
|
|
|
n := 0
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
n += len(EscapeString(benchEscapeData))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func BenchmarkEscapeNone(b *testing.B) {
|
|
|
|
n := 0
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
n += len(EscapeString(benchEscapeNone))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func BenchmarkUnescape(b *testing.B) {
|
|
|
|
s := EscapeString(benchEscapeData)
|
|
|
|
n := 0
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
n += len(UnescapeString(s))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func BenchmarkUnescapeNone(b *testing.B) {
|
|
|
|
s := EscapeString(benchEscapeNone)
|
|
|
|
n := 0
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
n += len(UnescapeString(s))
|
|
|
|
}
|
|
|
|
}
|
2016-02-03 22:58:02 +01:00
|
|
|
|
|
|
|
func BenchmarkUnescapeSparse(b *testing.B) {
|
|
|
|
n := 0
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
n += len(UnescapeString(benchUnescapeSparse))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func BenchmarkUnescapeDense(b *testing.B) {
|
|
|
|
n := 0
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
n += len(UnescapeString(benchUnescapeDense))
|
|
|
|
}
|
|
|
|
}
|