// Copyright 2010 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package html import ( "bytes" "os" "testing" ) type tokenTest struct { // A short description of the test case. desc string // The HTML to parse. html string // The string representations of the expected tokens. tokens []string } var tokenTests = []tokenTest{ // A single text node. The tokenizer should not break text nodes on whitespace, // nor should it normalize whitespace within a text node. { "text", "foo bar", []string{ "foo bar", }, }, // An entity. { "entity", "one < two", []string{ "one < two", }, }, // A start, self-closing and end tag. The tokenizer does not care if the start // and end tokens don't match; that is the job of the parser. { "tags", "bd", []string{ "", "b", "", "d", "", }, }, // An attribute with a backslash. { "backslash", `

`, []string{ `

`, }, }, // Entities, tag name and attribute key lower-casing, and whitespace // normalization within a tag. { "tricky", "

te<&;xt

", []string{ `

`, "", "te<&;xt", "", "

", }, }, // A non-existant entity. Tokenizing and converting back to a string should // escape the "&" to become "&". { "noSuchEntity", `
<&alsoDoesntExist;&`, []string{ ``, "<&alsoDoesntExist;&", }, }, } func TestTokenizer(t *testing.T) { loop: for _, tt := range tokenTests { z := NewTokenizer(bytes.NewBuffer([]byte(tt.html))) for i, s := range tt.tokens { if z.Next() == Error { t.Errorf("%s token %d: want %q got error %v", tt.desc, i, s, z.Error()) continue loop } actual := z.Token().String() if s != actual { t.Errorf("%s token %d: want %q got %q", tt.desc, i, s, actual) continue loop } } z.Next() if z.Error() != os.EOF { t.Errorf("%s: want EOF got %q", tt.desc, z.Token().String()) } } } func TestUnescapeEscape(t *testing.T) { ss := []string{ ``, `abc def`, `a & b`, `a&b`, `a & b`, `"`, `"`, `"<&>"`, `"<&>"`, `3&5==1 && 0<1, "0<1", a+acute=á`, } for _, s := range ss { if s != UnescapeString(EscapeString(s)) { t.Errorf("s != UnescapeString(EscapeString(s)), s=%q", s) } } } func TestBufAPI(t *testing.T) { s := "0123456789" z := NewTokenizer(bytes.NewBuffer([]byte(s))) result := bytes.NewBuffer(nil) depth := 0 loop: for { tt := z.Next() switch tt { case Error: if z.Error() != os.EOF { t.Error(z.Error()) } break loop case Text: if depth > 0 { result.Write(z.Text()) } case StartTag, EndTag: tn, _ := z.TagName() if len(tn) == 1 && tn[0] == 'a' { if tt == StartTag { depth++ } else { depth-- } } } } u := "14567" v := string(result.Bytes()) if u != v { t.Errorf("TestBufAPI: want %q got %q", u, v) } }