gcc/libgo/go/ebnf/parser.go

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package ebnf

import (
	"go/scanner"
	"go/token"
	"os"
	"strconv"
)

type parser struct {
	fset *token.FileSet
	scanner.ErrorVector
	scanner scanner.Scanner
	pos     token.Pos   // token position
	tok     token.Token // one token look-ahead
	lit     string      // token literal
}

func (p *parser) next() {
	p.pos, p.tok, p.lit = p.scanner.Scan()
	if p.tok.IsKeyword() {
		// TODO Should keyword mapping always happen outside scanner?
		//      Or should there be a flag to scanner to enable keyword mapping?
		p.tok = token.IDENT
	}
}

func (p *parser) error(pos token.Pos, msg string) {
	p.Error(p.fset.Position(pos), msg)
}

func (p *parser) errorExpected(pos token.Pos, msg string) {
	msg = "expected " + msg
	if pos == p.pos {
		// the error happened at the current position;
		// make the error message more specific
		msg += ", found '" + p.tok.String() + "'"
		if p.tok.IsLiteral() {
			msg += " " + p.lit
		}
	}
	p.error(pos, msg)
}

func (p *parser) expect(tok token.Token) token.Pos {
	pos := p.pos
	if p.tok != tok {
		p.errorExpected(pos, "'"+tok.String()+"'")
	}
	p.next() // make progress in any case
	return pos
}

func (p *parser) parseIdentifier() *Name {
	pos := p.pos
	name := p.lit
	p.expect(token.IDENT)
	return &Name{pos, name}
}

func (p *parser) parseToken() *Token {
	pos := p.pos
	value := ""
	if p.tok == token.STRING {
		value, _ = strconv.Unquote(p.lit)
		// Unquote may fail with an error, but only if the scanner found
		// an illegal string in the first place. In this case the error
		// has already been reported.
		p.next()
	} else {
		p.expect(token.STRING)
	}
	return &Token{pos, value}
}

// ParseTerm returns nil if no term was found.
func (p *parser) parseTerm() (x Expression) {
	pos := p.pos

	switch p.tok {
	case token.IDENT:
		x = p.parseIdentifier()

	case token.STRING:
		tok := p.parseToken()
		x = tok
		const ellipsis = "…" // U+2026, the horizontal ellipsis character
		if p.tok == token.ILLEGAL && p.lit == ellipsis {
			p.next()
			x = &Range{tok, p.parseToken()}
		}

	case token.LPAREN:
		p.next()
		x = &Group{pos, p.parseExpression()}
		p.expect(token.RPAREN)

	case token.LBRACK:
		p.next()
		x = &Option{pos, p.parseExpression()}
		p.expect(token.RBRACK)

	case token.LBRACE:
		p.next()
		x = &Repetition{pos, p.parseExpression()}
		p.expect(token.RBRACE)
	}

	return x
}

func (p *parser) parseSequence() Expression {
	var list Sequence

	for x := p.parseTerm(); x != nil; x = p.parseTerm() {
		list = append(list, x)
	}

	// no need for a sequence if list.Len() < 2
	switch len(list) {
	case 0:
		p.errorExpected(p.pos, "term")
		return &Bad{p.pos, "term expected"}
	case 1:
		return list[0]
	}

	return list
}

func (p *parser) parseExpression() Expression {
	var list Alternative

	for {
		list = append(list, p.parseSequence())
		if p.tok != token.OR {
			break
		}
		p.next()
	}
	// len(list) > 0

	// no need for an Alternative node if list.Len() < 2
	if len(list) == 1 {
		return list[0]
	}

	return list
}

func (p *parser) parseProduction() *Production {
	name := p.parseIdentifier()
	p.expect(token.ASSIGN)
	var expr Expression
	if p.tok != token.PERIOD {
		expr = p.parseExpression()
	}
	p.expect(token.PERIOD)
	return &Production{name, expr}
}

func (p *parser) parse(fset *token.FileSet, filename string, src []byte) Grammar {
	// initialize parser
	p.fset = fset
	p.ErrorVector.Reset()
	p.scanner.Init(fset.AddFile(filename, fset.Base(), len(src)), src, p, scanner.AllowIllegalChars)
	p.next() // initializes pos, tok, lit

	grammar := make(Grammar)
	for p.tok != token.EOF {
		prod := p.parseProduction()
		name := prod.Name.String
		if _, found := grammar[name]; !found {
			grammar[name] = prod
		} else {
			p.error(prod.Pos(), name+" declared already")
		}
	}

	return grammar
}

// Parse parses a set of EBNF productions from source src.
// It returns a set of productions. Errors are reported
// for incorrect syntax and if a production is declared
// more than once. Position information is recorded relative
// to the file set fset.
//
func Parse(fset *token.FileSet, filename string, src []byte) (Grammar, os.Error) {
	var p parser
	grammar := p.parse(fset, filename, src)
	return grammar, p.GetError(scanner.Sorted)
}