1 files changed, 208 insertions, 0 deletions
diff --git a/libgo/go/ebnf/parser.go b/libgo/go/ebnf/parser.go
new file mode 100644
index 000000000..c38530177
--- /dev/null
+++ b/libgo/go/ebnf/parser.go
@@ -0,0 +1,208 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ebnf
+
+import (
+	"go/scanner"
+	"go/token"
+	"os"
+	"strconv"
+)
+
+
+type parser struct {
+	fset *token.FileSet
+	scanner.ErrorVector
+	scanner scanner.Scanner
+	pos     token.Pos   // token position
+	tok     token.Token // one token look-ahead
+	lit     []byte      // token literal
+}
+
+
+func (p *parser) next() {
+	p.pos, p.tok, p.lit = p.scanner.Scan()
+	if p.tok.IsKeyword() {
+		// TODO Should keyword mapping always happen outside scanner?
+		//      Or should there be a flag to scanner to enable keyword mapping?
+		p.tok = token.IDENT
+	}
+}
+
+
+func (p *parser) error(pos token.Pos, msg string) {
+	p.Error(p.fset.Position(pos), msg)
+}
+
+
+func (p *parser) errorExpected(pos token.Pos, msg string) {
+	msg = "expected " + msg
+	if pos == p.pos {
+		// the error happened at the current position;
+		// make the error message more specific
+		msg += ", found '" + p.tok.String() + "'"
+		if p.tok.IsLiteral() {
+			msg += " " + string(p.lit)
+		}
+	}
+	p.error(pos, msg)
+}
+
+
+func (p *parser) expect(tok token.Token) token.Pos {
+	pos := p.pos
+	if p.tok != tok {
+		p.errorExpected(pos, "'"+tok.String()+"'")
+	}
+	p.next() // make progress in any case
+	return pos
+}
+
+
+func (p *parser) parseIdentifier() *Name {
+	pos := p.pos
+	name := string(p.lit)
+	p.expect(token.IDENT)
+	return &Name{pos, name}
+}
+
+
+func (p *parser) parseToken() *Token {
+	pos := p.pos
+	value := ""
+	if p.tok == token.STRING {
+		value, _ = strconv.Unquote(string(p.lit))
+		// Unquote may fail with an error, but only if the scanner found
+		// an illegal string in the first place. In this case the error
+		// has already been reported.
+		p.next()
+	} else {
+		p.expect(token.STRING)
+	}
+	return &Token{pos, value}
+}
+
+
+func (p *parser) parseTerm() (x Expression) {
+	pos := p.pos
+
+	switch p.tok {
+	case token.IDENT:
+		x = p.parseIdentifier()
+
+	case token.STRING:
+		tok := p.parseToken()
+		x = tok
+		if p.tok == token.ELLIPSIS {
+			p.next()
+			x = &Range{tok, p.parseToken()}
+		}
+
+	case token.LPAREN:
+		p.next()
+		x = &Group{pos, p.parseExpression()}
+		p.expect(token.RPAREN)
+
+	case token.LBRACK:
+		p.next()
+		x = &Option{pos, p.parseExpression()}
+		p.expect(token.RBRACK)
+
+	case token.LBRACE:
+		p.next()
+		x = &Repetition{pos, p.parseExpression()}
+		p.expect(token.RBRACE)
+	}
+
+	return x
+}
+
+
+func (p *parser) parseSequence() Expression {
+	var list Sequence
+
+	for x := p.parseTerm(); x != nil; x = p.parseTerm() {
+		list = append(list, x)
+	}
+
+	// no need for a sequence if list.Len() < 2
+	switch len(list) {
+	case 0:
+		return nil
+	case 1:
+		return list[0]
+	}
+
+	return list
+}
+
+
+func (p *parser) parseExpression() Expression {
+	var list Alternative
+
+	for {
+		if x := p.parseSequence(); x != nil {
+			list = append(list, x)
+		}
+		if p.tok != token.OR {
+			break
+		}
+		p.next()
+	}
+
+	// no need for an Alternative node if list.Len() < 2
+	switch len(list) {
+	case 0:
+		return nil
+	case 1:
+		return list[0]
+	}
+
+	return list
+}
+
+
+func (p *parser) parseProduction() *Production {
+	name := p.parseIdentifier()
+	p.expect(token.ASSIGN)
+	expr := p.parseExpression()
+	p.expect(token.PERIOD)
+	return &Production{name, expr}
+}
+
+
+func (p *parser) parse(fset *token.FileSet, filename string, src []byte) Grammar {
+	// initialize parser
+	p.fset = fset
+	p.ErrorVector.Reset()
+	p.scanner.Init(fset.AddFile(filename, fset.Base(), len(src)), src, p, 0)
+	p.next() // initializes pos, tok, lit
+
+	grammar := make(Grammar)
+	for p.tok != token.EOF {
+		prod := p.parseProduction()
+		name := prod.Name.String
+		if _, found := grammar[name]; !found {
+			grammar[name] = prod
+		} else {
+			p.error(prod.Pos(), name+" declared already")
+		}
+	}
+
+	return grammar
+}
+
+
+// Parse parses a set of EBNF productions from source src.
+// It returns a set of productions. Errors are reported
+// for incorrect syntax and if a production is declared
+// more than once. Position information is recorded relative
+// to the file set fset.
+//
+func Parse(fset *token.FileSet, filename string, src []byte) (Grammar, os.Error) {
+	var p parser
+	grammar := p.parse(fset, filename, src)
+	return grammar, p.GetError(scanner.Sorted)
+}