From 554fd8c5195424bdbcabf5de30fdc183aba391bd Mon Sep 17 00:00:00 2001 From: upstream source tree Date: Sun, 15 Mar 2015 20:14:05 -0400 Subject: obtained gcc-4.6.4.tar.bz2 from upstream website; verified gcc-4.6.4.tar.bz2.sig; imported gcc-4.6.4 source tree from verified upstream tarball. downloading a git-generated archive based on the 'upstream' tag should provide you with a source tree that is binary identical to the one extracted from the above tarball. if you have obtained the source via the command 'git clone', however, do note that line-endings of files in your working directory might differ from line-endings of the respective files in the upstream repository. --- libgo/go/ebnf/ebnf.go | 248 +++++++++++++++++++++++++++++++++++++++++++++ libgo/go/ebnf/ebnf_test.go | 77 ++++++++++++++ libgo/go/ebnf/parser.go | 208 +++++++++++++++++++++++++++++++++++++ 3 files changed, 533 insertions(+) create mode 100644 libgo/go/ebnf/ebnf.go create mode 100644 libgo/go/ebnf/ebnf_test.go create mode 100644 libgo/go/ebnf/parser.go (limited to 'libgo/go/ebnf') diff --git a/libgo/go/ebnf/ebnf.go b/libgo/go/ebnf/ebnf.go new file mode 100644 index 000000000..e5aabd582 --- /dev/null +++ b/libgo/go/ebnf/ebnf.go @@ -0,0 +1,248 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// A library for EBNF grammars. The input is text ([]byte) satisfying +// the following grammar (represented itself in EBNF): +// +// Production = name "=" Expression "." . +// Expression = Alternative { "|" Alternative } . +// Alternative = Term { Term } . +// Term = name | token [ "..." token ] | Group | Option | Repetition . +// Group = "(" Expression ")" . +// Option = "[" Expression "]" . +// Repetition = "{" Expression "}" . +// +// A name is a Go identifier, a token is a Go string, and comments +// and white space follow the same rules as for the Go language. +// Production names starting with an uppercase Unicode letter denote +// non-terminal productions (i.e., productions which allow white-space +// and comments between tokens); all other production names denote +// lexical productions. +// +package ebnf + +import ( + "go/scanner" + "go/token" + "os" + "unicode" + "utf8" +) + + +// ---------------------------------------------------------------------------- +// Internal representation + +type ( + // An Expression node represents a production expression. + Expression interface { + // Pos is the position of the first character of the syntactic construct + Pos() token.Pos + } + + // An Alternative node represents a non-empty list of alternative expressions. + Alternative []Expression // x | y | z + + // A Sequence node represents a non-empty list of sequential expressions. + Sequence []Expression // x y z + + // A Name node represents a production name. + Name struct { + StringPos token.Pos + String string + } + + // A Token node represents a literal. + Token struct { + StringPos token.Pos + String string + } + + // A List node represents a range of characters. + Range struct { + Begin, End *Token // begin ... end + } + + // A Group node represents a grouped expression. + Group struct { + Lparen token.Pos + Body Expression // (body) + } + + // An Option node represents an optional expression. + Option struct { + Lbrack token.Pos + Body Expression // [body] + } + + // A Repetition node represents a repeated expression. + Repetition struct { + Lbrace token.Pos + Body Expression // {body} + } + + // A Production node represents an EBNF production. + Production struct { + Name *Name + Expr Expression + } + + // A Grammar is a set of EBNF productions. The map + // is indexed by production name. + // + Grammar map[string]*Production +) + + +func (x Alternative) Pos() token.Pos { return x[0].Pos() } // the parser always generates non-empty Alternative +func (x Sequence) Pos() token.Pos { return x[0].Pos() } // the parser always generates non-empty Sequences +func (x *Name) Pos() token.Pos { return x.StringPos } +func (x *Token) Pos() token.Pos { return x.StringPos } +func (x *Range) Pos() token.Pos { return x.Begin.Pos() } +func (x *Group) Pos() token.Pos { return x.Lparen } +func (x *Option) Pos() token.Pos { return x.Lbrack } +func (x *Repetition) Pos() token.Pos { return x.Lbrace } +func (x *Production) Pos() token.Pos { return x.Name.Pos() } + + +// ---------------------------------------------------------------------------- +// Grammar verification + +func isLexical(name string) bool { + ch, _ := utf8.DecodeRuneInString(name) + return !unicode.IsUpper(ch) +} + + +type verifier struct { + fset *token.FileSet + scanner.ErrorVector + worklist []*Production + reached Grammar // set of productions reached from (and including) the root production + grammar Grammar +} + + +func (v *verifier) error(pos token.Pos, msg string) { + v.Error(v.fset.Position(pos), msg) +} + + +func (v *verifier) push(prod *Production) { + name := prod.Name.String + if _, found := v.reached[name]; !found { + v.worklist = append(v.worklist, prod) + v.reached[name] = prod + } +} + + +func (v *verifier) verifyChar(x *Token) int { + s := x.String + if utf8.RuneCountInString(s) != 1 { + v.error(x.Pos(), "single char expected, found "+s) + return 0 + } + ch, _ := utf8.DecodeRuneInString(s) + return ch +} + + +func (v *verifier) verifyExpr(expr Expression, lexical bool) { + switch x := expr.(type) { + case nil: + // empty expression + case Alternative: + for _, e := range x { + v.verifyExpr(e, lexical) + } + case Sequence: + for _, e := range x { + v.verifyExpr(e, lexical) + } + case *Name: + // a production with this name must exist; + // add it to the worklist if not yet processed + if prod, found := v.grammar[x.String]; found { + v.push(prod) + } else { + v.error(x.Pos(), "missing production "+x.String) + } + // within a lexical production references + // to non-lexical productions are invalid + if lexical && !isLexical(x.String) { + v.error(x.Pos(), "reference to non-lexical production "+x.String) + } + case *Token: + // nothing to do for now + case *Range: + i := v.verifyChar(x.Begin) + j := v.verifyChar(x.End) + if i >= j { + v.error(x.Pos(), "decreasing character range") + } + case *Group: + v.verifyExpr(x.Body, lexical) + case *Option: + v.verifyExpr(x.Body, lexical) + case *Repetition: + v.verifyExpr(x.Body, lexical) + default: + panic("unreachable") + } +} + + +func (v *verifier) verify(fset *token.FileSet, grammar Grammar, start string) { + // find root production + root, found := grammar[start] + if !found { + // token.NoPos doesn't require a file set; + // ok to set v.fset only afterwards + v.error(token.NoPos, "no start production "+start) + return + } + + // initialize verifier + v.fset = fset + v.ErrorVector.Reset() + v.worklist = v.worklist[0:0] + v.reached = make(Grammar) + v.grammar = grammar + + // work through the worklist + v.push(root) + for { + n := len(v.worklist) - 1 + if n < 0 { + break + } + prod := v.worklist[n] + v.worklist = v.worklist[0:n] + v.verifyExpr(prod.Expr, isLexical(prod.Name.String)) + } + + // check if all productions were reached + if len(v.reached) < len(v.grammar) { + for name, prod := range v.grammar { + if _, found := v.reached[name]; !found { + v.error(prod.Pos(), name+" is unreachable") + } + } + } +} + + +// Verify checks that: +// - all productions used are defined +// - all productions defined are used when beginning at start +// - lexical productions refer only to other lexical productions +// +// Position information is interpreted relative to the file set fset. +// +func Verify(fset *token.FileSet, grammar Grammar, start string) os.Error { + var v verifier + v.verify(fset, grammar, start) + return v.GetError(scanner.Sorted) +} diff --git a/libgo/go/ebnf/ebnf_test.go b/libgo/go/ebnf/ebnf_test.go new file mode 100644 index 000000000..bbe530c27 --- /dev/null +++ b/libgo/go/ebnf/ebnf_test.go @@ -0,0 +1,77 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ebnf + +import ( + "go/token" + "io/ioutil" + "testing" +) + + +var fset = token.NewFileSet() + + +var grammars = []string{ + `Program = . + `, + + `Program = foo . + foo = "foo" . + `, + + `Program = "a" | "b" "c" . + `, + + `Program = "a" ... "z" . + `, + + `Program = Song . + Song = { Note } . + Note = Do | (Re | Mi | Fa | So | La) | Ti . + Do = "c" . + Re = "d" . + Mi = "e" . + Fa = "f" . + So = "g" . + La = "a" . + Ti = ti . + ti = "b" . + `, +} + + +func check(t *testing.T, filename string, src []byte) { + grammar, err := Parse(fset, filename, src) + if err != nil { + t.Errorf("Parse(%s) failed: %v", src, err) + } + if err = Verify(fset, grammar, "Program"); err != nil { + t.Errorf("Verify(%s) failed: %v", src, err) + } +} + + +func TestGrammars(t *testing.T) { + for _, src := range grammars { + check(t, "", []byte(src)) + } +} + + +var files = []string{ +// TODO(gri) add some test files +} + + +func TestFiles(t *testing.T) { + for _, filename := range files { + src, err := ioutil.ReadFile(filename) + if err != nil { + t.Fatal(err) + } + check(t, filename, src) + } +} diff --git a/libgo/go/ebnf/parser.go b/libgo/go/ebnf/parser.go new file mode 100644 index 000000000..c38530177 --- /dev/null +++ b/libgo/go/ebnf/parser.go @@ -0,0 +1,208 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ebnf + +import ( + "go/scanner" + "go/token" + "os" + "strconv" +) + + +type parser struct { + fset *token.FileSet + scanner.ErrorVector + scanner scanner.Scanner + pos token.Pos // token position + tok token.Token // one token look-ahead + lit []byte // token literal +} + + +func (p *parser) next() { + p.pos, p.tok, p.lit = p.scanner.Scan() + if p.tok.IsKeyword() { + // TODO Should keyword mapping always happen outside scanner? + // Or should there be a flag to scanner to enable keyword mapping? + p.tok = token.IDENT + } +} + + +func (p *parser) error(pos token.Pos, msg string) { + p.Error(p.fset.Position(pos), msg) +} + + +func (p *parser) errorExpected(pos token.Pos, msg string) { + msg = "expected " + msg + if pos == p.pos { + // the error happened at the current position; + // make the error message more specific + msg += ", found '" + p.tok.String() + "'" + if p.tok.IsLiteral() { + msg += " " + string(p.lit) + } + } + p.error(pos, msg) +} + + +func (p *parser) expect(tok token.Token) token.Pos { + pos := p.pos + if p.tok != tok { + p.errorExpected(pos, "'"+tok.String()+"'") + } + p.next() // make progress in any case + return pos +} + + +func (p *parser) parseIdentifier() *Name { + pos := p.pos + name := string(p.lit) + p.expect(token.IDENT) + return &Name{pos, name} +} + + +func (p *parser) parseToken() *Token { + pos := p.pos + value := "" + if p.tok == token.STRING { + value, _ = strconv.Unquote(string(p.lit)) + // Unquote may fail with an error, but only if the scanner found + // an illegal string in the first place. In this case the error + // has already been reported. + p.next() + } else { + p.expect(token.STRING) + } + return &Token{pos, value} +} + + +func (p *parser) parseTerm() (x Expression) { + pos := p.pos + + switch p.tok { + case token.IDENT: + x = p.parseIdentifier() + + case token.STRING: + tok := p.parseToken() + x = tok + if p.tok == token.ELLIPSIS { + p.next() + x = &Range{tok, p.parseToken()} + } + + case token.LPAREN: + p.next() + x = &Group{pos, p.parseExpression()} + p.expect(token.RPAREN) + + case token.LBRACK: + p.next() + x = &Option{pos, p.parseExpression()} + p.expect(token.RBRACK) + + case token.LBRACE: + p.next() + x = &Repetition{pos, p.parseExpression()} + p.expect(token.RBRACE) + } + + return x +} + + +func (p *parser) parseSequence() Expression { + var list Sequence + + for x := p.parseTerm(); x != nil; x = p.parseTerm() { + list = append(list, x) + } + + // no need for a sequence if list.Len() < 2 + switch len(list) { + case 0: + return nil + case 1: + return list[0] + } + + return list +} + + +func (p *parser) parseExpression() Expression { + var list Alternative + + for { + if x := p.parseSequence(); x != nil { + list = append(list, x) + } + if p.tok != token.OR { + break + } + p.next() + } + + // no need for an Alternative node if list.Len() < 2 + switch len(list) { + case 0: + return nil + case 1: + return list[0] + } + + return list +} + + +func (p *parser) parseProduction() *Production { + name := p.parseIdentifier() + p.expect(token.ASSIGN) + expr := p.parseExpression() + p.expect(token.PERIOD) + return &Production{name, expr} +} + + +func (p *parser) parse(fset *token.FileSet, filename string, src []byte) Grammar { + // initialize parser + p.fset = fset + p.ErrorVector.Reset() + p.scanner.Init(fset.AddFile(filename, fset.Base(), len(src)), src, p, 0) + p.next() // initializes pos, tok, lit + + grammar := make(Grammar) + for p.tok != token.EOF { + prod := p.parseProduction() + name := prod.Name.String + if _, found := grammar[name]; !found { + grammar[name] = prod + } else { + p.error(prod.Pos(), name+" declared already") + } + } + + return grammar +} + + +// Parse parses a set of EBNF productions from source src. +// It returns a set of productions. Errors are reported +// for incorrect syntax and if a production is declared +// more than once. Position information is recorded relative +// to the file set fset. +// +func Parse(fset *token.FileSet, filename string, src []byte) (Grammar, os.Error) { + var p parser + grammar := p.parse(fset, filename, src) + return grammar, p.GetError(scanner.Sorted) +} -- cgit v1.2.3