summaryrefslogtreecommitdiff
path: root/libgo/go/exp/datafmt/parser.go
diff options
context:
space:
mode:
Diffstat (limited to 'libgo/go/exp/datafmt/parser.go')
-rw-r--r--libgo/go/exp/datafmt/parser.go386
1 files changed, 386 insertions, 0 deletions
diff --git a/libgo/go/exp/datafmt/parser.go b/libgo/go/exp/datafmt/parser.go
new file mode 100644
index 000000000..c6d140264
--- /dev/null
+++ b/libgo/go/exp/datafmt/parser.go
@@ -0,0 +1,386 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package datafmt
+
+import (
+ "container/vector"
+ "go/scanner"
+ "go/token"
+ "os"
+ "strconv"
+ "strings"
+)
+
+// ----------------------------------------------------------------------------
+// Parsing
+
+type parser struct {
+ scanner.ErrorVector
+ scanner scanner.Scanner
+ file *token.File
+ pos token.Pos // token position
+ tok token.Token // one token look-ahead
+ lit []byte // token literal
+
+ packs map[string]string // PackageName -> ImportPath
+ rules map[string]expr // RuleName -> Expression
+}
+
+
+func (p *parser) next() {
+ p.pos, p.tok, p.lit = p.scanner.Scan()
+ switch p.tok {
+ case token.CHAN, token.FUNC, token.INTERFACE, token.MAP, token.STRUCT:
+ // Go keywords for composite types are type names
+ // returned by reflect. Accept them as identifiers.
+ p.tok = token.IDENT // p.lit is already set correctly
+ }
+}
+
+
+func (p *parser) init(fset *token.FileSet, filename string, src []byte) {
+ p.ErrorVector.Reset()
+ p.file = fset.AddFile(filename, fset.Base(), len(src))
+ p.scanner.Init(p.file, src, p, scanner.AllowIllegalChars) // return '@' as token.ILLEGAL w/o error message
+ p.next() // initializes pos, tok, lit
+ p.packs = make(map[string]string)
+ p.rules = make(map[string]expr)
+}
+
+
+func (p *parser) error(pos token.Pos, msg string) {
+ p.Error(p.file.Position(pos), msg)
+}
+
+
+func (p *parser) errorExpected(pos token.Pos, msg string) {
+ msg = "expected " + msg
+ if pos == p.pos {
+ // the error happened at the current position;
+ // make the error message more specific
+ msg += ", found '" + p.tok.String() + "'"
+ if p.tok.IsLiteral() {
+ msg += " " + string(p.lit)
+ }
+ }
+ p.error(pos, msg)
+}
+
+
+func (p *parser) expect(tok token.Token) token.Pos {
+ pos := p.pos
+ if p.tok != tok {
+ p.errorExpected(pos, "'"+tok.String()+"'")
+ }
+ p.next() // make progress in any case
+ return pos
+}
+
+
+func (p *parser) parseIdentifier() string {
+ name := string(p.lit)
+ p.expect(token.IDENT)
+ return name
+}
+
+
+func (p *parser) parseTypeName() (string, bool) {
+ pos := p.pos
+ name, isIdent := p.parseIdentifier(), true
+ if p.tok == token.PERIOD {
+ // got a package name, lookup package
+ if importPath, found := p.packs[name]; found {
+ name = importPath
+ } else {
+ p.error(pos, "package not declared: "+name)
+ }
+ p.next()
+ name, isIdent = name+"."+p.parseIdentifier(), false
+ }
+ return name, isIdent
+}
+
+
+// Parses a rule name and returns it. If the rule name is
+// a package-qualified type name, the package name is resolved.
+// The 2nd result value is true iff the rule name consists of a
+// single identifier only (and thus could be a package name).
+//
+func (p *parser) parseRuleName() (string, bool) {
+ name, isIdent := "", false
+ switch p.tok {
+ case token.IDENT:
+ name, isIdent = p.parseTypeName()
+ case token.DEFAULT:
+ name = "default"
+ p.next()
+ case token.QUO:
+ name = "/"
+ p.next()
+ default:
+ p.errorExpected(p.pos, "rule name")
+ p.next() // make progress in any case
+ }
+ return name, isIdent
+}
+
+
+func (p *parser) parseString() string {
+ s := ""
+ if p.tok == token.STRING {
+ s, _ = strconv.Unquote(string(p.lit))
+ // Unquote may fail with an error, but only if the scanner found
+ // an illegal string in the first place. In this case the error
+ // has already been reported.
+ p.next()
+ return s
+ } else {
+ p.expect(token.STRING)
+ }
+ return s
+}
+
+
+func (p *parser) parseLiteral() literal {
+ s := []byte(p.parseString())
+
+ // A string literal may contain %-format specifiers. To simplify
+ // and speed up printing of the literal, split it into segments
+ // that start with "%" possibly followed by a last segment that
+ // starts with some other character.
+ var list vector.Vector
+ i0 := 0
+ for i := 0; i < len(s); i++ {
+ if s[i] == '%' && i+1 < len(s) {
+ // the next segment starts with a % format
+ if i0 < i {
+ // the current segment is not empty, split it off
+ list.Push(s[i0:i])
+ i0 = i
+ }
+ i++ // skip %; let loop skip over char after %
+ }
+ }
+ // the final segment may start with any character
+ // (it is empty iff the string is empty)
+ list.Push(s[i0:])
+
+ // convert list into a literal
+ lit := make(literal, list.Len())
+ for i := 0; i < list.Len(); i++ {
+ lit[i] = list.At(i).([]byte)
+ }
+
+ return lit
+}
+
+
+func (p *parser) parseField() expr {
+ var fname string
+ switch p.tok {
+ case token.ILLEGAL:
+ if string(p.lit) != "@" {
+ return nil
+ }
+ fname = "@"
+ p.next()
+ case token.MUL:
+ fname = "*"
+ p.next()
+ case token.IDENT:
+ fname = p.parseIdentifier()
+ default:
+ return nil
+ }
+
+ var ruleName string
+ if p.tok == token.COLON {
+ p.next()
+ ruleName, _ = p.parseRuleName()
+ }
+
+ return &field{fname, ruleName}
+}
+
+
+func (p *parser) parseOperand() (x expr) {
+ switch p.tok {
+ case token.STRING:
+ x = p.parseLiteral()
+
+ case token.LPAREN:
+ p.next()
+ x = p.parseExpression()
+ if p.tok == token.SHR {
+ p.next()
+ x = &group{x, p.parseExpression()}
+ }
+ p.expect(token.RPAREN)
+
+ case token.LBRACK:
+ p.next()
+ x = &option{p.parseExpression()}
+ p.expect(token.RBRACK)
+
+ case token.LBRACE:
+ p.next()
+ x = p.parseExpression()
+ var div expr
+ if p.tok == token.QUO {
+ p.next()
+ div = p.parseExpression()
+ }
+ x = &repetition{x, div}
+ p.expect(token.RBRACE)
+
+ default:
+ x = p.parseField() // may be nil
+ }
+
+ return x
+}
+
+
+func (p *parser) parseSequence() expr {
+ var list vector.Vector
+
+ for x := p.parseOperand(); x != nil; x = p.parseOperand() {
+ list.Push(x)
+ }
+
+ // no need for a sequence if list.Len() < 2
+ switch list.Len() {
+ case 0:
+ return nil
+ case 1:
+ return list.At(0).(expr)
+ }
+
+ // convert list into a sequence
+ seq := make(sequence, list.Len())
+ for i := 0; i < list.Len(); i++ {
+ seq[i] = list.At(i).(expr)
+ }
+ return seq
+}
+
+
+func (p *parser) parseExpression() expr {
+ var list vector.Vector
+
+ for {
+ x := p.parseSequence()
+ if x != nil {
+ list.Push(x)
+ }
+ if p.tok != token.OR {
+ break
+ }
+ p.next()
+ }
+
+ // no need for an alternatives if list.Len() < 2
+ switch list.Len() {
+ case 0:
+ return nil
+ case 1:
+ return list.At(0).(expr)
+ }
+
+ // convert list into a alternatives
+ alt := make(alternatives, list.Len())
+ for i := 0; i < list.Len(); i++ {
+ alt[i] = list.At(i).(expr)
+ }
+ return alt
+}
+
+
+func (p *parser) parseFormat() {
+ for p.tok != token.EOF {
+ pos := p.pos
+
+ name, isIdent := p.parseRuleName()
+ switch p.tok {
+ case token.STRING:
+ // package declaration
+ importPath := p.parseString()
+
+ // add package declaration
+ if !isIdent {
+ p.error(pos, "illegal package name: "+name)
+ } else if _, found := p.packs[name]; !found {
+ p.packs[name] = importPath
+ } else {
+ p.error(pos, "package already declared: "+name)
+ }
+
+ case token.ASSIGN:
+ // format rule
+ p.next()
+ x := p.parseExpression()
+
+ // add rule
+ if _, found := p.rules[name]; !found {
+ p.rules[name] = x
+ } else {
+ p.error(pos, "format rule already declared: "+name)
+ }
+
+ default:
+ p.errorExpected(p.pos, "package declaration or format rule")
+ p.next() // make progress in any case
+ }
+
+ if p.tok == token.SEMICOLON {
+ p.next()
+ } else {
+ break
+ }
+ }
+ p.expect(token.EOF)
+}
+
+
+func remap(p *parser, name string) string {
+ i := strings.Index(name, ".")
+ if i >= 0 {
+ packageName, suffix := name[0:i], name[i:]
+ // lookup package
+ if importPath, found := p.packs[packageName]; found {
+ name = importPath + suffix
+ } else {
+ var invalidPos token.Position
+ p.Error(invalidPos, "package not declared: "+packageName)
+ }
+ }
+ return name
+}
+
+
+// Parse parses a set of format productions from source src. Custom
+// formatters may be provided via a map of formatter functions. If
+// there are no errors, the result is a Format and the error is nil.
+// Otherwise the format is nil and a non-empty ErrorList is returned.
+//
+func Parse(fset *token.FileSet, filename string, src []byte, fmap FormatterMap) (Format, os.Error) {
+ // parse source
+ var p parser
+ p.init(fset, filename, src)
+ p.parseFormat()
+
+ // add custom formatters, if any
+ for name, form := range fmap {
+ name = remap(&p, name)
+ if _, found := p.rules[name]; !found {
+ p.rules[name] = &custom{name, form}
+ } else {
+ var invalidPos token.Position
+ p.Error(invalidPos, "formatter already declared: "+name)
+ }
+ }
+
+ return p.rules, p.GetError(scanner.NoMultiples)
+}